浏览代码

ActionTuple default is now np.array, not None

/develop/action-spec-gym
Andrew Cohen 4 年前
当前提交
453a2bba
共有 5 个文件被更改,包括 47 次插入37 次删除
  1. 55
      ml-agents-envs/mlagents_envs/base_env.py
  2. 2
      ml-agents-envs/mlagents_envs/tests/test_steps.py
  3. 4
      ml-agents/mlagents/trainers/env_manager.py
  4. 10
      ml-agents/mlagents/trainers/policy/policy.py
  5. 13
      ml-agents/mlagents/trainers/tests/simple_test_envs.py

55
ml-agents-envs/mlagents_envs/base_env.py


)
class ActionTuple(NamedTuple):
class ActionTuple:
A NamedTuple whose fields correspond to actions of different types.
Continuous and discrete actions are numpy arrays.
An object whose fields correspond to actions of different types.
Continuous and discrete actions are numpy arrays of type float32 and
int32, respectively and are type checked on construction.
Dimensions are of (n_agents, continuous_size) and (n_agents, discrete_size),
respectively.
continuous: np.ndarray # dims (n_agents, continuous_size)
discrete: np.ndarray # dims (n_agents, discrete_size)
def __init__(self, continuous: np.ndarray, discrete: np.ndarray):
if continuous.dtype != np.float32:
continuous = continuous.astype(np.float32, copy=False)
self._continuous = continuous
if discrete.dtype != np.int32:
discrete = discrete.astype(np.int32, copy=False)
self._discrete = discrete
@property
def continuous(self) -> np.ndarray:
return self._continuous
@property
def discrete(self) -> np.ndarray:
return self._discrete
class ActionSpec(NamedTuple):

for a number of agents.
:param n_agents: The number of agents that will have actions generated
"""
continuous: np.ndarray = None
discrete: np.ndarray = None
if self.continuous_size > 0:
continuous = np.zeros((n_agents, self.continuous_size), dtype=np.float32)
if self.discrete_size > 0:
discrete = np.zeros((n_agents, self.discrete_size), dtype=np.int32)
continuous = np.zeros((n_agents, self.continuous_size), dtype=np.float32)
discrete = np.zeros((n_agents, self.discrete_size), dtype=np.int32)
return ActionTuple(continuous, discrete)
def random_action(self, n_agents: int) -> ActionTuple:

:param n_agents: The number of agents that will have actions generated
"""
continuous: np.ndarray = None
discrete: np.ndarray = None
if self.continuous_size > 0:
continuous = np.random.uniform(
low=-1.0, high=1.0, size=(n_agents, self.continuous_size)
).astype(np.float32)
continuous = np.random.uniform(
low=-1.0, high=1.0, size=(n_agents, self.continuous_size)
)
discrete = np.array([])
if self.discrete_size > 0:
discrete = np.column_stack(
[

for the correct number of agents and ensures the type.
"""
_expected_shape = (n_agents, self.continuous_size)
if self.continuous_size > 0 and actions.continuous.shape != _expected_shape:
if actions.continuous.shape != _expected_shape:
if actions.continuous.dtype != np.float32:
actions.continuous = actions.continuous.astype(np.float32)
if self.discrete_size > 0 and actions.discrete.shape != _expected_shape:
if actions.discrete.shape != _expected_shape:
if actions.discrete.dtype != np.int32:
actions.discrete = actions.discrete.astype(np.int32)
return actions
@staticmethod

2
ml-agents-envs/mlagents_envs/tests/test_steps.py


specs = ActionSpec.create_continuous(action_len)
zero_action = specs.empty_action(4).continuous
assert np.array_equal(zero_action, np.zeros((4, action_len), dtype=np.float32))
print(specs.random_action(4))
print(random_action)
assert random_action.dtype == np.float32
assert random_action.shape == (4, action_len)
assert np.min(random_action) >= -1

4
ml-agents/mlagents/trainers/env_manager.py


def action_buffers_from_numpy_dict(
action_dict: Dict[str, np.ndarray]
) -> ActionTuple:
continuous: np.ndarray = None
discrete: np.ndarray = None
continuous: np.ndarray = np.array([], dtype=np.float32)
discrete: np.ndarray = np.array([], dtype=np.int32)
if "continuous_action" in action_dict:
continuous = action_dict["continuous_action"]
if "discrete_action" in action_dict:

10
ml-agents/mlagents/trainers/policy/policy.py


:return: Dict of action type to np.ndarray
"""
act_dict: Dict[str, np.ndarray] = {}
action_buffer = self.behavior_spec.action_spec.empty_action(num_agents)
if action_buffer.continuous is not None:
act_dict["continuous_action"] = action_buffer.continuous
if action_buffer.discrete is not None:
act_dict["discrete_action"] = action_buffer.discrete
action_tuple = self.behavior_spec.action_spec.empty_action(num_agents)
if self.behavior_spec.action_spec.continuous_size > 0:
act_dict["continuous_action"] = action_tuple.continuous
if self.behavior_spec.action_spec.discrete_size > 0:
act_dict["discrete_action"] = action_tuple.discrete
return act_dict
def save_previous_action(

13
ml-agents/mlagents/trainers/tests/simple_test_envs.py


else:
action_spec = ActionSpec.create_continuous(action_size)
self.behavior_spec = BehaviorSpec(self._make_obs_spec(), action_spec)
self.action_spec = action_spec
self.action_size = action_size
self.names = brain_names
self.positions: Dict[str, List[float]] = {}

def _take_action(self, name: str) -> bool:
deltas = []
_act = self.action[name]
if _act.discrete is not None:
if self.action_spec.discrete_size > 0:
if _act.continuous is not None:
if self.action_spec.continuous_size > 0:
for _cont in _act.continuous[0]:
deltas.append(_cont)
for i, _delta in enumerate(deltas):

for name in self.names:
if self.discrete:
self.action[name] = ActionTuple(
[[]], np.array([[1]] if self.goal[name] > 0 else [[0]])
np.array([], dtype=np.float32),
np.array(
[[1]] if self.goal[name] > 0 else [[0]], dtype=np.int32
),
np.array([[float(self.goal[name])]]), [[]]
np.array([[float(self.goal[name])]], dtype=np.float32),
np.array([], dtype=np.int32),
)
self.step()
正在加载...
取消
保存