浏览代码

rename to ActionTuple

/develop/action-spec-gym
Andrew Cohen 4 年前
当前提交
157f9e77
共有 4 个文件被更改,包括 30 次插入30 次删除
  1. 38
      ml-agents-envs/mlagents_envs/base_env.py
  2. 10
      ml-agents-envs/mlagents_envs/environment.py
  3. 6
      ml-agents/mlagents/trainers/env_manager.py
  4. 6
      ml-agents/mlagents/trainers/tests/simple_test_envs.py

38
ml-agents-envs/mlagents_envs/base_env.py


)
class ActionBuffers(NamedTuple):
class ActionTuple(NamedTuple):
continuous: np.ndarray # dims (n_agents, cont_size)
discrete: np.ndarray # dims (n_agents, disc_size)
continuous: np.ndarray # dims (n_agents, continuous_size)
discrete: np.ndarray # dims (n_agents, discrete_size)
class ActionSpec(NamedTuple):

"""
return len(self.discrete_branches)
def empty_action(self, n_agents: int) -> ActionBuffers:
def empty_action(self, n_agents: int) -> ActionTuple:
Generates ActionBuffers corresponding to an empty action (all zeros)
Generates ActionTuple corresponding to an empty action (all zeros)
for a number of agents.
:param n_agents: The number of agents that will have actions generated
"""

if self.discrete_size > 0:
discrete = np.zeros((n_agents, self.discrete_size), dtype=np.int32)
return ActionBuffers(continuous, discrete)
return ActionTuple(continuous, discrete)
def random_action(self, n_agents: int) -> ActionBuffers:
def random_action(self, n_agents: int) -> ActionTuple:
Generates ActionBuffers corresponding to a random action (either discrete
Generates ActionTuple corresponding to a random action (either discrete
or continuous) for a number of agents.
:param n_agents: The number of agents that will have actions generated
"""

for i in range(self.discrete_size)
]
)
return ActionBuffers(continuous, discrete)
return ActionTuple(continuous, discrete)
self, actions: ActionBuffers, n_agents: int, name: str
) -> ActionBuffers:
self, actions: ActionTuple, n_agents: int, name: str
) -> ActionTuple:
"""
Validates that action has the correct action dim
for the correct number of agents and ensures the type.

f"{_expected_shape} for (<number of agents>, <action size>) but "
f"received input of dimension {actions.continuous.shape}"
)
if actions.continuous.dtype != np.float32:
actions.continuous = actions.continuous.astype(np.float32)
if actions.continuous.dtype != np.float32:
actions.continuous = actions.continuous.astype(np.float32)
_expected_shape = (n_agents, self.discrete_size)
if self.discrete_size > 0 and actions.discrete.shape != _expected_shape:

f"received input of dimension {actions.discrete.shape}"
)
if actions.discrete.dtype != np.int32:
actions.discrete = actions.discrete.astype(np.int32)
if actions.discrete.dtype != np.int32:
actions.discrete = actions.discrete.astype(np.int32)
return actions
@staticmethod

"""
@abstractmethod
def set_actions(self, behavior_name: BehaviorName, action: ActionBuffers) -> None:
def set_actions(self, behavior_name: BehaviorName, action: ActionTuple) -> None:
:param action: ActionBuffers tuple of continuous and/or discrete action
:param action: ActionTuple tuple of continuous and/or discrete action
self, behavior_name: BehaviorName, agent_id: AgentId, action: ActionBuffers
self, behavior_name: BehaviorName, agent_id: AgentId, action: ActionTuple
) -> None:
"""
Sets the action for one of the agents in the simulation for the next

:param action: ActionBuffers tuple of continuous and/or discrete action
:param action: ActionTuple tuple of continuous and/or discrete action
"""
@abstractmethod

10
ml-agents-envs/mlagents_envs/environment.py


DecisionSteps,
TerminalSteps,
BehaviorSpec,
ActionBuffers,
ActionTuple,
BehaviorName,
AgentId,
BehaviorMapping,

self._env_state: Dict[str, Tuple[DecisionSteps, TerminalSteps]] = {}
self._env_specs: Dict[str, BehaviorSpec] = {}
self._env_actions: Dict[str, ActionBuffers] = {}
self._env_actions: Dict[str, ActionTuple] = {}
self._is_first_message = True
self._update_behavior_specs(aca_output)

f"agent group in the environment"
)
def set_actions(self, behavior_name: BehaviorName, action: ActionBuffers) -> None:
def set_actions(self, behavior_name: BehaviorName, action: ActionTuple) -> None:
self._assert_behavior_exists(behavior_name)
if behavior_name not in self._env_state:
return

self._env_actions[behavior_name] = action
def set_action_for_agent(
self, behavior_name: BehaviorName, agent_id: AgentId, action: ActionBuffers
self, behavior_name: BehaviorName, agent_id: AgentId, action: ActionTuple
) -> None:
self._assert_behavior_exists(behavior_name)
if behavior_name not in self._env_state:

@timed
def _generate_step_input(
self, vector_action: Dict[str, ActionBuffers]
self, vector_action: Dict[str, ActionTuple]
) -> UnityInputProto:
rl_in = UnityRLInputProto()
for b in vector_action:

6
ml-agents/mlagents/trainers/env_manager.py


TerminalSteps,
BehaviorSpec,
BehaviorName,
ActionBuffers,
ActionTuple,
)
from mlagents_envs.side_channel.stats_side_channel import EnvironmentStats

@staticmethod
def action_buffers_from_numpy_dict(
action_dict: Dict[str, np.ndarray]
) -> ActionBuffers:
) -> ActionTuple:
continuous: np.ndarray = None
discrete: np.ndarray = None
if "continuous_action" in action_dict:

return ActionBuffers(continuous, discrete)
return ActionTuple(continuous, discrete)

6
ml-agents/mlagents/trainers/tests/simple_test_envs.py


from mlagents_envs.base_env import (
ActionSpec,
ActionBuffers,
ActionTuple,
BaseEnv,
BehaviorSpec,
DecisionSteps,

for _ in range(self.n_demos):
for name in self.names:
if self.discrete:
self.action[name] = ActionBuffers(
self.action[name] = ActionTuple(
self.action[name] = ActionBuffers(
self.action[name] = ActionTuple(
np.array([[float(self.goal[name])]]), [[]]
)
self.step()
正在加载...
取消
保存