浏览代码

Merge branch 'develop-action-buffer' into develop-hybrid-actions-singleton

/develop/actionmodel-csharp
Andrew Cohen 4 年前
当前提交
95566e44
共有 9 个文件被更改,包括 89 次插入65 次删除
  1. 2
      Project/Assets/ML-Agents/Examples/PushBlock/Demos/ExpertPush.demo.meta
  2. 84
      ml-agents-envs/mlagents_envs/base_env.py
  3. 15
      ml-agents-envs/mlagents_envs/environment.py
  4. 2
      ml-agents-envs/mlagents_envs/tests/test_steps.py
  5. 10
      ml-agents/mlagents/trainers/env_manager.py
  6. 10
      ml-agents/mlagents/trainers/policy/policy.py
  7. 19
      ml-agents/mlagents/trainers/tests/simple_test_envs.py
  8. 6
      ml-agents/mlagents/trainers/torch/utils.py
  9. 6
      ml-agents/mlagents/trainers/trajectory.py

2
Project/Assets/ML-Agents/Examples/PushBlock/Demos/ExpertPush.demo.meta


guid: 7f11f35191533404c9957443a681aaee
ScriptedImporter:
fileIDToRecycleName:
11400000: Assets/ML-Agents/Examples/PushBlock/Demos/ExpertPush.demo
11400002: Assets/ML-Agents/Examples/Pushblock/Demos/ExpertPush.demo
externalObjects: {}
userData: ' (Unity.MLAgents.Demonstrations.DemonstrationSummary)'
assetBundleName:

84
ml-agents-envs/mlagents_envs/base_env.py


)
class ActionBuffers(NamedTuple):
class ActionTuple:
A NamedTuple whose fields correspond to actions of different types.
Continuous and discrete actions are numpy arrays.
An object whose fields correspond to actions of different types.
Continuous and discrete actions are numpy arrays of type float32 and
int32, respectively and are type checked on construction.
Dimensions are of (n_agents, continuous_size) and (n_agents, discrete_size),
respectively.
continuous: np.ndarray # dims (n_agents, cont_size)
discrete: np.ndarray # dims (n_agents, disc_size)
def __init__(self, continuous: np.ndarray, discrete: np.ndarray):
if continuous.dtype != np.float32:
continuous = continuous.astype(np.float32, copy=False)
self._continuous = continuous
if discrete.dtype != np.int32:
discrete = discrete.astype(np.int32, copy=False)
self._discrete = discrete
@property
def continuous(self) -> np.ndarray:
return self._continuous
@property
def discrete(self) -> np.ndarray:
return self._discrete
class ActionSpec(NamedTuple):

"""
return len(self.discrete_branches)
def empty_action(self, n_agents: int) -> ActionBuffers:
def empty_action(self, n_agents: int) -> ActionTuple:
Generates ActionBuffers corresponding to an empty action (all zeros)
Generates ActionTuple corresponding to an empty action (all zeros)
continuous: np.ndarray = None
discrete: np.ndarray = None
if self.continuous_size > 0:
continuous = np.zeros((n_agents, self.continuous_size), dtype=np.float32)
if self.discrete_size > 0:
discrete = np.zeros((n_agents, self.discrete_size), dtype=np.int32)
return ActionBuffers(continuous, discrete)
continuous = np.zeros((n_agents, self.continuous_size), dtype=np.float32)
discrete = np.zeros((n_agents, self.discrete_size), dtype=np.int32)
return ActionTuple(continuous, discrete)
def random_action(self, n_agents: int) -> ActionBuffers:
def random_action(self, n_agents: int) -> ActionTuple:
Generates ActionBuffers corresponding to a random action (either discrete
Generates ActionTuple corresponding to a random action (either discrete
continuous: np.ndarray = None
discrete: np.ndarray = None
if self.continuous_size > 0:
continuous = np.random.uniform(
low=-1.0, high=1.0, size=(n_agents, self.continuous_size)
).astype(np.float32)
continuous = np.random.uniform(
low=-1.0, high=1.0, size=(n_agents, self.continuous_size)
)
discrete = np.array([])
if self.discrete_size > 0:
discrete = np.column_stack(
[

for i in range(self.discrete_size)
]
)
return ActionBuffers(continuous, discrete)
return ActionTuple(continuous, discrete)
self, actions: ActionBuffers, n_agents: int, name: str
) -> ActionBuffers:
self, actions: ActionTuple, n_agents: int, name: str
) -> ActionTuple:
if self.continuous_size > 0 and actions.continuous.shape != _expected_shape:
if actions.continuous.shape != _expected_shape:
if actions.continuous.dtype != np.float32:
actions.continuous = actions.continuous.astype(np.float32)
if self.discrete_size > 0 and actions.discrete.shape != _expected_shape:
if actions.discrete.shape != _expected_shape:
if actions.discrete.dtype != np.int32:
actions.discrete = actions.discrete.astype(np.int32)
return actions
@staticmethod

"""
@abstractmethod
def set_actions(self, behavior_name: BehaviorName, action: ActionBuffers) -> None:
def set_actions(self, behavior_name: BehaviorName, action: ActionTuple) -> None:
:param action: ActionBuffers tuple of continuous and/or discrete action
:param action: ActionTuple tuple of continuous and/or discrete action.
Actions are np.arrays with dimensions (n_agents, continuous_size) and
(n_agents, discrete_size), respectively.
self, behavior_name: BehaviorName, agent_id: AgentId, action: ActionBuffers
self, behavior_name: BehaviorName, agent_id: AgentId, action: ActionTuple
) -> None:
"""
Sets the action for one of the agents in the simulation for the next

:param action: ActionBuffers tuple of continuous and/or discrete action
:param action: ActionTuple tuple of continuous and/or discrete action
Actions are np.arrays with dimensions (1, continuous_size) and
(1, discrete_size), respectively. Note, this initial dimensions of 1 is because
this action is meant for a single agent.
"""
@abstractmethod

15
ml-agents-envs/mlagents_envs/environment.py


DecisionSteps,
TerminalSteps,
BehaviorSpec,
ActionBuffers,
ActionTuple,
BehaviorName,
AgentId,
BehaviorMapping,

self._env_state: Dict[str, Tuple[DecisionSteps, TerminalSteps]] = {}
self._env_specs: Dict[str, BehaviorSpec] = {}
self._env_actions: Dict[str, ActionBuffers] = {}
self._env_actions: Dict[str, ActionTuple] = {}
self._is_first_message = True
self._update_behavior_specs(aca_output)

f"agent group in the environment"
)
def set_actions(self, behavior_name: BehaviorName, action: ActionBuffers) -> None:
def set_actions(self, behavior_name: BehaviorName, action: ActionTuple) -> None:
self._assert_behavior_exists(behavior_name)
if behavior_name not in self._env_state:
return

self._env_actions[behavior_name] = action
def set_action_for_agent(
self, behavior_name: BehaviorName, agent_id: AgentId, action: ActionBuffers
self, behavior_name: BehaviorName, agent_id: AgentId, action: ActionTuple
) -> None:
self._assert_behavior_exists(behavior_name)
if behavior_name not in self._env_state:

agent_id
)
) from ie
self._env_actions[behavior_name][index] = action
if action_spec.continuous_size > 0:
self._env_actions[behavior_name].continuous[index] = action.continuous[0]
if action_spec.discrete_size > 0:
self._env_actions[behavior_name].discrete[index] = action.discrete[0]
def get_steps(
self, behavior_name: BehaviorName

@timed
def _generate_step_input(
self, vector_action: Dict[str, ActionBuffers]
self, vector_action: Dict[str, ActionTuple]
) -> UnityInputProto:
rl_in = UnityRLInputProto()
for b in vector_action:

2
ml-agents-envs/mlagents_envs/tests/test_steps.py


specs = ActionSpec.create_continuous(action_len)
zero_action = specs.empty_action(4).continuous
assert np.array_equal(zero_action, np.zeros((4, action_len), dtype=np.float32))
print(specs.random_action(4))
print(random_action)
assert random_action.dtype == np.float32
assert random_action.shape == (4, action_len)
assert np.min(random_action) >= -1

10
ml-agents/mlagents/trainers/env_manager.py


TerminalSteps,
BehaviorSpec,
BehaviorName,
ActionBuffers,
ActionTuple,
)
from mlagents_envs.side_channel.stats_side_channel import EnvironmentStats

@staticmethod
def action_buffers_from_numpy_dict(
action_dict: Dict[str, np.ndarray]
) -> ActionBuffers:
continuous: np.ndarray = None
discrete: np.ndarray = None
) -> ActionTuple:
continuous: np.ndarray = np.array([], dtype=np.float32)
discrete: np.ndarray = np.array([], dtype=np.int32)
return ActionBuffers(continuous, discrete)
return ActionTuple(continuous, discrete)

10
ml-agents/mlagents/trainers/policy/policy.py


:return: Dict of action type to np.ndarray
"""
act_dict: Dict[str, np.ndarray] = {}
action_buffer = self.behavior_spec.action_spec.empty_action(num_agents)
if action_buffer.continuous is not None:
act_dict["continuous_action"] = action_buffer.continuous
if action_buffer.discrete is not None:
act_dict["discrete_action"] = action_buffer.discrete
action_tuple = self.behavior_spec.action_spec.empty_action(num_agents)
if self.behavior_spec.action_spec.continuous_size > 0:
act_dict["continuous_action"] = action_tuple.continuous
if self.behavior_spec.action_spec.discrete_size > 0:
act_dict["discrete_action"] = action_tuple.discrete
return act_dict
def save_previous_action(

19
ml-agents/mlagents/trainers/tests/simple_test_envs.py


from mlagents_envs.base_env import (
ActionSpec,
ActionBuffers,
ActionTuple,
BaseEnv,
BehaviorSpec,
DecisionSteps,

) # to set the goals/positions
self.action_spec = action_spec
self.behavior_spec = BehaviorSpec(self._make_obs_spec(), action_spec)
self.action_spec = action_spec
self.names = brain_names
self.positions: Dict[str, List[float]] = {}
self.step_count: Dict[str, float] = {}

def _take_action(self, name: str) -> bool:
deltas = []
_act = self.action[name]
if _act.discrete is not None:
if self.action_spec.discrete_size > 0:
if _act.continuous is not None:
if self.action_spec.continuous_size > 0:
for _cont in _act.continuous[0]:
deltas.append(_cont)
for i, _delta in enumerate(deltas):

for _ in range(self.n_demos):
for name in self.names:
if self.discrete:
self.action[name] = ActionBuffers(
[[]], np.array([[1]] if self.goal[name] > 0 else [[0]])
self.action[name] = ActionTuple(
np.array([], dtype=np.float32),
np.array(
[[1]] if self.goal[name] > 0 else [[0]], dtype=np.int32
),
self.action[name] = ActionBuffers(
np.array([[float(self.goal[name])]]), [[]]
self.action[name] = ActionTuple(
np.array([[float(self.goal[name])]], dtype=np.float32),
np.array([], dtype=np.int32),
)
self.step()

6
ml-agents/mlagents/trainers/torch/utils.py


discrete log probs of individual actions as well as all the log probs for an entire branch.
Utility functions provide numpy <=> tensor conversions to be used by the optimizers.
:param continuous_tensor: Torch tensor corresponding to log probs of continuous actions
:param discrete_list: List of Torch tensors each corresponding to log probs of discrete actions
:param discrete_list: List of Torch tensors each corresponding to log probs of the discrete actions that were
sampled.
a discrete action branch
a discrete action branch, even the discrete actions that were not sampled. all_discrete_list is a list of Tensors,
each Tensor corresponds to one discrete branch log probabilities.
"""
continuous_tensor: torch.Tensor

6
ml-agents/mlagents/trainers/trajectory.py


action_shape = None
for act_type, act_array in exp.action.items():
agent_buffer_trajectory[act_type].append(act_array)
action_shape = act_array.shape # TODO Better way to make mask
for log_type, log_array in exp.action_probs.items():
agent_buffer_trajectory[log_type].append(log_array)

else:
# This should never be needed unless the environment somehow doesn't supply the
# action mask in a discrete space.
if "discrete_action" in exp.action:
action_shape = exp.action["discrete_action"].shape
else:
action_shape = exp.action["continuous_action"].shape
agent_buffer_trajectory["action_mask"].append(
np.ones(action_shape, dtype=np.float32), padding_value=1
)

正在加载...
取消
保存