浏览代码

Merge branch 'develop-hybrid-action-staging' into develop-hybrid-actions-singleton

/develop/actionmodel-csharp
Andrew Cohen 4 年前
当前提交
886883b3
共有 5 个文件被更改,包括 15 次插入16 次删除
  1. 4
      ml-agents-envs/mlagents_envs/tests/test_envs.py
  2. 2
      ml-agents/mlagents/trainers/policy/policy.py
  3. 3
      ml-agents/mlagents/trainers/simple_env_manager.py
  4. 18
      ml-agents/mlagents/trainers/tests/mock_brain.py
  5. 4
      ml-agents/mlagents/trainers/torch/components/bc/module.py

4
ml-agents-envs/mlagents_envs/tests/test_envs.py


decision_steps, terminal_steps = env.get_steps("RealFakeBrain")
n_agents = len(decision_steps)
_empty_act = spec.action_spec.empty_action(n_agents)
next_action = ActionTuple()
next_action.add_continuous(_empty_act.continuous - 1)
next_action.add_discrete(_empty_act.discrete - 1)
next_action = ActionTuple(_empty_act.continuous - 1, _empty_act.discrete - 1)
env.set_actions("RealFakeBrain", next_action)
env.step()

2
ml-agents/mlagents/trainers/policy/policy.py


def save_previous_action(
self, agent_ids: List[str], action_tuple: ActionTuple
) -> None:
# if action_dict is None or "discrete_action" not in action_dict:
# return
for index, agent_id in enumerate(agent_ids):
self.previous_action_dict[agent_id] = action_tuple.discrete[index, :]

3
ml-agents/mlagents/trainers/simple_env_manager.py


self.previous_all_action_info = all_action_info
for brain_name, action_info in all_action_info.items():
self.env.set_actions(brain_name, action_info.action)
_action = EnvManager.action_tuple_from_numpy_dict(action_info.action)
self.env.set_actions(brain_name, _action)
self.env.step()
all_step_result = self._generate_all_results()

18
ml-agents/mlagents/trainers/tests/mock_brain.py


import numpy as np
from mlagents.trainers.buffer import AgentBuffer
from mlagents.trainers.torch.action_log_probs import LogProbsTuple
from mlagents.trainers.trajectory import Trajectory, AgentExperience
from mlagents_envs.base_env import (
DecisionSteps,

ActionTuple,
)

steps_list = []
action_size = action_spec.discrete_size + action_spec.continuous_size
action_probs = {
"action_probs": np.ones(
int(np.sum(action_spec.discrete_branches) + action_spec.continuous_size),
dtype=np.float32,
)
}
prob_ones = np.ones(
int(np.sum(action_spec.discrete_branches) + action_spec.continuous_size),
dtype=np.float32,
)
for _i in range(length - 1):
obs = []
for _shape in observation_shapes:

if action_spec.is_continuous():
action = {"continuous_action": np.zeros(action_size, dtype=np.float32)}
action = ActionTuple(continuous=np.zeros(action_size, dtype=np.float32))
action_probs = LogProbsTuple(continuous=prob_ones)
action = {"discrete_action": np.zeros(action_size, dtype=np.float32)}
action = ActionTuple(discrete=np.zeros(action_size, dtype=np.float32))
action_probs = LogProbsTuple(discrete=prob_ones)
action_pre = np.zeros(action_size, dtype=np.float32)
action_mask = (
[

4
ml-agents/mlagents/trainers/torch/components/bc/module.py


expert_actions.discrete_tensor,
self.policy.behavior_spec.action_spec.discrete_branches,
)
log_prob_branches = ModelUtils.break_into_branches(
log_probs.all_discrete_tensor,
self.policy.behavior_spec.action_spec.discrete_branches,

act_masks = None
expert_actions = AgentAction.from_dict(mini_batch_demo)
if self.policy.behavior_spec.action_spec.discrete_size > 0:
act_masks = ModelUtils.list_to_tensor(
np.ones(
(

else:
vis_obs = []
selected_actions, log_probs, _, _, _ = self.policy.sample_actions(
selected_actions, log_probs, _, _ = self.policy.sample_actions(
vec_obs,
vis_obs,
masks=act_masks,

正在加载...
取消
保存