浏览代码

tf tests except gail pass

/develop/actionmodel-csharp
Andrew Cohen 4 年前
当前提交
fc3027ac
共有 4 个文件被更改,包括 38 次插入28 次删除
  1. 9
      ml-agents/mlagents/trainers/agent_processor.py
  2. 14
      ml-agents/mlagents/trainers/policy/policy.py
  3. 4
      ml-agents/mlagents/trainers/policy/tf_policy.py
  4. 39
      ml-agents/mlagents/trainers/tests/simple_test_envs.py

9
ml-agents/mlagents/trainers/agent_processor.py


interrupted = step.interrupted if terminated else False
# Add the outputs of the last eval
action = stored_take_action_outputs["action"][idx]
#if self.policy.use_continuous_act:
# action_pre = stored_take_action_outputs["pre_action"][idx]
#else:
# action_pre = None
action_pre = None
if self.policy.use_continuous_act:
action_pre = stored_take_action_outputs["pre_action"][idx]
else:
action_pre = None
action_probs = stored_take_action_outputs["log_probs"][idx]
action_mask = stored_decision_step.action_mask
prev_action = self.policy.retrieve_previous_action([global_id])[0, :]

14
ml-agents/mlagents/trainers/policy/policy.py


self.trainer_settings = trainer_settings
self.network_settings: NetworkSettings = trainer_settings.network_settings
self.seed = seed
# For hybrid
# For mixed action spaces
#self.act_size = (
# list(behavior_spec.discrete_action_branches)
# if behavior_spec.is_action_discrete()
# else [behavior_spec.action_size]
#)
self.act_size = (
list(behavior_spec.discrete_action_branches)
if behavior_spec.is_action_discrete()
else [behavior_spec.action_size]
)
self.vec_obs_size = sum(
shape[0] for shape in behavior_spec.observation_shapes if len(shape) == 1
)

#self.use_continuous_act = behavior_spec.is_action_continuous()
self.use_continuous_act = behavior_spec.is_action_continuous()
self.num_branches = self.behavior_spec.action_size
self.previous_action_dict: Dict[str, np.array] = {}
self.memory_dict: Dict[str, np.ndarray] = {}

4
ml-agents/mlagents/trainers/policy/tf_policy.py


reparameterize,
condition_sigma_on_obs,
)
if self.continuous_act_size > 0 and len(self.discrete_act_size) > 0:
raise UnityPolicyException(
"Tensorflow does not support mixed action spaces. Please run with --torch."
)
# for ghost trainer save/load snapshots
self.assign_phs: List[tf.Tensor] = []
self.assign_ops: List[tf.Operation] = []

39
ml-agents/mlagents/trainers/tests/simple_test_envs.py


self.vis_obs_size = vis_obs_size
self.vec_obs_size = vec_obs_size
action_type = ActionType.DISCRETE if use_discrete else ActionType.CONTINUOUS
self.behavior_spec = BehaviorSpec(
self._make_obs_spec(),
action_type,
tuple(2 for _ in range(action_size)) if use_discrete else action_size,
)
if use_discrete:
self.behavior_spec = BehaviorSpec(
self._make_obs_spec(), 0, tuple(2 for _ in range(action_size))
)
else:
self.behavior_spec = BehaviorSpec(
self._make_obs_spec(), action_size, tuple()
)
self.action_size = action_size
self.names = brain_names
self.positions: Dict[str, List[float]] = {}

)
super().__init__(
brain_names,
True, # This is needed for env to generate masks correctly
True, # This is needed for env to generate masks correctly
action_size=discrete_action_size, # This is needed for env to generate masks correctly
action_size=discrete_action_size, # This is needed for env to generate masks correctly
self._make_obs_spec(), continuous_action_size, tuple(2 for _ in range(discrete_action_size))
self._make_obs_spec(),
continuous_action_size,
tuple(2 for _ in range(discrete_action_size)),
)
self.continuous_action_size = continuous_action_size
self.discrete_action_size = discrete_action_size

all_done = cont_done and disc_done
if all_done:
reward = 0
for _pos in self.continuous_env.positions[name] + self.discrete_env.positions[name]:
for _pos in (
self.continuous_env.positions[name]
+ self.discrete_env.positions[name]
):
self.continuous_env.positions[name] + self.discrete_env.positions[name]
self.continuous_env.positions[name]
+ self.discrete_env.positions[name]
self.step_result[name] = self._make_batched_step(
name, all_done, reward
)
self.step_result[name] = self._make_batched_step(name, all_done, reward)
def reset(self) -> None: # type: ignore
super().reset()

self.discrete_env.goal = self.goal
def set_actions(self, behavior_name: BehaviorName, action) -> None:
#print(action, self.goal[behavior_name])
continuous_action = action[:, :self.continuous_action_size]
discrete_action = action[:, self.continuous_action_size:]
# print(action, self.goal[behavior_name])
continuous_action = action[:, : self.continuous_action_size]
discrete_action = action[:, self.continuous_action_size :]
self.continuous_env.set_actions(behavior_name, continuous_action)
self.discrete_env.set_actions(behavior_name, discrete_action)

正在加载...
取消
保存