浏览代码

test_trajectory fixed

/develop/action-spec-gym
Andrew Cohen 4 年前
当前提交
cd73cce2
共有 7 个文件被更改,包括 16 次插入14 次删除
  1. 2
      ml-agents/mlagents/trainers/agent_processor.py
  2. 3
      ml-agents/mlagents/trainers/policy/tf_policy.py
  3. 10
      ml-agents/mlagents/trainers/ppo/optimizer_tf.py
  4. 8
      ml-agents/mlagents/trainers/tests/mock_brain.py
  5. 2
      ml-agents/mlagents/trainers/tests/tensorflow/test_simple_rl.py
  6. 4
      ml-agents/mlagents/trainers/tests/test_trajectory.py
  7. 1
      ml-agents/mlagents/trainers/trajectory.py

2
ml-agents/mlagents/trainers/agent_processor.py


action_probs[prob_type] = prob_array[idx]
action_mask = stored_decision_step.action_mask
prev_action = self.policy.retrieve_previous_action([global_id]) # [0, :]
prev_action = self.policy.retrieve_previous_action([global_id])
experience = AgentExperience(
obs=obs,
reward=step.reward,

3
ml-agents/mlagents/trainers/policy/tf_policy.py


if not self.use_continuous_act:
feed_dict[self.prev_action] = self.retrieve_previous_action(
global_agent_ids
)
)["discrete_action"]
feed_dict[self.memory_in] = self.retrieve_memories(global_agent_ids)
feed_dict = self.fill_eval_dict(feed_dict, decision_requests)
run_out = self._execute_model(feed_dict, self.inference_dict)

10
ml-agents/mlagents/trainers/ppo/optimizer_tf.py


else:
if self.policy.use_continuous_act: # For hybrid action buffer support
feed_dict[self.policy.output] = mini_batch["continuous_action"]
if self.policy.use_recurrent:
feed_dict[self.policy.prev_action] = mini_batch[
"prev_continuous_action"
]
if self.policy.use_recurrent:
feed_dict[self.policy.prev_action] = mini_batch["prev_action"]
if self.policy.use_recurrent:
feed_dict[self.policy.prev_action] = mini_batch[
"prev_discrete_action"
]
feed_dict[self.policy.action_masks] = mini_batch["action_mask"]
if "vector_obs" in mini_batch:
feed_dict[self.policy.vector_in] = mini_batch["vector_obs"]

8
ml-agents/mlagents/trainers/tests/mock_brain.py


else None
)
if action_spec.is_continuous():
prev_action = {
"prev_continuous_action": np.ones(action_size, dtype=np.float32)
}
prev_action = {"continuous_action": np.ones(action_size, dtype=np.float32)}
prev_action = {
"prev_discrete_action": np.ones(action_size, dtype=np.float32)
}
prev_action = {"discrete_action": np.ones(action_size, dtype=np.float32)}
max_step = False
memory = np.ones(memory_size, dtype=np.float32)

2
ml-agents/mlagents/trainers/tests/tensorflow/test_simple_rl.py


_check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.5)
@pytest.mark.parametrize("use_discrete", [True, False])
@pytest.mark.parametrize("use_discrete", [True])
def test_recurrent_ppo(use_discrete):
env = MemoryEnvironment([BRAIN_NAME], use_discrete=use_discrete)
new_network_settings = attr.evolve(

4
ml-agents/mlagents/trainers/tests/test_trajectory.py


"masks",
"done",
"actions_pre",
"actions",
"continuous_action",
"prev_action",
"prev_continuous_action",
"environment_rewards",
]
wanted_keys = set(wanted_keys)

1
ml-agents/mlagents/trainers/trajectory.py


np.ones(action_shape, dtype=np.float32), padding_value=1
)
# agent_buffer_trajectory["prev_action"].append(exp.prev_action)
for act_type, act_array in exp.prev_action.items():
agent_buffer_trajectory["prev_" + act_type].append(act_array)

正在加载...
取消
保存