浏览代码

Remove random normal epsilon (#3039)

/develop
GitHub 5 年前
当前提交
3b4b0d55
共有 4 个文件被更改,包括 1 次插入14 次删除
  1. 3
      ml-agents/mlagents/trainers/ppo/policy.py
  2. 4
      ml-agents/mlagents/trainers/ppo/trainer.py
  3. 3
      ml-agents/mlagents/trainers/tests/mock_brain.py
  4. 5
      ml-agents/mlagents/trainers/tests/test_sac.py

3
ml-agents/mlagents/trainers/ppo/policy.py


feed_dict[self.model.epsilon] = epsilon
feed_dict = self.fill_eval_dict(feed_dict, brain_info)
run_out = self._execute_model(feed_dict, self.inference_dict)
if self.use_continuous_act:
run_out["random_normal_epsilon"] = epsilon
return run_out
@timed

if self.use_continuous_act:
feed_dict[model.output_pre] = mini_batch["actions_pre"]
feed_dict[model.epsilon] = mini_batch["random_normal_epsilon"]
else:
feed_dict[model.action_holder] = mini_batch["actions"]
if self.use_recurrent:

4
ml-agents/mlagents/trainers/ppo/trainer.py


self.processing_buffer[agent_id]["actions_pre"].append(
actions_pre[agent_idx]
)
epsilons = take_action_outputs["random_normal_epsilon"]
self.processing_buffer[agent_id]["random_normal_epsilon"].append(
epsilons[agent_idx]
)
a_dist = take_action_outputs["log_probs"]
# value is a dictionary from name of reward to value estimate of the value head
self.processing_buffer[agent_id]["actions"].append(actions[agent_idx])

3
ml-agents/mlagents/trainers/tests/mock_brain.py


buffer[0]["actions_pre"].append(
np.ones(buffer[0]["actions"][0].shape, dtype=np.float32)
)
buffer[0]["random_normal_epsilon"].append(
np.ones(buffer[0]["actions"][0].shape, dtype=np.float32)
)
buffer[0]["action_mask"].append(
np.ones(np.sum(brain_params.vector_action_space_size), dtype=np.float32)
)

5
ml-agents/mlagents/trainers/tests/test_sac.py


# Test update, while removing PPO-specific buffer elements.
update_buffer = mb.simulate_rollout(
env,
policy,
BUFFER_INIT_SAMPLES,
exclude_key_list=["advantages", "actions_pre", "random_normal_epsilon"],
env, policy, BUFFER_INIT_SAMPLES, exclude_key_list=["advantages", "actions_pre"]
)
# Mock out reward signal eval

正在加载...
取消
保存