浏览代码

[bug-fix] Fix continuous LSTMs and add test (#3521)

/asymm-envs
GitHub 5 年前
当前提交
b2cc1c25
共有 2 个文件被更改,包括 37 次插入26 次删除
  1. 10
      ml-agents/mlagents/trainers/common/tf_optimizer.py
  2. 53
      ml-agents/mlagents/trainers/tests/test_ppo.py

10
ml-agents/mlagents/trainers/common/tf_optimizer.py


_obs = batch["visual_obs%d" % i]
feed_dict[self.policy.visual_in[i]] = _obs
if self.policy.use_recurrent:
feed_dict[self.policy.memory_in] = [np.zeros((self.policy.m_size))]
feed_dict[self.memory_in] = [np.zeros((self.m_size))]
feed_dict[self.policy.memory_in] = [
np.zeros((self.policy.m_size), dtype=np.float32)
]
feed_dict[self.memory_in] = [np.zeros((self.m_size), dtype=np.float32)]
if self.policy.prev_action is not None:
feed_dict[self.policy.prev_action] = batch["prev_action"]

)
prev_action = batch["actions"][-1]
prev_action = (
batch["actions"][-1] if not self.policy.use_continuous_act else None
)
else:
value_estimates = self.sess.run(self.value_heads, feed_dict)
prev_action = None

53
ml-agents/mlagents/trainers/tests/test_ppo.py


return optimizer
def _create_fake_trajectory(use_discrete, use_visual, time_horizon):
if use_discrete:
act_space = DISCRETE_ACTION_SPACE
else:
act_space = VECTOR_ACTION_SPACE
if use_visual:
num_vis_obs = 1
vec_obs_size = 0
else:
num_vis_obs = 0
vec_obs_size = VECTOR_OBS_SPACE
trajectory = make_fake_trajectory(
length=time_horizon,
max_step_complete=True,
vec_obs_size=vec_obs_size,
num_vis_obs=num_vis_obs,
action_space=act_space,
)
return trajectory
@pytest.mark.parametrize("discrete", [True, False], ids=["discrete", "continuous"])
@pytest.mark.parametrize("visual", [True, False], ids=["visual", "vector"])
@pytest.mark.parametrize("rnn", [True, False], ids=["rnn", "no_rnn"])

)
@mock.patch("mlagents_envs.environment.UnityEnvironment.executable_launcher")
@mock.patch("mlagents_envs.environment.UnityEnvironment.get_communicator")
def test_ppo_get_value_estimates(mock_communicator, mock_launcher, dummy_config):
@pytest.mark.parametrize("discrete", [True, False], ids=["discrete", "continuous"])
@pytest.mark.parametrize("visual", [True, False], ids=["visual", "vector"])
@pytest.mark.parametrize("rnn", [True, False], ids=["rnn", "no_rnn"])
def test_ppo_get_value_estimates(dummy_config, rnn, visual, discrete):
brain_params = BrainParameters(
brain_name="test_brain",
vector_observation_space_size=1,
camera_resolutions=[],
vector_action_space_size=[2],
vector_action_descriptions=[],
vector_action_space_type=0,
)
dummy_config["summary_path"] = "./summaries/test_trainer_summary"
dummy_config["model_path"] = "./models/test_trainer_models/TestModel"
policy = NNPolicy(
0, brain_params, dummy_config, False, False, create_tf_graph=False
optimizer = _create_ppo_optimizer_ops_mock(
dummy_config, use_rnn=rnn, use_discrete=discrete, use_visual=visual
optimizer = PPOOptimizer(policy, dummy_config)
trajectory = make_fake_trajectory(
length=time_horizon,
max_step_complete=True,
vec_obs_size=1,
num_vis_obs=0,
action_space=[2],
)
trajectory = _create_fake_trajectory(discrete, visual, time_horizon)
run_out, final_value_out = optimizer.get_trajectory_value_estimates(
trajectory.to_agentbuffer(), trajectory.next_obs, done=False
)

正在加载...
取消
保存