浏览代码

Fix PPO value tests

/develop-newnormalization
Ervin Teng 5 年前
当前提交
83126bb2
共有 1 个文件被更改,包括 27 次插入17 次删除
  1. 44
      ml-agents/mlagents/trainers/tests/test_ppo.py

44
ml-agents/mlagents/trainers/tests/test_ppo.py


from mlagents.trainers.ppo.models import PPOModel
from mlagents.trainers.ppo.trainer import PPOTrainer, discount_rewards
from mlagents.trainers.ppo.policy import PPOPolicy
from mlagents.trainers.trajectory import trajectory_to_agentbuffer
from mlagents.envs.brain import BrainParameters
from mlagents.envs.environment import UnityEnvironment
from mlagents.envs.mock_communicator import MockCommunicator

@mock.patch("mlagents.envs.environment.UnityEnvironment.get_communicator")
def test_ppo_get_value_estimates(mock_communicator, mock_launcher, dummy_config):
tf.reset_default_graph()
mock_communicator.return_value = MockCommunicator(
discrete_action=False, visual_inputs=0
brain_params = BrainParameters(
brain_name="test_brain",
vector_observation_space_size=1,
camera_resolutions=[],
vector_action_space_size=[2],
vector_action_descriptions=[],
vector_action_space_type=0,
env = UnityEnvironment(" ")
brain_infos = env.reset()
brain_info = brain_infos[env.external_brain_names[0]]
trainer_parameters = dummy_config
model_path = env.external_brain_names[0]
trainer_parameters["model_path"] = model_path
trainer_parameters["keep_checkpoints"] = 3
policy = PPOPolicy(
0, env.brains[env.external_brain_names[0]], trainer_parameters, False, False
dummy_config["summary_path"] = "./summaries/test_trainer_summary"
dummy_config["model_path"] = "./models/test_trainer_models/TestModel"
policy = PPOPolicy(0, brain_params, dummy_config, False, False)
time_horizon = 15
trajectory = make_fake_trajectory(
length=time_horizon,
max_step_complete=True,
vec_obs_size=1,
num_vis_obs=0,
action_space=2,
run_out = policy.get_value_estimates(brain_info, 0, done=False)
run_out = policy.get_value_estimates(trajectory.next_obs, "test_agent", done=False)
run_out = policy.get_value_estimates(brain_info, 0, done=True)
run_out = policy.get_value_estimates(trajectory.next_obs, "test_agent", done=True)
for key, val in run_out.items():
assert type(key) is str
assert val == 0.0

run_out = policy.get_value_estimates(brain_info, 0, done=True)
run_out = policy.get_value_estimates(trajectory.next_obs, "test_agent", done=True)
env.close()
agentbuffer = trajectory_to_agentbuffer(trajectory)
batched_values = policy.get_batched_value_estimates(agentbuffer)
for values in batched_values.values():
assert len(values) == 15
def test_ppo_model_cc_vector():

trainer = PPOTrainer(brain_params, 0, dummy_config, True, False, 0, "0", False)
time_horizon = 15
trajectory = make_fake_trajectory(
length=time_horizon + 1,
length=time_horizon,
max_step_complete=True,
vec_obs_size=1,
num_vis_obs=0,

正在加载...
取消
保存