浏览代码

preliminary aciton model tests

/develop/actionmodel-csharp
Andrew Cohen 4 年前
当前提交
272affe0
共有 6 个文件被更改,包括 59 次插入148 次删除
  1. 2
      ml-agents/mlagents/trainers/tests/tensorflow/test_simple_rl.py
  2. 2
      ml-agents/mlagents/trainers/tests/test_subprocess_env_manager.py
  3. 46
      ml-agents/mlagents/trainers/tests/torch/test_networks.py
  4. 112
      ml-agents/mlagents/trainers/tests/torch/test_simple_rl.py
  5. 43
      ml-agents/mlagents/trainers/tests/torch/test_utils.py
  6. 2
      ml-agents/mlagents/trainers/torch/action_model.py

2
ml-agents/mlagents/trainers/tests/tensorflow/test_simple_rl.py


_check_environment_trains(env, {BRAIN_NAME: config})
@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
@pytest.mark.parametrize("action_sizes", [(0, 2), (2, 0)])
def test_2d_sac(action_sizes):
env = SimpleEnvironment(
[BRAIN_NAME], action_sizes=action_sizes, action_size=2, step_size=0.8

2
ml-agents/mlagents/trainers/tests/test_subprocess_env_manager.py


@pytest.mark.parametrize("num_envs", [1, 4])
def test_subprocess_env_endtoend(num_envs):
def simple_env_factory(worker_id, config):
env = SimpleEnvironment(["1D"], use_discrete=True)
env = SimpleEnvironment(["1D"], action_sizes=(0, 1))
return env
env_manager = SubprocessEnvManager(

46
ml-agents/mlagents/trainers/tests/torch/test_networks.py


assert _out[0] == pytest.approx(1.0, abs=0.1)
@pytest.mark.parametrize("use_discrete", [True, False])
def test_simple_actor(use_discrete):
obs_size = 4
network_settings = NetworkSettings()
obs_shapes = [(obs_size,)]
act_size = [2]
if use_discrete:
masks = torch.ones((1, 1))
action_spec = ActionSpec.create_discrete(tuple(act_size))
else:
masks = None
action_spec = ActionSpec.create_continuous(act_size[0])
actor = SimpleActor(obs_shapes, network_settings, action_spec)
# Test get_dist
sample_obs = torch.ones((1, obs_size))
dists, _ = actor.get_dists([sample_obs], [], masks=masks)
for dist in dists:
if use_discrete:
assert isinstance(dist, CategoricalDistInstance)
else:
assert isinstance(dist, GaussianDistInstance)
# Test sample_actions
actions = actor.sample_action(dists)
for act in actions:
if use_discrete:
assert act.shape == (1, 1)
else:
assert act.shape == (1, act_size[0])
# Test forward
actions, ver_num, mem_size, is_cont, act_size_vec = actor.forward(
[sample_obs], [], masks=masks
)
for act in actions:
# This is different from above for ONNX export
if use_discrete:
assert act.shape == tuple(act_size)
else:
assert act.shape == (act_size[0], 1)
assert mem_size == 0
assert is_cont == int(not use_discrete)
assert act_size_vec == torch.tensor(act_size)
@pytest.mark.parametrize("ac_type", [SharedActorCritic, SeparateActorCritic])
@pytest.mark.parametrize("lstm", [True, False])
def test_actor_critic(ac_type, lstm):

112
ml-agents/mlagents/trainers/tests/torch/test_simple_rl.py


SAC_TORCH_CONFIG = attr.evolve(sac_dummy_config(), framework=FrameworkType.PYTORCH)
@pytest.mark.parametrize("use_discrete", [True, False])
def test_simple_ppo(use_discrete):
env = SimpleEnvironment([BRAIN_NAME], use_discrete=use_discrete)
@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
def test_simple_ppo(action_sizes):
env = SimpleEnvironment([BRAIN_NAME], action_sizes=action_sizes)
@pytest.mark.parametrize("use_discrete", [True, False])
def test_2d_ppo(use_discrete):
@pytest.mark.parametrize("action_sizes", [(0, 2), (2, 0)])
def test_2d_ppo(action_sizes):
[BRAIN_NAME], use_discrete=use_discrete, action_size=2, step_size=0.8
[BRAIN_NAME], action_sizes=action_sizes, action_size=2, step_size=0.8
)
new_hyperparams = attr.evolve(
PPO_TORCH_CONFIG.hyperparameters, batch_size=64, buffer_size=640

check_environment_trains(env, {BRAIN_NAME: config})
@pytest.mark.parametrize("use_discrete", [True, False])
@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
def test_visual_ppo(num_visual, use_discrete):
def test_visual_ppo(num_visual, action_sizes):
use_discrete=use_discrete,
action_sizes=action_sizes,
num_visual=num_visual,
num_vector=0,
step_size=0.2,

def test_visual_advanced_ppo(vis_encode_type, num_visual):
env = SimpleEnvironment(
[BRAIN_NAME],
use_discrete=True,
action_sizes=True,
num_visual=num_visual,
num_vector=0,
step_size=0.5,

check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.5)
@pytest.mark.parametrize("use_discrete", [True, False])
def test_recurrent_ppo(use_discrete):
env = MemoryEnvironment([BRAIN_NAME], use_discrete=use_discrete)
@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
def test_recurrent_ppo(action_sizes):
env = MemoryEnvironment([BRAIN_NAME], action_sizes=action_sizes)
new_network_settings = attr.evolve(
PPO_TORCH_CONFIG.network_settings,
memory=NetworkSettings.MemorySettings(memory_size=16),

check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)
@pytest.mark.parametrize("use_discrete", [True, False])
def test_simple_sac(use_discrete):
env = SimpleEnvironment([BRAIN_NAME], use_discrete=use_discrete)
@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
def test_simple_sac(action_sizes):
env = SimpleEnvironment([BRAIN_NAME], action_sizes=action_sizes)
@pytest.mark.parametrize("use_discrete", [True, False])
def test_2d_sac(use_discrete):
@pytest.mark.parametrize("action_sizes", [(0, 2), (2, 0)])
def test_2d_sac(action_sizes):
[BRAIN_NAME], use_discrete=use_discrete, action_size=2, step_size=0.8
[BRAIN_NAME], action_sizes=action_sizes, action_size=2, step_size=0.8
)
new_hyperparams = attr.evolve(
SAC_TORCH_CONFIG.hyperparameters, buffer_init_steps=2000

check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.8)
@pytest.mark.parametrize("use_discrete", [True, False])
@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
def test_visual_sac(num_visual, use_discrete):
def test_visual_sac(num_visual, action_sizes):
use_discrete=use_discrete,
action_sizes=action_sizes,
num_visual=num_visual,
num_vector=0,
step_size=0.2,

def test_visual_advanced_sac(vis_encode_type, num_visual):
env = SimpleEnvironment(
[BRAIN_NAME],
use_discrete=True,
action_sizes=True,
num_visual=num_visual,
num_vector=0,
step_size=0.5,

check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.5)
@pytest.mark.parametrize("use_discrete", [True, False])
def test_recurrent_sac(use_discrete):
step_size = 0.2 if use_discrete else 0.5
@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
def test_recurrent_sac(action_sizes):
step_size = 0.2 if action_sizes else 0.5
[BRAIN_NAME], use_discrete=use_discrete, step_size=step_size
[BRAIN_NAME], action_sizes=action_sizes, step_size=step_size
)
new_networksettings = attr.evolve(
SAC_TORCH_CONFIG.network_settings,

check_environment_trains(env, {BRAIN_NAME: config})
@pytest.mark.parametrize("use_discrete", [True, False])
def test_simple_ghost(use_discrete):
@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
def test_simple_ghost(action_sizes):
[BRAIN_NAME + "?team=0", BRAIN_NAME + "?team=1"], use_discrete=use_discrete
[BRAIN_NAME + "?team=0", BRAIN_NAME + "?team=1"], action_sizes=action_sizes
)
self_play_settings = SelfPlaySettings(
play_against_latest_model_ratio=1.0, save_steps=2000, swap_steps=2000

@pytest.mark.parametrize("use_discrete", [True, False])
def test_simple_ghost_fails(use_discrete):
@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
def test_simple_ghost_fails(action_sizes):
[BRAIN_NAME + "?team=0", BRAIN_NAME + "?team=1"], use_discrete=use_discrete
[BRAIN_NAME + "?team=0", BRAIN_NAME + "?team=1"], action_sizes=action_sizes
)
# This config should fail because the ghosted policy is never swapped with a competent policy.
# Swap occurs after max step is reached.

)
@pytest.mark.parametrize("use_discrete", [True, False])
def test_simple_asymm_ghost(use_discrete):
@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
def test_simple_asymm_ghost(action_sizes):
[BRAIN_NAME + "?team=0", brain_name_opp + "?team=1"], use_discrete=use_discrete
[BRAIN_NAME + "?team=0", brain_name_opp + "?team=1"], action_sizes=action_sizes
)
self_play_settings = SelfPlaySettings(
play_against_latest_model_ratio=1.0,

check_environment_trains(env, {BRAIN_NAME: config, brain_name_opp: config})
@pytest.mark.parametrize("use_discrete", [True, False])
def test_simple_asymm_ghost_fails(use_discrete):
@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
def test_simple_asymm_ghost_fails(action_sizes):
[BRAIN_NAME + "?team=0", brain_name_opp + "?team=1"], use_discrete=use_discrete
[BRAIN_NAME + "?team=0", brain_name_opp + "?team=1"], action_sizes=action_sizes
)
# This config should fail because the team that us not learning when both have reached
# max step should be executing the initial, untrained poliy.

@pytest.fixture(scope="session")
def simple_record(tmpdir_factory):
def record_demo(use_discrete, num_visual=0, num_vector=1):
def record_demo(action_sizes, num_visual=0, num_vector=1):
use_discrete=use_discrete,
action_sizes=action_sizes,
num_visual=num_visual,
num_vector=num_vector,
n_demos=100,

agent_info_protos = env.demonstration_protos[BRAIN_NAME]
meta_data_proto = DemonstrationMetaProto()
brain_param_proto = BrainParametersProto(
vector_action_size=[2] if use_discrete else [1],
vector_action_size=[2] if action_sizes else [1],
vector_action_space_type=discrete if use_discrete else continuous,
vector_action_space_type=discrete if action_sizes else continuous,
action_type = "Discrete" if use_discrete else "Continuous"
action_type = "Discrete" if action_sizes else "Continuous"
demo_path_name = "1DTest" + action_type + ".demo"
demo_path = str(tmpdir_factory.mktemp("tmp_demo").join(demo_path_name))
write_demo(demo_path, meta_data_proto, brain_param_proto, agent_info_protos)

@pytest.mark.parametrize("use_discrete", [True, False])
@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
def test_gail(simple_record, use_discrete, trainer_config):
demo_path = simple_record(use_discrete)
env = SimpleEnvironment([BRAIN_NAME], use_discrete=use_discrete, step_size=0.2)
def test_gail(simple_record, action_sizes, trainer_config):
demo_path = simple_record(action_sizes)
env = SimpleEnvironment([BRAIN_NAME], action_sizes=action_sizes, step_size=0.2)
bc_settings = BehavioralCloningSettings(demo_path=demo_path, steps=1000)
reward_signals = {
RewardSignalType.GAIL: GAILSettings(encoding_size=32, demo_path=demo_path)

check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)
@pytest.mark.parametrize("use_discrete", [True, False])
def test_gail_visual_ppo(simple_record, use_discrete):
demo_path = simple_record(use_discrete, num_visual=1, num_vector=0)
@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
def test_gail_visual_ppo(simple_record, action_sizes):
demo_path = simple_record(action_sizes, num_visual=1, num_vector=0)
use_discrete=use_discrete,
action_sizes=action_sizes,
step_size=0.2,
)
bc_settings = BehavioralCloningSettings(demo_path=demo_path, steps=1500)

check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)
@pytest.mark.parametrize("use_discrete", [True, False])
def test_gail_visual_sac(simple_record, use_discrete):
demo_path = simple_record(use_discrete, num_visual=1, num_vector=0)
@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
def test_gail_visual_sac(simple_record, action_sizes):
demo_path = simple_record(action_sizes, num_visual=1, num_vector=0)
use_discrete=use_discrete,
action_sizes=action_sizes,
step_size=0.2,
)
bc_settings = BehavioralCloningSettings(demo_path=demo_path, steps=1000)

43
ml-agents/mlagents/trainers/tests/torch/test_utils.py


assert torch.equal(res, exp)
def test_get_probs_and_entropy():
# Test continuous
# Add two dists to the list. This isn't done in the code but we'd like to support it.
dist_list = [
GaussianDistInstance(torch.zeros((1, 2)), torch.ones((1, 2))),
GaussianDistInstance(torch.zeros((1, 2)), torch.ones((1, 2))),
]
action_list = [torch.zeros((1, 2)), torch.zeros((1, 2))]
log_probs, entropies, all_probs = ModelUtils.get_probs_and_entropy(
action_list, dist_list
)
for lp in log_probs:
assert lp.shape == (1, 2)
assert entropies.shape == (1, 2, 2)
assert all_probs == []
for log_prob in log_probs:
# Log prob of standard normal at 0
for lp in log_prob.flatten():
assert lp == pytest.approx(-0.919, abs=0.01)
for ent in entropies.flatten():
# entropy of standard normal at 0
assert ent == pytest.approx(1.42, abs=0.01)
# Test continuous
# Add two dists to the list.
act_size = 2
test_prob = torch.tensor(
[[1.0 - 0.1 * (act_size - 1)] + [0.1] * (act_size - 1)]
) # High prob for first action
dist_list = [CategoricalDistInstance(test_prob), CategoricalDistInstance(test_prob)]
action_list = [torch.tensor([0]), torch.tensor([1])]
log_probs, entropies, all_probs = ModelUtils.get_probs_and_entropy(
action_list, dist_list
)
for all_prob in all_probs:
assert all_prob.shape == (1, act_size)
assert entropies.shape == (1, len(dist_list))
# Make sure the first action has high probability than the others.
assert log_probs[0] > log_probs[1]
def test_masked_mean():
test_input = torch.tensor([1, 2, 3, 4, 5])
masks = torch.ones_like(test_input).bool()

2
ml-agents/mlagents/trainers/torch/action_model.py


def _sample_action(self, dists: DistInstances) -> AgentAction:
"""
Samples actions from list of distribution instances
Samples actions from a DistInstances tuple
"""
continuous_action: Optional[torch.Tensor] = None
discrete_action: Optional[List[torch.Tensor]] = None

正在加载...
取消
保存