浏览代码

gail and bc tests

/test-recurrent-gail
Andrew Cohen 5 年前
当前提交
7aaf1fb6
共有 4 个文件被更改,包括 604 次插入20 次删除
  1. 281
      demos/1DTestContinuous.demo
  2. 280
      demos/1DTestDiscrete.demo
  3. 4
      ml-agents/mlagents/trainers/tests/simple_test_envs.py
  4. 59
      ml-agents/mlagents/trainers/tests/test_simple_rl.py

281
demos/1DTestContinuous.demo


*0:1D@
=o��P�j
"
��
#6�
=o��P�j
"
��
��
=o��P�j
"
��
��!
=sh�?@P�j
"
��
��C�
=o��P�j
"
�?
��
=o��P�j
"
�?
>(�>
=o��P�j
"
�?
��(?
=o��P�j
"
�?
��?
=o��P�j
"
�?
�L?
=o��P�j
"
�?
��<
=o��P�j
"
�?
�n ?!
=sh�?@P�j
"
�?
ӽ?
=o��P�j
"
�?
�� ?
=o��P�j
"
�?
�b#?
=o��P�j
"
�?
t;?
=o��P�j
"
�?
�%?!
=sh�?@P�j
"
�?
�?
=o��P�j
"
��
"��>
=o��P�j
"
��
�V�
=o��P�j
"
��
�2�
=o��P�j
"
��
��
=o��P�j
"
��
pJ�
=o��P�j
"
��
��!
=sh�?@P�j
"
��
g�e�
=o��P�j
"
�?
�\K�
=o��P�j
"
�?
ГV?
=o��P�j
"
�?
t a?
=o��P�j
"
�?
�8?
=o��P�j
"
�?
M�?
=o��P�j
"
�?
���=
=o��P�j
"
�?
�?!
=sh�?@P�j
"
�?
�!7?
=o��P�j
"
�?
��>
=o��P�j
"
�?
�?
=o��P�j
"
�?
�?
=o��P�j
"
�?
$�?!
=sh�?@P�j
"
�?
�?
=o��P�j
"
�?
�?
=o��P�j
"
�?
�?
=o��P�j
"
�?
g,?
=o��P�j
"
�?
�?!
=sh�?@P�j
"
�?
�;?
=o��P�j
"
�?
�?
=o��P�j
"
�?
f��>
=o��P�j
"
�?
Jx?
=o��P�j
"
�?
�%
?!
=sh�?@P�j
"
�?
�P?
=o��P�j
"
��
C�"?
=o��P�j
"
��
��
=o��P�j
"
��
��
=o��P�j
"
��
��
=o��P�j
"
��
�Uz�
=o��P�j
"
��
��!
=sh�?@P�j
"
��
O4�
=o��P�j
"
�?
��
=o��P�j
"
�?
$�>
=o��P�j
"
�?
��L?
=o��P�j
"
�?
n��>
=o��P�j
"
�?
��+?
=o��P�j
"
�?
J�?!
=sh�?@P�j
"
�?
�?
=o��P�j
"
��
��Z?
=o��P�j
"
��
�8u�
=o��P�j
"
��
��g�
=o��P�j
"
��
��
=o��P�j
"
��
��
=o��P�j
"
��
��C�!
=sh�?@P�j
"
��
��
=o��P�j
"
��
��
=o��P�j
"
��
��
=o��P�j
"
��

280
demos/1DTestDiscrete.demo


*:1D@#
=o��P�Zj
"
�?
�?%
=sh�?@P�Zj
"
�?
�?#
=o��P�Zj
"
�?
�?#
=o��P�Zj
"
�?
�?#
=o��P�Zj
"
�?
�?#
=o��P�Zj
"
�?
�?%
=sh�?@P�Zj
"
�?
�?#
=o��P�Zj
"
�?
�?#
=o��P�Zj
"
�?
�?#
=o��P�Zj
"
�?
�?#
=o��P�Zj
"
�?
�?%
=sh�?@P�Zj
"
�?
�?#
=o��P�Zj
"
��
�?#
=o��P�Zj
"
��
#
=o��P�Zj
"
��
#
=o��P�Zj
"
��
#
=o��P�Zj
"
��
#
=o��P�Zj
"
��
%
=sh�?@P�Zj
"
��
#
=o��P�Zj
"
�?
#
=o��P�Zj
"
�?
�?#
=o��P�Zj
"
�?
�?#
=o��P�Zj
"
�?
�?#
=o��P�Zj
"
�?
�?#
=o��P�Zj
"
�?
�?%
=sh�?@P�Zj
"
�?
�?#
=o��P�Zj
"
�?
�?#
=o��P�Zj
"
�?
�?#
=o��P�Zj
"
�?
�?#
=o��P�Zj
"
�?
�?%
=sh�?@P�Zj
"
�?
�?#
=o��P�Zj
"
��
�?#
=o��P�Zj
"
��
#
=o��P�Zj
"
��
#
=o��P�Zj
"
��
#
=o��P�Zj
"
��
#
=o��P�Zj
"
��
%
=sh�?@P�Zj
"
��
#
=o��P�Zj
"
��
#
=o��P�Zj
"
��
#
=o��P�Zj
"
��
#
=o��P�Zj
"
��
%
=sh�?@P�Zj
"
��
#
=o��P�Zj
"
�?
#
=o��P�Zj
"
�?
�?#
=o��P�Zj
"
�?
�?#
=o��P�Zj
"
�?
�?#
=o��P�Zj
"
�?
�?#
=o��P�Zj
"
�?
�?%
=sh�?@P�Zj
"
�?
�?#
=o��P�Zj
"
�?
�?#
=o��P�Zj
"
�?
�?#
=o��P�Zj
"
�?
�?#
=o��P�Zj
"
�?
�?%
=sh�?@P�Zj
"
�?
�?#
=o��P�Zj
"
��
�?#
=o��P�Zj
"
��
#
=o��P�Zj
"
��
#
=o��P�Zj
"
��
#
=o��P�Zj
"
��
#
=o��P�Zj
"
��
%
=sh�?@P�Zj
"
��
#
=o��P�Zj
"
�?
#
=o��P�Zj
"
�?
�?#
=o��P�Zj
"
�?
�?#
=o��P�Zj
"
�?
�?#
=o��P�Zj
"
�?
�?#
=o��P�Zj
"
�?
�?%
=sh�?@P�Zj
"
�?
�?#
=o��P�Zj
"
�?
�?#
=o��P�Zj
"
�?

4
ml-agents/mlagents/trainers/tests/simple_test_envs.py


ActionType,
)
from mlagents_envs.rpc_utils import proto_from_batched_step_result_and_action
from mlagents_envs.agent_info_action_pair_pb2 import AgentInfoActionPairProto
from mlagents_envs.communicator_objects.agent_info_action_pair_pb2 import (
AgentInfoActionPairProto,
)
OBS_SIZE = 1
VIS_OBS_SIZE = (20, 20, 3)

59
ml-agents/mlagents/trainers/tests/test_simple_rl.py


@pytest.mark.parametrize("use_discrete", [True, False])
def test_simple_record(use_discrete):
env = Record1DEnvironment([BRAIN_NAME], use_discrete=use_discrete, n_demos=30)
config = generate_config(PPO_CONFIG)
_check_environment_trains(env, config)
agent_info_protos = env.demonstration_protos[BRAIN_NAME]
meta_data_proto = DemonstrationMetaProto()
brain_param_proto = BrainParametersProto(
vector_action_size=[1],
vector_action_descriptions=[""],
vector_action_space_type=discrete if use_discrete else continuous,
brain_name=BRAIN_NAME,
is_training=True,
)
action_type = "Discrete" if use_discrete else "Continuous"
demo_path = "demos/1DTest" + action_type + ".demo"
write_demo(demo_path, meta_data_proto, brain_param_proto, agent_info_protos)
@pytest.mark.parametrize("use_discrete", [True, False])
def test_simple_ppo(use_discrete):
env = Simple1DEnvironment([BRAIN_NAME], use_discrete=use_discrete)
config = generate_config(PPO_CONFIG)

assert any(reward > success_threshold for reward in processed_rewards) and any(
reward < success_threshold for reward in processed_rewards
)
@pytest.mark.parametrize("use_discrete", [True, False])
def test_simple_record(use_discrete):
env = Record1DEnvironment([BRAIN_NAME], use_discrete=use_discrete, n_demos=100)
config = generate_config(PPO_CONFIG)
_check_environment_trains(env, config)
agent_info_protos = env.demonstration_protos[BRAIN_NAME]
meta_data_proto = DemonstrationMetaProto()
brain_param_proto = BrainParametersProto(
vector_action_size=[1],
vector_action_descriptions=[""],
vector_action_space_type=discrete if use_discrete else continuous,
brain_name=BRAIN_NAME,
is_training=True,
)
action_type = "Discrete" if use_discrete else "Continuous"
demo_path = "demos/1DTest" + action_type + ".demo"
write_demo(demo_path, meta_data_proto, brain_param_proto, agent_info_protos)
@pytest.mark.parametrize("use_discrete", [True, False])
@pytest.mark.parametrize("trainer_config", [PPO_CONFIG, SAC_CONFIG])
def test_gail(use_discrete, trainer_config):
env = Simple1DEnvironment([BRAIN_NAME], use_discrete=use_discrete)
action_type = "Discrete" if use_discrete else "Continuous"
demo_path = "demos/1DTest" + action_type + ".demo"
override_vals = {
"behavioral_cloning": {"demo_path": demo_path, "strength": 1.0, "steps": 2000},
"reward_signals": {
"gail": {
"strength": 1.0,
"gamma": 0.99,
"encoding_size": 128,
"demo_path": demo_path,
}
},
}
config = generate_config(trainer_config, override_vals)
_check_environment_trains(env, config)
正在加载...
取消
保存