比较提交

...
此合并请求有变更与目标分支冲突。
/ml-agents/mlagents/trainers/tests/torch/test_simple_rl.py
/ml-agents/mlagents/trainers/tests/simple_test_envs.py

2 次代码提交

作者 SHA1 备注 提交日期
Andrew Cohen e547f26c adjust step size 4 年前
Andrew Cohen cd349985 add negative constant extrinsic to gail 4 年前
共有 2 个文件被更改,包括 28 次插入9 次删除
  1. 7
      ml-agents/mlagents/trainers/tests/simple_test_envs.py
  2. 30
      ml-agents/mlagents/trainers/tests/torch/test_simple_rl.py

7
ml-agents/mlagents/trainers/tests/simple_test_envs.py


vec_obs_size=OBS_SIZE,
var_len_obs_size=VAR_LEN_SIZE,
action_sizes=(1, 0),
gail=False,
self.gail = gail
self.num_visual = num_visual
self.num_vector = num_vector
self.num_var_len = num_var_len

return action_mask
def _compute_reward(self, name: str, done: bool) -> float:
if done:
if self.gail:
# Subtract large positive constant to eliminate survivor bias in GAIL
reward = -0.2
elif done:
reward = 0.0
for _pos in self.positions[name]:
reward += (SUCCESS_REWARD * _pos * self.goal[name]) / len(

30
ml-agents/mlagents/trainers/tests/torch/test_simple_rl.py


SelfPlaySettings,
BehavioralCloningSettings,
GAILSettings,
RewardSignalSettings,
RewardSignalType,
EncoderType,
)

@pytest.mark.parametrize("trainer_config", [PPO_TORCH_CONFIG, SAC_TORCH_CONFIG])
def test_gail(simple_record, action_sizes, trainer_config):
demo_path = simple_record(action_sizes)
env = SimpleEnvironment([BRAIN_NAME], action_sizes=action_sizes, step_size=0.2)
env = SimpleEnvironment(
[BRAIN_NAME], action_sizes=action_sizes, step_size=0.3, gail=True
)
RewardSignalType.GAIL: GAILSettings(encoding_size=32, demo_path=demo_path)
RewardSignalType.GAIL: GAILSettings(
strength=0.05, encoding_size=32, demo_path=demo_path
),
RewardSignalType.EXTRINSIC: RewardSignalSettings(),
}
config = attr.evolve(
trainer_config,

)
check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)
check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=-1.5)
@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])

num_vector=0,
action_sizes=action_sizes,
step_size=0.3,
gail=True,
RewardSignalType.GAIL: GAILSettings(encoding_size=32, demo_path=demo_path)
RewardSignalType.GAIL: GAILSettings(
strength=0.05, encoding_size=32, demo_path=demo_path
),
RewardSignalType.EXTRINSIC: RewardSignalSettings(),
}
hyperparams = attr.evolve(PPO_TORCH_CONFIG.hyperparameters, learning_rate=5e-3)
config = attr.evolve(

behavioral_cloning=bc_settings,
max_steps=1000,
)
check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)
check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=-1.5)
@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])

num_visual=1,
num_vector=0,
action_sizes=action_sizes,
step_size=0.2,
step_size=0.3,
gail=True,
RewardSignalType.GAIL: GAILSettings(encoding_size=32, demo_path=demo_path)
RewardSignalType.GAIL: GAILSettings(
strength=0.05, encoding_size=32, demo_path=demo_path
),
RewardSignalType.EXTRINSIC: RewardSignalSettings(),
}
hyperparams = attr.evolve(
SAC_TORCH_CONFIG.hyperparameters, learning_rate=3e-4, batch_size=16

behavioral_cloning=bc_settings,
max_steps=500,
)
check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)
check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=-1.5)
正在加载...
取消
保存