浏览代码

add negative constant extrinsic to gail

/develop/gail-srl-hack
Andrew Cohen 4 年前
当前提交
cd349985
共有 2 个文件被更改,包括 27 次插入8 次删除
  1. 7
      ml-agents/mlagents/trainers/tests/simple_test_envs.py
  2. 28
      ml-agents/mlagents/trainers/tests/torch/test_simple_rl.py

7
ml-agents/mlagents/trainers/tests/simple_test_envs.py


vec_obs_size=OBS_SIZE,
var_len_obs_size=VAR_LEN_SIZE,
action_sizes=(1, 0),
gail=False,
self.gail = gail
self.num_visual = num_visual
self.num_vector = num_vector
self.num_var_len = num_var_len

return action_mask
def _compute_reward(self, name: str, done: bool) -> float:
if done:
if self.gail:
# Subtract large positive constant to eliminate survivor bias in GAIL
reward = -0.2
elif done:
reward = 0.0
for _pos in self.positions[name]:
reward += (SUCCESS_REWARD * _pos * self.goal[name]) / len(

28
ml-agents/mlagents/trainers/tests/torch/test_simple_rl.py


SelfPlaySettings,
BehavioralCloningSettings,
GAILSettings,
RewardSignalSettings,
RewardSignalType,
EncoderType,
)

@pytest.mark.parametrize("trainer_config", [PPO_TORCH_CONFIG, SAC_TORCH_CONFIG])
def test_gail(simple_record, action_sizes, trainer_config):
demo_path = simple_record(action_sizes)
env = SimpleEnvironment([BRAIN_NAME], action_sizes=action_sizes, step_size=0.2)
env = SimpleEnvironment(
[BRAIN_NAME], action_sizes=action_sizes, step_size=0.2, gail=True
)
RewardSignalType.GAIL: GAILSettings(encoding_size=32, demo_path=demo_path)
RewardSignalType.GAIL: GAILSettings(
strength=0.05, encoding_size=32, demo_path=demo_path
),
RewardSignalType.EXTRINSIC: RewardSignalSettings(),
}
config = attr.evolve(
trainer_config,

)
check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)
check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=-1.5)
@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])

num_vector=0,
action_sizes=action_sizes,
step_size=0.3,
gail=True,
RewardSignalType.GAIL: GAILSettings(encoding_size=32, demo_path=demo_path)
RewardSignalType.GAIL: GAILSettings(
strength=0.05, encoding_size=32, demo_path=demo_path
),
RewardSignalType.EXTRINSIC: RewardSignalSettings(),
}
hyperparams = attr.evolve(PPO_TORCH_CONFIG.hyperparameters, learning_rate=5e-3)
config = attr.evolve(

behavioral_cloning=bc_settings,
max_steps=1000,
)
check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)
check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=-1.5)
@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])

num_vector=0,
action_sizes=action_sizes,
step_size=0.2,
gail=True,
RewardSignalType.GAIL: GAILSettings(encoding_size=32, demo_path=demo_path)
RewardSignalType.GAIL: GAILSettings(
strength=0.05, encoding_size=32, demo_path=demo_path
),
RewardSignalType.EXTRINSIC: RewardSignalSettings(),
}
hyperparams = attr.evolve(
SAC_TORCH_CONFIG.hyperparameters, learning_rate=3e-4, batch_size=16

behavioral_cloning=bc_settings,
max_steps=500,
)
check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)
check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=-1.5)
正在加载...
取消
保存