浏览代码

Curriculum: If no behavior specified, do magic (#4346)

* Make behavior in curriculum a required attrib

* Re-adding the test
/MLA-1734-demo-provider
GitHub 4 年前
当前提交
705a0e0e
共有 2 个文件被更改,包括 44 次插入20 次删除
  1. 2
      ml-agents/mlagents/trainers/settings.py
  2. 62
      ml-agents/mlagents/trainers/tests/test_env_param_manager.py

2
ml-agents/mlagents/trainers/settings.py


PROGRESS: str = "progress"
REWARD: str = "reward"
behavior: str
behavior: str = attr.ib(default="")
min_lesson_length: int = 0
signal_smoothing: bool = True
threshold: float = attr.ib(default=0.0)

62
ml-agents/mlagents/trainers/tests/test_env_param_manager.py


yaml.safe_load(test_bad_curriculum_all_competion_criteria_config_yaml)
)
param_manager = EnvironmentParameterManager(
run_options.environment_parameters, 1337, False
)
assert param_manager.update_lessons(
trainer_steps={"fake_behavior": 500},
trainer_max_steps={"fake_behavior": 1000},
trainer_reward_buffer={"fake_behavior": [1000] * 101},
) == (True, True)
assert param_manager.update_lessons(
trainer_steps={"fake_behavior": 500},
trainer_max_steps={"fake_behavior": 1000},
trainer_reward_buffer={"fake_behavior": [1000] * 101},
) == (True, True)
assert param_manager.update_lessons(
trainer_steps={"fake_behavior": 500},
trainer_max_steps={"fake_behavior": 1000},
trainer_reward_buffer={"fake_behavior": [1000] * 101},
) == (False, False)
assert param_manager.get_current_lesson_number() == {"param_1": 2}
param_manager = EnvironmentParameterManager(
run_options.environment_parameters, 1337, False
)
assert param_manager.update_lessons(
trainer_steps={"fake_behavior": 500},
trainer_max_steps={"fake_behavior": 1000},
trainer_reward_buffer={"fake_behavior": [1000] * 101},
) == (True, True)
assert param_manager.update_lessons(
trainer_steps={"fake_behavior": 500},
trainer_max_steps={"fake_behavior": 1000},
trainer_reward_buffer={"fake_behavior": [1000] * 101},
) == (True, True)
assert param_manager.update_lessons(
trainer_steps={"fake_behavior": 500},
trainer_max_steps={"fake_behavior": 1000},
trainer_reward_buffer={"fake_behavior": [1000] * 101},
) == (False, False)
assert param_manager.get_current_lesson_number() == {"param_1": 2}
test_everything_config_yaml = """

"param_2": GaussianSettings(seed=1337 + 3, mean=4, st_dev=5),
"param_3": ConstantSettings(seed=1337 + 3 + 1, value=20),
}
test_curriculum_no_behavior_yaml = """
environment_parameters:
param_1:
curriculum:
- name: Lesson1
completion_criteria:
measure: reward
threshold: 30
min_lesson_length: 100
require_reset: true
value: 1
- name: Lesson2
value: 2
"""
def test_curriculum_no_behavior():
with pytest.raises(TypeError):
run_options = RunOptions.from_dict(
yaml.safe_load(test_curriculum_no_behavior_yaml)
)
EnvironmentParameterManager(run_options.environment_parameters, 1337, False)
正在加载...
取消
保存