浏览代码
Refactor of Curriculum and parameter sampling (#4160)
Refactor of Curriculum and parameter sampling (#4160)
* Introduced the Constant Parameter Sampler that will be useful later as samplers and floats can be used interchangeably * Refactored the settings.py to refect the new format of the config.yaml * First working version * Added the unit tests * Update to Upgrade for Updates * fixing the tests * Upgraded the config files * Fixes * Additional error catching * addressing some comments * Making the code nicer with cattr * Added and registered an unstructure hook for PrameterRandomization * Updating C# Walljump * Adding comments * Add test for settings export (#4164) * Add test for settings export * Update ml-agents/mlagents/trainers/tests/test_settings.py Co-authored-by: Vincent-Pierre BERGES <vincentpierre@unity3d.com> Co-authored-by: Vincent-Pierre BERGES <vincentpierre@unity3d.com> * Including environment parameters for the test for settings export * First documentation up.../MLA-1734-demo-provider
GitHub
4 年前
当前提交
8eefdcd3
共有 26 个文件被更改,包括 1190 次插入 和 868 次删除
-
4Project/Assets/ML-Agents/Examples/WallJump/Scripts/WallJumpAgent.cs
-
4com.unity.ml-agents/CHANGELOG.md
-
3config/ppo/3DBall_randomize.yaml
-
86config/ppo/WallJump_curriculum.yaml
-
16docs/Migrating.md
-
216docs/Training-ML-Agents.md
-
2docs/Using-Docker.md
-
2ml-agents/mlagents/trainers/env_manager.py
-
44ml-agents/mlagents/trainers/learn.py
-
246ml-agents/mlagents/trainers/settings.py
-
4ml-agents/mlagents/trainers/subprocess_env_manager.py
-
101ml-agents/mlagents/trainers/tests/test_config_conversion.py
-
61ml-agents/mlagents/trainers/tests/test_learn.py
-
135ml-agents/mlagents/trainers/tests/test_settings.py
-
9ml-agents/mlagents/trainers/tests/test_simple_rl.py
-
5ml-agents/mlagents/trainers/tests/test_trainer_controller.py
-
3ml-agents/mlagents/trainers/tests/test_trainer_util.py
-
104ml-agents/mlagents/trainers/trainer_controller.py
-
24ml-agents/mlagents/trainers/trainer_util.py
-
125ml-agents/mlagents/trainers/upgrade_config.py
-
156ml-agents/mlagents/trainers/environment_parameter_manager.py
-
256ml-agents/mlagents/trainers/tests/test_env_param_manager.py
-
77ml-agents/mlagents/trainers/tests/test_curriculum.py
-
136ml-agents/mlagents/trainers/tests/test_meta_curriculum.py
-
91ml-agents/mlagents/trainers/curriculum.py
-
148ml-agents/mlagents/trainers/meta_curriculum.py
|
|||
from typing import Dict, List, Tuple, Optional |
|||
from mlagents.trainers.settings import ( |
|||
EnvironmentParameterSettings, |
|||
ParameterRandomizationSettings, |
|||
) |
|||
from collections import defaultdict |
|||
from mlagents.trainers.training_status import GlobalTrainingStatus, StatusType |
|||
|
|||
from mlagents_envs.logging_util import get_logger |
|||
|
|||
logger = get_logger(__name__) |
|||
|
|||
|
|||
class EnvironmentParameterManager: |
|||
def __init__( |
|||
self, |
|||
settings: Optional[Dict[str, EnvironmentParameterSettings]] = None, |
|||
run_seed: int = -1, |
|||
restore: bool = False, |
|||
): |
|||
""" |
|||
EnvironmentParameterManager manages all the environment parameters of a training |
|||
session. It determines when parameters should change and gives access to the |
|||
current sampler of each parameter. |
|||
:param settings: A dictionary from environment parameter to |
|||
EnvironmentParameterSettings. |
|||
:param run_seed: When the seed is not provided for an environment parameter, |
|||
this seed will be used instead. |
|||
:param restore: If true, the EnvironmentParameterManager will use the |
|||
GlobalTrainingStatus to try and reload the lesson status of each environment |
|||
parameter. |
|||
""" |
|||
if settings is None: |
|||
settings = {} |
|||
self._dict_settings = settings |
|||
for parameter_name in self._dict_settings.keys(): |
|||
initial_lesson = GlobalTrainingStatus.get_parameter_state( |
|||
parameter_name, StatusType.LESSON_NUM |
|||
) |
|||
if initial_lesson is None or not restore: |
|||
GlobalTrainingStatus.set_parameter_state( |
|||
parameter_name, StatusType.LESSON_NUM, 0 |
|||
) |
|||
self._smoothed_values: Dict[str, float] = defaultdict(float) |
|||
for key in self._dict_settings.keys(): |
|||
self._smoothed_values[key] = 0.0 |
|||
# Update the seeds of the samplers |
|||
self._set_sampler_seeds(run_seed) |
|||
|
|||
def _set_sampler_seeds(self, seed): |
|||
""" |
|||
Sets the seeds for the samplers (if no seed was already present). Note that |
|||
using the provided seed. |
|||
""" |
|||
offset = 0 |
|||
for settings in self._dict_settings.values(): |
|||
for lesson in settings.curriculum: |
|||
if lesson.value.seed == -1: |
|||
lesson.value.seed = seed + offset |
|||
offset += 1 |
|||
|
|||
def get_minimum_reward_buffer_size(self, behavior_name: str) -> int: |
|||
""" |
|||
Calculates the minimum size of the reward buffer a behavior must use. This |
|||
method uses the 'min_lesson_length' sampler_parameter to determine this value. |
|||
:param behavior_name: The name of the behavior the minimum reward buffer |
|||
size corresponds to. |
|||
""" |
|||
result = 1 |
|||
for settings in self._dict_settings.values(): |
|||
for lesson in settings.curriculum: |
|||
if lesson.completion_criteria is not None: |
|||
if lesson.completion_criteria.behavior == behavior_name: |
|||
result = max( |
|||
result, lesson.completion_criteria.min_lesson_length |
|||
) |
|||
return result |
|||
|
|||
def get_current_samplers(self) -> Dict[str, ParameterRandomizationSettings]: |
|||
""" |
|||
Creates a dictionary from environment parameter name to their corresponding |
|||
ParameterRandomizationSettings. If curriculum is used, the |
|||
ParameterRandomizationSettings corresponds to the sampler of the current lesson. |
|||
""" |
|||
samplers: Dict[str, ParameterRandomizationSettings] = {} |
|||
for param_name, settings in self._dict_settings.items(): |
|||
lesson_num = GlobalTrainingStatus.get_parameter_state( |
|||
param_name, StatusType.LESSON_NUM |
|||
) |
|||
lesson = settings.curriculum[lesson_num] |
|||
samplers[param_name] = lesson.value |
|||
return samplers |
|||
|
|||
def get_current_lesson_number(self) -> Dict[str, int]: |
|||
""" |
|||
Creates a dictionary from environment parameter to the current lesson number. |
|||
If not using curriculum, this number is always 0 for that environment parameter. |
|||
""" |
|||
result: Dict[str, int] = {} |
|||
for parameter_name in self._dict_settings.keys(): |
|||
result[parameter_name] = GlobalTrainingStatus.get_parameter_state( |
|||
parameter_name, StatusType.LESSON_NUM |
|||
) |
|||
return result |
|||
|
|||
def update_lessons( |
|||
self, |
|||
trainer_steps: Dict[str, int], |
|||
trainer_max_steps: Dict[str, int], |
|||
trainer_reward_buffer: Dict[str, List[float]], |
|||
) -> Tuple[bool, bool]: |
|||
""" |
|||
Given progress metrics, calculates if at least one environment parameter is |
|||
in a new lesson and if at least one environment parameter requires the env |
|||
to reset. |
|||
:param trainer_steps: A dictionary from behavior_name to the number of training |
|||
steps this behavior's trainer has performed. |
|||
:param trainer_max_steps: A dictionary from behavior_name to the maximum number |
|||
of training steps this behavior's trainer has performed. |
|||
:param trainer_reward_buffer: A dictionary from behavior_name to the list of |
|||
the most recent episode returns for this behavior's trainer. |
|||
:returns: A tuple of two booleans : (True if any lesson has changed, True if |
|||
environment needs to reset) |
|||
""" |
|||
must_reset = False |
|||
updated = False |
|||
for param_name, settings in self._dict_settings.items(): |
|||
lesson_num = GlobalTrainingStatus.get_parameter_state( |
|||
param_name, StatusType.LESSON_NUM |
|||
) |
|||
lesson = settings.curriculum[lesson_num] |
|||
if ( |
|||
lesson.completion_criteria is not None |
|||
and len(settings.curriculum) > lesson_num |
|||
): |
|||
behavior_to_consider = lesson.completion_criteria.behavior |
|||
if behavior_to_consider in trainer_steps: |
|||
must_increment, new_smoothing = lesson.completion_criteria.need_increment( |
|||
float(trainer_steps[behavior_to_consider]) |
|||
/ float(trainer_max_steps[behavior_to_consider]), |
|||
trainer_reward_buffer[behavior_to_consider], |
|||
self._smoothed_values[param_name], |
|||
) |
|||
self._smoothed_values[param_name] = new_smoothing |
|||
if must_increment: |
|||
GlobalTrainingStatus.set_parameter_state( |
|||
param_name, StatusType.LESSON_NUM, lesson_num + 1 |
|||
) |
|||
new_lesson_name = settings.curriculum[lesson_num + 1].name |
|||
logger.info( |
|||
f"Parameter '{param_name}' has changed. Now in lesson '{new_lesson_name}'" |
|||
) |
|||
updated = True |
|||
if lesson.completion_criteria.require_reset: |
|||
must_reset = True |
|||
return updated, must_reset |
|
|||
import pytest |
|||
import yaml |
|||
|
|||
|
|||
from mlagents.trainers.exception import TrainerConfigError |
|||
from mlagents.trainers.environment_parameter_manager import EnvironmentParameterManager |
|||
from mlagents.trainers.settings import ( |
|||
RunOptions, |
|||
UniformSettings, |
|||
GaussianSettings, |
|||
ConstantSettings, |
|||
CompletionCriteriaSettings, |
|||
) |
|||
|
|||
|
|||
test_sampler_config_yaml = """ |
|||
environment_parameters: |
|||
param_1: |
|||
sampler_type: uniform |
|||
sampler_parameters: |
|||
min_value: 0.5 |
|||
max_value: 10 |
|||
""" |
|||
|
|||
|
|||
def test_sampler_conversion(): |
|||
run_options = RunOptions.from_dict(yaml.safe_load(test_sampler_config_yaml)) |
|||
assert run_options.environment_parameters is not None |
|||
assert "param_1" in run_options.environment_parameters |
|||
lessons = run_options.environment_parameters["param_1"].curriculum |
|||
assert len(lessons) == 1 |
|||
assert lessons[0].completion_criteria is None |
|||
assert isinstance(lessons[0].value, UniformSettings) |
|||
assert lessons[0].value.min_value == 0.5 |
|||
assert lessons[0].value.max_value == 10 |
|||
|
|||
|
|||
test_sampler_and_constant_config_yaml = """ |
|||
environment_parameters: |
|||
param_1: |
|||
sampler_type: gaussian |
|||
sampler_parameters: |
|||
mean: 4 |
|||
st_dev: 5 |
|||
param_2: 20 |
|||
""" |
|||
|
|||
|
|||
def test_sampler_and_constant_conversion(): |
|||
run_options = RunOptions.from_dict( |
|||
yaml.safe_load(test_sampler_and_constant_config_yaml) |
|||
) |
|||
assert "param_1" in run_options.environment_parameters |
|||
assert "param_2" in run_options.environment_parameters |
|||
lessons_1 = run_options.environment_parameters["param_1"].curriculum |
|||
lessons_2 = run_options.environment_parameters["param_2"].curriculum |
|||
# gaussian |
|||
assert isinstance(lessons_1[0].value, GaussianSettings) |
|||
assert lessons_1[0].value.mean == 4 |
|||
assert lessons_1[0].value.st_dev == 5 |
|||
# constant |
|||
assert isinstance(lessons_2[0].value, ConstantSettings) |
|||
assert lessons_2[0].value.value == 20 |
|||
|
|||
|
|||
test_curriculum_config_yaml = """ |
|||
environment_parameters: |
|||
param_1: |
|||
curriculum: |
|||
- name: Lesson1 |
|||
completion_criteria: |
|||
measure: reward |
|||
behavior: fake_behavior |
|||
threshold: 30 |
|||
min_lesson_length: 100 |
|||
require_reset: true |
|||
value: 1 |
|||
- name: Lesson2 |
|||
completion_criteria: |
|||
measure: reward |
|||
behavior: fake_behavior |
|||
threshold: 60 |
|||
min_lesson_length: 100 |
|||
require_reset: false |
|||
value: 2 |
|||
- name: Lesson3 |
|||
value: |
|||
sampler_type: uniform |
|||
sampler_parameters: |
|||
min_value: 1 |
|||
max_value: 3 |
|||
""" |
|||
|
|||
|
|||
def test_curriculum_conversion(): |
|||
run_options = RunOptions.from_dict(yaml.safe_load(test_curriculum_config_yaml)) |
|||
assert "param_1" in run_options.environment_parameters |
|||
lessons = run_options.environment_parameters["param_1"].curriculum |
|||
assert len(lessons) == 3 |
|||
# First lesson |
|||
lesson = lessons[0] |
|||
assert lesson.completion_criteria is not None |
|||
assert ( |
|||
lesson.completion_criteria.measure |
|||
== CompletionCriteriaSettings.MeasureType.REWARD |
|||
) |
|||
assert lesson.completion_criteria.behavior == "fake_behavior" |
|||
assert lesson.completion_criteria.threshold == 30.0 |
|||
assert lesson.completion_criteria.min_lesson_length == 100 |
|||
assert lesson.completion_criteria.require_reset |
|||
assert isinstance(lesson.value, ConstantSettings) |
|||
assert lesson.value.value == 1 |
|||
# Second lesson |
|||
lesson = lessons[1] |
|||
assert lesson.completion_criteria is not None |
|||
assert ( |
|||
lesson.completion_criteria.measure |
|||
== CompletionCriteriaSettings.MeasureType.REWARD |
|||
) |
|||
assert lesson.completion_criteria.behavior == "fake_behavior" |
|||
assert lesson.completion_criteria.threshold == 60.0 |
|||
assert lesson.completion_criteria.min_lesson_length == 100 |
|||
assert not lesson.completion_criteria.require_reset |
|||
assert isinstance(lesson.value, ConstantSettings) |
|||
assert lesson.value.value == 2 |
|||
# Last lesson |
|||
lesson = lessons[2] |
|||
assert lesson.completion_criteria is None |
|||
assert isinstance(lesson.value, UniformSettings) |
|||
assert lesson.value.min_value == 1 |
|||
assert lesson.value.max_value == 3 |
|||
|
|||
|
|||
test_bad_curriculum_no_competion_criteria_config_yaml = """ |
|||
environment_parameters: |
|||
param_1: |
|||
curriculum: |
|||
- name: Lesson1 |
|||
completion_criteria: |
|||
measure: reward |
|||
behavior: fake_behavior |
|||
threshold: 30 |
|||
min_lesson_length: 100 |
|||
require_reset: true |
|||
value: 1 |
|||
- name: Lesson2 |
|||
value: 2 |
|||
- name: Lesson3 |
|||
value: |
|||
sampler_type: uniform |
|||
sampler_parameters: |
|||
min_value: 1 |
|||
max_value: 3 |
|||
""" |
|||
|
|||
|
|||
def test_curriculum_raises_no_completion_criteria_conversion(): |
|||
with pytest.raises(TrainerConfigError): |
|||
RunOptions.from_dict( |
|||
yaml.safe_load(test_bad_curriculum_no_competion_criteria_config_yaml) |
|||
) |
|||
|
|||
|
|||
test_everything_config_yaml = """ |
|||
environment_parameters: |
|||
param_1: |
|||
curriculum: |
|||
- name: Lesson1 |
|||
completion_criteria: |
|||
measure: reward |
|||
behavior: fake_behavior |
|||
threshold: 30 |
|||
min_lesson_length: 100 |
|||
require_reset: true |
|||
value: 1 |
|||
- name: Lesson2 |
|||
completion_criteria: |
|||
measure: progress |
|||
behavior: fake_behavior |
|||
threshold: 0.5 |
|||
min_lesson_length: 100 |
|||
require_reset: false |
|||
value: 2 |
|||
- name: Lesson3 |
|||
value: |
|||
sampler_type: uniform |
|||
sampler_parameters: |
|||
min_value: 1 |
|||
max_value: 3 |
|||
param_2: |
|||
sampler_type: gaussian |
|||
sampler_parameters: |
|||
mean: 4 |
|||
st_dev: 5 |
|||
param_3: 20 |
|||
""" |
|||
|
|||
|
|||
def test_create_manager(): |
|||
run_options = RunOptions.from_dict(yaml.safe_load(test_everything_config_yaml)) |
|||
param_manager = EnvironmentParameterManager( |
|||
run_options.environment_parameters, 1337, False |
|||
) |
|||
assert param_manager.get_minimum_reward_buffer_size("fake_behavior") == 100 |
|||
assert param_manager.get_current_lesson_number() == { |
|||
"param_1": 0, |
|||
"param_2": 0, |
|||
"param_3": 0, |
|||
} |
|||
assert param_manager.get_current_samplers() == { |
|||
"param_1": ConstantSettings(seed=1337, value=1), |
|||
"param_2": GaussianSettings(seed=1337 + 3, mean=4, st_dev=5), |
|||
"param_3": ConstantSettings(seed=1337 + 3 + 1, value=20), |
|||
} |
|||
# Not enough episodes completed |
|||
assert param_manager.update_lessons( |
|||
trainer_steps={"fake_behavior": 500}, |
|||
trainer_max_steps={"fake_behavior": 1000}, |
|||
trainer_reward_buffer={"fake_behavior": [1000] * 99}, |
|||
) == (False, False) |
|||
# Not enough episodes reward |
|||
assert param_manager.update_lessons( |
|||
trainer_steps={"fake_behavior": 500}, |
|||
trainer_max_steps={"fake_behavior": 1000}, |
|||
trainer_reward_buffer={"fake_behavior": [1] * 101}, |
|||
) == (False, False) |
|||
assert param_manager.update_lessons( |
|||
trainer_steps={"fake_behavior": 500}, |
|||
trainer_max_steps={"fake_behavior": 1000}, |
|||
trainer_reward_buffer={"fake_behavior": [1000] * 101}, |
|||
) == (True, True) |
|||
assert param_manager.get_current_lesson_number() == { |
|||
"param_1": 1, |
|||
"param_2": 0, |
|||
"param_3": 0, |
|||
} |
|||
param_manager_2 = EnvironmentParameterManager( |
|||
run_options.environment_parameters, 1337, restore=True |
|||
) |
|||
# The use of global status should make it so that the lesson numbers are maintained |
|||
assert param_manager_2.get_current_lesson_number() == { |
|||
"param_1": 1, |
|||
"param_2": 0, |
|||
"param_3": 0, |
|||
} |
|||
# No reset required |
|||
assert param_manager.update_lessons( |
|||
trainer_steps={"fake_behavior": 700}, |
|||
trainer_max_steps={"fake_behavior": 1000}, |
|||
trainer_reward_buffer={"fake_behavior": [0] * 101}, |
|||
) == (True, False) |
|||
assert param_manager.get_current_samplers() == { |
|||
"param_1": UniformSettings(seed=1337 + 2, min_value=1, max_value=3), |
|||
"param_2": GaussianSettings(seed=1337 + 3, mean=4, st_dev=5), |
|||
"param_3": ConstantSettings(seed=1337 + 3 + 1, value=20), |
|||
} |
|
|||
import pytest |
|||
|
|||
from mlagents.trainers.exception import CurriculumConfigError |
|||
from mlagents.trainers.curriculum import Curriculum |
|||
from mlagents.trainers.settings import CurriculumSettings |
|||
|
|||
|
|||
dummy_curriculum_config = CurriculumSettings( |
|||
measure="reward", |
|||
thresholds=[10, 20, 50], |
|||
min_lesson_length=3, |
|||
signal_smoothing=True, |
|||
parameters={ |
|||
"param1": [0.7, 0.5, 0.3, 0.1], |
|||
"param2": [100, 50, 20, 15], |
|||
"param3": [0.2, 0.3, 0.7, 0.9], |
|||
}, |
|||
) |
|||
|
|||
bad_curriculum_config = CurriculumSettings( |
|||
measure="reward", |
|||
thresholds=[10, 20, 50], |
|||
min_lesson_length=3, |
|||
signal_smoothing=False, |
|||
parameters={ |
|||
"param1": [0.7, 0.5, 0.3, 0.1], |
|||
"param2": [100, 50, 20], |
|||
"param3": [0.2, 0.3, 0.7, 0.9], |
|||
}, |
|||
) |
|||
|
|||
|
|||
@pytest.fixture |
|||
def default_reset_parameters(): |
|||
return {"param1": 1, "param2": 1, "param3": 1} |
|||
|
|||
|
|||
def test_init_curriculum_happy_path(): |
|||
curriculum = Curriculum("TestBrain", dummy_curriculum_config) |
|||
|
|||
assert curriculum.brain_name == "TestBrain" |
|||
assert curriculum.lesson_num == 0 |
|||
assert curriculum.measure == "reward" |
|||
|
|||
|
|||
def test_increment_lesson(): |
|||
curriculum = Curriculum("TestBrain", dummy_curriculum_config) |
|||
assert curriculum.lesson_num == 0 |
|||
|
|||
curriculum.lesson_num = 1 |
|||
assert curriculum.lesson_num == 1 |
|||
|
|||
assert not curriculum.increment_lesson(10) |
|||
assert curriculum.lesson_num == 1 |
|||
|
|||
assert curriculum.increment_lesson(30) |
|||
assert curriculum.lesson_num == 2 |
|||
|
|||
assert not curriculum.increment_lesson(30) |
|||
assert curriculum.lesson_num == 2 |
|||
|
|||
assert curriculum.increment_lesson(10000) |
|||
assert curriculum.lesson_num == 3 |
|||
|
|||
|
|||
def test_get_parameters(): |
|||
curriculum = Curriculum("TestBrain", dummy_curriculum_config) |
|||
assert curriculum.get_config() == {"param1": 0.7, "param2": 100, "param3": 0.2} |
|||
|
|||
curriculum.lesson_num = 2 |
|||
assert curriculum.get_config() == {"param1": 0.3, "param2": 20, "param3": 0.7} |
|||
assert curriculum.get_config(0) == {"param1": 0.7, "param2": 100, "param3": 0.2} |
|||
|
|||
|
|||
def test_load_bad_curriculum_file_raises_error(): |
|||
with pytest.raises(CurriculumConfigError): |
|||
Curriculum("TestBrain", bad_curriculum_config) |
|
|||
import pytest |
|||
from unittest.mock import patch, Mock, call |
|||
import yaml |
|||
import cattr |
|||
|
|||
from mlagents.trainers.meta_curriculum import MetaCurriculum |
|||
|
|||
from mlagents.trainers.tests.simple_test_envs import SimpleEnvironment |
|||
from mlagents.trainers.tests.test_simple_rl import ( |
|||
_check_environment_trains, |
|||
BRAIN_NAME, |
|||
PPO_CONFIG, |
|||
) |
|||
from mlagents.trainers.tests.test_curriculum import dummy_curriculum_config |
|||
from mlagents.trainers.settings import CurriculumSettings |
|||
from mlagents.trainers.training_status import StatusType |
|||
|
|||
|
|||
@pytest.fixture |
|||
def measure_vals(): |
|||
return {"Brain1": 0.2, "Brain2": 0.3} |
|||
|
|||
|
|||
@pytest.fixture |
|||
def reward_buff_sizes(): |
|||
return {"Brain1": 7, "Brain2": 8} |
|||
|
|||
|
|||
def test_convert_from_dict(): |
|||
config = yaml.safe_load( |
|||
""" |
|||
measure: progress |
|||
thresholds: [0.1, 0.3, 0.5] |
|||
min_lesson_length: 100 |
|||
signal_smoothing: true |
|||
parameters: |
|||
param1: [0.0, 4.0, 6.0, 8.0] |
|||
""" |
|||
) |
|||
should_be_config = CurriculumSettings( |
|||
thresholds=[0.1, 0.3, 0.5], |
|||
min_lesson_length=100, |
|||
signal_smoothing=True, |
|||
measure=CurriculumSettings.MeasureType.PROGRESS, |
|||
parameters={"param1": [0.0, 4.0, 6.0, 8.0]}, |
|||
) |
|||
assert cattr.structure(config, CurriculumSettings) == should_be_config |
|||
|
|||
|
|||
def test_curriculum_config(param_name="test_param1", min_lesson_length=100): |
|||
return CurriculumSettings( |
|||
thresholds=[0.1, 0.3, 0.5], |
|||
min_lesson_length=min_lesson_length, |
|||
parameters={f"{param_name}": [0.0, 4.0, 6.0, 8.0]}, |
|||
) |
|||
|
|||
|
|||
test_meta_curriculum_config = { |
|||
"Brain1": test_curriculum_config("test_param1"), |
|||
"Brain2": test_curriculum_config("test_param2"), |
|||
} |
|||
|
|||
|
|||
def test_set_lesson_nums(): |
|||
meta_curriculum = MetaCurriculum(test_meta_curriculum_config) |
|||
meta_curriculum.lesson_nums = {"Brain1": 1, "Brain2": 3} |
|||
|
|||
assert meta_curriculum.brains_to_curricula["Brain1"].lesson_num == 1 |
|||
assert meta_curriculum.brains_to_curricula["Brain2"].lesson_num == 3 |
|||
|
|||
|
|||
def test_increment_lessons(measure_vals): |
|||
meta_curriculum = MetaCurriculum(test_meta_curriculum_config) |
|||
meta_curriculum.brains_to_curricula["Brain1"] = Mock() |
|||
meta_curriculum.brains_to_curricula["Brain2"] = Mock() |
|||
|
|||
meta_curriculum.increment_lessons(measure_vals) |
|||
|
|||
meta_curriculum.brains_to_curricula["Brain1"].increment_lesson.assert_called_with( |
|||
0.2 |
|||
) |
|||
meta_curriculum.brains_to_curricula["Brain2"].increment_lesson.assert_called_with( |
|||
0.3 |
|||
) |
|||
|
|||
|
|||
@patch("mlagents.trainers.curriculum.Curriculum") |
|||
@patch("mlagents.trainers.curriculum.Curriculum") |
|||
def test_increment_lessons_with_reward_buff_sizes( |
|||
curriculum_a, curriculum_b, measure_vals, reward_buff_sizes |
|||
): |
|||
curriculum_a.min_lesson_length = 5 |
|||
curriculum_b.min_lesson_length = 10 |
|||
meta_curriculum = MetaCurriculum(test_meta_curriculum_config) |
|||
meta_curriculum.brains_to_curricula["Brain1"] = curriculum_a |
|||
meta_curriculum.brains_to_curricula["Brain2"] = curriculum_b |
|||
|
|||
meta_curriculum.increment_lessons(measure_vals, reward_buff_sizes=reward_buff_sizes) |
|||
|
|||
curriculum_a.increment_lesson.assert_called_with(0.2) |
|||
curriculum_b.increment_lesson.assert_not_called() |
|||
|
|||
|
|||
@patch("mlagents.trainers.meta_curriculum.GlobalTrainingStatus") |
|||
def test_restore_curriculums(mock_trainingstatus): |
|||
meta_curriculum = MetaCurriculum(test_meta_curriculum_config) |
|||
# Test restore to value |
|||
mock_trainingstatus.get_parameter_state.return_value = 2 |
|||
meta_curriculum.try_restore_all_curriculum() |
|||
mock_trainingstatus.get_parameter_state.assert_has_calls( |
|||
[call("Brain1", StatusType.LESSON_NUM), call("Brain2", StatusType.LESSON_NUM)], |
|||
any_order=True, |
|||
) |
|||
assert meta_curriculum.brains_to_curricula["Brain1"].lesson_num == 2 |
|||
assert meta_curriculum.brains_to_curricula["Brain2"].lesson_num == 2 |
|||
|
|||
# Test restore to None |
|||
mock_trainingstatus.get_parameter_state.return_value = None |
|||
meta_curriculum.try_restore_all_curriculum() |
|||
|
|||
assert meta_curriculum.brains_to_curricula["Brain1"].lesson_num == 0 |
|||
assert meta_curriculum.brains_to_curricula["Brain2"].lesson_num == 0 |
|||
|
|||
|
|||
def test_get_config(): |
|||
meta_curriculum = MetaCurriculum(test_meta_curriculum_config) |
|||
assert meta_curriculum.get_config() == {"test_param1": 0.0, "test_param2": 0.0} |
|||
|
|||
|
|||
@pytest.mark.parametrize("curriculum_brain_name", [BRAIN_NAME, "WrongBrainName"]) |
|||
def test_simple_metacurriculum(curriculum_brain_name): |
|||
env = SimpleEnvironment([BRAIN_NAME], use_discrete=False) |
|||
mc = MetaCurriculum({curriculum_brain_name: dummy_curriculum_config}) |
|||
_check_environment_trains( |
|||
env, {BRAIN_NAME: PPO_CONFIG}, meta_curriculum=mc, success_threshold=None |
|||
) |
|
|||
import math |
|||
from typing import Dict, Any |
|||
|
|||
from mlagents.trainers.exception import CurriculumConfigError |
|||
|
|||
from mlagents_envs.logging_util import get_logger |
|||
from mlagents.trainers.settings import CurriculumSettings |
|||
|
|||
logger = get_logger(__name__) |
|||
|
|||
|
|||
class Curriculum: |
|||
def __init__(self, brain_name: str, settings: CurriculumSettings): |
|||
""" |
|||
Initializes a Curriculum object. |
|||
:param brain_name: Name of the brain this Curriculum is associated with |
|||
:param config: Dictionary of fields needed to configure the Curriculum |
|||
""" |
|||
self.max_lesson_num = 0 |
|||
self.measure = None |
|||
self._lesson_num = 0 |
|||
self.brain_name = brain_name |
|||
self.settings = settings |
|||
|
|||
self.smoothing_value = 0.0 |
|||
self.measure = self.settings.measure |
|||
self.min_lesson_length = self.settings.min_lesson_length |
|||
self.max_lesson_num = len(self.settings.thresholds) |
|||
|
|||
parameters = self.settings.parameters |
|||
for key in parameters: |
|||
if len(parameters[key]) != self.max_lesson_num + 1: |
|||
raise CurriculumConfigError( |
|||
f"The parameter {key} in {brain_name}'s curriculum must have {self.max_lesson_num + 1} values " |
|||
f"but {len(parameters[key])} were found" |
|||
) |
|||
|
|||
@property |
|||
def lesson_num(self) -> int: |
|||
return self._lesson_num |
|||
|
|||
@lesson_num.setter |
|||
def lesson_num(self, lesson_num: int) -> None: |
|||
self._lesson_num = max(0, min(lesson_num, self.max_lesson_num)) |
|||
|
|||
def increment_lesson(self, measure_val: float) -> bool: |
|||
""" |
|||
Increments the lesson number depending on the progress given. |
|||
:param measure_val: Measure of progress (either reward or percentage |
|||
steps completed). |
|||
:return Whether the lesson was incremented. |
|||
""" |
|||
if not self.settings or not measure_val or math.isnan(measure_val): |
|||
return False |
|||
if self.settings.signal_smoothing: |
|||
measure_val = self.smoothing_value * 0.25 + 0.75 * measure_val |
|||
self.smoothing_value = measure_val |
|||
if self.lesson_num < self.max_lesson_num: |
|||
if measure_val > self.settings.thresholds[self.lesson_num]: |
|||
self.lesson_num += 1 |
|||
config = {} |
|||
parameters = self.settings.parameters |
|||
for key in parameters: |
|||
config[key] = parameters[key][self.lesson_num] |
|||
logger.info( |
|||
"{0} lesson changed. Now in lesson {1}: {2}".format( |
|||
self.brain_name, |
|||
self.lesson_num, |
|||
", ".join([str(x) + " -> " + str(config[x]) for x in config]), |
|||
) |
|||
) |
|||
return True |
|||
return False |
|||
|
|||
def get_config(self, lesson: int = None) -> Dict[str, Any]: |
|||
""" |
|||
Returns reset parameters which correspond to the lesson. |
|||
:param lesson: The lesson you want to get the config of. If None, the |
|||
current lesson is returned. |
|||
:return: The configuration of the reset parameters. |
|||
""" |
|||
if not self.settings: |
|||
return {} |
|||
if lesson is None: |
|||
lesson = self.lesson_num |
|||
lesson = max(0, min(lesson, self.max_lesson_num)) |
|||
config = {} |
|||
parameters = self.settings.parameters |
|||
for key in parameters: |
|||
config[key] = parameters[key][lesson] |
|||
return config |
|
|||
"""Contains the MetaCurriculum class.""" |
|||
|
|||
from typing import Dict, Set |
|||
from mlagents.trainers.curriculum import Curriculum |
|||
from mlagents.trainers.settings import CurriculumSettings |
|||
from mlagents.trainers.training_status import GlobalTrainingStatus, StatusType |
|||
|
|||
from mlagents_envs.logging_util import get_logger |
|||
|
|||
logger = get_logger(__name__) |
|||
|
|||
|
|||
class MetaCurriculum: |
|||
"""A MetaCurriculum holds curricula. Each curriculum is associated to a |
|||
particular brain in the environment. |
|||
""" |
|||
|
|||
def __init__(self, curriculum_configs: Dict[str, CurriculumSettings]): |
|||
"""Initializes a MetaCurriculum object. |
|||
|
|||
:param curriculum_folder: Dictionary of brain_name to the |
|||
Curriculum for each brain. |
|||
""" |
|||
self._brains_to_curricula: Dict[str, Curriculum] = {} |
|||
used_reset_parameters: Set[str] = set() |
|||
for brain_name, curriculum_settings in curriculum_configs.items(): |
|||
self._brains_to_curricula[brain_name] = Curriculum( |
|||
brain_name, curriculum_settings |
|||
) |
|||
config_keys: Set[str] = set( |
|||
self._brains_to_curricula[brain_name].get_config().keys() |
|||
) |
|||
|
|||
# Check if any two curricula use the same reset params. |
|||
if config_keys & used_reset_parameters: |
|||
logger.warning( |
|||
"Two or more curricula will " |
|||
"attempt to change the same reset " |
|||
"parameter. The result will be " |
|||
"non-deterministic." |
|||
) |
|||
|
|||
used_reset_parameters.update(config_keys) |
|||
|
|||
@property |
|||
def brains_to_curricula(self): |
|||
"""A dict from brain_name to the brain's curriculum.""" |
|||
return self._brains_to_curricula |
|||
|
|||
@property |
|||
def lesson_nums(self): |
|||
"""A dict from brain name to the brain's curriculum's lesson number.""" |
|||
lesson_nums = {} |
|||
for brain_name, curriculum in self.brains_to_curricula.items(): |
|||
lesson_nums[brain_name] = curriculum.lesson_num |
|||
|
|||
return lesson_nums |
|||
|
|||
@lesson_nums.setter |
|||
def lesson_nums(self, lesson_nums): |
|||
for brain_name, lesson in lesson_nums.items(): |
|||
self.brains_to_curricula[brain_name].lesson_num = lesson |
|||
|
|||
def _lesson_ready_to_increment( |
|||
self, brain_name: str, reward_buff_size: int |
|||
) -> bool: |
|||
"""Determines whether the curriculum of a specified brain is ready |
|||
to attempt an increment. |
|||
|
|||
Args: |
|||
brain_name (str): The name of the brain whose curriculum will be |
|||
checked for readiness. |
|||
reward_buff_size (int): The size of the reward buffer of the trainer |
|||
that corresponds to the specified brain. |
|||
|
|||
Returns: |
|||
Whether the curriculum of the specified brain should attempt to |
|||
increment its lesson. |
|||
""" |
|||
if brain_name not in self.brains_to_curricula: |
|||
return False |
|||
|
|||
return reward_buff_size >= ( |
|||
self.brains_to_curricula[brain_name].min_lesson_length |
|||
) |
|||
|
|||
def increment_lessons(self, measure_vals, reward_buff_sizes=None): |
|||
"""Attempts to increments all the lessons of all the curricula in this |
|||
MetaCurriculum. Note that calling this method does not guarantee the |
|||
lesson of a curriculum will increment. The lesson of a curriculum will |
|||
only increment if the specified measure threshold defined in the |
|||
curriculum has been reached and the minimum number of episodes in the |
|||
lesson have been completed. |
|||
|
|||
Args: |
|||
measure_vals (dict): A dict of brain name to measure value. |
|||
reward_buff_sizes (dict): A dict of brain names to the size of their |
|||
corresponding reward buffers. |
|||
|
|||
Returns: |
|||
A dict from brain name to whether that brain's lesson number was |
|||
incremented. |
|||
""" |
|||
ret = {} |
|||
if reward_buff_sizes: |
|||
for brain_name, buff_size in reward_buff_sizes.items(): |
|||
if self._lesson_ready_to_increment(brain_name, buff_size): |
|||
measure_val = measure_vals[brain_name] |
|||
ret[brain_name] = self.brains_to_curricula[ |
|||
brain_name |
|||
].increment_lesson(measure_val) |
|||
else: |
|||
for brain_name, measure_val in measure_vals.items(): |
|||
ret[brain_name] = self.brains_to_curricula[brain_name].increment_lesson( |
|||
measure_val |
|||
) |
|||
return ret |
|||
|
|||
def try_restore_all_curriculum(self): |
|||
""" |
|||
Tries to restore all the curriculums to what is saved in training_status.json |
|||
""" |
|||
|
|||
for brain_name, curriculum in self.brains_to_curricula.items(): |
|||
lesson_num = GlobalTrainingStatus.get_parameter_state( |
|||
brain_name, StatusType.LESSON_NUM |
|||
) |
|||
if lesson_num is not None: |
|||
logger.info( |
|||
f"Resuming curriculum for {brain_name} at lesson {lesson_num}." |
|||
) |
|||
curriculum.lesson_num = lesson_num |
|||
else: |
|||
curriculum.lesson_num = 0 |
|||
|
|||
def get_config(self): |
|||
"""Get the combined configuration of all curricula in this |
|||
MetaCurriculum. |
|||
|
|||
:return: A dict from parameter to value. |
|||
""" |
|||
config = {} |
|||
|
|||
for _, curriculum in self.brains_to_curricula.items(): |
|||
curr_config = curriculum.get_config() |
|||
config.update(curr_config) |
|||
|
|||
return config |
撰写
预览
正在加载...
取消
保存
Reference in new issue