Unity 机器学习代理工具包 (ML-Agents) 是一个开源项目,它使游戏和模拟能够作为训练智能代理的环境。
您最多选择25个主题 主题必须以中文或者字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符
 
 
 
 
 

318 行
10 KiB

import pytest
import yaml
from mlagents.trainers.exception import TrainerConfigError, TrainerConfigWarning
from mlagents.trainers.environment_parameter_manager import EnvironmentParameterManager
from mlagents.trainers.settings import (
RunOptions,
UniformSettings,
GaussianSettings,
ConstantSettings,
CompletionCriteriaSettings,
)
test_sampler_config_yaml = """
environment_parameters:
param_1:
sampler_type: uniform
sampler_parameters:
min_value: 0.5
max_value: 10
"""
def test_sampler_conversion():
run_options = RunOptions.from_dict(yaml.safe_load(test_sampler_config_yaml))
assert run_options.environment_parameters is not None
assert "param_1" in run_options.environment_parameters
lessons = run_options.environment_parameters["param_1"].curriculum
assert len(lessons) == 1
assert lessons[0].completion_criteria is None
assert isinstance(lessons[0].value, UniformSettings)
assert lessons[0].value.min_value == 0.5
assert lessons[0].value.max_value == 10
test_sampler_and_constant_config_yaml = """
environment_parameters:
param_1:
sampler_type: gaussian
sampler_parameters:
mean: 4
st_dev: 5
param_2: 20
"""
def test_sampler_and_constant_conversion():
run_options = RunOptions.from_dict(
yaml.safe_load(test_sampler_and_constant_config_yaml)
)
assert "param_1" in run_options.environment_parameters
assert "param_2" in run_options.environment_parameters
lessons_1 = run_options.environment_parameters["param_1"].curriculum
lessons_2 = run_options.environment_parameters["param_2"].curriculum
# gaussian
assert isinstance(lessons_1[0].value, GaussianSettings)
assert lessons_1[0].value.mean == 4
assert lessons_1[0].value.st_dev == 5
# constant
assert isinstance(lessons_2[0].value, ConstantSettings)
assert lessons_2[0].value.value == 20
test_curriculum_config_yaml = """
environment_parameters:
param_1:
curriculum:
- name: Lesson1
completion_criteria:
measure: reward
behavior: fake_behavior
threshold: 30
min_lesson_length: 100
require_reset: true
value: 1
- name: Lesson2
completion_criteria:
measure: reward
behavior: fake_behavior
threshold: 60
min_lesson_length: 100
require_reset: false
value: 2
- name: Lesson3
value:
sampler_type: uniform
sampler_parameters:
min_value: 1
max_value: 3
"""
def test_curriculum_conversion():
run_options = RunOptions.from_dict(yaml.safe_load(test_curriculum_config_yaml))
assert "param_1" in run_options.environment_parameters
lessons = run_options.environment_parameters["param_1"].curriculum
assert len(lessons) == 3
# First lesson
lesson = lessons[0]
assert lesson.completion_criteria is not None
assert (
lesson.completion_criteria.measure
== CompletionCriteriaSettings.MeasureType.REWARD
)
assert lesson.completion_criteria.behavior == "fake_behavior"
assert lesson.completion_criteria.threshold == 30.0
assert lesson.completion_criteria.min_lesson_length == 100
assert lesson.completion_criteria.require_reset
assert isinstance(lesson.value, ConstantSettings)
assert lesson.value.value == 1
# Second lesson
lesson = lessons[1]
assert lesson.completion_criteria is not None
assert (
lesson.completion_criteria.measure
== CompletionCriteriaSettings.MeasureType.REWARD
)
assert lesson.completion_criteria.behavior == "fake_behavior"
assert lesson.completion_criteria.threshold == 60.0
assert lesson.completion_criteria.min_lesson_length == 100
assert not lesson.completion_criteria.require_reset
assert isinstance(lesson.value, ConstantSettings)
assert lesson.value.value == 2
# Last lesson
lesson = lessons[2]
assert lesson.completion_criteria is None
assert isinstance(lesson.value, UniformSettings)
assert lesson.value.min_value == 1
assert lesson.value.max_value == 3
test_bad_curriculum_no_competion_criteria_config_yaml = """
environment_parameters:
param_1:
curriculum:
- name: Lesson1
completion_criteria:
measure: reward
behavior: fake_behavior
threshold: 30
min_lesson_length: 100
require_reset: true
value: 1
- name: Lesson2
value: 2
- name: Lesson3
value:
sampler_type: uniform
sampler_parameters:
min_value: 1
max_value: 3
"""
test_bad_curriculum_all_competion_criteria_config_yaml = """
environment_parameters:
param_1:
curriculum:
- name: Lesson1
completion_criteria:
measure: reward
behavior: fake_behavior
threshold: 30
min_lesson_length: 100
require_reset: true
value: 1
- name: Lesson2
completion_criteria:
measure: reward
behavior: fake_behavior
threshold: 30
min_lesson_length: 100
require_reset: true
value: 2
- name: Lesson3
completion_criteria:
measure: reward
behavior: fake_behavior
threshold: 30
min_lesson_length: 100
require_reset: true
value:
sampler_type: uniform
sampler_parameters:
min_value: 1
max_value: 3
"""
def test_curriculum_raises_no_completion_criteria_conversion():
with pytest.raises(TrainerConfigError):
RunOptions.from_dict(
yaml.safe_load(test_bad_curriculum_no_competion_criteria_config_yaml)
)
def test_curriculum_raises_all_completion_criteria_conversion():
with pytest.warns(TrainerConfigWarning):
run_options = RunOptions.from_dict(
yaml.safe_load(test_bad_curriculum_all_competion_criteria_config_yaml)
)
param_manager = EnvironmentParameterManager(
run_options.environment_parameters, 1337, False
)
assert param_manager.update_lessons(
trainer_steps={"fake_behavior": 500},
trainer_max_steps={"fake_behavior": 1000},
trainer_reward_buffer={"fake_behavior": [1000] * 101},
) == (True, True)
assert param_manager.update_lessons(
trainer_steps={"fake_behavior": 500},
trainer_max_steps={"fake_behavior": 1000},
trainer_reward_buffer={"fake_behavior": [1000] * 101},
) == (True, True)
assert param_manager.update_lessons(
trainer_steps={"fake_behavior": 500},
trainer_max_steps={"fake_behavior": 1000},
trainer_reward_buffer={"fake_behavior": [1000] * 101},
) == (False, False)
assert param_manager.get_current_lesson_number() == {"param_1": 2}
test_everything_config_yaml = """
environment_parameters:
param_1:
curriculum:
- name: Lesson1
completion_criteria:
measure: reward
behavior: fake_behavior
threshold: 30
min_lesson_length: 100
require_reset: true
value: 1
- name: Lesson2
completion_criteria:
measure: progress
behavior: fake_behavior
threshold: 0.5
min_lesson_length: 100
require_reset: false
value: 2
- name: Lesson3
value:
sampler_type: uniform
sampler_parameters:
min_value: 1
max_value: 3
param_2:
sampler_type: gaussian
sampler_parameters:
mean: 4
st_dev: 5
param_3: 20
"""
def test_create_manager():
run_options = RunOptions.from_dict(yaml.safe_load(test_everything_config_yaml))
param_manager = EnvironmentParameterManager(
run_options.environment_parameters, 1337, False
)
assert param_manager.get_minimum_reward_buffer_size("fake_behavior") == 100
assert param_manager.get_current_lesson_number() == {
"param_1": 0,
"param_2": 0,
"param_3": 0,
}
assert param_manager.get_current_samplers() == {
"param_1": ConstantSettings(seed=1337, value=1),
"param_2": GaussianSettings(seed=1337 + 3, mean=4, st_dev=5),
"param_3": ConstantSettings(seed=1337 + 3 + 1, value=20),
}
# Not enough episodes completed
assert param_manager.update_lessons(
trainer_steps={"fake_behavior": 500},
trainer_max_steps={"fake_behavior": 1000},
trainer_reward_buffer={"fake_behavior": [1000] * 99},
) == (False, False)
# Not enough episodes reward
assert param_manager.update_lessons(
trainer_steps={"fake_behavior": 500},
trainer_max_steps={"fake_behavior": 1000},
trainer_reward_buffer={"fake_behavior": [1] * 101},
) == (False, False)
assert param_manager.update_lessons(
trainer_steps={"fake_behavior": 500},
trainer_max_steps={"fake_behavior": 1000},
trainer_reward_buffer={"fake_behavior": [1000] * 101},
) == (True, True)
assert param_manager.get_current_lesson_number() == {
"param_1": 1,
"param_2": 0,
"param_3": 0,
}
param_manager_2 = EnvironmentParameterManager(
run_options.environment_parameters, 1337, restore=True
)
# The use of global status should make it so that the lesson numbers are maintained
assert param_manager_2.get_current_lesson_number() == {
"param_1": 1,
"param_2": 0,
"param_3": 0,
}
# No reset required
assert param_manager.update_lessons(
trainer_steps={"fake_behavior": 700},
trainer_max_steps={"fake_behavior": 1000},
trainer_reward_buffer={"fake_behavior": [0] * 101},
) == (True, False)
assert param_manager.get_current_samplers() == {
"param_1": UniformSettings(seed=1337 + 2, min_value=1, max_value=3),
"param_2": GaussianSettings(seed=1337 + 3, mean=4, st_dev=5),
"param_3": ConstantSettings(seed=1337 + 3 + 1, value=20),
}