您最多选择25个主题
主题必须以中文或者字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符
219 行
6.2 KiB
219 行
6.2 KiB
import yaml
|
|
import pytest
|
|
|
|
from mlagents.trainers.upgrade_config import convert_behaviors, remove_nones, convert
|
|
from mlagents.trainers.settings import (
|
|
TrainerType,
|
|
PPOSettings,
|
|
SACSettings,
|
|
RewardSignalType,
|
|
)
|
|
|
|
BRAIN_NAME = "testbehavior"
|
|
|
|
# Check one per category
|
|
BATCH_SIZE = 256
|
|
HIDDEN_UNITS = 32
|
|
SUMMARY_FREQ = 500
|
|
|
|
PPO_CONFIG = f"""
|
|
default:
|
|
trainer: ppo
|
|
batch_size: 1024
|
|
beta: 5.0e-3
|
|
buffer_size: 10240
|
|
epsilon: 0.2
|
|
hidden_units: 128
|
|
lambd: 0.95
|
|
learning_rate: 3.0e-4
|
|
learning_rate_schedule: linear
|
|
max_steps: 5.0e5
|
|
memory_size: 256
|
|
normalize: false
|
|
num_epoch: 3
|
|
num_layers: 2
|
|
time_horizon: 64
|
|
sequence_length: 64
|
|
summary_freq: 10000
|
|
use_recurrent: false
|
|
vis_encode_type: simple
|
|
reward_signals:
|
|
extrinsic:
|
|
strength: 1.0
|
|
gamma: 0.99
|
|
|
|
{BRAIN_NAME}:
|
|
trainer: ppo
|
|
batch_size: {BATCH_SIZE}
|
|
beta: 5.0e-3
|
|
buffer_size: 64
|
|
epsilon: 0.2
|
|
hidden_units: {HIDDEN_UNITS}
|
|
lambd: 0.95
|
|
learning_rate: 5.0e-3
|
|
max_steps: 2500
|
|
memory_size: 256
|
|
normalize: false
|
|
num_epoch: 3
|
|
num_layers: 2
|
|
time_horizon: 64
|
|
sequence_length: 64
|
|
summary_freq: {SUMMARY_FREQ}
|
|
use_recurrent: false
|
|
reward_signals:
|
|
curiosity:
|
|
strength: 1.0
|
|
gamma: 0.99
|
|
encoding_size: 128
|
|
"""
|
|
|
|
SAC_CONFIG = f"""
|
|
default:
|
|
trainer: sac
|
|
batch_size: 128
|
|
buffer_size: 50000
|
|
buffer_init_steps: 0
|
|
hidden_units: 128
|
|
init_entcoef: 1.0
|
|
learning_rate: 3.0e-4
|
|
learning_rate_schedule: constant
|
|
max_steps: 5.0e5
|
|
memory_size: 256
|
|
normalize: false
|
|
num_update: 1
|
|
train_interval: 1
|
|
num_layers: 2
|
|
time_horizon: 64
|
|
sequence_length: 64
|
|
summary_freq: 10000
|
|
tau: 0.005
|
|
use_recurrent: false
|
|
vis_encode_type: simple
|
|
reward_signals:
|
|
extrinsic:
|
|
strength: 1.0
|
|
gamma: 0.99
|
|
|
|
{BRAIN_NAME}:
|
|
trainer: sac
|
|
batch_size: {BATCH_SIZE}
|
|
buffer_size: 64
|
|
buffer_init_steps: 100
|
|
hidden_units: {HIDDEN_UNITS}
|
|
init_entcoef: 0.01
|
|
learning_rate: 3.0e-4
|
|
max_steps: 1000
|
|
memory_size: 256
|
|
normalize: false
|
|
num_update: 1
|
|
train_interval: 1
|
|
num_layers: 1
|
|
time_horizon: 64
|
|
sequence_length: 64
|
|
summary_freq: {SUMMARY_FREQ}
|
|
tau: 0.005
|
|
use_recurrent: false
|
|
curiosity_enc_size: 128
|
|
demo_path: None
|
|
vis_encode_type: simple
|
|
reward_signals:
|
|
curiosity:
|
|
strength: 1.0
|
|
gamma: 0.99
|
|
encoding_size: 128
|
|
"""
|
|
|
|
CURRICULUM = """
|
|
|
|
BigWallJump:
|
|
measure: progress
|
|
thresholds: [0.1, 0.3, 0.5]
|
|
min_lesson_length: 200
|
|
signal_smoothing: true
|
|
parameters:
|
|
big_wall_min_height: [0.0, 4.0, 6.0, 8.0]
|
|
big_wall_max_height: [4.0, 7.0, 8.0, 8.0]
|
|
SmallWallJump:
|
|
measure: progress
|
|
thresholds: [0.1, 0.3, 0.5]
|
|
min_lesson_length: 100
|
|
signal_smoothing: true
|
|
parameters:
|
|
small_wall_height: [1.5, 2.0, 2.5, 4.0]
|
|
"""
|
|
|
|
RANDOMIZATION = """
|
|
resampling-interval: 5000
|
|
mass:
|
|
sampler-type: uniform
|
|
min_value: 0.5
|
|
max_value: 10
|
|
gravity:
|
|
sampler-type: uniform
|
|
min_value: 7
|
|
max_value: 12
|
|
scale:
|
|
sampler-type: uniform
|
|
min_value: 0.75
|
|
max_value: 3
|
|
"""
|
|
|
|
|
|
@pytest.mark.parametrize("use_recurrent", [True, False])
|
|
@pytest.mark.parametrize("trainer_type", [TrainerType.PPO, TrainerType.SAC])
|
|
def test_convert_behaviors(trainer_type, use_recurrent):
|
|
if trainer_type == TrainerType.PPO:
|
|
trainer_config = PPO_CONFIG
|
|
trainer_settings_type = PPOSettings
|
|
elif trainer_type == TrainerType.SAC:
|
|
trainer_config = SAC_CONFIG
|
|
trainer_settings_type = SACSettings
|
|
|
|
old_config = yaml.load(trainer_config)
|
|
old_config[BRAIN_NAME]["use_recurrent"] = use_recurrent
|
|
new_config = convert_behaviors(old_config)
|
|
|
|
# Test that the new config can be converted to TrainerSettings w/o exceptions
|
|
trainer_settings = new_config[BRAIN_NAME]
|
|
|
|
# Test that the trainer_settings contains the settings for BRAIN_NAME and
|
|
# the defaults where specified
|
|
assert trainer_settings.trainer_type == trainer_type
|
|
assert isinstance(trainer_settings.hyperparameters, trainer_settings_type)
|
|
assert trainer_settings.hyperparameters.batch_size == BATCH_SIZE
|
|
assert trainer_settings.network_settings.hidden_units == HIDDEN_UNITS
|
|
assert RewardSignalType.CURIOSITY in trainer_settings.reward_signals
|
|
|
|
|
|
def test_convert():
|
|
old_behaviors = yaml.safe_load(PPO_CONFIG)
|
|
old_curriculum = yaml.safe_load(CURRICULUM)
|
|
old_sampler = yaml.safe_load(RANDOMIZATION)
|
|
config = convert(old_behaviors, old_curriculum, old_sampler)
|
|
assert BRAIN_NAME in config["behaviors"]
|
|
assert "big_wall_min_height" in config["environment_parameters"]
|
|
|
|
curriculum = config["environment_parameters"]["big_wall_min_height"]["curriculum"]
|
|
assert len(curriculum) == 4
|
|
for i, expected_value in enumerate([0.0, 4.0, 6.0, 8.0]):
|
|
assert curriculum[i][f"Lesson{i}"]["value"] == expected_value
|
|
for i, threshold in enumerate([0.1, 0.3, 0.5]):
|
|
criteria = curriculum[i][f"Lesson{i}"]["completion_criteria"]
|
|
assert criteria["threshold"] == threshold
|
|
assert criteria["behavior"] == "BigWallJump"
|
|
assert criteria["signal_smoothing"]
|
|
assert criteria["min_lesson_length"] == 200
|
|
assert criteria["measure"] == "progress"
|
|
|
|
assert "gravity" in config["environment_parameters"]
|
|
gravity = config["environment_parameters"]["gravity"]
|
|
assert gravity["sampler_type"] == "uniform"
|
|
assert gravity["sampler_parameters"]["min_value"] == 7
|
|
assert gravity["sampler_parameters"]["max_value"] == 12
|
|
|
|
|
|
def test_remove_nones():
|
|
dict_with_nones = {"hello": {"hello2": 2, "hello3": None}, "hello4": None}
|
|
dict_without_nones = {"hello": {"hello2": 2}}
|
|
output = remove_nones(dict_with_nones)
|
|
assert output == dict_without_nones
|