ml-agents/ml-agents/mlagents/trainers/tests/test_config_conversion.py


								import yaml

								import pytest


								from mlagents.trainers.upgrade_config import convert_behaviors, remove_nones, convert

								from mlagents.trainers.settings import (

								    TrainerType,

								    PPOSettings,

								    SACSettings,

								    RewardSignalType,

								)


								BRAIN_NAME = "testbehavior"


								# Check one per category

								BATCH_SIZE = 256

								HIDDEN_UNITS = 32

								SUMMARY_FREQ = 500


								PPO_CONFIG = f"""

								    default:

								        trainer: ppo

								        batch_size: 1024

								        beta: 5.0e-3

								        buffer_size: 10240

								        epsilon: 0.2

								        hidden_units: 128

								        lambd: 0.95

								        learning_rate: 3.0e-4

								        learning_rate_schedule: linear

								        max_steps: 5.0e5

								        memory_size: 256

								        normalize: false

								        num_epoch: 3

								        num_layers: 2

								        time_horizon: 64

								        sequence_length: 64

								        summary_freq: 10000

								        use_recurrent: false

								        vis_encode_type: simple

								        reward_signals:

								            extrinsic:

								                strength: 1.0

								                gamma: 0.99


								    {BRAIN_NAME}:

								        trainer: ppo

								        batch_size: {BATCH_SIZE}

								        beta: 5.0e-3

								        buffer_size: 64

								        epsilon: 0.2

								        hidden_units: {HIDDEN_UNITS}

								        lambd: 0.95

								        learning_rate: 5.0e-3

								        max_steps: 2500

								        memory_size: 256

								        normalize: false

								        num_epoch: 3

								        num_layers: 2

								        time_horizon: 64

								        sequence_length: 64

								        summary_freq: {SUMMARY_FREQ}

								        use_recurrent: false

								        reward_signals:

								            curiosity:

								                strength: 1.0

								                gamma: 0.99

								                encoding_size: 128

								    """


								SAC_CONFIG = f"""

								    default:

								        trainer: sac

								        batch_size: 128

								        buffer_size: 50000

								        buffer_init_steps: 0

								        hidden_units: 128

								        init_entcoef: 1.0

								        learning_rate: 3.0e-4

								        learning_rate_schedule: constant

								        max_steps: 5.0e5

								        memory_size: 256

								        normalize: false

								        num_update: 1

								        train_interval: 1

								        num_layers: 2

								        time_horizon: 64

								        sequence_length: 64

								        summary_freq: 10000

								        tau: 0.005

								        use_recurrent: false

								        vis_encode_type: simple

								        reward_signals:

								            extrinsic:

								                strength: 1.0

								                gamma: 0.99


								    {BRAIN_NAME}:

								        trainer: sac

								        batch_size: {BATCH_SIZE}

								        buffer_size: 64

								        buffer_init_steps: 100

								        hidden_units: {HIDDEN_UNITS}

								        init_entcoef: 0.01

								        learning_rate: 3.0e-4

								        max_steps: 1000

								        memory_size: 256

								        normalize: false

								        num_update: 1

								        train_interval: 1

								        num_layers: 1

								        time_horizon: 64

								        sequence_length: 64

								        summary_freq: {SUMMARY_FREQ}

								        tau: 0.005

								        use_recurrent: false

								        curiosity_enc_size: 128

								        demo_path: None

								        vis_encode_type: simple

								        reward_signals:

								            curiosity:

								                strength: 1.0

								                gamma: 0.99

								                encoding_size: 128

								    """


								CURRICULUM = """


								  BigWallJump:

								    measure: progress

								    thresholds: [0.1, 0.3, 0.5]

								    min_lesson_length: 200

								    signal_smoothing: true

								    parameters:

								      big_wall_min_height: [0.0, 4.0, 6.0, 8.0]

								      big_wall_max_height: [4.0, 7.0, 8.0, 8.0]

								  SmallWallJump:

								    measure: progress

								    thresholds: [0.1, 0.3, 0.5]

								    min_lesson_length: 100

								    signal_smoothing: true

								    parameters:

								      small_wall_height: [1.5, 2.0, 2.5, 4.0]

								      """


								RANDOMIZATION = """

								  resampling-interval: 5000

								  mass:

								    sampler-type: uniform

								    min_value: 0.5

								    max_value: 10

								  gravity:

								    sampler-type: uniform

								    min_value: 7

								    max_value: 12

								  scale:

								    sampler-type: uniform

								    min_value: 0.75

								    max_value: 3

								    """


								@pytest.mark.parametrize("use_recurrent", [True, False])

								@pytest.mark.parametrize("trainer_type", [TrainerType.PPO, TrainerType.SAC])

								def test_convert_behaviors(trainer_type, use_recurrent):

								    if trainer_type == TrainerType.PPO:

								        trainer_config = PPO_CONFIG

								        trainer_settings_type = PPOSettings

								    else:

								        trainer_config = SAC_CONFIG

								        trainer_settings_type = SACSettings


								    old_config = yaml.safe_load(trainer_config)

								    old_config[BRAIN_NAME]["use_recurrent"] = use_recurrent

								    new_config = convert_behaviors(old_config)


								    # Test that the new config can be converted to TrainerSettings w/o exceptions

								    trainer_settings = new_config[BRAIN_NAME]


								    # Test that the trainer_settings contains the settings for BRAIN_NAME and

								    # the defaults where specified

								    assert trainer_settings.trainer_type == trainer_type

								    assert isinstance(trainer_settings.hyperparameters, trainer_settings_type)

								    assert trainer_settings.hyperparameters.batch_size == BATCH_SIZE

								    assert trainer_settings.network_settings.hidden_units == HIDDEN_UNITS

								    assert RewardSignalType.CURIOSITY in trainer_settings.reward_signals


								def test_convert():

								    old_behaviors = yaml.safe_load(PPO_CONFIG)

								    old_curriculum = yaml.safe_load(CURRICULUM)

								    old_sampler = yaml.safe_load(RANDOMIZATION)

								    config = convert(old_behaviors, old_curriculum, old_sampler)

								    assert BRAIN_NAME in config["behaviors"]

								    assert "big_wall_min_height" in config["environment_parameters"]


								    curriculum = config["environment_parameters"]["big_wall_min_height"]["curriculum"]

								    assert len(curriculum) == 4

								    for i, expected_value in enumerate([0.0, 4.0, 6.0, 8.0]):

								        assert curriculum[i][f"Lesson{i}"]["value"] == expected_value

								    for i, threshold in enumerate([0.1, 0.3, 0.5]):

								        criteria = curriculum[i][f"Lesson{i}"]["completion_criteria"]

								        assert criteria["threshold"] == threshold

								        assert criteria["behavior"] == "BigWallJump"

								        assert criteria["signal_smoothing"]

								        assert criteria["min_lesson_length"] == 200

								        assert criteria["measure"] == "progress"


								    assert "gravity" in config["environment_parameters"]

								    gravity = config["environment_parameters"]["gravity"]

								    assert gravity["sampler_type"] == "uniform"

								    assert gravity["sampler_parameters"]["min_value"] == 7

								    assert gravity["sampler_parameters"]["max_value"] == 12


								def test_remove_nones():

								    dict_with_nones = {"hello": {"hello2": 2, "hello3": None}, "hello4": None}

								    dict_without_nones = {"hello": {"hello2": 2}}

								    output = remove_nones(dict_with_nones)

								    assert output == dict_without_nones