浏览代码

handle mismatch between brain and metacurriculum (#3034)

* handle mismatch between brain and metacur

* add unit tests

* use os.path.splitext in metacurriculum

* fix type
/develop
GitHub 5 年前
当前提交
2c3794a6
共有 6 个文件被更改,包括 87 次插入19 次删除
  1. 11
      ml-agents/mlagents/trainers/meta_curriculum.py
  2. 1
      ml-agents/mlagents/trainers/tests/test_curriculum.py
  3. 53
      ml-agents/mlagents/trainers/tests/test_meta_curriculum.py
  4. 10
      ml-agents/mlagents/trainers/tests/test_simple_rl.py
  5. 8
      ml-agents/mlagents/trainers/trainer_controller.py
  6. 23
      ml-agents/mlagents/trainers/trainer_util.py

11
ml-agents/mlagents/trainers/meta_curriculum.py


try:
for curriculum_filename in os.listdir(curriculum_folder):
# This process requires JSON files
if not curriculum_filename.lower().endswith(".json"):
brain_name, extension = os.path.splitext(curriculum_filename)
if extension.lower() != ".json":
brain_name = curriculum_filename.split(".")[0]
curriculum_filepath = os.path.join(
curriculum_folder, curriculum_filename
)

for brain_name, lesson in lesson_nums.items():
self.brains_to_curriculums[brain_name].lesson_num = lesson
def _lesson_ready_to_increment(self, brain_name, reward_buff_size):
def _lesson_ready_to_increment(
self, brain_name: str, reward_buff_size: int
) -> bool:
"""Determines whether the curriculum of a specified brain is ready
to attempt an increment.

Whether the curriculum of the specified brain should attempt to
increment its lesson.
"""
if brain_name not in self.brains_to_curriculums:
return False
return reward_buff_size >= (
self.brains_to_curriculums[brain_name].min_lesson_length
)

1
ml-agents/mlagents/trainers/tests/test_curriculum.py


from mlagents.trainers.exception import CurriculumConfigError, CurriculumLoadingError
from mlagents.trainers.curriculum import Curriculum
dummy_curriculum_json_str = """
{
"measure" : "reward",

53
ml-agents/mlagents/trainers/tests/test_meta_curriculum.py


import pytest
from unittest.mock import patch, call
from unittest.mock import patch, call, mock_open
from mlagents.trainers.curriculum import Curriculum
from mlagents.trainers.tests.test_simple_rl import (
Simple1DEnvironment,
_check_environment_trains,
BRAIN_NAME,
)
from mlagents.trainers.tests.test_curriculum import dummy_curriculum_json_str
class MetaCurriculumTest(MetaCurriculum):

@patch("mlagents.trainers.curriculum.Curriculum.get_config", return_value={})
@patch("mlagents.trainers.curriculum.Curriculum.__init__", return_value=None)
@patch("os.listdir", return_value=["Brain1.json", "Brain2.json"])
@patch("os.listdir", return_value=["Brain1.json", "Brain2.test.json"])
def test_init_meta_curriculum_happy_path(
listdir, mock_curriculum_init, mock_curriculum_get_config, default_reset_parameters
):

assert "Brain1" in meta_curriculum.brains_to_curriculums
assert "Brain2" in meta_curriculum.brains_to_curriculums
assert "Brain2.test" in meta_curriculum.brains_to_curriculums
calls = [call("test/Brain1.json"), call("test/Brain2.json")]
calls = [call("test/Brain1.json"), call("test/Brain2.test.json")]
mock_curriculum_init.assert_has_calls(calls)

new_reset_parameters.update(more_reset_parameters)
assert meta_curriculum.get_config() == new_reset_parameters
META_CURRICULUM_CONFIG = """
default:
trainer: ppo
batch_size: 16
beta: 5.0e-3
buffer_size: 64
epsilon: 0.2
hidden_units: 128
lambd: 0.95
learning_rate: 5.0e-3
max_steps: 100
memory_size: 256
normalize: false
num_epoch: 3
num_layers: 2
time_horizon: 64
sequence_length: 64
summary_freq: 50
use_recurrent: false
reward_signals:
extrinsic:
strength: 1.0
gamma: 0.99
"""
@pytest.mark.parametrize("curriculum_brain_name", [BRAIN_NAME, "WrongBrainName"])
def test_simple_metacurriculum(curriculum_brain_name):
env = Simple1DEnvironment(use_discrete=False)
with patch(
"builtins.open", new_callable=mock_open, read_data=dummy_curriculum_json_str
):
curriculum = Curriculum("TestBrain.json")
mc = MetaCurriculumTest({curriculum_brain_name: curriculum})
_check_environment_trains(env, META_CURRICULUM_CONFIG, mc, -100.0)

10
ml-agents/mlagents/trainers/tests/test_simple_rl.py


"""
def _check_environment_trains(env, config):
def _check_environment_trains(
env, config, meta_curriculum=None, success_threshold=0.99
):
# Create controller and begin training.
with tempfile.TemporaryDirectory() as dir:
run_id = "id"

train_model=True,
load_model=False,
seed=seed,
meta_curriculum=None,
meta_curriculum=meta_curriculum,
multi_gpu=False,
)

model_path=dir,
run_id=run_id,
meta_curriculum=None,
meta_curriculum=meta_curriculum,
train=True,
training_seed=seed,
sampler_manager=SamplerManager(None),

print(tc._get_measure_vals())
for brain_name, mean_reward in tc._get_measure_vals().items():
assert not math.isnan(mean_reward)
assert mean_reward > 0.99
assert mean_reward > success_threshold
@pytest.mark.parametrize("use_discrete", [True, False])

8
ml-agents/mlagents/trainers/trainer_controller.py


brain_name,
curriculum,
) in self.meta_curriculum.brains_to_curriculums.items():
# Skip brains that are in the metacurriculum but no trainer yet.
if brain_name not in self.trainers:
continue
if curriculum.measure == "progress":
measure_val = (
self.trainers[brain_name].get_step

for brain_name, trainer in self.trainers.items():
# Write training statistics to Tensorboard.
delta_train_start = time() - self.training_start_time
if self.meta_curriculum is not None:
if (
self.meta_curriculum
and brain_name in self.meta_curriculum.brains_to_curriculums
):
trainer.write_summary(
global_step,
delta_train_start,

23
ml-agents/mlagents/trainers/trainer_util.py


import yaml
from typing import Any, Dict, TextIO
import logging
from mlagents.trainers.meta_curriculum import MetaCurriculum
from mlagents.trainers.exception import TrainerConfigError

from mlagents.trainers.sac.trainer import SACTrainer
logger = logging.getLogger("mlagents.trainers")
class TrainerFactory:

_brain_key = trainer_config[_brain_key]
trainer_parameters.update(trainer_config[_brain_key])
min_lesson_length = 1
if meta_curriculum:
if brain_name in meta_curriculum.brains_to_curriculums:
min_lesson_length = meta_curriculum.brains_to_curriculums[
brain_name
].min_lesson_length
else:
logger.warning(
f"Metacurriculum enabled, but no curriculum for brain {brain_name}. "
f"Brains with curricula: {meta_curriculum.brains_to_curriculums.keys()}. "
)
trainer: Trainer = None # type: ignore # will be set to one of these, or raise
if "trainer" not in trainer_parameters:
raise TrainerConfigError(

elif trainer_type == "ppo":
trainer = PPOTrainer(
brain_parameters,
meta_curriculum.brains_to_curriculums[brain_name].min_lesson_length
if meta_curriculum
else 1,
min_lesson_length,
trainer_parameters,
train_model,
load_model,

elif trainer_type == "sac":
trainer = SACTrainer(
brain_parameters,
meta_curriculum.brains_to_curriculums[brain_name].min_lesson_length
if meta_curriculum
else 1,
min_lesson_length,
trainer_parameters,
train_model,
load_model,

正在加载...
取消
保存