浏览代码

Move trainer initialization into a utility function (#2412)

This change moves trainer initialization outside of TrainerController,
reducing some of the constructor arguments of TrainerController and
setting up the ability for trainers to be initialized in the case where
a TrainerController isn't needed.
/develop-generalizationTraining-TrainerController
GitHub 5 年前
当前提交
30930383
共有 7 个文件被更改,包括 501 次插入431 次删除
  1. 54
      ml-agents/mlagents/trainers/learn.py
  2. 9
      ml-agents/mlagents/trainers/tests/test_learn.py
  3. 41
      ml-agents/mlagents/trainers/tests/test_simple_rl.py
  4. 300
      ml-agents/mlagents/trainers/tests/test_trainer_controller.py
  5. 116
      ml-agents/mlagents/trainers/trainer_controller.py
  6. 315
      ml-agents/mlagents/trainers/tests/test_trainer_util.py
  7. 97
      ml-agents/mlagents/trainers/trainer_util.py

54
ml-agents/mlagents/trainers/learn.py


from mlagents.trainers.trainer_controller import TrainerController
from mlagents.trainers.exception import TrainerError
from mlagents.trainers import MetaCurriculumError, MetaCurriculum
from mlagents.trainers.trainer_util import initialize_trainers
from mlagents.envs import UnityEnvironment
from mlagents.envs.sampler_class import SamplerManager
from mlagents.envs.exception import UnityEnvironmentException, SamplerException

base_port + (sub_id * num_envs),
)
env = SubprocessEnvManager(env_factory, num_envs)
maybe_meta_curriculum = try_create_meta_curriculum(curriculum_folder, env)
maybe_meta_curriculum = try_create_meta_curriculum(curriculum_folder, env, lesson)
trainers = initialize_trainers(
trainer_config,
env.external_brains,
summaries_dir,
run_id,
model_path,
keep_checkpoints,
train_model,
load_model,
run_seed,
maybe_meta_curriculum,
multi_gpu,
)
trainers,
load_model,
keep_checkpoints,
lesson,
multi_gpu,
sampler_manager,
resampling_interval,
)

# Begin training
tc.start_learning(env, trainer_config)
tc.start_learning(env)
def create_sampler_manager(sampler_file_path, env_reset_params):

sampler_config = load_config(sampler_file_path)
if ("resampling-interval") in sampler_config:
if "resampling-interval" in sampler_config:
# Filter arguments that do not exist in the environment
resample_interval = sampler_config.pop("resampling-interval")
if (resample_interval <= 0) or (not isinstance(resample_interval, int)):

def try_create_meta_curriculum(
curriculum_folder: Optional[str], env: SubprocessEnvManager
curriculum_folder: Optional[str], env: SubprocessEnvManager, lesson: int
if meta_curriculum:
for brain_name in meta_curriculum.brains_to_curriculums.keys():
if brain_name not in env.external_brains.keys():
raise MetaCurriculumError(
"One of the curricula "
"defined in " + curriculum_folder + " "
"does not have a corresponding "
"Brain. Check that the "
"curriculum file has the same "
"name as the Brain "
"whose curriculum it defines."
)
# TODO: Should be able to start learning at different lesson numbers
# for each curriculum.
meta_curriculum.set_all_curriculums_to_lesson_num(lesson)
for brain_name in meta_curriculum.brains_to_curriculums.keys():
if brain_name not in env.external_brains.keys():
raise MetaCurriculumError(
"One of the curricula "
"defined in " + curriculum_folder + " "
"does not have a corresponding "
"Brain. Check that the "
"curriculum file has the same "
"name as the Brain "
"whose curriculum it defines."
)
return meta_curriculum

9
ml-agents/mlagents/trainers/tests/test_learn.py


with patch.object(TrainerController, "start_learning", MagicMock()):
learn.run_training(0, 0, basic_options(), MagicMock())
mock_init.assert_called_once_with(
{},
"./models/ppo-0",
"./summaries",
"ppo-0",

False,
5,
0,
False,
sampler_manager_mock.return_value,
None,
)

with patch.object(TrainerController, "start_learning", MagicMock()):
learn.run_training(0, 0, options_with_docker_target, MagicMock())
mock_init.assert_called_once()
assert mock_init.call_args[0][0] == "/dockertarget/models/ppo-0"
assert mock_init.call_args[0][1] == "/dockertarget/summaries"
assert mock_init.call_args[0][1] == "/dockertarget/models/ppo-0"
assert mock_init.call_args[0][2] == "/dockertarget/summaries"

41
ml-agents/mlagents/trainers/tests/test_simple_rl.py


from mlagents.trainers.trainer_controller import TrainerController
from mlagents.trainers.trainer_util import initialize_trainers
from mlagents.envs.base_unity_environment import BaseUnityEnvironment
from mlagents.envs import BrainInfo, AllBrainInfo, BrainParameters
from mlagents.envs.communicator_objects import AgentInfoProto

with tempfile.TemporaryDirectory() as dir:
run_id = "id"
save_freq = 99999
seed = 1337
trainer_config = yaml.safe_load(config)
env_manager = SimpleEnvManager(env)
trainers = initialize_trainers(
trainer_config=trainer_config,
external_brains=env_manager.external_brains,
summaries_dir=dir,
run_id=run_id,
model_path=dir,
keep_checkpoints=1,
train_model=True,
load_model=False,
seed=seed,
meta_curriculum=None,
multi_gpu=False,
)
print(trainers)
dir,
dir,
run_id,
save_freq,
trainers=trainers,
summaries_dir=dir,
model_path=dir,
run_id=run_id,
load=False,
keep_checkpoints=1,
lesson=None,
training_seed=1337,
training_seed=seed,
multi_gpu=False,
save_freq=save_freq,
env_manager = SimpleEnvManager(env)
trainer_config = yaml.safe_load(config)
tc.start_learning(env_manager, trainer_config)
tc.start_learning(env_manager)
print(tc._get_measure_vals())
for brain_name, mean_reward in tc._get_measure_vals().items():
assert not math.isnan(mean_reward)
assert mean_reward > 0.99

300
ml-agents/mlagents/trainers/tests/test_trainer_controller.py


import os
from mlagents.trainers import ActionInfo
from mlagents.trainers import TrainerMetrics
from mlagents.trainers.ppo.trainer import PPOTrainer
from mlagents.trainers.bc.offline_trainer import OfflineBCTrainer
from mlagents.trainers.bc.online_trainer import OnlineBCTrainer
from mlagents.envs.exception import UnityEnvironmentException
from mlagents.envs.sampler_class import SamplerManager

@pytest.fixture
def dummy_online_bc_config():
return yaml.safe_load(
"""
default:
trainer: online_bc
brain_to_imitate: ExpertBrain
batches_per_epoch: 16
batch_size: 32
beta: 5.0e-3
buffer_size: 512
epsilon: 0.2
gamma: 0.99
hidden_units: 128
lambd: 0.95
learning_rate: 3.0e-4
max_steps: 5.0e4
normalize: true
num_epoch: 5
num_layers: 2
time_horizon: 64
sequence_length: 64
summary_freq: 1000
use_recurrent: false
memory_size: 8
use_curiosity: false
curiosity_strength: 0.0
curiosity_enc_size: 1
"""
)
@pytest.fixture
def dummy_offline_bc_config():
return yaml.safe_load(
"""
default:
trainer: offline_bc
demo_path: """
+ os.path.dirname(os.path.abspath(__file__))
+ """/test.demo
batches_per_epoch: 16
batch_size: 32
beta: 5.0e-3
buffer_size: 512
epsilon: 0.2
gamma: 0.99
hidden_units: 128
lambd: 0.95
learning_rate: 3.0e-4
max_steps: 5.0e4
normalize: true
num_epoch: 5
num_layers: 2
time_horizon: 64
sequence_length: 64
summary_freq: 1000
use_recurrent: false
memory_size: 8
use_curiosity: false
curiosity_strength: 0.0
curiosity_enc_size: 1
"""
)
@pytest.fixture
def dummy_offline_bc_config_with_override():
base = dummy_offline_bc_config()
base["testbrain"] = {}
base["testbrain"]["normalize"] = False
return base
@pytest.fixture
def dummy_bad_config():
return yaml.safe_load(
"""
default:
trainer: incorrect_trainer
brain_to_imitate: ExpertBrain
batches_per_epoch: 16
batch_size: 32
beta: 5.0e-3
buffer_size: 512
epsilon: 0.2
gamma: 0.99
hidden_units: 128
lambd: 0.95
learning_rate: 3.0e-4
max_steps: 5.0e4
normalize: true
num_epoch: 5
num_layers: 2
time_horizon: 64
sequence_length: 64
summary_freq: 1000
use_recurrent: false
memory_size: 8
"""
)
@pytest.fixture
def basic_trainer_controller():
return TrainerController(
model_path="test_model_path",

meta_curriculum=None,
load=True,
keep_checkpoints=False,
lesson=None,
multi_gpu=False,
sampler_manager=SamplerManager(None),
sampler_manager=SamplerManager({}),
trainers={},
)

seed = 27
TrainerController(
"",
"",
"1",
1,
None,
True,
False,
False,
None,
seed,
True,
False,
SamplerManager(None),
None,
model_path="",
summaries_dir="",
run_id="1",
save_freq=1,
meta_curriculum=None,
train=True,
training_seed=seed,
fast_simulation=True,
sampler_manager=SamplerManager({}),
resampling_interval=None,
trainers={},
def assert_bc_trainer_constructed(
trainer_cls, input_config, tc, expected_brain_params, expected_config
):
external_brains = {"testbrain": expected_brain_params}
def mock_constructor(self, brain, trainer_parameters, training, load, seed, run_id):
assert brain == expected_brain_params
assert trainer_parameters == expected_config
assert training == tc.train_model
assert load == tc.load_model
assert seed == tc.seed
assert run_id == tc.run_id
with patch.object(trainer_cls, "__init__", mock_constructor):
tc.initialize_trainers(input_config, external_brains)
assert "testbrain" in tc.trainers
assert isinstance(tc.trainers["testbrain"], trainer_cls)
def assert_ppo_trainer_constructed(
input_config, tc, expected_brain_params, expected_config, expected_reward_buff_cap=1
):
external_brains = {"testbrain": expected_brain_params}
def mock_constructor(
self,
brain,
reward_buff_cap,
trainer_parameters,
training,
load,
seed,
run_id,
multi_gpu,
):
self.trainer_metrics = TrainerMetrics("", "")
assert brain == expected_brain_params
assert trainer_parameters == expected_config
assert reward_buff_cap == expected_reward_buff_cap
assert training == tc.train_model
assert load == tc.load_model
assert seed == tc.seed
assert run_id == tc.run_id
assert multi_gpu == tc.multi_gpu
with patch.object(PPOTrainer, "__init__", mock_constructor):
tc.initialize_trainers(input_config, external_brains)
assert "testbrain" in tc.trainers
assert isinstance(tc.trainers["testbrain"], PPOTrainer)
@patch("mlagents.envs.BrainParameters")
def test_initialize_trainer_parameters_uses_defaults(BrainParametersMock):
brain_params_mock = BrainParametersMock()
tc = basic_trainer_controller()
full_config = dummy_offline_bc_config()
expected_config = full_config["default"]
expected_config["summary_path"] = tc.summaries_dir + "/test_run_id_testbrain"
expected_config["model_path"] = tc.model_path + "/testbrain"
expected_config["keep_checkpoints"] = tc.keep_checkpoints
assert_bc_trainer_constructed(
OfflineBCTrainer, full_config, tc, brain_params_mock, expected_config
)
@patch("mlagents.envs.BrainParameters")
def test_initialize_trainer_parameters_override_defaults(BrainParametersMock):
brain_params_mock = BrainParametersMock()
tc = basic_trainer_controller()
full_config = dummy_offline_bc_config_with_override()
expected_config = full_config["default"]
expected_config["summary_path"] = tc.summaries_dir + "/test_run_id_testbrain"
expected_config["model_path"] = tc.model_path + "/testbrain"
expected_config["keep_checkpoints"] = tc.keep_checkpoints
# Override value from specific brain config
expected_config["normalize"] = False
assert_bc_trainer_constructed(
OfflineBCTrainer, full_config, tc, brain_params_mock, expected_config
)
@patch("mlagents.envs.BrainParameters")
def test_initialize_online_bc_trainer(BrainParametersMock):
brain_params_mock = BrainParametersMock()
tc = basic_trainer_controller()
full_config = dummy_online_bc_config()
expected_config = full_config["default"]
expected_config["summary_path"] = tc.summaries_dir + "/test_run_id_testbrain"
expected_config["model_path"] = tc.model_path + "/testbrain"
expected_config["keep_checkpoints"] = tc.keep_checkpoints
assert_bc_trainer_constructed(
OnlineBCTrainer, full_config, tc, brain_params_mock, expected_config
)
@patch("mlagents.envs.BrainParameters")
def test_initialize_ppo_trainer(BrainParametersMock):
brain_params_mock = BrainParametersMock()
tc = basic_trainer_controller()
full_config = dummy_config()
expected_config = full_config["default"]
expected_config["summary_path"] = tc.summaries_dir + "/test_run_id_testbrain"
expected_config["model_path"] = tc.model_path + "/testbrain"
expected_config["keep_checkpoints"] = tc.keep_checkpoints
assert_ppo_trainer_constructed(full_config, tc, brain_params_mock, expected_config)
@patch("mlagents.envs.BrainParameters")
def test_initialize_invalid_trainer_raises_exception(BrainParametersMock):
tc = basic_trainer_controller()
bad_config = dummy_bad_config()
external_brains = {"testbrain": BrainParametersMock()}
with pytest.raises(UnityEnvironmentException):
tc.initialize_trainers(bad_config, external_brains)
def trainer_controller_with_start_learning_mocks():
trainer_mock = MagicMock()
trainer_mock.get_step = 0

tc, trainer_mock = trainer_controller_with_start_learning_mocks()
tc.train_model = False
trainer_config = dummy_config()
tf_reset_graph.return_value = None
env_mock = MagicMock()

tc.start_learning(env_mock, trainer_config)
tc.start_learning(env_mock)
tc.initialize_trainers.assert_called_once_with(
trainer_config, env_mock.external_brains
)
env_mock.reset.assert_called_once()
assert tc.advance.call_count == 11
tc._export_graph.assert_not_called()

@patch("tensorflow.reset_default_graph")
def test_start_learning_trains_until_max_steps_then_saves(tf_reset_graph):
tc, trainer_mock = trainer_controller_with_start_learning_mocks()
trainer_config = dummy_config()
tf_reset_graph.return_value = None
brain_info_mock = MagicMock()

env_mock.external_brains = MagicMock()
tc.start_learning(env_mock, trainer_config)
tc.start_learning(env_mock)
tc.initialize_trainers.assert_called_once_with(
trainer_config, env_mock.external_brains
)
def test_start_learning_updates_meta_curriculum_lesson_number():
tc, trainer_mock = trainer_controller_with_start_learning_mocks()
trainer_config = dummy_config()
brain_info_mock = MagicMock()
env_mock = MagicMock()
env_mock.close = MagicMock()
env_mock.reset = MagicMock(return_value=brain_info_mock)
meta_curriculum_mock = MagicMock()
meta_curriculum_mock.set_all_curriculums_to_lesson_num = MagicMock()
tc.meta_curriculum = meta_curriculum_mock
tc.lesson = 5
tc.start_learning(env_mock, trainer_config)
meta_curriculum_mock.set_all_curriculums_to_lesson_num.assert_called_once_with(
tc.lesson
)
def trainer_controller_with_take_step_mocks():

116
ml-agents/mlagents/trainers/trainer_controller.py


import tensorflow as tf
from time import time
from mlagents.envs import BrainParameters
from mlagents.envs.env_manager import StepInfo
from mlagents.envs.env_manager import EnvManager
from mlagents.envs.exception import (

from mlagents.envs.sampler_class import SamplerManager
from mlagents.envs.timers import hierarchical_timer, get_timer_tree, timed
from mlagents.trainers import Trainer, TrainerMetrics
from mlagents.trainers.ppo.trainer import PPOTrainer
from mlagents.trainers.bc.offline_trainer import OfflineBCTrainer
from mlagents.trainers.bc.online_trainer import OnlineBCTrainer
from mlagents.envs.base_unity_environment import BaseUnityEnvironment
from mlagents.envs.subprocess_env_manager import SubprocessEnvManager
trainers: Dict[str, Trainer],
load: bool,
keep_checkpoints: int,
lesson: Optional[int],
multi_gpu: bool,
:param trainers: Trainers for each brain to train.
:param load: Whether to load the model or randomly initialize.
:param keep_checkpoints: How many model checkpoints to keep.
:param lesson: Start learning from this lesson.
self.trainers = trainers
self.lesson = lesson
self.load_model = load
self.keep_checkpoints = keep_checkpoints
self.trainers: Dict[str, Trainer] = {}
self.seed = training_seed
self.multi_gpu = multi_gpu
np.random.seed(self.seed)
tf.set_random_seed(self.seed)
np.random.seed(training_seed)
tf.set_random_seed(training_seed)
def _get_measure_vals(self):
brain_names_to_measure_vals = {}

for brain_name in self.trainers.keys():
self.trainers[brain_name].export_model()
def initialize_trainers(
self,
trainer_config: Dict[str, Any],
external_brains: Dict[str, BrainParameters],
) -> None:
"""
Initialization of the trainers
:param trainer_config: The configurations of the trainers
"""
trainer_parameters_dict = {}
for brain_name in external_brains:
trainer_parameters = trainer_config["default"].copy()
trainer_parameters["summary_path"] = "{basedir}/{name}".format(
basedir=self.summaries_dir, name=str(self.run_id) + "_" + brain_name
)
trainer_parameters["model_path"] = "{basedir}/{name}".format(
basedir=self.model_path, name=brain_name
)
trainer_parameters["keep_checkpoints"] = self.keep_checkpoints
if brain_name in trainer_config:
_brain_key: Any = brain_name
while not isinstance(trainer_config[_brain_key], dict):
_brain_key = trainer_config[_brain_key]
trainer_parameters.update(trainer_config[_brain_key])
trainer_parameters_dict[brain_name] = trainer_parameters.copy()
for brain_name in external_brains:
if trainer_parameters_dict[brain_name]["trainer"] == "offline_bc":
self.trainers[brain_name] = OfflineBCTrainer(
brain=external_brains[brain_name],
trainer_parameters=trainer_parameters_dict[brain_name],
training=self.train_model,
load=self.load_model,
seed=self.seed,
run_id=self.run_id,
)
elif trainer_parameters_dict[brain_name]["trainer"] == "online_bc":
self.trainers[brain_name] = OnlineBCTrainer(
brain=external_brains[brain_name],
trainer_parameters=trainer_parameters_dict[brain_name],
training=self.train_model,
load=self.load_model,
seed=self.seed,
run_id=self.run_id,
)
elif trainer_parameters_dict[brain_name]["trainer"] == "ppo":
# Find lesson length based on the form of learning
if self.meta_curriculum:
lesson_length = self.meta_curriculum.brains_to_curriculums[
brain_name
].min_lesson_length
else:
lesson_length = 1
self.trainers[brain_name] = PPOTrainer(
brain=external_brains[brain_name],
reward_buff_cap=lesson_length,
trainer_parameters=trainer_parameters_dict[brain_name],
training=self.train_model,
load=self.load_model,
seed=self.seed,
run_id=self.run_id,
multi_gpu=self.multi_gpu,
)
self.trainer_metrics[brain_name] = self.trainers[
brain_name
].trainer_metrics
else:
raise UnityEnvironmentException(
"The trainer config contains "
"an unknown trainer type for "
"brain {}".format(brain_name)
)
@staticmethod
def _create_model_path(model_path):
try:

else:
trainer.write_summary(global_step, delta_train_start)
def start_learning(
self, env_manager: EnvManager, trainer_config: Dict[str, Any]
) -> None:
# TODO: Should be able to start learning at different lesson numbers
# for each curriculum.
if self.meta_curriculum is not None:
self.meta_curriculum.set_all_curriculums_to_lesson_num(self.lesson)
def start_learning(self, env_manager: EnvManager) -> None:
# Prevent a single session from taking all GPU memory.
self.initialize_trainers(trainer_config, env_manager.external_brains)
for _, t in self.trainers.items():
self.logger.info(t)

env_manager.close()
def end_trainer_episodes(
self, env: BaseUnityEnvironment, lessons_incremented: Dict[str, bool]
self, env: EnvManager, lessons_incremented: Dict[str, bool]
) -> None:
self._reset_env(env)
# Reward buffers reset takes place only for curriculum learning

if changed:
self.trainers[brain_name].reward_buffer.clear()
def reset_env_if_ready(self, env: BaseUnityEnvironment, steps: int) -> None:
def reset_env_if_ready(self, env: EnvManager, steps: int) -> None:
if self.meta_curriculum:
# Get the sizes of the reward buffers.
reward_buff_sizes = {

self.end_trainer_episodes(env, lessons_incremented)
@timed
def advance(self, env: SubprocessEnvManager) -> int:
def advance(self, env: EnvManager) -> int:
with hierarchical_timer("env_step"):
time_start_step = time()
new_step_infos = env.step()

315
ml-agents/mlagents/trainers/tests/test_trainer_util.py


import pytest
import yaml
import os
from unittest.mock import patch
import mlagents.trainers.trainer_util as trainer_util
from mlagents.trainers.trainer_metrics import TrainerMetrics
from mlagents.trainers.ppo.trainer import PPOTrainer
from mlagents.trainers.bc.offline_trainer import OfflineBCTrainer
from mlagents.trainers.bc.online_trainer import OnlineBCTrainer
from mlagents.envs.exception import UnityEnvironmentException
@pytest.fixture
def dummy_config():
return yaml.safe_load(
"""
default:
trainer: ppo
batch_size: 32
beta: 5.0e-3
buffer_size: 512
epsilon: 0.2
gamma: 0.99
hidden_units: 128
lambd: 0.95
learning_rate: 3.0e-4
max_steps: 5.0e4
normalize: true
num_epoch: 5
num_layers: 2
time_horizon: 64
sequence_length: 64
summary_freq: 1000
use_recurrent: false
memory_size: 8
use_curiosity: false
curiosity_strength: 0.0
curiosity_enc_size: 1
"""
)
@pytest.fixture
def dummy_online_bc_config():
return yaml.safe_load(
"""
default:
trainer: online_bc
brain_to_imitate: ExpertBrain
batches_per_epoch: 16
batch_size: 32
beta: 5.0e-3
buffer_size: 512
epsilon: 0.2
gamma: 0.99
hidden_units: 128
lambd: 0.95
learning_rate: 3.0e-4
max_steps: 5.0e4
normalize: true
num_epoch: 5
num_layers: 2
time_horizon: 64
sequence_length: 64
summary_freq: 1000
use_recurrent: false
memory_size: 8
use_curiosity: false
curiosity_strength: 0.0
curiosity_enc_size: 1
"""
)
@pytest.fixture
def dummy_offline_bc_config():
return yaml.safe_load(
"""
default:
trainer: offline_bc
demo_path: """
+ os.path.dirname(os.path.abspath(__file__))
+ """/test.demo
batches_per_epoch: 16
batch_size: 32
beta: 5.0e-3
buffer_size: 512
epsilon: 0.2
gamma: 0.99
hidden_units: 128
lambd: 0.95
learning_rate: 3.0e-4
max_steps: 5.0e4
normalize: true
num_epoch: 5
num_layers: 2
time_horizon: 64
sequence_length: 64
summary_freq: 1000
use_recurrent: false
memory_size: 8
use_curiosity: false
curiosity_strength: 0.0
curiosity_enc_size: 1
"""
)
@pytest.fixture
def dummy_offline_bc_config_with_override():
base = dummy_offline_bc_config()
base["testbrain"] = {}
base["testbrain"]["normalize"] = False
return base
@pytest.fixture
def dummy_bad_config():
return yaml.safe_load(
"""
default:
trainer: incorrect_trainer
brain_to_imitate: ExpertBrain
batches_per_epoch: 16
batch_size: 32
beta: 5.0e-3
buffer_size: 512
epsilon: 0.2
gamma: 0.99
hidden_units: 128
lambd: 0.95
learning_rate: 3.0e-4
max_steps: 5.0e4
normalize: true
num_epoch: 5
num_layers: 2
time_horizon: 64
sequence_length: 64
summary_freq: 1000
use_recurrent: false
memory_size: 8
"""
)
@patch("mlagents.envs.BrainParameters")
def test_initialize_trainer_parameters_override_defaults(BrainParametersMock):
summaries_dir = "test_dir"
run_id = "testrun"
model_path = "model_dir"
keep_checkpoints = 1
train_model = True
load_model = False
seed = 11
base_config = dummy_offline_bc_config_with_override()
expected_config = base_config["default"]
expected_config["summary_path"] = summaries_dir + f"/{run_id}_testbrain"
expected_config["model_path"] = model_path + "/testbrain"
expected_config["keep_checkpoints"] = keep_checkpoints
# Override value from specific brain config
expected_config["normalize"] = False
brain_params_mock = BrainParametersMock()
external_brains = {"testbrain": brain_params_mock}
def mock_constructor(self, brain, trainer_parameters, training, load, seed, run_id):
assert brain == brain_params_mock
assert trainer_parameters == expected_config
assert training == train_model
assert load == load_model
assert seed == seed
assert run_id == run_id
with patch.object(OfflineBCTrainer, "__init__", mock_constructor):
trainers = trainer_util.initialize_trainers(
trainer_config=base_config,
external_brains=external_brains,
summaries_dir=summaries_dir,
run_id=run_id,
model_path=model_path,
keep_checkpoints=keep_checkpoints,
train_model=train_model,
load_model=load_model,
seed=seed,
)
assert "testbrain" in trainers
assert isinstance(trainers["testbrain"], OfflineBCTrainer)
@patch("mlagents.envs.BrainParameters")
def test_initialize_online_bc_trainer(BrainParametersMock):
summaries_dir = "test_dir"
run_id = "testrun"
model_path = "model_dir"
keep_checkpoints = 1
train_model = True
load_model = False
seed = 11
base_config = dummy_online_bc_config()
expected_config = base_config["default"]
expected_config["summary_path"] = summaries_dir + f"/{run_id}_testbrain"
expected_config["model_path"] = model_path + "/testbrain"
expected_config["keep_checkpoints"] = keep_checkpoints
brain_params_mock = BrainParametersMock()
external_brains = {"testbrain": brain_params_mock}
def mock_constructor(self, brain, trainer_parameters, training, load, seed, run_id):
assert brain == brain_params_mock
assert trainer_parameters == expected_config
assert training == train_model
assert load == load_model
assert seed == seed
assert run_id == run_id
with patch.object(OnlineBCTrainer, "__init__", mock_constructor):
trainers = trainer_util.initialize_trainers(
trainer_config=base_config,
external_brains=external_brains,
summaries_dir=summaries_dir,
run_id=run_id,
model_path=model_path,
keep_checkpoints=keep_checkpoints,
train_model=train_model,
load_model=load_model,
seed=seed,
)
assert "testbrain" in trainers
assert isinstance(trainers["testbrain"], OnlineBCTrainer)
@patch("mlagents.envs.BrainParameters")
def test_initialize_ppo_trainer(BrainParametersMock):
brain_params_mock = BrainParametersMock()
external_brains = {"testbrain": BrainParametersMock()}
summaries_dir = "test_dir"
run_id = "testrun"
model_path = "model_dir"
keep_checkpoints = 1
train_model = True
load_model = False
seed = 11
expected_reward_buff_cap = 1
base_config = dummy_config()
expected_config = base_config["default"]
expected_config["summary_path"] = summaries_dir + f"/{run_id}_testbrain"
expected_config["model_path"] = model_path + "/testbrain"
expected_config["keep_checkpoints"] = keep_checkpoints
def mock_constructor(
self,
brain,
reward_buff_cap,
trainer_parameters,
training,
load,
seed,
run_id,
multi_gpu,
):
self.trainer_metrics = TrainerMetrics("", "")
assert brain == brain_params_mock
assert trainer_parameters == expected_config
assert reward_buff_cap == expected_reward_buff_cap
assert training == train_model
assert load == load_model
assert seed == seed
assert run_id == run_id
assert multi_gpu == multi_gpu
with patch.object(PPOTrainer, "__init__", mock_constructor):
trainers = trainer_util.initialize_trainers(
trainer_config=base_config,
external_brains=external_brains,
summaries_dir=summaries_dir,
run_id=run_id,
model_path=model_path,
keep_checkpoints=keep_checkpoints,
train_model=train_model,
load_model=load_model,
seed=seed,
)
assert "testbrain" in trainers
assert isinstance(trainers["testbrain"], PPOTrainer)
@patch("mlagents.envs.BrainParameters")
def test_initialize_invalid_trainer_raises_exception(BrainParametersMock):
summaries_dir = "test_dir"
run_id = "testrun"
model_path = "model_dir"
keep_checkpoints = 1
train_model = True
load_model = False
seed = 11
bad_config = dummy_bad_config()
external_brains = {"testbrain": BrainParametersMock()}
with pytest.raises(UnityEnvironmentException):
trainer_util.initialize_trainers(
trainer_config=bad_config,
external_brains=external_brains,
summaries_dir=summaries_dir,
run_id=run_id,
model_path=model_path,
keep_checkpoints=keep_checkpoints,
train_model=train_model,
load_model=load_model,
seed=seed,
)

97
ml-agents/mlagents/trainers/trainer_util.py


from typing import Any, Dict
from mlagents.trainers import MetaCurriculum
from mlagents.envs.exception import UnityEnvironmentException
from mlagents.trainers import Trainer
from mlagents.envs.brain import BrainParameters
from mlagents.trainers.ppo.trainer import PPOTrainer
from mlagents.trainers.bc.offline_trainer import OfflineBCTrainer
from mlagents.trainers.bc.online_trainer import OnlineBCTrainer
def initialize_trainers(
trainer_config: Dict[str, Any],
external_brains: Dict[str, BrainParameters],
summaries_dir: str,
run_id: str,
model_path: str,
keep_checkpoints: int,
train_model: bool,
load_model: bool,
seed: int,
meta_curriculum: MetaCurriculum = None,
multi_gpu: bool = False,
) -> Dict[str, Trainer]:
"""
Initializes trainers given a provided trainer configuration and set of brains from the environment, as well as
some general training session options.
:param trainer_config: Original trainer configuration loaded from YAML
:param external_brains: BrainParameters provided by the Unity environment
:param summaries_dir: Directory to store trainer summary statistics
:param run_id: Run ID to associate with this training run
:param model_path: Path to save the model
:param keep_checkpoints: How many model checkpoints to keep
:param train_model: Whether to train the model (vs. run inference)
:param load_model: Whether to load the model or randomly initialize
:param seed: The random seed to use
:param meta_curriculum: Optional meta_curriculum, used to determine a reward buffer length for PPOTrainer
:param multi_gpu: Whether to use multi-GPU training
:return:
"""
trainers = {}
trainer_parameters_dict = {}
for brain_name in external_brains:
trainer_parameters = trainer_config["default"].copy()
trainer_parameters["summary_path"] = "{basedir}/{name}".format(
basedir=summaries_dir, name=str(run_id) + "_" + brain_name
)
trainer_parameters["model_path"] = "{basedir}/{name}".format(
basedir=model_path, name=brain_name
)
trainer_parameters["keep_checkpoints"] = keep_checkpoints
if brain_name in trainer_config:
_brain_key: Any = brain_name
while not isinstance(trainer_config[_brain_key], dict):
_brain_key = trainer_config[_brain_key]
trainer_parameters.update(trainer_config[_brain_key])
trainer_parameters_dict[brain_name] = trainer_parameters.copy()
for brain_name in external_brains:
if trainer_parameters_dict[brain_name]["trainer"] == "offline_bc":
trainers[brain_name] = OfflineBCTrainer(
external_brains[brain_name],
trainer_parameters_dict[brain_name],
train_model,
load_model,
seed,
run_id,
)
elif trainer_parameters_dict[brain_name]["trainer"] == "online_bc":
trainers[brain_name] = OnlineBCTrainer(
external_brains[brain_name],
trainer_parameters_dict[brain_name],
train_model,
load_model,
seed,
run_id,
)
elif trainer_parameters_dict[brain_name]["trainer"] == "ppo":
trainers[brain_name] = PPOTrainer(
external_brains[brain_name],
meta_curriculum.brains_to_curriculums[brain_name].min_lesson_length
if meta_curriculum
else 1,
trainer_parameters_dict[brain_name],
train_model,
load_model,
seed,
run_id,
multi_gpu,
)
else:
raise UnityEnvironmentException(
"The trainer config contains "
"an unknown trainer type for "
"brain {}".format(brain_name)
)
return trainers
正在加载...
取消
保存