浏览代码
Move trainer initialization into a utility function (#2412)
Move trainer initialization into a utility function (#2412)
This change moves trainer initialization outside of TrainerController, reducing some of the constructor arguments of TrainerController and setting up the ability for trainers to be initialized in the case where a TrainerController isn't needed./develop-generalizationTraining-TrainerController
GitHub
6 年前
当前提交
30930383
共有 7 个文件被更改,包括 501 次插入 和 431 次删除
-
54ml-agents/mlagents/trainers/learn.py
-
9ml-agents/mlagents/trainers/tests/test_learn.py
-
41ml-agents/mlagents/trainers/tests/test_simple_rl.py
-
300ml-agents/mlagents/trainers/tests/test_trainer_controller.py
-
116ml-agents/mlagents/trainers/trainer_controller.py
-
315ml-agents/mlagents/trainers/tests/test_trainer_util.py
-
97ml-agents/mlagents/trainers/trainer_util.py
|
|||
import pytest |
|||
import yaml |
|||
import os |
|||
from unittest.mock import patch |
|||
|
|||
import mlagents.trainers.trainer_util as trainer_util |
|||
from mlagents.trainers.trainer_metrics import TrainerMetrics |
|||
from mlagents.trainers.ppo.trainer import PPOTrainer |
|||
from mlagents.trainers.bc.offline_trainer import OfflineBCTrainer |
|||
from mlagents.trainers.bc.online_trainer import OnlineBCTrainer |
|||
from mlagents.envs.exception import UnityEnvironmentException |
|||
|
|||
|
|||
@pytest.fixture |
|||
def dummy_config(): |
|||
return yaml.safe_load( |
|||
""" |
|||
default: |
|||
trainer: ppo |
|||
batch_size: 32 |
|||
beta: 5.0e-3 |
|||
buffer_size: 512 |
|||
epsilon: 0.2 |
|||
gamma: 0.99 |
|||
hidden_units: 128 |
|||
lambd: 0.95 |
|||
learning_rate: 3.0e-4 |
|||
max_steps: 5.0e4 |
|||
normalize: true |
|||
num_epoch: 5 |
|||
num_layers: 2 |
|||
time_horizon: 64 |
|||
sequence_length: 64 |
|||
summary_freq: 1000 |
|||
use_recurrent: false |
|||
memory_size: 8 |
|||
use_curiosity: false |
|||
curiosity_strength: 0.0 |
|||
curiosity_enc_size: 1 |
|||
""" |
|||
) |
|||
|
|||
|
|||
@pytest.fixture |
|||
def dummy_online_bc_config(): |
|||
return yaml.safe_load( |
|||
""" |
|||
default: |
|||
trainer: online_bc |
|||
brain_to_imitate: ExpertBrain |
|||
batches_per_epoch: 16 |
|||
batch_size: 32 |
|||
beta: 5.0e-3 |
|||
buffer_size: 512 |
|||
epsilon: 0.2 |
|||
gamma: 0.99 |
|||
hidden_units: 128 |
|||
lambd: 0.95 |
|||
learning_rate: 3.0e-4 |
|||
max_steps: 5.0e4 |
|||
normalize: true |
|||
num_epoch: 5 |
|||
num_layers: 2 |
|||
time_horizon: 64 |
|||
sequence_length: 64 |
|||
summary_freq: 1000 |
|||
use_recurrent: false |
|||
memory_size: 8 |
|||
use_curiosity: false |
|||
curiosity_strength: 0.0 |
|||
curiosity_enc_size: 1 |
|||
""" |
|||
) |
|||
|
|||
|
|||
@pytest.fixture |
|||
def dummy_offline_bc_config(): |
|||
return yaml.safe_load( |
|||
""" |
|||
default: |
|||
trainer: offline_bc |
|||
demo_path: """ |
|||
+ os.path.dirname(os.path.abspath(__file__)) |
|||
+ """/test.demo |
|||
batches_per_epoch: 16 |
|||
batch_size: 32 |
|||
beta: 5.0e-3 |
|||
buffer_size: 512 |
|||
epsilon: 0.2 |
|||
gamma: 0.99 |
|||
hidden_units: 128 |
|||
lambd: 0.95 |
|||
learning_rate: 3.0e-4 |
|||
max_steps: 5.0e4 |
|||
normalize: true |
|||
num_epoch: 5 |
|||
num_layers: 2 |
|||
time_horizon: 64 |
|||
sequence_length: 64 |
|||
summary_freq: 1000 |
|||
use_recurrent: false |
|||
memory_size: 8 |
|||
use_curiosity: false |
|||
curiosity_strength: 0.0 |
|||
curiosity_enc_size: 1 |
|||
""" |
|||
) |
|||
|
|||
|
|||
@pytest.fixture |
|||
def dummy_offline_bc_config_with_override(): |
|||
base = dummy_offline_bc_config() |
|||
base["testbrain"] = {} |
|||
base["testbrain"]["normalize"] = False |
|||
return base |
|||
|
|||
|
|||
@pytest.fixture |
|||
def dummy_bad_config(): |
|||
return yaml.safe_load( |
|||
""" |
|||
default: |
|||
trainer: incorrect_trainer |
|||
brain_to_imitate: ExpertBrain |
|||
batches_per_epoch: 16 |
|||
batch_size: 32 |
|||
beta: 5.0e-3 |
|||
buffer_size: 512 |
|||
epsilon: 0.2 |
|||
gamma: 0.99 |
|||
hidden_units: 128 |
|||
lambd: 0.95 |
|||
learning_rate: 3.0e-4 |
|||
max_steps: 5.0e4 |
|||
normalize: true |
|||
num_epoch: 5 |
|||
num_layers: 2 |
|||
time_horizon: 64 |
|||
sequence_length: 64 |
|||
summary_freq: 1000 |
|||
use_recurrent: false |
|||
memory_size: 8 |
|||
""" |
|||
) |
|||
|
|||
|
|||
@patch("mlagents.envs.BrainParameters") |
|||
def test_initialize_trainer_parameters_override_defaults(BrainParametersMock): |
|||
summaries_dir = "test_dir" |
|||
run_id = "testrun" |
|||
model_path = "model_dir" |
|||
keep_checkpoints = 1 |
|||
train_model = True |
|||
load_model = False |
|||
seed = 11 |
|||
|
|||
base_config = dummy_offline_bc_config_with_override() |
|||
expected_config = base_config["default"] |
|||
expected_config["summary_path"] = summaries_dir + f"/{run_id}_testbrain" |
|||
expected_config["model_path"] = model_path + "/testbrain" |
|||
expected_config["keep_checkpoints"] = keep_checkpoints |
|||
|
|||
# Override value from specific brain config |
|||
expected_config["normalize"] = False |
|||
|
|||
brain_params_mock = BrainParametersMock() |
|||
external_brains = {"testbrain": brain_params_mock} |
|||
|
|||
def mock_constructor(self, brain, trainer_parameters, training, load, seed, run_id): |
|||
assert brain == brain_params_mock |
|||
assert trainer_parameters == expected_config |
|||
assert training == train_model |
|||
assert load == load_model |
|||
assert seed == seed |
|||
assert run_id == run_id |
|||
|
|||
with patch.object(OfflineBCTrainer, "__init__", mock_constructor): |
|||
trainers = trainer_util.initialize_trainers( |
|||
trainer_config=base_config, |
|||
external_brains=external_brains, |
|||
summaries_dir=summaries_dir, |
|||
run_id=run_id, |
|||
model_path=model_path, |
|||
keep_checkpoints=keep_checkpoints, |
|||
train_model=train_model, |
|||
load_model=load_model, |
|||
seed=seed, |
|||
) |
|||
assert "testbrain" in trainers |
|||
assert isinstance(trainers["testbrain"], OfflineBCTrainer) |
|||
|
|||
|
|||
@patch("mlagents.envs.BrainParameters") |
|||
def test_initialize_online_bc_trainer(BrainParametersMock): |
|||
summaries_dir = "test_dir" |
|||
run_id = "testrun" |
|||
model_path = "model_dir" |
|||
keep_checkpoints = 1 |
|||
train_model = True |
|||
load_model = False |
|||
seed = 11 |
|||
|
|||
base_config = dummy_online_bc_config() |
|||
expected_config = base_config["default"] |
|||
expected_config["summary_path"] = summaries_dir + f"/{run_id}_testbrain" |
|||
expected_config["model_path"] = model_path + "/testbrain" |
|||
expected_config["keep_checkpoints"] = keep_checkpoints |
|||
|
|||
brain_params_mock = BrainParametersMock() |
|||
external_brains = {"testbrain": brain_params_mock} |
|||
|
|||
def mock_constructor(self, brain, trainer_parameters, training, load, seed, run_id): |
|||
assert brain == brain_params_mock |
|||
assert trainer_parameters == expected_config |
|||
assert training == train_model |
|||
assert load == load_model |
|||
assert seed == seed |
|||
assert run_id == run_id |
|||
|
|||
with patch.object(OnlineBCTrainer, "__init__", mock_constructor): |
|||
trainers = trainer_util.initialize_trainers( |
|||
trainer_config=base_config, |
|||
external_brains=external_brains, |
|||
summaries_dir=summaries_dir, |
|||
run_id=run_id, |
|||
model_path=model_path, |
|||
keep_checkpoints=keep_checkpoints, |
|||
train_model=train_model, |
|||
load_model=load_model, |
|||
seed=seed, |
|||
) |
|||
assert "testbrain" in trainers |
|||
assert isinstance(trainers["testbrain"], OnlineBCTrainer) |
|||
|
|||
|
|||
@patch("mlagents.envs.BrainParameters") |
|||
def test_initialize_ppo_trainer(BrainParametersMock): |
|||
brain_params_mock = BrainParametersMock() |
|||
external_brains = {"testbrain": BrainParametersMock()} |
|||
summaries_dir = "test_dir" |
|||
run_id = "testrun" |
|||
model_path = "model_dir" |
|||
keep_checkpoints = 1 |
|||
train_model = True |
|||
load_model = False |
|||
seed = 11 |
|||
expected_reward_buff_cap = 1 |
|||
|
|||
base_config = dummy_config() |
|||
expected_config = base_config["default"] |
|||
expected_config["summary_path"] = summaries_dir + f"/{run_id}_testbrain" |
|||
expected_config["model_path"] = model_path + "/testbrain" |
|||
expected_config["keep_checkpoints"] = keep_checkpoints |
|||
|
|||
def mock_constructor( |
|||
self, |
|||
brain, |
|||
reward_buff_cap, |
|||
trainer_parameters, |
|||
training, |
|||
load, |
|||
seed, |
|||
run_id, |
|||
multi_gpu, |
|||
): |
|||
self.trainer_metrics = TrainerMetrics("", "") |
|||
assert brain == brain_params_mock |
|||
assert trainer_parameters == expected_config |
|||
assert reward_buff_cap == expected_reward_buff_cap |
|||
assert training == train_model |
|||
assert load == load_model |
|||
assert seed == seed |
|||
assert run_id == run_id |
|||
assert multi_gpu == multi_gpu |
|||
|
|||
with patch.object(PPOTrainer, "__init__", mock_constructor): |
|||
trainers = trainer_util.initialize_trainers( |
|||
trainer_config=base_config, |
|||
external_brains=external_brains, |
|||
summaries_dir=summaries_dir, |
|||
run_id=run_id, |
|||
model_path=model_path, |
|||
keep_checkpoints=keep_checkpoints, |
|||
train_model=train_model, |
|||
load_model=load_model, |
|||
seed=seed, |
|||
) |
|||
assert "testbrain" in trainers |
|||
assert isinstance(trainers["testbrain"], PPOTrainer) |
|||
|
|||
|
|||
@patch("mlagents.envs.BrainParameters") |
|||
def test_initialize_invalid_trainer_raises_exception(BrainParametersMock): |
|||
summaries_dir = "test_dir" |
|||
run_id = "testrun" |
|||
model_path = "model_dir" |
|||
keep_checkpoints = 1 |
|||
train_model = True |
|||
load_model = False |
|||
seed = 11 |
|||
bad_config = dummy_bad_config() |
|||
external_brains = {"testbrain": BrainParametersMock()} |
|||
|
|||
with pytest.raises(UnityEnvironmentException): |
|||
trainer_util.initialize_trainers( |
|||
trainer_config=bad_config, |
|||
external_brains=external_brains, |
|||
summaries_dir=summaries_dir, |
|||
run_id=run_id, |
|||
model_path=model_path, |
|||
keep_checkpoints=keep_checkpoints, |
|||
train_model=train_model, |
|||
load_model=load_model, |
|||
seed=seed, |
|||
) |
|
|||
from typing import Any, Dict |
|||
|
|||
from mlagents.trainers import MetaCurriculum |
|||
from mlagents.envs.exception import UnityEnvironmentException |
|||
from mlagents.trainers import Trainer |
|||
from mlagents.envs.brain import BrainParameters |
|||
from mlagents.trainers.ppo.trainer import PPOTrainer |
|||
from mlagents.trainers.bc.offline_trainer import OfflineBCTrainer |
|||
from mlagents.trainers.bc.online_trainer import OnlineBCTrainer |
|||
|
|||
|
|||
def initialize_trainers( |
|||
trainer_config: Dict[str, Any], |
|||
external_brains: Dict[str, BrainParameters], |
|||
summaries_dir: str, |
|||
run_id: str, |
|||
model_path: str, |
|||
keep_checkpoints: int, |
|||
train_model: bool, |
|||
load_model: bool, |
|||
seed: int, |
|||
meta_curriculum: MetaCurriculum = None, |
|||
multi_gpu: bool = False, |
|||
) -> Dict[str, Trainer]: |
|||
""" |
|||
Initializes trainers given a provided trainer configuration and set of brains from the environment, as well as |
|||
some general training session options. |
|||
|
|||
:param trainer_config: Original trainer configuration loaded from YAML |
|||
:param external_brains: BrainParameters provided by the Unity environment |
|||
:param summaries_dir: Directory to store trainer summary statistics |
|||
:param run_id: Run ID to associate with this training run |
|||
:param model_path: Path to save the model |
|||
:param keep_checkpoints: How many model checkpoints to keep |
|||
:param train_model: Whether to train the model (vs. run inference) |
|||
:param load_model: Whether to load the model or randomly initialize |
|||
:param seed: The random seed to use |
|||
:param meta_curriculum: Optional meta_curriculum, used to determine a reward buffer length for PPOTrainer |
|||
:param multi_gpu: Whether to use multi-GPU training |
|||
:return: |
|||
""" |
|||
trainers = {} |
|||
trainer_parameters_dict = {} |
|||
for brain_name in external_brains: |
|||
trainer_parameters = trainer_config["default"].copy() |
|||
trainer_parameters["summary_path"] = "{basedir}/{name}".format( |
|||
basedir=summaries_dir, name=str(run_id) + "_" + brain_name |
|||
) |
|||
trainer_parameters["model_path"] = "{basedir}/{name}".format( |
|||
basedir=model_path, name=brain_name |
|||
) |
|||
trainer_parameters["keep_checkpoints"] = keep_checkpoints |
|||
if brain_name in trainer_config: |
|||
_brain_key: Any = brain_name |
|||
while not isinstance(trainer_config[_brain_key], dict): |
|||
_brain_key = trainer_config[_brain_key] |
|||
trainer_parameters.update(trainer_config[_brain_key]) |
|||
trainer_parameters_dict[brain_name] = trainer_parameters.copy() |
|||
for brain_name in external_brains: |
|||
if trainer_parameters_dict[brain_name]["trainer"] == "offline_bc": |
|||
trainers[brain_name] = OfflineBCTrainer( |
|||
external_brains[brain_name], |
|||
trainer_parameters_dict[brain_name], |
|||
train_model, |
|||
load_model, |
|||
seed, |
|||
run_id, |
|||
) |
|||
elif trainer_parameters_dict[brain_name]["trainer"] == "online_bc": |
|||
trainers[brain_name] = OnlineBCTrainer( |
|||
external_brains[brain_name], |
|||
trainer_parameters_dict[brain_name], |
|||
train_model, |
|||
load_model, |
|||
seed, |
|||
run_id, |
|||
) |
|||
elif trainer_parameters_dict[brain_name]["trainer"] == "ppo": |
|||
trainers[brain_name] = PPOTrainer( |
|||
external_brains[brain_name], |
|||
meta_curriculum.brains_to_curriculums[brain_name].min_lesson_length |
|||
if meta_curriculum |
|||
else 1, |
|||
trainer_parameters_dict[brain_name], |
|||
train_model, |
|||
load_model, |
|||
seed, |
|||
run_id, |
|||
multi_gpu, |
|||
) |
|||
else: |
|||
raise UnityEnvironmentException( |
|||
"The trainer config contains " |
|||
"an unknown trainer type for " |
|||
"brain {}".format(brain_name) |
|||
) |
|||
return trainers |
撰写
预览
正在加载...
取消
保存
Reference in new issue