浏览代码
[life improvement] Moving Python files around (#4531)
[life improvement] Moving Python files around (#4531)
* Moved components to the tf folder and moved the TrainerFactory to the `trainer` folder * Addressing comments * Editing the migrating doc * fixing test/MLA-1734-demo-provider
GitHub
4 年前
当前提交
c188781b
共有 31 个文件被更改,包括 303 次插入 和 225 次删除
-
16docs/Migrating.md
-
5ml-agents/mlagents/trainers/learn.py
-
4ml-agents/mlagents/trainers/optimizer/tf_optimizer.py
-
2ml-agents/mlagents/trainers/ppo/trainer.py
-
2ml-agents/mlagents/trainers/sac/trainer.py
-
2ml-agents/mlagents/trainers/tests/check_env_trains.py
-
2ml-agents/mlagents/trainers/tests/tensorflow/test_bcmodule.py
-
2ml-agents/mlagents/trainers/tests/tensorflow/test_simple_rl.py
-
2ml-agents/mlagents/trainers/tests/test_learn.py
-
1ml-agents/mlagents/trainers/tests/test_trainer_controller.py
-
21ml-agents/mlagents/trainers/tests/test_trainer_util.py
-
1ml-agents/mlagents/trainers/trainer/__init__.py
-
5ml-agents/mlagents/trainers/trainer/rl_trainer.py
-
2ml-agents/mlagents/trainers/trainer_controller.py
-
9ml-agents/mlagents/trainers/tf/components/reward_signals/curiosity/signal.py
-
7ml-agents/mlagents/trainers/tf/components/reward_signals/gail/signal.py
-
42ml-agents/mlagents/trainers/directory_utils.py
-
156ml-agents/mlagents/trainers/trainer/trainer_factory.py
-
13ml-agents/mlagents/trainers/tf/components/reward_signals/extrinsic/signal.py
-
37ml-agents/mlagents/trainers/tf/components/reward_signals/reward_signal_factory.py
-
197ml-agents/mlagents/trainers/trainer_util.py
-
0/ml-agents/mlagents/trainers/tf/components/__init__.py
-
0/ml-agents/mlagents/trainers/tf/components/bc
-
0/ml-agents/mlagents/trainers/tf/components/reward_signals/__init__.py
-
0/ml-agents/mlagents/trainers/tf/components/reward_signals/extrinsic/__init__.py
-
0/ml-agents/mlagents/trainers/tf/components/reward_signals/curiosity
-
0/ml-agents/mlagents/trainers/tf/components/reward_signals/gail
|
|||
from mlagents.trainers.trainer.trainer import Trainer # noqa |
|||
from mlagents.trainers.trainer.trainer_factory import TrainerFactory # noqa |
|
|||
import os |
|||
from mlagents.trainers.exception import UnityTrainerException |
|||
|
|||
|
|||
def validate_existing_directories( |
|||
output_path: str, resume: bool, force: bool, init_path: str = None |
|||
) -> None: |
|||
""" |
|||
Validates that if the run_id model exists, we do not overwrite it unless --force is specified. |
|||
Throws an exception if resume isn't specified and run_id exists. Throws an exception |
|||
if --resume is specified and run-id was not found. |
|||
:param model_path: The model path specified. |
|||
:param summary_path: The summary path to be used. |
|||
:param resume: Whether or not the --resume flag was passed. |
|||
:param force: Whether or not the --force flag was passed. |
|||
""" |
|||
|
|||
output_path_exists = os.path.isdir(output_path) |
|||
|
|||
if output_path_exists: |
|||
if not resume and not force: |
|||
raise UnityTrainerException( |
|||
"Previous data from this run ID was found. " |
|||
"Either specify a new run ID, use --resume to resume this run, " |
|||
"or use the --force parameter to overwrite existing data." |
|||
) |
|||
else: |
|||
if resume: |
|||
raise UnityTrainerException( |
|||
"Previous data from this run ID was not found. " |
|||
"Train a new run by removing the --resume flag." |
|||
) |
|||
|
|||
# Verify init path if specified. |
|||
if init_path is not None: |
|||
if not os.path.isdir(init_path): |
|||
raise UnityTrainerException( |
|||
"Could not initialize from {}. " |
|||
"Make sure models have already been saved with that run ID.".format( |
|||
init_path |
|||
) |
|||
) |
|
|||
import os |
|||
from typing import Dict |
|||
|
|||
from mlagents_envs.logging_util import get_logger |
|||
from mlagents.trainers.environment_parameter_manager import EnvironmentParameterManager |
|||
from mlagents.trainers.exception import TrainerConfigError |
|||
from mlagents.trainers.trainer import Trainer |
|||
from mlagents.trainers.ppo.trainer import PPOTrainer |
|||
from mlagents.trainers.sac.trainer import SACTrainer |
|||
from mlagents.trainers.ghost.trainer import GhostTrainer |
|||
from mlagents.trainers.ghost.controller import GhostController |
|||
from mlagents.trainers.settings import TrainerSettings, TrainerType, FrameworkType |
|||
|
|||
|
|||
logger = get_logger(__name__) |
|||
|
|||
|
|||
class TrainerFactory: |
|||
def __init__( |
|||
self, |
|||
trainer_config: Dict[str, TrainerSettings], |
|||
output_path: str, |
|||
train_model: bool, |
|||
load_model: bool, |
|||
seed: int, |
|||
param_manager: EnvironmentParameterManager, |
|||
init_path: str = None, |
|||
multi_gpu: bool = False, |
|||
force_torch: bool = False, |
|||
): |
|||
""" |
|||
The TrainerFactory generates the Trainers based on the configuration passed as |
|||
input. |
|||
:param trainer_config: A dictionary from behavior name to TrainerSettings |
|||
:param output_path: The path to the directory where the artifacts generated by |
|||
the trainer will be saved. |
|||
:param train_model: If True, the Trainers will train the model and if False, |
|||
only perform inference. |
|||
:param load_model: If True, the Trainer will load neural networks weights from |
|||
the previous run. |
|||
:param seed: The seed of the Trainers. Dictates how the neural networks will be |
|||
initialized. |
|||
:param param_manager: The EnvironmentParameterManager that will dictate when/if |
|||
the EnvironmentParameters must change. |
|||
:param init_path: Path from which to load model. |
|||
:param multi_gpu: If True, multi-gpu will be used. (currently not available) |
|||
:param force_torch: If True, the Trainers will all use the PyTorch framework |
|||
instead of the TensorFlow framework. |
|||
""" |
|||
self.trainer_config = trainer_config |
|||
self.output_path = output_path |
|||
self.init_path = init_path |
|||
self.train_model = train_model |
|||
self.load_model = load_model |
|||
self.seed = seed |
|||
self.param_manager = param_manager |
|||
self.multi_gpu = multi_gpu |
|||
self.ghost_controller = GhostController() |
|||
self._force_torch = force_torch |
|||
|
|||
def generate(self, behavior_name: str) -> Trainer: |
|||
if behavior_name not in self.trainer_config.keys(): |
|||
logger.warning( |
|||
f"Behavior name {behavior_name} does not match any behaviors specified" |
|||
f"in the trainer configuration file: {sorted(self.trainer_config.keys())}" |
|||
) |
|||
trainer_settings = self.trainer_config[behavior_name] |
|||
if self._force_torch: |
|||
trainer_settings.framework = FrameworkType.PYTORCH |
|||
return TrainerFactory._initialize_trainer( |
|||
trainer_settings, |
|||
behavior_name, |
|||
self.output_path, |
|||
self.train_model, |
|||
self.load_model, |
|||
self.ghost_controller, |
|||
self.seed, |
|||
self.param_manager, |
|||
self.init_path, |
|||
self.multi_gpu, |
|||
) |
|||
|
|||
@staticmethod |
|||
def _initialize_trainer( |
|||
trainer_settings: TrainerSettings, |
|||
brain_name: str, |
|||
output_path: str, |
|||
train_model: bool, |
|||
load_model: bool, |
|||
ghost_controller: GhostController, |
|||
seed: int, |
|||
param_manager: EnvironmentParameterManager, |
|||
init_path: str = None, |
|||
multi_gpu: bool = False, |
|||
) -> Trainer: |
|||
""" |
|||
Initializes a trainer given a provided trainer configuration and brain parameters, as well as |
|||
some general training session options. |
|||
|
|||
:param trainer_settings: Original trainer configuration loaded from YAML |
|||
:param brain_name: Name of the brain to be associated with trainer |
|||
:param output_path: Path to save the model and summary statistics |
|||
:param keep_checkpoints: How many model checkpoints to keep |
|||
:param train_model: Whether to train the model (vs. run inference) |
|||
:param load_model: Whether to load the model or randomly initialize |
|||
:param ghost_controller: The object that coordinates ghost trainers |
|||
:param seed: The random seed to use |
|||
:param param_manager: EnvironmentParameterManager, used to determine a reward buffer length for PPOTrainer |
|||
:param init_path: Path from which to load model, if different from model_path. |
|||
:return: |
|||
""" |
|||
trainer_artifact_path = os.path.join(output_path, brain_name) |
|||
if init_path is not None: |
|||
trainer_settings.init_path = os.path.join(init_path, brain_name) |
|||
|
|||
min_lesson_length = param_manager.get_minimum_reward_buffer_size(brain_name) |
|||
|
|||
trainer: Trainer = None # type: ignore # will be set to one of these, or raise |
|||
trainer_type = trainer_settings.trainer_type |
|||
|
|||
if trainer_type == TrainerType.PPO: |
|||
trainer = PPOTrainer( |
|||
brain_name, |
|||
min_lesson_length, |
|||
trainer_settings, |
|||
train_model, |
|||
load_model, |
|||
seed, |
|||
trainer_artifact_path, |
|||
) |
|||
elif trainer_type == TrainerType.SAC: |
|||
trainer = SACTrainer( |
|||
brain_name, |
|||
min_lesson_length, |
|||
trainer_settings, |
|||
train_model, |
|||
load_model, |
|||
seed, |
|||
trainer_artifact_path, |
|||
) |
|||
else: |
|||
raise TrainerConfigError( |
|||
f'The trainer config contains an unknown trainer type "{trainer_type}" for brain {brain_name}' |
|||
) |
|||
|
|||
if trainer_settings.self_play is not None: |
|||
trainer = GhostTrainer( |
|||
trainer, |
|||
brain_name, |
|||
ghost_controller, |
|||
min_lesson_length, |
|||
trainer_settings, |
|||
train_model, |
|||
trainer_artifact_path, |
|||
) |
|||
return trainer |
|
|||
import numpy as np |
|||
|
|||
from mlagents.trainers.tf.components.reward_signals import ( |
|||
RewardSignal, |
|||
RewardSignalResult, |
|||
) |
|||
from mlagents.trainers.buffer import AgentBuffer |
|||
|
|||
|
|||
class ExtrinsicRewardSignal(RewardSignal): |
|||
def evaluate_batch(self, mini_batch: AgentBuffer) -> RewardSignalResult: |
|||
env_rews = np.array(mini_batch["environment_rewards"], dtype=np.float32) |
|||
return RewardSignalResult(self.strength * env_rews, env_rews) |
|
|||
from typing import Dict, Type |
|||
from mlagents.trainers.exception import UnityTrainerException |
|||
from mlagents.trainers.tf.components.reward_signals import RewardSignal |
|||
from mlagents.trainers.tf.components.reward_signals.extrinsic.signal import ( |
|||
ExtrinsicRewardSignal, |
|||
) |
|||
from mlagents.trainers.tf.components.reward_signals.gail.signal import GAILRewardSignal |
|||
from mlagents.trainers.tf.components.reward_signals.curiosity.signal import ( |
|||
CuriosityRewardSignal, |
|||
) |
|||
from mlagents.trainers.policy.tf_policy import TFPolicy |
|||
from mlagents.trainers.settings import RewardSignalSettings, RewardSignalType |
|||
|
|||
|
|||
NAME_TO_CLASS: Dict[RewardSignalType, Type[RewardSignal]] = { |
|||
RewardSignalType.EXTRINSIC: ExtrinsicRewardSignal, |
|||
RewardSignalType.CURIOSITY: CuriosityRewardSignal, |
|||
RewardSignalType.GAIL: GAILRewardSignal, |
|||
} |
|||
|
|||
|
|||
def create_reward_signal( |
|||
policy: TFPolicy, name: RewardSignalType, settings: RewardSignalSettings |
|||
) -> RewardSignal: |
|||
""" |
|||
Creates a reward signal class based on the name and config entry provided as a dict. |
|||
:param policy: The policy class which the reward will be applied to. |
|||
:param name: The name of the reward signal |
|||
:param config_entry: The config entries for that reward signal |
|||
:return: The reward signal class instantiated |
|||
""" |
|||
rcls = NAME_TO_CLASS.get(name) |
|||
if not rcls: |
|||
raise UnityTrainerException(f"Unknown reward signal type {name}") |
|||
|
|||
class_inst = rcls(policy, settings) |
|||
return class_inst |
|
|||
import os |
|||
from typing import Dict |
|||
|
|||
from mlagents_envs.logging_util import get_logger |
|||
from mlagents.trainers.environment_parameter_manager import EnvironmentParameterManager |
|||
from mlagents.trainers.exception import TrainerConfigError |
|||
from mlagents.trainers.trainer import Trainer |
|||
from mlagents.trainers.exception import UnityTrainerException |
|||
from mlagents.trainers.ppo.trainer import PPOTrainer |
|||
from mlagents.trainers.sac.trainer import SACTrainer |
|||
from mlagents.trainers.ghost.trainer import GhostTrainer |
|||
from mlagents.trainers.ghost.controller import GhostController |
|||
from mlagents.trainers.settings import TrainerSettings, TrainerType, FrameworkType |
|||
|
|||
|
|||
logger = get_logger(__name__) |
|||
|
|||
|
|||
class TrainerFactory: |
|||
def __init__( |
|||
self, |
|||
trainer_config: Dict[str, TrainerSettings], |
|||
output_path: str, |
|||
train_model: bool, |
|||
load_model: bool, |
|||
seed: int, |
|||
param_manager: EnvironmentParameterManager, |
|||
init_path: str = None, |
|||
multi_gpu: bool = False, |
|||
force_torch: bool = False, |
|||
): |
|||
""" |
|||
The TrainerFactory generates the Trainers based on the configuration passed as |
|||
input. |
|||
:param trainer_config: A dictionary from behavior name to TrainerSettings |
|||
:param output_path: The path to the directory where the artifacts generated by |
|||
the trainer will be saved. |
|||
:param train_model: If True, the Trainers will train the model and if False, |
|||
only perform inference. |
|||
:param load_model: If True, the Trainer will load neural networks weights from |
|||
the previous run. |
|||
:param seed: The seed of the Trainers. Dictates how the neural networks will be |
|||
initialized. |
|||
:param param_manager: The EnvironmentParameterManager that will dictate when/if |
|||
the EnvironmentParameters must change. |
|||
:param init_path: Path from which to load model. |
|||
:param multi_gpu: If True, multi-gpu will be used. (currently not available) |
|||
:param force_torch: If True, the Trainers will all use the PyTorch framework |
|||
instead of the TensorFlow framework. |
|||
""" |
|||
self.trainer_config = trainer_config |
|||
self.output_path = output_path |
|||
self.init_path = init_path |
|||
self.train_model = train_model |
|||
self.load_model = load_model |
|||
self.seed = seed |
|||
self.param_manager = param_manager |
|||
self.multi_gpu = multi_gpu |
|||
self.ghost_controller = GhostController() |
|||
self._force_torch = force_torch |
|||
|
|||
def generate(self, behavior_name: str) -> Trainer: |
|||
if behavior_name not in self.trainer_config.keys(): |
|||
logger.warning( |
|||
f"Behavior name {behavior_name} does not match any behaviors specified" |
|||
f"in the trainer configuration file: {sorted(self.trainer_config.keys())}" |
|||
) |
|||
trainer_settings = self.trainer_config[behavior_name] |
|||
if self._force_torch: |
|||
trainer_settings.framework = FrameworkType.PYTORCH |
|||
return initialize_trainer( |
|||
trainer_settings, |
|||
behavior_name, |
|||
self.output_path, |
|||
self.train_model, |
|||
self.load_model, |
|||
self.ghost_controller, |
|||
self.seed, |
|||
self.param_manager, |
|||
self.init_path, |
|||
self.multi_gpu, |
|||
) |
|||
|
|||
|
|||
def initialize_trainer( |
|||
trainer_settings: TrainerSettings, |
|||
brain_name: str, |
|||
output_path: str, |
|||
train_model: bool, |
|||
load_model: bool, |
|||
ghost_controller: GhostController, |
|||
seed: int, |
|||
param_manager: EnvironmentParameterManager, |
|||
init_path: str = None, |
|||
multi_gpu: bool = False, |
|||
) -> Trainer: |
|||
""" |
|||
Initializes a trainer given a provided trainer configuration and brain parameters, as well as |
|||
some general training session options. |
|||
|
|||
:param trainer_settings: Original trainer configuration loaded from YAML |
|||
:param brain_name: Name of the brain to be associated with trainer |
|||
:param output_path: Path to save the model and summary statistics |
|||
:param keep_checkpoints: How many model checkpoints to keep |
|||
:param train_model: Whether to train the model (vs. run inference) |
|||
:param load_model: Whether to load the model or randomly initialize |
|||
:param ghost_controller: The object that coordinates ghost trainers |
|||
:param seed: The random seed to use |
|||
:param param_manager: EnvironmentParameterManager, used to determine a reward buffer length for PPOTrainer |
|||
:param init_path: Path from which to load model, if different from model_path. |
|||
:return: |
|||
""" |
|||
trainer_artifact_path = os.path.join(output_path, brain_name) |
|||
if init_path is not None: |
|||
trainer_settings.init_path = os.path.join(init_path, brain_name) |
|||
|
|||
min_lesson_length = param_manager.get_minimum_reward_buffer_size(brain_name) |
|||
|
|||
trainer: Trainer = None # type: ignore # will be set to one of these, or raise |
|||
trainer_type = trainer_settings.trainer_type |
|||
|
|||
if trainer_type == TrainerType.PPO: |
|||
trainer = PPOTrainer( |
|||
brain_name, |
|||
min_lesson_length, |
|||
trainer_settings, |
|||
train_model, |
|||
load_model, |
|||
seed, |
|||
trainer_artifact_path, |
|||
) |
|||
elif trainer_type == TrainerType.SAC: |
|||
trainer = SACTrainer( |
|||
brain_name, |
|||
min_lesson_length, |
|||
trainer_settings, |
|||
train_model, |
|||
load_model, |
|||
seed, |
|||
trainer_artifact_path, |
|||
) |
|||
else: |
|||
raise TrainerConfigError( |
|||
f'The trainer config contains an unknown trainer type "{trainer_type}" for brain {brain_name}' |
|||
) |
|||
|
|||
if trainer_settings.self_play is not None: |
|||
trainer = GhostTrainer( |
|||
trainer, |
|||
brain_name, |
|||
ghost_controller, |
|||
min_lesson_length, |
|||
trainer_settings, |
|||
train_model, |
|||
trainer_artifact_path, |
|||
) |
|||
return trainer |
|||
|
|||
|
|||
def handle_existing_directories( |
|||
output_path: str, resume: bool, force: bool, init_path: str = None |
|||
) -> None: |
|||
""" |
|||
Validates that if the run_id model exists, we do not overwrite it unless --force is specified. |
|||
Throws an exception if resume isn't specified and run_id exists. Throws an exception |
|||
if --resume is specified and run-id was not found. |
|||
:param model_path: The model path specified. |
|||
:param summary_path: The summary path to be used. |
|||
:param resume: Whether or not the --resume flag was passed. |
|||
:param force: Whether or not the --force flag was passed. |
|||
""" |
|||
|
|||
output_path_exists = os.path.isdir(output_path) |
|||
|
|||
if output_path_exists: |
|||
if not resume and not force: |
|||
raise UnityTrainerException( |
|||
"Previous data from this run ID was found. " |
|||
"Either specify a new run ID, use --resume to resume this run, " |
|||
"or use the --force parameter to overwrite existing data." |
|||
) |
|||
else: |
|||
if resume: |
|||
raise UnityTrainerException( |
|||
"Previous data from this run ID was not found. " |
|||
"Train a new run by removing the --resume flag." |
|||
) |
|||
|
|||
# Verify init path if specified. |
|||
if init_path is not None: |
|||
if not os.path.isdir(init_path): |
|||
raise UnityTrainerException( |
|||
"Could not initialize from {}. " |
|||
"Make sure models have already been saved with that run ID.".format( |
|||
init_path |
|||
) |
|||
) |
撰写
预览
正在加载...
取消
保存
Reference in new issue