比较提交

...
此合并请求有变更与目标分支冲突。
/ml-agents/mlagents/trainers/cli_utils.py
/ml-agents/mlagents/trainers/settings.py
/ml-agents/mlagents/trainers/learn.py
/ml-agents/mlagents/trainers/trainer_controller.py
/ml-agents/mlagents/trainers/stats.py
/ml-agents/mlagents/tf_utils/global_values.py
/ml-agents/mlagents/trainers/policy/tf_policy.py
/ml-agents/mlagents/trainers/saver/tf_saver.py
/ml-agents/mlagents/trainers/trainer_util.py

3 次代码提交

作者 SHA1 备注 提交日期
Anupam Bhatnagar 3d7956e9 [skip ci] fix key name 4 年前
Anupam Bhatnagar d7f0d457 [skip ci] removing package import statements 4 年前
Anupam Bhatnagar f4f1a8d9 merge master into trainer-plugin branch 4 年前
共有 10 个文件被更改,包括 136 次插入19 次删除
  1. 7
      ml-agents/mlagents/trainers/trainer_controller.py
  2. 7
      ml-agents/mlagents/trainers/stats.py
  3. 2
      ml-agents/mlagents/trainers/policy/tf_policy.py
  4. 9
      ml-agents/mlagents/trainers/cli_utils.py
  5. 9
      ml-agents/mlagents/trainers/saver/tf_saver.py
  6. 76
      ml-agents/mlagents/trainers/learn.py
  7. 11
      ml-agents/mlagents/trainers/settings.py
  8. 28
      ml-agents/mlagents/trainers/trainer_util.py
  9. 6
      ml-agents/mlagents/trainers/initializer.py
  10. 0
      /ml-agents/mlagents/tf_utils/global_values.py

7
ml-agents/mlagents/trainers/trainer_controller.py


from mlagents.trainers.trainer_util import TrainerFactory
from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers
from mlagents.trainers.agent_processor import AgentManager
from mlagents.tf_utils.globals import get_rank
from mlagents.tf_utils import global_values
try:
import torch

tf.set_random_seed(training_seed)
if torch is not None:
torch.manual_seed(training_seed)
self.rank = get_rank()
@timed
def _save_models(self):

if self.rank is not None and self.rank != 0:
if global_values.get_rank() is not None and global_values.get_rank() != 0:
return
for brain_name in self.trainers.keys():

"""
Saves models for all trainers.
"""
if self.rank is not None and self.rank != 0:
if global_values.get_rank() is not None and global_values.get_rank() != 0:
return
for brain_name in self.trainers.keys():

7
ml-agents/mlagents/trainers/stats.py


from mlagents_envs.logging_util import get_logger
from mlagents_envs.timers import set_gauge
from mlagents.tf_utils import tf, generate_session_config
from mlagents.tf_utils.globals import get_rank
from mlagents.tf_utils import global_values
logger = get_logger(__name__)

# If self-play, we want to print ELO as well as reward
self.self_play = False
self.self_play_team = -1
self.rank = get_rank()
def write_stats(
self, category: str, values: Dict[str, StatsSummary], step: int

log_info.append(f"Time Elapsed: {elapsed_time:0.3f} s")
if "Environment/Cumulative Reward" in values:
stats_summary = values["Environment/Cumulative Reward"]
if self.rank is not None:
log_info.append(f"Rank: {self.rank}")
if global_values.get_rank() is not None:
log_info.append(f"Rank: {global_values.get_rank()}")
log_info.append(f"Mean Reward: {stats_summary.mean:0.3f}")
log_info.append(f"Std of Reward: {stats_summary.std:0.3f}")

2
ml-agents/mlagents/trainers/policy/tf_policy.py


GaussianDistribution,
MultiCategoricalDistribution,
)
from mlagents.tf_utils.globals import get_rank
logger = get_logger(__name__)

self.grads = None
self.update_batch: Optional[tf.Operation] = None
self.trainable_variables: List[tf.Variable] = []
self.rank = get_rank()
if create_tf_graph:
self.create_tf_graph()

9
ml-agents/mlagents/trainers/cli_utils.py


help="Whether to enable debug-level logging for some parts of the code",
)
argparser.add_argument(
"--plugins",
default="",
type=str,
nargs="*",
help="Absolute paths of plugins to be loaded",
required=False,
action=DetectDefault,
)
argparser.add_argument(
"--env-args",
default=None,
nargs=argparse.REMAINDER,

9
ml-agents/mlagents/trainers/saver/tf_saver.py


from mlagents.trainers.policy.tf_policy import TFPolicy
from mlagents.trainers.optimizer.tf_optimizer import TFOptimizer
from mlagents.trainers import __version__
from mlagents.tf_utils import global_values
logger = get_logger(__name__)

def export(self, output_filepath: str, brain_name: str) -> None:
# save model if there is only one worker or
# only on worker-0 if there are multiple workers
if self.policy and self.policy.rank is not None and self.policy.rank != 0:
if (
self.policy
and global_values.get_rank() is not None
and global_values.get_rank() != 0
):
return
export_policy_model(
self.model_path, output_filepath, brain_name, self.graph, self.sess

self._load_graph(policy, self.model_path, reset_global_steps=reset_steps)
else:
policy.initialize()
TFPolicy.broadcast_global_variables(0)
TFPolicy.broadcast_global_variables
def _load_graph(
self, policy: TFPolicy, model_path: str, reset_global_steps: bool = False

76
ml-agents/mlagents/trainers/learn.py


# # Unity ML-Agents Toolkit
import yaml
import inspect
import mlagents.trainers
import mlagents_envs
from mlagents import tf_utils

GaugeWriter,
ConsoleWriter,
)
from mlagents.trainers.sac.trainer import SACTrainer
from mlagents.trainers.ppo.trainer import PPOTrainer
from mlagents.trainers.trainer.rl_trainer import RLTrainer
from mlagents.trainers.training_status import GlobalTrainingStatus
from mlagents_envs.base_env import BaseEnv
from mlagents.trainers.subprocess_env_manager import SubprocessEnvManager

add_metadata as add_timer_metadata,
)
from mlagents_envs import logging_util
import sys
import importlib
import pkgutil
from mlagents.trainers.initializer import Initializer
logger = logging_util.get_logger(__name__)

def get_all_subclasses(cls):
all_subclasses = []
for subclass in cls.__subclasses__():
all_subclasses.append(subclass)
all_subclasses.extend(get_all_subclasses(subclass))
return all_subclasses
def get_initializer_and_trainer(paths: List[str]) -> Optional[dict]:
original_initializers = set(Initializer.__subclasses__())
original_trainers = set(RLTrainer.__subclasses__())
logger.info(
f"Found {len(original_initializers)} initializers and {len(original_trainers)} "
f"trainers."
)
# add all plugin paths to system path
for p in paths:
sys.path.append(p)
discovered_plugins = {
name: importlib.import_module(name)
for finder, name, ispkg in pkgutil.iter_modules(paths)
}
if discovered_plugins:
logger.info(f"The following plugins are available {discovered_plugins}")
new_initializers = set(get_all_subclasses(Initializer))
if len(new_initializers) == 0:
return None
elif len(new_initializers) == 1:
# load the initializer
logger.info("Registering new initializer")
distributed_init = list(new_initializers)[0]()
distributed_init.load()
# construct a list of new trainers
all_trainers = set(get_all_subclasses(RLTrainer))
new_trainers = list(all_trainers - original_trainers)
logger.info(f"Found {len(new_trainers)} new trainers")
new_trainer_map = dict()
for key, value in discovered_plugins.items():
trainer_name = importlib.import_module(key)
for name, obj in inspect.getmembers(trainer_name):
if inspect.isclass(obj) and issubclass(obj, PPOTrainer) and obj != PPOTrainer:
print(f"Found a sub trainer of PPO Trainer: {obj}")
new_trainer_map[key] = obj
if inspect.isclass(obj) and issubclass(obj, SACTrainer) and obj != SACTrainer:
print(f"Found a sub trainer of SAC Trainer: {obj}")
new_trainer_map[key] = obj
return new_trainer_map
else:
raise ValueError(
"There should be exactly one initializer passed through plugins option."
)
def get_version_string() -> str:
# pylint: disable=no-member
return f""" Version information:

options.environment_parameters, run_seed, restore=checkpoint_settings.resume
)
new_trainer_map = get_initializer_and_trainer(options.plugins)
trainer_factory = TrainerFactory(
options.behaviors,
write_path,

env_parameter_manager,
new_trainer_map,
maybe_init_path,
False,
)

11
ml-agents/mlagents/trainers/settings.py


class TrainerType(Enum):
PPO: str = "ppo"
SAC: str = "sac"
DistributedPPO: str = "distributed_ppo"
DistributedSAC: str = "distributed_sac"
_mapping = {TrainerType.PPO: PPOSettings, TrainerType.SAC: SACSettings}
_mapping = {
TrainerType.PPO: PPOSettings,
TrainerType.SAC: SACSettings,
TrainerType.DistributedPPO: PPOSettings,
TrainerType.DistributedSAC: SACSettings,
}
return _mapping[self]

# These are options that are relevant to the run itself, and not the engine or environment.
# They will be left here.
debug: bool = parser.get_default("debug")
plugins: List[str] = parser.get_default("plugins")
# Strict conversion
cattr.register_structure_hook(EnvironmentSettings, strict_to_cls)
cattr.register_structure_hook(EngineSettings, strict_to_cls)

from file paths, and converts to a RunOptions instance.
:param args: collection of command-line parameters passed to mlagents-learn
:return: RunOptions representing the passed in arguments, with trainer config, curriculum and sampler
configs loaded from files.
configs loaded from files.
"""
argparse_args = vars(args)

28
ml-agents/mlagents/trainers/trainer_util.py


from mlagents.trainers.ghost.controller import GhostController
from mlagents.trainers.settings import TrainerSettings, TrainerType
logger = get_logger(__name__)

load_model: bool,
seed: int,
param_manager: EnvironmentParameterManager,
new_trainer_map: dict,
self.init_path = init_path
self.new_trainer_map = new_trainer_map
self.init_path = init_path
self.multi_gpu = multi_gpu
self.ghost_controller = GhostController()

self.ghost_controller,
self.seed,
self.param_manager,
self.new_trainer_map,
self.init_path,
self.multi_gpu,
)

ghost_controller: GhostController,
seed: int,
param_manager: EnvironmentParameterManager,
new_trainer_map: dict,
init_path: str = None,
multi_gpu: bool = False,
) -> Trainer:

:param ghost_controller: The object that coordinates ghost trainers
:param seed: The random seed to use
:param param_manager: EnvironmentParameterManager, used to determine a reward buffer length for PPOTrainer
:param new_trainer_map: a mapping from trainer name to trainer class; to be used with the plugin
:param init_path: Path from which to load model, if different from model_path.
:return:
"""

)
elif trainer_type == TrainerType.SAC:
trainer = SACTrainer(
brain_name,
min_lesson_length,
trainer_settings,
train_model,
load_model,
seed,
trainer_artifact_path,
)
elif trainer_type == TrainerType.DistributedPPO:
trainer = new_trainer_map['distributed_ppo'](
brain_name,
min_lesson_length,
trainer_settings,
train_model,
load_model,
seed,
trainer_artifact_path,
)
elif trainer_type == TrainerType.DistributedSAC:
trainer = new_trainer_map['distributed_sac'](
brain_name,
min_lesson_length,
trainer_settings,

6
ml-agents/mlagents/trainers/initializer.py


class Initializer:
def __init__(self):
pass
def load(self):
pass

/ml-agents/mlagents/tf_utils/globals.py → /ml-agents/mlagents/tf_utils/global_values.py

正在加载...
取消
保存