浏览代码

hard reset when team changes

/develop/dockerfile
Andrew Cohen 5 年前
当前提交
ddb6787c
共有 4 个文件被更改,包括 25 次插入2 次删除
  1. 14
      ml-agents/mlagents/trainers/ghost/controller.py
  2. 5
      ml-agents/mlagents/trainers/learn.py
  3. 5
      ml-agents/mlagents/trainers/trainer_controller.py
  4. 3
      ml-agents/mlagents/trainers/trainer_util.py

14
ml-agents/mlagents/trainers/ghost/controller.py


self._learning_team: int = -1
# Dict from team id to GhostTrainer for ELO calculation
self._ghost_trainers: Dict[int, GhostTrainer] = {}
# Signals to the trainer control to perform a hard reset
self._reset = False
@property
def get_learning_team(self) -> int:

"""
return self._learning_team
@property
def reset(self) -> bool:
"""
Whether or not team change occurred. Causes full reset in trainer_controller
:return: The truth value of the team changing
"""
change_team = self._reset
if self._reset:
self._reset = False
return change_team
def subscribe_team_id(self, team_id: int, trainer: GhostTrainer) -> None:
"""

logger.debug(
"Learning team {} swapped on step {}".format(self._learning_team, step)
)
self._reset = True
# Adapted from https://github.com/Unity-Technologies/ml-agents/pull/1975 and
# https://metinmediamath.wordpress.com/2013/11/27/how-to-calculate-the-elo-rating-including-example/

5
ml-agents/mlagents/trainers/learn.py


from mlagents import tf_utils
from mlagents.trainers.trainer_controller import TrainerController
from mlagents.trainers.meta_curriculum import MetaCurriculum
from mlagents.trainers.ghost.controller import GhostController
from mlagents.trainers.trainer_util import (
load_config,
TrainerFactory,

sampler_manager, resampling_interval = create_sampler_manager(
options.sampler_config, run_seed
)
ghost_controller = GhostController()
trainer_factory = TrainerFactory(
options.trainer_config,
summaries_dir,

not options.inference,
options.resume,
run_seed,
ghost_controller,
maybe_init_path,
maybe_meta_curriculum,
options.multi_gpu,

run_seed,
sampler_manager,
resampling_interval,
ghost_controller,
)
# Begin training

5
ml-agents/mlagents/trainers/trainer_controller.py


from mlagents.trainers.trainer_util import TrainerFactory
from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers
from mlagents.trainers.agent_processor import AgentManager
from mlagents.trainers.ghost.controller import GhostController
class TrainerController(object):

training_seed: int,
sampler_manager: SamplerManager,
resampling_interval: Optional[int],
ghost_controller: GhostController,
):
"""
:param model_path: Path to save the model.

self.meta_curriculum = meta_curriculum
self.sampler_manager = sampler_manager
self.resampling_interval = resampling_interval
self.ghost_controller = ghost_controller
self.trainer_threads: List[threading.Thread] = []
self.kill_trainers = False

and (self.resampling_interval)
and (steps % self.resampling_interval == 0)
)
if meta_curriculum_reset or generalization_reset:
if meta_curriculum_reset or generalization_reset or self.ghost_controller.reset:
self.end_trainer_episodes(env, lessons_incremented)
@timed

3
ml-agents/mlagents/trainers/trainer_util.py


train_model: bool,
load_model: bool,
seed: int,
ghost_controller: GhostController,
init_path: str = None,
meta_curriculum: MetaCurriculum = None,
multi_gpu: bool = False,

self.seed = seed
self.meta_curriculum = meta_curriculum
self.multi_gpu = multi_gpu
self.ghost_controller = GhostController()
self.ghost_controller = ghost_controller
def generate(self, brain_name: str) -> Trainer:
return initialize_trainer(

正在加载...
取消
保存