hard reset when team changes

5 年前 · ddb6787c
--- a/ml-agents/mlagents/trainers/ghost/controller.py
+++ b/ml-agents/mlagents/trainers/ghost/controller.py
        self._learning_team: int = -1
        # Dict from team id to GhostTrainer for ELO calculation
        self._ghost_trainers: Dict[int, GhostTrainer] = {}
+        # Signals to the trainer control to perform a hard reset
+        self._reset = False

    @property
    def get_learning_team(self) -> int:
        """
        return self._learning_team
+
+    @property
+    def reset(self) -> bool:
+        """
+        Whether or not team change occurred. Causes full reset in trainer_controller
+        :return: The truth value of the team changing
+        """
+        change_team = self._reset
+        if self._reset:
+            self._reset = False
+        return change_team

    def subscribe_team_id(self, team_id: int, trainer: GhostTrainer) -> None:
        """
        logger.debug(
            "Learning team {} swapped on step {}".format(self._learning_team, step)
        )
+        self._reset = True

    # Adapted from https://github.com/Unity-Technologies/ml-agents/pull/1975 and
    # https://metinmediamath.wordpress.com/2013/11/27/how-to-calculate-the-elo-rating-including-example/
--- a/ml-agents/mlagents/trainers/learn.py
+++ b/ml-agents/mlagents/trainers/learn.py
 from mlagents import tf_utils
 from mlagents.trainers.trainer_controller import TrainerController
 from mlagents.trainers.meta_curriculum import MetaCurriculum
+from mlagents.trainers.ghost.controller import GhostController
 from mlagents.trainers.trainer_util import (
    load_config,
    TrainerFactory,
        sampler_manager, resampling_interval = create_sampler_manager(
            options.sampler_config, run_seed
        )
+        ghost_controller = GhostController()
+
        trainer_factory = TrainerFactory(
            options.trainer_config,
            summaries_dir,
            not options.inference,
            options.resume,
            run_seed,
+            ghost_controller,
            maybe_init_path,
            maybe_meta_curriculum,
            options.multi_gpu,
            run_seed,
            sampler_manager,
            resampling_interval,
+            ghost_controller,
        )

    # Begin training
--- a/ml-agents/mlagents/trainers/trainer_controller.py
+++ b/ml-agents/mlagents/trainers/trainer_controller.py
 from mlagents.trainers.trainer_util import TrainerFactory
 from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers
 from mlagents.trainers.agent_processor import AgentManager
+from mlagents.trainers.ghost.controller import GhostController


 class TrainerController(object):
        training_seed: int,
        sampler_manager: SamplerManager,
        resampling_interval: Optional[int],
+        ghost_controller: GhostController,
    ):
        """
        :param model_path: Path to save the model.
        self.meta_curriculum = meta_curriculum
        self.sampler_manager = sampler_manager
        self.resampling_interval = resampling_interval
+        self.ghost_controller = ghost_controller

        self.trainer_threads: List[threading.Thread] = []
        self.kill_trainers = False
            and (self.resampling_interval)
            and (steps % self.resampling_interval == 0)
        )
-        if meta_curriculum_reset or generalization_reset:
+        if meta_curriculum_reset or generalization_reset or self.ghost_controller.reset:
            self.end_trainer_episodes(env, lessons_incremented)

    @timed
--- a/ml-agents/mlagents/trainers/trainer_util.py
+++ b/ml-agents/mlagents/trainers/trainer_util.py
        train_model: bool,
        load_model: bool,
        seed: int,
+        ghost_controller: GhostController,
        init_path: str = None,
        meta_curriculum: MetaCurriculum = None,
        multi_gpu: bool = False,
        self.seed = seed
        self.meta_curriculum = meta_curriculum
        self.multi_gpu = multi_gpu
-        self.ghost_controller = GhostController()
+        self.ghost_controller = ghost_controller

    def generate(self, brain_name: str) -> Trainer:
        return initialize_trainer(