docstrings/ghost_swap -> team_change

5 年前 · 1269b555
--- a/ml-agents/mlagents/trainers/behavior_id_utils.py
+++ b/ml-agents/mlagents/trainers/behavior_id_utils.py


 class BehaviorIdentifiers(NamedTuple):
+    """
+    BehaviorIdentifiers is a named tuple if the identifiers that uniquely distinguish
+    an agent encountered in the trainer_controller. The named tuple consists of the
+    fully qualified behavior name, the name of the brain name (corresponds to trainer
+    in the trainer controller) and the team id.  In the future, this can be extended
+    to support further identifiers.
+    """
+
    behavior_id: str
    brain_name: str
    team_id: int
        """
-        Parses a name_behavior_id of the form name?team=0&param1=i&...
+        Parses a name_behavior_id of the form name?team=0
-        This allows you to access the brain name and distinguishing identifiers
-        without parsing more than once.
+        This allows you to access the brain name and team id og an agent
        :param name_behavior_id: String of behavior params in HTTP format.
        :returns: A BehaviorIdentifiers object.
        """
--- a/ml-agents/mlagents/trainers/ghost/controller.py
+++ b/ml-agents/mlagents/trainers/ghost/controller.py


 class GhostController(object):
+    """
+    GhostController contains a queue of team ids. GhostTrainers subscribe to the GhostController and query
+    it to get the current learning team.  The GhostController cycles through team ids every 'swap_interval'
+    which corresponds to the number of trainer steps between changing learning teams.
+    """
+
+        """
+        Create a GhostController.
+        :param swap_interval: Number of trainer steps between changing learning teams.
+        :param maxlen: Maximum number of GhostTrainers allowed in this GhostController
+        """
+
+        # Dict from team id to GhostTrainer
+        """
+        Given a team_id and trainer, add to queue and trainers if not already.
+        The GhostTrainer is used later by the controller to get ELO ratings of agents.
+        :param team_id: The team_id of an agent managed by this GhostTrainer
+        :param trainer: A GhostTrainer that manages this team_id.
+        """
-            self._queue.append(team_id)
+            else:
+                self._queue.append(team_id)
+        """
+        Returns the current learning team. If 'swap_interval' steps have elapsed, the current
+        learning team is added to the end of the queue and then updated with the next in line.
+        :param step: Current step of the trainer.
+        :return: The learning team id
+        """
-            self._learning_team = self._queue.popleft()
+            self._learning_team = self._queue.popleft()
-
+    # TODO : Generalize this to more than two teams
+        """
+        Calculates ELO. Given the rating of the learning team and result.  The GhostController
+        queries the other GhostTrainers for the ELO of their agent that is currently being deployed.
+        Note, this could be the current agent or a past snapshot.
+        :param rating: Rating of the learning team.
+        :param result: Win, loss, or draw from the perspective of the learning team.
+        :return: The change in ELO.
+        """
        opponent_rating: float = 0.0
        for team_id, trainer in self._ghost_trainers.items():
            if team_id != self._learning_team:
--- a/ml-agents/mlagents/trainers/learn.py
+++ b/ml-agents/mlagents/trainers/learn.py
    )

    argparser.add_argument(
-        "--ghost-swap",
+        "--team-change",
-        help="Number of trainer steps between swapping behavior id being ghosted",
+        help="Number of trainer steps between changing the team_id that is learning",
    )

    argparser.add_argument(
    keep_checkpoints: int = parser.get_default("keep_checkpoints")
    base_port: int = parser.get_default("base_port")
    num_envs: int = parser.get_default("num_envs")
-    ghost_swap: int = parser.get_default("ghost_swap")
+    team_change: int = parser.get_default("team_change")
    curriculum_config: Optional[Dict] = None
    lesson: int = parser.get_default("lesson")
    no_graphics: bool = parser.get_default("no_graphics")
            options.keep_checkpoints,
            options.train_model,
            options.load_model,
-            options.ghost_swap,
+            options.team_change,
            run_seed,
            maybe_meta_curriculum,
            options.multi_gpu,
--- a/ml-agents/mlagents/trainers/trainer_util.py
+++ b/ml-agents/mlagents/trainers/trainer_util.py
        keep_checkpoints: int,
        train_model: bool,
        load_model: bool,
-        ghost_swap: int,
+        team_change: int,
        seed: int,
        meta_curriculum: MetaCurriculum = None,
        multi_gpu: bool = False,
        self.seed = seed
        self.meta_curriculum = meta_curriculum
        self.multi_gpu = multi_gpu
-        self.ghost_controller = GhostController(ghost_swap)
+        self.ghost_controller = GhostController(team_change)

    def generate(self, brain_name: str) -> Trainer:
        return initialize_trainer(
    :param keep_checkpoints: How many model checkpoints to keep
    :param train_model: Whether to train the model (vs. run inference)
    :param load_model: Whether to load the model or randomly initialize
+    :param ghost_controller: The object that coordinates ghost trainers
    :param seed: The random seed to use
    :param meta_curriculum: Optional meta_curriculum, used to determine a reward buffer length for PPOTrainer
    :return: