浏览代码

docstrings/ghost_swap -> team_change

/develop/cubewars
Andrew Cohen 5 年前
当前提交
1269b555
共有 4 个文件被更改,包括 54 次插入12 次删除
  1. 13
      ml-agents/mlagents/trainers/behavior_id_utils.py
  2. 40
      ml-agents/mlagents/trainers/ghost/controller.py
  3. 8
      ml-agents/mlagents/trainers/learn.py
  4. 5
      ml-agents/mlagents/trainers/trainer_util.py

13
ml-agents/mlagents/trainers/behavior_id_utils.py


class BehaviorIdentifiers(NamedTuple):
"""
BehaviorIdentifiers is a named tuple if the identifiers that uniquely distinguish
an agent encountered in the trainer_controller. The named tuple consists of the
fully qualified behavior name, the name of the brain name (corresponds to trainer
in the trainer controller) and the team id. In the future, this can be extended
to support further identifiers.
"""
behavior_id: str
brain_name: str
team_id: int

"""
Parses a name_behavior_id of the form name?team=0&param1=i&...
Parses a name_behavior_id of the form name?team=0
This allows you to access the brain name and distinguishing identifiers
without parsing more than once.
This allows you to access the brain name and team id og an agent
:param name_behavior_id: String of behavior params in HTTP format.
:returns: A BehaviorIdentifiers object.
"""

40
ml-agents/mlagents/trainers/ghost/controller.py


class GhostController(object):
"""
GhostController contains a queue of team ids. GhostTrainers subscribe to the GhostController and query
it to get the current learning team. The GhostController cycles through team ids every 'swap_interval'
which corresponds to the number of trainer steps between changing learning teams.
"""
"""
Create a GhostController.
:param swap_interval: Number of trainer steps between changing learning teams.
:param maxlen: Maximum number of GhostTrainers allowed in this GhostController
"""
# Dict from team id to GhostTrainer
"""
Given a team_id and trainer, add to queue and trainers if not already.
The GhostTrainer is used later by the controller to get ELO ratings of agents.
:param team_id: The team_id of an agent managed by this GhostTrainer
:param trainer: A GhostTrainer that manages this team_id.
"""
self._queue.append(team_id)
else:
self._queue.append(team_id)
"""
Returns the current learning team. If 'swap_interval' steps have elapsed, the current
learning team is added to the end of the queue and then updated with the next in line.
:param step: Current step of the trainer.
:return: The learning team id
"""
self._learning_team = self._queue.popleft()
self._learning_team = self._queue.popleft()
# TODO : Generalize this to more than two teams
"""
Calculates ELO. Given the rating of the learning team and result. The GhostController
queries the other GhostTrainers for the ELO of their agent that is currently being deployed.
Note, this could be the current agent or a past snapshot.
:param rating: Rating of the learning team.
:param result: Win, loss, or draw from the perspective of the learning team.
:return: The change in ELO.
"""
opponent_rating: float = 0.0
for team_id, trainer in self._ghost_trainers.items():
if team_id != self._learning_team:

8
ml-agents/mlagents/trainers/learn.py


)
argparser.add_argument(
"--ghost-swap",
"--team-change",
help="Number of trainer steps between swapping behavior id being ghosted",
help="Number of trainer steps between changing the team_id that is learning",
)
argparser.add_argument(

keep_checkpoints: int = parser.get_default("keep_checkpoints")
base_port: int = parser.get_default("base_port")
num_envs: int = parser.get_default("num_envs")
ghost_swap: int = parser.get_default("ghost_swap")
team_change: int = parser.get_default("team_change")
curriculum_config: Optional[Dict] = None
lesson: int = parser.get_default("lesson")
no_graphics: bool = parser.get_default("no_graphics")

options.keep_checkpoints,
options.train_model,
options.load_model,
options.ghost_swap,
options.team_change,
run_seed,
maybe_meta_curriculum,
options.multi_gpu,

5
ml-agents/mlagents/trainers/trainer_util.py


keep_checkpoints: int,
train_model: bool,
load_model: bool,
ghost_swap: int,
team_change: int,
seed: int,
meta_curriculum: MetaCurriculum = None,
multi_gpu: bool = False,

self.seed = seed
self.meta_curriculum = meta_curriculum
self.multi_gpu = multi_gpu
self.ghost_controller = GhostController(ghost_swap)
self.ghost_controller = GhostController(team_change)
def generate(self, brain_name: str) -> Trainer:
return initialize_trainer(

:param keep_checkpoints: How many model checkpoints to keep
:param train_model: Whether to train the model (vs. run inference)
:param load_model: Whether to load the model or randomly initialize
:param ghost_controller: The object that coordinates ghost trainers
:param seed: The random seed to use
:param meta_curriculum: Optional meta_curriculum, used to determine a reward buffer length for PPOTrainer
:return:

正在加载...
取消
保存