|
|
|
|
|
|
|
|
|
|
|
|
|
|
class GhostController(object): |
|
|
|
""" |
|
|
|
GhostController contains a queue of team ids. GhostTrainers subscribe to the GhostController and query |
|
|
|
it to get the current learning team. The GhostController cycles through team ids every 'swap_interval' |
|
|
|
which corresponds to the number of trainer steps between changing learning teams. |
|
|
|
""" |
|
|
|
|
|
|
|
""" |
|
|
|
Create a GhostController. |
|
|
|
:param swap_interval: Number of trainer steps between changing learning teams. |
|
|
|
:param maxlen: Maximum number of GhostTrainers allowed in this GhostController |
|
|
|
""" |
|
|
|
|
|
|
|
# Dict from team id to GhostTrainer |
|
|
|
""" |
|
|
|
Given a team_id and trainer, add to queue and trainers if not already. |
|
|
|
The GhostTrainer is used later by the controller to get ELO ratings of agents. |
|
|
|
:param team_id: The team_id of an agent managed by this GhostTrainer |
|
|
|
:param trainer: A GhostTrainer that manages this team_id. |
|
|
|
""" |
|
|
|
self._queue.append(team_id) |
|
|
|
else: |
|
|
|
self._queue.append(team_id) |
|
|
|
""" |
|
|
|
Returns the current learning team. If 'swap_interval' steps have elapsed, the current |
|
|
|
learning team is added to the end of the queue and then updated with the next in line. |
|
|
|
:param step: Current step of the trainer. |
|
|
|
:return: The learning team id |
|
|
|
""" |
|
|
|
self._learning_team = self._queue.popleft() |
|
|
|
self._learning_team = self._queue.popleft() |
|
|
|
|
|
|
|
# TODO : Generalize this to more than two teams |
|
|
|
""" |
|
|
|
Calculates ELO. Given the rating of the learning team and result. The GhostController |
|
|
|
queries the other GhostTrainers for the ELO of their agent that is currently being deployed. |
|
|
|
Note, this could be the current agent or a past snapshot. |
|
|
|
:param rating: Rating of the learning team. |
|
|
|
:param result: Win, loss, or draw from the perspective of the learning team. |
|
|
|
:return: The change in ELO. |
|
|
|
""" |
|
|
|
opponent_rating: float = 0.0 |
|
|
|
for team_id, trainer in self._ghost_trainers.items(): |
|
|
|
if team_id != self._learning_team: |
|
|
|