|
|
|
|
|
|
self.change_current_elo(change) |
|
|
|
self._stats_reporter.add_stat("Self-play/ELO", self.current_elo) |
|
|
|
|
|
|
|
def advance(self, empty_queue: bool = False) -> None: |
|
|
|
def advance(self) -> None: |
|
|
|
""" |
|
|
|
Steps the trainer, passing trajectories to wrapped trainer and calling trainer advance |
|
|
|
""" |
|
|
|
|
|
|
# We grab at most the maximum length of the queue. |
|
|
|
# This ensures that even if the queue is being filled faster than it is |
|
|
|
# being emptied, the trajectories in the queue are on-policy. |
|
|
|
for _ in range(trajectory_queue.maxlen): |
|
|
|
t = trajectory_queue.get(block=not empty_queue, timeout=0.05) |
|
|
|
for _ in range(trajectory_queue.qsize()): |
|
|
|
t = trajectory_queue.get(block=False) |
|
|
|
if not empty_queue: |
|
|
|
break |
|
|
|
for _ in range(trajectory_queue.maxlen): |
|
|
|
t = trajectory_queue.get(block=not empty_queue, timeout=0.05) |
|
|
|
for _ in range(trajectory_queue.qsize()): |
|
|
|
t = trajectory_queue.get(block=False) |
|
|
|
if not empty_queue: |
|
|
|
break |
|
|
|
self.trainer.advance(empty_queue=empty_queue) |
|
|
|
self.trainer.advance() |
|
|
|
if self.get_step - self.last_team_change > self.steps_to_train_team: |
|
|
|
self.controller.change_training_team(self.get_step) |
|
|
|
self.last_team_change = self.get_step |
|
|
|