|
|
|
|
|
|
) |
|
|
|
self.steps_between_save = self_play_parameters.get("save_steps", 20000) |
|
|
|
self.steps_between_swap = self_play_parameters.get("swap_steps", 20000) |
|
|
|
# Counts the The number of steps of the ghost policies. Snapshot swapping |
|
|
|
# depends on this counter whereas snapshot saving and team switching depends |
|
|
|
# on the wrapped. This ensures that all teams train for the same number of trainer |
|
|
|
# steps. |
|
|
|
self.ghost_step: int = 0 |
|
|
|
|
|
|
|
# A list of dicts from brain name to a single snapshot for this trainer's policies |
|
|
|
|
|
|
# will push the current_snapshot into the correct queue. Otherwise, |
|
|
|
# it will continue skipping and swap_snapshot will continue to handle |
|
|
|
# pushing fixed snapshots |
|
|
|
# Case 3: No team change. The if statement just continues to push the policy |
|
|
|
# into the correct queue (or not if not learning team). |
|
|
|
next_learning_team = self.controller.get_learning_team(self.get_step) |
|
|
|
for brain_name in self._internal_policy_queues: |
|
|
|
internal_policy_queue = self._internal_policy_queues[brain_name] |
|
|
|
|
|
|
|
|
|
|
def export_model(self, name_behavior_id: str) -> None: |
|
|
|
""" |
|
|
|
Forwarding call to wrapped trainers export_model |
|
|
|
Forwarding call to wrapped trainers export_model. |
|
|
|
First loads the current snapshot. |
|
|
|
self.trainer.export_model(name_behavior_id) |
|
|
|
parsed_behavior_id = self._name_to_parsed_behavior_id[name_behavior_id] |
|
|
|
brain_name = parsed_behavior_id.brain_name |
|
|
|
policy = self.trainer.get_policy(brain_name) |
|
|
|
policy.load_weights(self.current_policy_snapshot[brain_name]) |
|
|
|
self.trainer.export_model(brain_name) |
|
|
|
|
|
|
|
def create_policy(self, brain_parameters: BrainParameters) -> TFPolicy: |
|
|
|
""" |
|
|
|