浏览代码

Merge branch 'self-play-mutex' into soccer-2v1

/asymm-envs
Andrew Cohen 5 年前
当前提交
c05d6c49
共有 1 个文件被更改,包括 13 次插入2 次删除
  1. 15
      ml-agents/mlagents/trainers/ghost/trainer.py

15
ml-agents/mlagents/trainers/ghost/trainer.py


)
self.steps_between_save = self_play_parameters.get("save_steps", 20000)
self.steps_between_swap = self_play_parameters.get("swap_steps", 20000)
# Counts the The number of steps of the ghost policies. Snapshot swapping
# depends on this counter whereas snapshot saving and team switching depends
# on the wrapped. This ensures that all teams train for the same number of trainer
# steps.
self.ghost_step: int = 0
# A list of dicts from brain name to a single snapshot for this trainer's policies

# will push the current_snapshot into the correct queue. Otherwise,
# it will continue skipping and swap_snapshot will continue to handle
# pushing fixed snapshots
# Case 3: No team change. The if statement just continues to push the policy
# into the correct queue (or not if not learning team).
next_learning_team = self.controller.get_learning_team(self.get_step)
for brain_name in self._internal_policy_queues:
internal_policy_queue = self._internal_policy_queues[brain_name]

def export_model(self, name_behavior_id: str) -> None:
"""
Forwarding call to wrapped trainers export_model
Forwarding call to wrapped trainers export_model.
First loads the current snapshot.
self.trainer.export_model(name_behavior_id)
parsed_behavior_id = self._name_to_parsed_behavior_id[name_behavior_id]
brain_name = parsed_behavior_id.brain_name
policy = self.trainer.get_policy(brain_name)
policy.load_weights(self.current_policy_snapshot[brain_name])
self.trainer.export_model(brain_name)
def create_policy(self, brain_parameters: BrainParameters) -> TFPolicy:
"""

正在加载...
取消
保存