浏览代码

fixed controller behavior when first team discovered isnt 0

/asymm-envs
Andrew Cohen 5 年前
当前提交
66b505c3
共有 2 个文件被更改,包括 10 次插入8 次删除
  1. 4
      ml-agents/mlagents/trainers/ghost/controller.py
  2. 14
      ml-agents/mlagents/trainers/ghost/trainer.py

4
ml-agents/mlagents/trainers/ghost/controller.py


self._swap_interval = swap_interval
self._last_swap: int = 0
self._queue: Deque[int] = deque(maxlen=maxlen)
self._learning_team: int = 0
self._learning_team: int = -1
self._ghost_trainers: Dict[int, GhostTrainer] = {}
def subscribe_team_id(self, team_id: int, trainer: GhostTrainer) -> None:

if self._learning_team < 0:
self._learning_team = team_id
def get_learning_team(self, step: int) -> int:
if step >= self._swap_interval + self._last_swap:

14
ml-agents/mlagents/trainers/ghost/trainer.py


# in the situation where new agents are created destroyed
# after learning team switches. These agents need to be added
# to trainers properly.
self.learning_team: int = None
self._learning_team: int = None
self.wrapped_trainer_team: int = None
self.current_policy_snapshot = None
self.last_save = 0

parsed_behavior_id = self._name_to_parsed_behavior_id[
trajectory_queue.behavior_id
]
if parsed_behavior_id.team_id == self.learning_team:
if parsed_behavior_id.team_id == self._learning_team:
# With a future multiagent trainer, this will be indexed by 'role'
internal_trajectory_queue = self._internal_trajectory_queues[
parsed_behavior_id.brain_name

parsed_behavior_id = self._name_to_parsed_behavior_id[
policy_queue.behavior_id
]
if parsed_behavior_id.team_id == self.learning_team:
if parsed_behavior_id.team_id == self._learning_team:
# With a future multiagent trainer, this will be indexed by 'role'
internal_policy_queue = self._internal_policy_queues[
parsed_behavior_id.brain_name

except AgentManagerQueue.Empty:
pass
self.learning_team = self.controller.get_learning_team(self.ghost_step)
self._learning_team = self.controller.get_learning_team(self.ghost_step)
if self.ghost_step - self.last_save > self.steps_between_save:
self._save_snapshot(self.trainer.policy)

self.current_policy_snapshot = weights
self.trainer.add_policy(parsed_behavior_id, policy)
self._save_snapshot(policy) # Need to save after trainer initializes policy
self.learning_team = self.controller.get_learning_team(self.ghost_step)
self._learning_team = self.controller.get_learning_team(self.ghost_step)
self.wrapped_trainer_team = team_id
else:
# for saving/swapping snapshots

policy_queue.behavior_id
]
# here is the place for a sampling protocol
if parsed_behavior_id.team_id == self.learning_team:
if parsed_behavior_id.team_id == self._learning_team:
continue
elif np.random.uniform() < (1 - self.play_against_current_self_ratio):
x = np.random.randint(len(self.policy_snapshots))

self.ghost_step,
x,
parsed_behavior_id.behavior_id,
self.learning_team,
self._learning_team,
)
)
policy = self.get_policy(parsed_behavior_id.behavior_id)

正在加载...
取消
保存