浏览代码

trainer_controller expects name_behavior_ids

/develop/tanhsquash
Andrew Cohen 5 年前
当前提交
2b192bc3
共有 1 个文件被更改,包括 36 次插入51 次删除
  1. 87
      ml-agents/mlagents/trainers/trainer_controller.py

87
ml-agents/mlagents/trainers/trainer_controller.py


# # Unity ML-Agents Toolkit
# ## ML-Agenunique_t Learning
# ## ML-Agent Learning
from typing import Tuple, Dict, List, Optional, Set
from typing import Dict, List, Optional, Set
from collections import defaultdict
import numpy as np

self._create_model_path(self.model_path)
tf.reset_default_graph()
global_step = 0
last_brain_names: Set[str] = set()
last_behavior_identifiers: Set[Tuple[str, str]] = set()
last_brain_behavior_ids: Set[str] = set()
brain_names, behavior_identifiers = zip(
*map(lambda x: [x.split("?")[0], x], external_brain_behavior_ids)
)
new_behavior_ids = external_brain_behavior_ids - last_brain_behavior_ids
for name_behavior_id in new_behavior_ids:
brain_name, _ = name_behavior_id.split("?")
for brain_name, behavior_identifier in zip(
brain_names, behavior_identifiers
):
self.brain_name_to_identifier[brain_name].add(behavior_identifier)
external_brains = set(brain_names)
external_identifiers = set(zip(brain_names, behavior_identifiers))
# This could be done with a try/except which may improve performance?
if brain_name in self.trainers:
trainer = self.trainers[brain_name]
else:
trainer = self.trainer_factory.generate(brain_name)
self.trainers[brain_name] = trainer
self.logger.info(trainer)
if self.train_model:
trainer.write_tensorboard_text(
"Hyperparameters", trainer.parameters
)
new_brains = external_brains - last_brain_names
new_identifiers = external_identifiers - last_behavior_identifiers
for name in new_brains:
trainer = self.trainer_factory.generate(name)
self.trainers[name] = trainer
for behavior_identifier in self.brain_name_to_identifier[name]:
self.multi_trainers[behavior_identifier] = trainer
trainer.add_policy(
env_manager.external_brains[behavior_identifier]
)
env_manager.set_policy(
behavior_identifier, trainer.get_policy(behavior_identifier)
)
self.logger.info(trainer)
if self.train_model:
trainer.write_tensorboard_text(
"Hyperparameters", trainer.parameters
)
for name, new_identifier in new_identifiers:
trainer = self.trainers[name]
trainer[name].add_policy(
env_manager.external_brains[new_identifier]
)
trainer.add_policy(env_manager.external_brains[name_behavior_id])
new_identifier, trainer.get_policy(new_identifier)
name_behavior_id, trainer.get_policy(name_behavior_id)
last_brain_names = external_brains
last_behavior_identifiers = external_identifiers
self.brain_name_to_identifier[brain_name].add(name_behavior_id)
last_brain_behavior_ids = external_brain_behavior_ids
n_steps = self.advance(env_manager)
for i in range(n_steps):

for brain_name, trainer in self.trainers.items():
if brain_name in self.trainer_metrics:
self.trainer_metrics[brain_name].add_delta_step(delta_time_step)
if step_info.has_actions_for_brain(brain_name):
trainer.add_experiences(
step_info.previous_all_brain_info[brain_name],
step_info.current_all_brain_info[brain_name],
step_info.brain_name_to_action_info[brain_name].outputs,
)
trainer.process_experiences(
step_info.previous_all_brain_info[brain_name],
step_info.current_all_brain_info[brain_name],
)
for behavior_identifier in self.brain_name_to_identifier[brain_name]:
if step_info.has_actions_for_brain(behavior_identifier):
trainer.add_experiences(
step_info.previous_all_brain_info[behavior_identifier],
step_info.current_all_brain_info[behavior_identifier],
step_info.brain_name_to_action_info[
behavior_identifier
].outputs,
)
trainer.process_experiences(
step_info.previous_all_brain_info[behavior_identifier],
step_info.current_all_brain_info[behavior_identifier],
)
for brain_name, trainer in self.trainers.items():
if brain_name in self.trainer_metrics:
self.trainer_metrics[brain_name].add_delta_step(delta_time_step)

正在加载...
取消
保存