浏览代码

Fix issue with different decision intervals for different brains (#3181)

* Move action check into agent_processor

* Better loop for iterating over step_info

* Add warning for agentmanager not found
/release-0.13.0
GitHub 5 年前
当前提交
bed7debf
共有 3 个文件被更改,包括 21 次插入18 次删除
  1. 7
      ml-agents/mlagents/trainers/agent_processor.py
  2. 9
      ml-agents/mlagents/trainers/env_manager.py
  3. 23
      ml-agents/mlagents/trainers/trainer_controller.py

7
ml-agents/mlagents/trainers/agent_processor.py


del self.episode_rewards[agent_id]
elif not next_info.local_done[next_idx]:
self.episode_steps[agent_id] += 1
self.policy.save_previous_action(
curr_info.agents, take_action_outputs["action"]
)
if "action" in take_action_outputs:
self.policy.save_previous_action(
curr_info.agents, take_action_outputs["action"]
)

9
ml-agents/mlagents/trainers/env_manager.py


from abc import ABC, abstractmethod
from typing import List, Dict, NamedTuple
from typing import List, Dict, NamedTuple, Iterable
from mlagents.trainers.brain import AllBrainInfo, BrainParameters
from mlagents.trainers.policy import Policy
from mlagents.trainers.action_info import ActionInfo

current_all_brain_info: AllBrainInfo
brain_name_to_action_info: Dict[str, ActionInfo]
def has_actions_for_brain(self, brain_name: str) -> bool:
return brain_name in self.brain_name_to_action_info and bool(
self.brain_name_to_action_info[brain_name].outputs
)
@property
def name_behavior_ids(self) -> Iterable[str]:
return self.brain_name_to_action_info.keys()
class EnvManager(ABC):

23
ml-agents/mlagents/trainers/trainer_controller.py


with hierarchical_timer("env_step"):
new_step_infos = env.step()
for step_info in new_step_infos:
for brain_name in self.trainers.keys():
for name_behavior_id in self.brain_name_to_identifier[brain_name]:
if step_info.has_actions_for_brain(name_behavior_id):
_processor = self.managers[name_behavior_id].processor
_processor.add_experiences(
step_info.previous_all_brain_info[name_behavior_id],
step_info.current_all_brain_info[name_behavior_id],
step_info.brain_name_to_action_info[
name_behavior_id
].outputs,
for name_behavior_id in step_info.name_behavior_ids:
if name_behavior_id not in self.managers:
self.logger.warning(
"Agent manager was not created for behavior id {}.".format(
name_behavior_id
)
continue
_processor = self.managers[name_behavior_id].processor
_processor.add_experiences(
step_info.previous_all_brain_info[name_behavior_id],
step_info.current_all_brain_info[name_behavior_id],
step_info.brain_name_to_action_info[name_behavior_id].outputs,
)
for brain_name, trainer in self.trainers.items():
if self.train_model and trainer.get_step <= trainer.get_max_steps:

正在加载...
取消
保存