浏览代码

Address AgentProcessor comments

/develop-newnormalization
Ervin Teng 5 年前
当前提交
e2b2f4be
共有 1 个文件被更改,包括 4 次插入14 次删除
  1. 18
      ml-agents/mlagents/trainers/agent_processor.py

18
ml-agents/mlagents/trainers/agent_processor.py


:param policy: Policy instance associated with this AgentProcessor.
:param max_trajectory_length: Maximum length of a trajectory before it is added to the trainer.
"""
self.experience_buffers: Dict[str, List] = defaultdict(list)
self.experience_buffers: Dict[str, List[AgentExperience]] = defaultdict(list)
self.stats: Dict[str, List] = defaultdict(list)
self.stats: Dict[str, List[float]] = defaultdict(list)
# Note: this is needed until we switch to AgentExperiences as the data input type.
# We still need some info from the policy (memories, previous actions)
# that really should be gathered by the env-manager.

self.trainer = trainer
def __str__(self):
return "local_buffers :\n{0}".format(
"\n".join(
[
"\tagent {0} :{1}".format(k, str(self.experience_buffers[k]))
for k in self.experience_buffers.keys()
]
)
)
def add_experiences(
self,
curr_info: BrainInfo,

self.last_take_action_outputs[agent_id] = take_action_outputs
# Store the environment reward
tmp_environment = np.array(next_info.rewards, dtype=np.float32)
tmp_environment_reward = np.array(next_info.rewards, dtype=np.float32)
for agent_id in next_info.agents:
stored_info = self.last_brain_info.get(agent_id, None)

values = stored_take_action_outputs["value_heads"]
experience = AgentExperience(
obs=obs,
reward=tmp_environment[next_idx],
reward=tmp_environment_reward[next_idx],
done=done,
action=action,
action_probs=action_probs,

正在加载...
取消
保存