浏览代码

Merge pull request #937 from Unity-Technologies/release-v0.4-fix-curiosity-odd

Hotfix - Curiosity & ODD
/hotfix-v0.9.2a
GitHub 6 年前
当前提交
3b5af6b2
共有 2 个文件被更改,包括 57 次插入18 次删除
  1. 18
      python/unityagents/brain.py
  2. 57
      python/unitytrainers/ppo/trainer.py

18
python/unityagents/brain.py


class BrainInfo:
def __init__(self, visual_observation, vector_observation, text_observations, memory=None,
reward=None, agents=None, local_done=None,
vector_action=None, text_action=None, max_reached=None):
reward=None, agents=None, local_done=None,
vector_action=None, text_action=None, max_reached=None):
"""
Describes experience at current step of all agents linked to a brain.
"""

Vector Action space type: {5}
Vector Action space size (per agent): {6}
Vector Action descriptions: {7}'''.format(self.brain_name,
str(self.number_visual_observations),
self.vector_observation_space_type,
str(self.vector_observation_space_size),
str(self.num_stacked_vector_observations),
self.vector_action_space_type,
str(self.vector_action_space_size),
', '.join(self.vector_action_descriptions))
str(self.number_visual_observations),
self.vector_observation_space_type,
str(self.vector_observation_space_size),
str(self.num_stacked_vector_observations),
self.vector_action_space_type,
str(self.vector_action_space_size),
', '.join(self.vector_action_descriptions))

57
python/unitytrainers/ppo/trainer.py


import numpy as np
import tensorflow as tf
from unityagents import AllBrainInfo
from unityagents import AllBrainInfo, BrainInfo
from unitytrainers.buffer import Buffer
from unitytrainers.ppo.models import PPOModel
from unitytrainers.trainer import UnityTrainerException, Trainer

else:
return run_out[self.model.output], None, None, run_out
def construct_curr_info(self, next_info: BrainInfo) -> BrainInfo:
"""
Constructs a BrainInfo which contains the most recent previous experiences for all agents info
which correspond to the agents in a provided next_info.
:BrainInfo next_info: A t+1 BrainInfo.
:return: curr_info: Reconstructed BrainInfo to match agents of next_info.
"""
visual_observations = [[]]
vector_observations = []
text_observations = []
memories = []
rewards = []
local_dones = []
max_reacheds = []
agents = []
prev_vector_actions = []
prev_text_actions = []
for agent_id in next_info.agents:
agent_brain_info = self.training_buffer[agent_id].last_brain_info
agent_index = agent_brain_info.agents.index(agent_id)
if agent_brain_info is None:
agent_brain_info = next_info
for i in range(len(next_info.visual_observations)):
visual_observations[i].append(agent_brain_info.visual_observations[i][agent_index])
vector_observations.append(agent_brain_info.vector_observations[agent_index])
text_observations.append(agent_brain_info.text_observations[agent_index])
if self.use_recurrent:
memories.append(agent_brain_info.memories[agent_index])
rewards.append(agent_brain_info.rewards[agent_index])
local_dones.append(agent_brain_info.local_done[agent_index])
max_reacheds.append(agent_brain_info.max_reached[agent_index])
agents.append(agent_brain_info.agents[agent_index])
prev_vector_actions.append(agent_brain_info.previous_vector_actions[agent_index])
prev_text_actions.append(agent_brain_info.previous_text_actions[agent_index])
curr_info = BrainInfo(visual_observations, vector_observations, text_observations, memories, rewards,
agents, local_dones, prev_vector_actions, prev_text_actions, max_reacheds)
return curr_info
:param curr_info: Current BrainInfo.
:param next_info: Next BrainInfo.
:BrainInfo curr_info: Current BrainInfo.
:BrainInfo next_info: Next BrainInfo.
if curr_info.agents != next_info.agents:
raise UnityTrainerException("Training with Curiosity-driven exploration"
" and On-Demand Decision making is currently not supported.")
feed_dict = {self.model.batch_size: len(curr_info.vector_observations), self.model.sequence_length: 1}
feed_dict = {self.model.batch_size: len(next_info.vector_observations), self.model.sequence_length: 1}
if curr_info.agents != next_info.agents:
curr_info = self.construct_curr_info(next_info)
if self.use_visual_obs:
for i in range(len(curr_info.visual_observations)):
feed_dict[self.model.visual_in[i]] = curr_info.visual_observations[i]

curr_info = curr_all_info[self.brain_name]
next_info = next_all_info[self.brain_name]
intrinsic_rewards = self.generate_intrinsic_rewards(curr_info, next_info)
intrinsic_rewards = self.generate_intrinsic_rewards(curr_info, next_info)
for agent_id in next_info.agents:
stored_info = self.training_buffer[agent_id].last_brain_info

正在加载...
取消
保存