浏览代码

Code clean-up

/develop-generalizationTraining-TrainerController
Arthur Juliani 7 年前
当前提交
cfb7cfef
共有 1 个文件被更改,包括 12 次插入14 次删除
  1. 26
      python/unitytrainers/ppo/trainer.py

26
python/unitytrainers/ppo/trainer.py


prev_text_actions = []
for agent_id in next_info.agents:
agent_brain_info = self.training_buffer[agent_id].last_brain_info
agent_index = agent_brain_info.agents.index(agent_id)
visual_observations[i].append(
agent_brain_info.visual_observations[i][agent_brain_info.agents.index(agent_id)])
vector_observations.append(agent_brain_info.vector_observations[agent_brain_info.agents.index(agent_id)])
text_observations.append(agent_brain_info.text_observations[agent_brain_info.agents.index(agent_id)])
visual_observations[i].append(agent_brain_info.visual_observations[i][agent_index])
vector_observations.append(agent_brain_info.vector_observations[agent_index])
text_observations.append(agent_brain_info.text_observations[agent_index])
memories.append(agent_brain_info.memories[agent_brain_info.agents.index(agent_id)])
rewards.append(agent_brain_info.rewards[agent_brain_info.agents.index(agent_id)])
local_dones.append(agent_brain_info.local_done[agent_brain_info.agents.index(agent_id)])
max_reacheds.append(agent_brain_info.max_reached[agent_brain_info.agents.index(agent_id)])
agents.append(agent_brain_info.agents[agent_brain_info.agents.index(agent_id)])
prev_vector_actions.append(
agent_brain_info.previous_vector_actions[agent_brain_info.agents.index(agent_id)])
prev_text_actions.append(agent_brain_info.previous_text_actions[agent_brain_info.agents.index(agent_id)])
memories.append(agent_brain_info.memories[agent_index])
rewards.append(agent_brain_info.rewards[agent_index])
local_dones.append(agent_brain_info.local_done[agent_index])
max_reacheds.append(agent_brain_info.max_reached[agent_index])
agents.append(agent_brain_info.agents[agent_index])
prev_vector_actions.append(agent_brain_info.previous_vector_actions[agent_index])
prev_text_actions.append(agent_brain_info.previous_text_actions[agent_index])
agents,
local_dones, prev_vector_actions, prev_text_actions, max_reacheds)
agents, local_dones, prev_vector_actions, prev_text_actions, max_reacheds)
return curr_info
def generate_intrinsic_rewards(self, curr_info, next_info):

正在加载...
取消
保存