浏览代码

Hotfix 0.3.1b (#656)

* [Fix] Use the stored agent info instead of the previous agent info when bootstraping the value

* [Bug Fix] Addressed #643

* [Added Line Break]
/develop-generalizationTraining-TrainerController
GitHub 6 年前
当前提交
b2675216
共有 2 个文件被更改,包括 8 次插入6 次删除
  1. 5
      python/unitytrainers/bc/trainer.py
  2. 9
      python/unitytrainers/ppo/trainer.py

5
python/unitytrainers/bc/trainer.py


else:
idx = stored_info_teacher.agents.index(agent_id)
next_idx = next_info_teacher.agents.index(agent_id)
if info_teacher.text_observations[idx] != "":
info_teacher_record, info_teacher_reset = info_teacher.text_observations[idx].lower().split(",")
if stored_info_teacher.text_observations[idx] != "":
info_teacher_record, info_teacher_reset = \
stored_info_teacher.text_observations[idx].lower().split(",")
next_info_teacher_record, next_info_teacher_reset = next_info_teacher.text_observations[idx].\
lower().split(",")
if next_info_teacher_reset == "true":

9
python/unitytrainers/ppo/trainer.py


"""
info = new_info[self.brain_name]
last_info = current_info[self.brain_name]
agent_id = info.agents[l]
bootstrapping_info = last_info
bootstrapping_info = self.training_buffer[agent_id].last_brain_info
idx = bootstrapping_info.agents.index(agent_id)
idx = l
feed_dict = {self.model.batch_size: len(bootstrapping_info.vector_observations), self.model.sequence_length: 1}
if self.use_observations:
for i in range(len(bootstrapping_info.visual_observations)):

feed_dict[self.model.memory_in] = bootstrapping_info.memories
if not self.is_continuous_action and self.use_recurrent:
feed_dict[self.model.prev_action] = np.reshape(bootstrapping_info.previous_vector_actions, [-1])
value_next = self.sess.run(self.model.value, feed_dict)[l]
agent_id = info.agents[l]
value_next = self.sess.run(self.model.value, feed_dict)[idx]
self.training_buffer[agent_id]['advantages'].set(
get_gae(

正在加载...
取消
保存