Hotfix 0.3.1b (#656)

* [Fix] Use the stored agent info instead of the previous agent info when bootstraping the value * [Bug Fix] Addressed #643 * [Added Line Break]
7 年前 · b2675216
--- a/python/unitytrainers/bc/trainer.py
+++ b/python/unitytrainers/bc/trainer.py
            else:
                idx = stored_info_teacher.agents.index(agent_id)
                next_idx = next_info_teacher.agents.index(agent_id)
-                if info_teacher.text_observations[idx] != "":
-                    info_teacher_record, info_teacher_reset = info_teacher.text_observations[idx].lower().split(",")
+                if stored_info_teacher.text_observations[idx] != "":
+                    info_teacher_record, info_teacher_reset = \
+                        stored_info_teacher.text_observations[idx].lower().split(",")
                    next_info_teacher_record, next_info_teacher_reset = next_info_teacher.text_observations[idx].\
                        lower().split(",")
                    if next_info_teacher_reset == "true":
--- a/python/unitytrainers/ppo/trainer.py
+++ b/python/unitytrainers/ppo/trainer.py
        """

        info = new_info[self.brain_name]
-        last_info = current_info[self.brain_name]
+                agent_id = info.agents[l]
-                        bootstrapping_info = last_info
+                        bootstrapping_info = self.training_buffer[agent_id].last_brain_info
+                        idx = bootstrapping_info.agents.index(agent_id)
+                        idx = l
                    feed_dict = {self.model.batch_size: len(bootstrapping_info.vector_observations), self.model.sequence_length: 1}
                    if self.use_observations:
                        for i in range(len(bootstrapping_info.visual_observations)):
                        feed_dict[self.model.memory_in] = bootstrapping_info.memories
                    if not self.is_continuous_action and self.use_recurrent:
                        feed_dict[self.model.prev_action] = np.reshape(bootstrapping_info.previous_vector_actions, [-1])
-                    value_next = self.sess.run(self.model.value, feed_dict)[l]
-                agent_id = info.agents[l]
+                    value_next = self.sess.run(self.model.value, feed_dict)[idx]

                self.training_buffer[agent_id]['advantages'].set(
                    get_gae(