|
|
|
|
|
|
""" |
|
|
|
|
|
|
|
info = new_info[self.brain_name] |
|
|
|
last_info = current_info[self.brain_name] |
|
|
|
agent_id = info.agents[l] |
|
|
|
bootstrapping_info = last_info |
|
|
|
bootstrapping_info = self.training_buffer[agent_id].last_brain_info |
|
|
|
idx = bootstrapping_info.agents.index(agent_id) |
|
|
|
idx = l |
|
|
|
feed_dict = {self.model.batch_size: len(bootstrapping_info.vector_observations), self.model.sequence_length: 1} |
|
|
|
if self.use_observations: |
|
|
|
for i in range(len(bootstrapping_info.visual_observations)): |
|
|
|
|
|
|
feed_dict[self.model.memory_in] = bootstrapping_info.memories |
|
|
|
if not self.is_continuous_action and self.use_recurrent: |
|
|
|
feed_dict[self.model.prev_action] = np.reshape(bootstrapping_info.previous_vector_actions, [-1]) |
|
|
|
value_next = self.sess.run(self.model.value, feed_dict)[l] |
|
|
|
agent_id = info.agents[l] |
|
|
|
value_next = self.sess.run(self.model.value, feed_dict)[idx] |
|
|
|
|
|
|
|
self.training_buffer[agent_id]['advantages'].set( |
|
|
|
get_gae( |
|
|
|