Fixes for recurrent

5 年前 · 5ab2563b
--- a/ml-agents/mlagents/trainers/ppo/policy.py
+++ b/ml-agents/mlagents/trainers/ppo/policy.py
    def get_batched_value_estimates(self, batch: AgentBuffer) -> Dict[str, np.ndarray]:
        feed_dict: Dict[tf.Tensor, Any] = {
            self.model.batch_size: batch.num_experiences,
-            self.model.sequence_length: self.sequence_length,
+            self.model.sequence_length: 1,  # We want to feed data in batch-wise, not time-wise.
        }

        if self.use_vec_obs:
--- a/ml-agents/mlagents/trainers/trajectory.py
+++ b/ml-agents/mlagents/trainers/trajectory.py
        agent_buffer_trajectory["next_vector_in"].append(
            next_vec_vis_obs.vector_observations
        )
-        if exp.memory:
+        if exp.memory is not None:
            agent_buffer_trajectory["memory"].append(exp.memory)

        agent_buffer_trajectory["masks"].append(1.0)