Hotfix memory leak on Python (#3664)

* Hotfix memory leak on Python * Fixing * Fixing a bug in the heuristic policy. A decision should not be requested when the agent is done * [bug-fix] Make Python able to deal with 0-step episodes (#3671) * adding some comments Co-authored-by: Ervin T <ervin@unity3d.com>
5 年前 · de3fc4e8
--- a/com.unity.ml-agents/Runtime/Agent.cs
+++ b/com.unity.ml-agents/Runtime/Agent.cs

        void NotifyAgentDone(DoneReason doneReason)
        {
+            m_Info.episodeId = m_EpisodeId;
            m_Info.reward = m_Reward;
            m_Info.done = true;
            m_Info.maxStepReached = doneReason == DoneReason.MaxStepReached;
                // If everything is the same, don't make any changes.
                return;
            }
-
+            NotifyAgentDone(DoneReason.Disabled);
            m_PolicyFactory.model = model;
            m_PolicyFactory.inferenceDevice = inferenceDevice;
            m_PolicyFactory.behaviorName = behaviorName;
--- a/com.unity.ml-agents/Runtime/Communicator/RpcCommunicator.cs
+++ b/com.unity.ml-agents/Runtime/Communicator/RpcCommunicator.cs
            {
                if (m_CurrentUnityRlOutput.AgentInfos.ContainsKey(behaviorName))
                {
-                    if (output == null)
+                    if (m_CurrentUnityRlOutput.AgentInfos[behaviorName].CalculateSize() > 0)
-                        output = new UnityRLInitializationOutputProto();
-                    }
+                        // Only send the BrainParameters if there is a non empty list of
+                        // AgentInfos ready to be sent.
+                        // This is to ensure that The Python side will always have a first
+                        // observation when receiving the BrainParameters
+                        if (output == null)
+                        {
+                            output = new UnityRLInitializationOutputProto();
+                        }
-                    var brainParameters = m_UnsentBrainKeys[behaviorName];
-                    output.BrainParameters.Add(brainParameters.ToProto(behaviorName, true));
+                        var brainParameters = m_UnsentBrainKeys[behaviorName];
+                        output.BrainParameters.Add(brainParameters.ToProto(behaviorName, true));
+                    }
                }
            }

--- a/com.unity.ml-agents/Runtime/Policies/HeuristicPolicy.cs
+++ b/com.unity.ml-agents/Runtime/Policies/HeuristicPolicy.cs
        public void RequestDecision(AgentInfo info, List<ISensor> sensors)
        {
            StepSensors(sensors);
-            m_LastDecision = m_Heuristic.Invoke();
+            if (!info.done)
+            {
+                m_LastDecision = m_Heuristic.Invoke();
+            }
        }

        /// <inheritdoc />
--- a/ml-agents/mlagents/trainers/agent_processor.py
+++ b/ml-agents/mlagents/trainers/agent_processor.py
 import sys
-from typing import List, Dict, Deque, TypeVar, Generic, Tuple, Set
+from typing import List, Dict, Deque, TypeVar, Generic, Tuple, Any
 from collections import defaultdict, Counter, deque

 from mlagents_envs.base_env import BatchedStepResult, StepResult
            for _entropy in take_action_outputs["entropy"]:
                self.stats_reporter.add_stat("Policy/Entropy", _entropy)

-        terminated_agents: Set[str] = set()
        # Make unique agent_ids that are global across workers
        action_global_agent_ids = [
            get_global_agent_id(worker_id, ag_id) for ag_id in previous_action.agent_ids
            stored_take_action_outputs = self.last_take_action_outputs.get(
                global_id, None
            )
+
            if stored_agent_step is not None and stored_take_action_outputs is not None:
                # We know the step is from the same worker, so use the local agent id.
                obs = stored_agent_step.obs
                        traj_queue.put(trajectory)
                    self.experience_buffers[global_id] = []
                    if curr_agent_step.done:
+                        # Record episode length for agents which have had at least
+                        # 1 step. Done after reset ignored.
-                        terminated_agents.add(global_id)
                elif not curr_agent_step.done:
                    self.episode_steps[global_id] += 1

                batched_step_result.agent_id_to_index[_id],
            )
-
-        for terminated_id in terminated_agents:
-            self._clean_agent_data(terminated_id)
+            # Delete all done agents, regardless of if they had a 0-length episode.
+            if curr_agent_step.done:
+                self._clean_agent_data(global_id)

        for _gid in action_global_agent_ids:
            # If the ID doesn't have a last step result, the agent just reset,
        """
        Removes the data for an Agent.
        """
-        del self.experience_buffers[global_id]
-        del self.last_take_action_outputs[global_id]
-        del self.last_step_result[global_id]
-        del self.episode_steps[global_id]
-        del self.episode_rewards[global_id]
+        self._safe_delete(self.experience_buffers, global_id)
+        self._safe_delete(self.last_take_action_outputs, global_id)
+        self._safe_delete(self.last_step_result, global_id)
+        self._safe_delete(self.episode_steps, global_id)
+        self._safe_delete(self.episode_rewards, global_id)
+
+    def _safe_delete(self, my_dictionary: Dict[Any, Any], key: Any) -> None:
+        """
+        Safe removes data from a dictionary. If not found,
+        don't delete.
+        """
+        if key in my_dictionary:
+            del my_dictionary[key]

    def publish_trajectory_queue(
        self, trajectory_queue: "AgentManagerQueue[Trajectory]"
--- a/ml-agents/mlagents/trainers/policy/tf_policy.py
+++ b/ml-agents/mlagents/trainers/policy/tf_policy.py
        if batched_step_result.n_agents() == 0:
            return ActionInfo.empty()

-        agents_done = [
-            agent
-            for agent, done in zip(
-                batched_step_result.agent_id, batched_step_result.done
-            )
-            if done
-        ]
-
-        self.remove_memories(agents_done)
-        self.remove_previous_action(agents_done)
-
        global_agent_ids = [
            get_global_agent_id(worker_id, int(agent_id))
            for agent_id in batched_step_result.agent_id

    def create_input_placeholders(self):
        with self.graph.as_default():
-            self.global_step, self.increment_step_op, self.steps_to_increment = (
-                ModelUtils.create_global_steps()
-            )
+            (
+                self.global_step,
+                self.increment_step_op,
+                self.steps_to_increment,
+            ) = ModelUtils.create_global_steps()
            self.visual_in = ModelUtils.create_visual_input_placeholders(
                self.brain.camera_resolutions
            )
--- a/ml-agents/mlagents/trainers/tests/test_agent_processor.py
+++ b/ml-agents/mlagents/trainers/tests/test_agent_processor.py
    assert len(processor.last_take_action_outputs.keys()) == 0
    assert len(processor.episode_steps.keys()) == 0
    assert len(processor.episode_rewards.keys()) == 0
+    assert len(processor.last_step_result.keys()) == 0
+
+    # check that steps with immediate dones don't add to dicts
+    processor.add_experiences(mock_done_step, 0, ActionInfo.empty())
+    assert len(processor.experience_buffers.keys()) == 0
+    assert len(processor.last_take_action_outputs.keys()) == 0
+    assert len(processor.episode_steps.keys()) == 0
+    assert len(processor.episode_rewards.keys()) == 0
+    assert len(processor.last_step_result.keys()) == 0


 def test_end_episode():