浏览代码

Hotfix memory leak on Python (#3664)

* Hotfix memory leak on Python

* Fixing

* Fixing a bug in the heuristic policy. A decision should not be requested when the agent is done

* [bug-fix] Make Python able to deal with 0-step episodes (#3671)

* adding some comments

Co-authored-by: Ervin T <ervin@unity3d.com>
/develop/add-fire
GitHub 5 年前
当前提交
de3fc4e8
共有 6 个文件被更改,包括 52 次插入32 次删除
  1. 3
      com.unity.ml-agents/Runtime/Agent.cs
  2. 17
      com.unity.ml-agents/Runtime/Communicator/RpcCommunicator.cs
  3. 5
      com.unity.ml-agents/Runtime/Policies/HeuristicPolicy.cs
  4. 31
      ml-agents/mlagents/trainers/agent_processor.py
  5. 19
      ml-agents/mlagents/trainers/policy/tf_policy.py
  6. 9
      ml-agents/mlagents/trainers/tests/test_agent_processor.py

3
com.unity.ml-agents/Runtime/Agent.cs


void NotifyAgentDone(DoneReason doneReason)
{
m_Info.episodeId = m_EpisodeId;
m_Info.reward = m_Reward;
m_Info.done = true;
m_Info.maxStepReached = doneReason == DoneReason.MaxStepReached;

// If everything is the same, don't make any changes.
return;
}
NotifyAgentDone(DoneReason.Disabled);
m_PolicyFactory.model = model;
m_PolicyFactory.inferenceDevice = inferenceDevice;
m_PolicyFactory.behaviorName = behaviorName;

17
com.unity.ml-agents/Runtime/Communicator/RpcCommunicator.cs


{
if (m_CurrentUnityRlOutput.AgentInfos.ContainsKey(behaviorName))
{
if (output == null)
if (m_CurrentUnityRlOutput.AgentInfos[behaviorName].CalculateSize() > 0)
output = new UnityRLInitializationOutputProto();
}
// Only send the BrainParameters if there is a non empty list of
// AgentInfos ready to be sent.
// This is to ensure that The Python side will always have a first
// observation when receiving the BrainParameters
if (output == null)
{
output = new UnityRLInitializationOutputProto();
}
var brainParameters = m_UnsentBrainKeys[behaviorName];
output.BrainParameters.Add(brainParameters.ToProto(behaviorName, true));
var brainParameters = m_UnsentBrainKeys[behaviorName];
output.BrainParameters.Add(brainParameters.ToProto(behaviorName, true));
}
}
}

5
com.unity.ml-agents/Runtime/Policies/HeuristicPolicy.cs


public void RequestDecision(AgentInfo info, List<ISensor> sensors)
{
StepSensors(sensors);
m_LastDecision = m_Heuristic.Invoke();
if (!info.done)
{
m_LastDecision = m_Heuristic.Invoke();
}
}
/// <inheritdoc />

31
ml-agents/mlagents/trainers/agent_processor.py


import sys
from typing import List, Dict, Deque, TypeVar, Generic, Tuple, Set
from typing import List, Dict, Deque, TypeVar, Generic, Tuple, Any
from collections import defaultdict, Counter, deque
from mlagents_envs.base_env import BatchedStepResult, StepResult

for _entropy in take_action_outputs["entropy"]:
self.stats_reporter.add_stat("Policy/Entropy", _entropy)
terminated_agents: Set[str] = set()
# Make unique agent_ids that are global across workers
action_global_agent_ids = [
get_global_agent_id(worker_id, ag_id) for ag_id in previous_action.agent_ids

stored_take_action_outputs = self.last_take_action_outputs.get(
global_id, None
)
if stored_agent_step is not None and stored_take_action_outputs is not None:
# We know the step is from the same worker, so use the local agent id.
obs = stored_agent_step.obs

traj_queue.put(trajectory)
self.experience_buffers[global_id] = []
if curr_agent_step.done:
# Record episode length for agents which have had at least
# 1 step. Done after reset ignored.
terminated_agents.add(global_id)
elif not curr_agent_step.done:
self.episode_steps[global_id] += 1

batched_step_result.agent_id_to_index[_id],
)
for terminated_id in terminated_agents:
self._clean_agent_data(terminated_id)
# Delete all done agents, regardless of if they had a 0-length episode.
if curr_agent_step.done:
self._clean_agent_data(global_id)
for _gid in action_global_agent_ids:
# If the ID doesn't have a last step result, the agent just reset,

"""
Removes the data for an Agent.
"""
del self.experience_buffers[global_id]
del self.last_take_action_outputs[global_id]
del self.last_step_result[global_id]
del self.episode_steps[global_id]
del self.episode_rewards[global_id]
self._safe_delete(self.experience_buffers, global_id)
self._safe_delete(self.last_take_action_outputs, global_id)
self._safe_delete(self.last_step_result, global_id)
self._safe_delete(self.episode_steps, global_id)
self._safe_delete(self.episode_rewards, global_id)
def _safe_delete(self, my_dictionary: Dict[Any, Any], key: Any) -> None:
"""
Safe removes data from a dictionary. If not found,
don't delete.
"""
if key in my_dictionary:
del my_dictionary[key]
def publish_trajectory_queue(
self, trajectory_queue: "AgentManagerQueue[Trajectory]"

19
ml-agents/mlagents/trainers/policy/tf_policy.py


if batched_step_result.n_agents() == 0:
return ActionInfo.empty()
agents_done = [
agent
for agent, done in zip(
batched_step_result.agent_id, batched_step_result.done
)
if done
]
self.remove_memories(agents_done)
self.remove_previous_action(agents_done)
global_agent_ids = [
get_global_agent_id(worker_id, int(agent_id))
for agent_id in batched_step_result.agent_id

def create_input_placeholders(self):
with self.graph.as_default():
self.global_step, self.increment_step_op, self.steps_to_increment = (
ModelUtils.create_global_steps()
)
(
self.global_step,
self.increment_step_op,
self.steps_to_increment,
) = ModelUtils.create_global_steps()
self.visual_in = ModelUtils.create_visual_input_placeholders(
self.brain.camera_resolutions
)

9
ml-agents/mlagents/trainers/tests/test_agent_processor.py


assert len(processor.last_take_action_outputs.keys()) == 0
assert len(processor.episode_steps.keys()) == 0
assert len(processor.episode_rewards.keys()) == 0
assert len(processor.last_step_result.keys()) == 0
# check that steps with immediate dones don't add to dicts
processor.add_experiences(mock_done_step, 0, ActionInfo.empty())
assert len(processor.experience_buffers.keys()) == 0
assert len(processor.last_take_action_outputs.keys()) == 0
assert len(processor.episode_steps.keys()) == 0
assert len(processor.episode_rewards.keys()) == 0
assert len(processor.last_step_result.keys()) == 0
def test_end_episode():

正在加载...
取消
保存