浏览代码

Decoupling IPolicy from Agent (#3203)

* initial commit

* Fixed the compilation errors

* fixing the tests

* Addressing the comment about the brain parameters

* Fixing typo

* Made timers more accurate

* addressing comments

* Better memory allocation

* Added some docstrings

* Adding better sensor validation

* Wrapped in #if DEBUG and also wrapped GenerateSensorData in a timer

* Timer changes
/asymm-envs
GitHub 5 年前
当前提交
f97bcf1c
共有 24 个文件被更改,包括 363 次插入328 次删除
  1. 2
      UnitySDK/Assets/ML-Agents/Editor/Tests/DemonstrationTests.cs
  2. 89
      UnitySDK/Assets/ML-Agents/Editor/Tests/EditModeTestInternalBrainTensorApplier.cs
  3. 33
      UnitySDK/Assets/ML-Agents/Editor/Tests/EditModeTestInternalBrainTensorGenerator.cs
  4. 1
      UnitySDK/Assets/ML-Agents/Editor/Tests/MLAgentsEditModeTest.cs
  5. 56
      UnitySDK/Assets/ML-Agents/Scripts/Agent.cs
  6. 6
      UnitySDK/Assets/ML-Agents/Scripts/DemonstrationRecorder.cs
  7. 7
      UnitySDK/Assets/ML-Agents/Scripts/DemonstrationStore.cs
  8. 10
      UnitySDK/Assets/ML-Agents/Scripts/Grpc/GrpcExtensions.cs
  9. 100
      UnitySDK/Assets/ML-Agents/Scripts/Grpc/RpcCommunicator.cs
  10. 9
      UnitySDK/Assets/ML-Agents/Scripts/ICommunicator.cs
  11. 61
      UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/ApplierImpl.cs
  12. 56
      UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/GeneratorImpl.cs
  13. 46
      UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/ModelRunner.cs
  14. 8
      UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/TensorApplier.cs
  15. 15
      UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/TensorGenerator.cs
  16. 6
      UnitySDK/Assets/ML-Agents/Scripts/Monitor.cs
  17. 45
      UnitySDK/Assets/ML-Agents/Scripts/Policy/BarracudaPolicy.cs
  18. 14
      UnitySDK/Assets/ML-Agents/Scripts/Policy/HeuristicPolicy.cs
  19. 5
      UnitySDK/Assets/ML-Agents/Scripts/Policy/IPolicy.cs
  20. 45
      UnitySDK/Assets/ML-Agents/Scripts/Policy/RemotePolicy.cs
  21. 6
      UnitySDK/Assets/ML-Agents/Scripts/Sensor/WriteAdapter.cs
  22. 16
      UnitySDK/Assets/ML-Agents/Scripts/Utilities.cs
  23. 44
      UnitySDK/Assets/ML-Agents/Scripts/Sensor/SensorShapeValidator.cs
  24. 11
      UnitySDK/Assets/ML-Agents/Scripts/Sensor/SensorShapeValidator.cs.meta

2
UnitySDK/Assets/ML-Agents/Editor/Tests/DemonstrationTests.cs


storedVectorActions = new[] { 0f, 1f },
};
demoStore.Record(agentInfo);
demoStore.Record(agentInfo, new System.Collections.Generic.List<Sensor.Observation>());
demoStore.Close();
}

89
UnitySDK/Assets/ML-Agents/Editor/Tests/EditModeTestInternalBrainTensorApplier.cs


using System.Reflection;
using Barracuda;
using MLAgents.InferenceBrain;
using System;
namespace MLAgents.Tests
{

}
}
List<Agent> GetFakeAgentInfos()
{
var goA = new GameObject("goA");
var agentA = goA.AddComponent<TestAgent>();
var goB = new GameObject("goB");
var agentB = goB.AddComponent<TestAgent>();
return new List<Agent> { agentA, agentB };
}
[Test]
public void Construction()
{

shape = new long[] { 2, 3 },
data = new Tensor(2, 3, new float[] { 1, 2, 3, 4, 5, 6 })
};
var agentInfos = GetFakeAgentInfos();
applier.Apply(inputTensor, agentInfos);
var agents = agentInfos;
var action0 = new AgentAction();
var action1 = new AgentAction();
var callbacks = new List<AgentIdActionPair>()
{
new AgentIdActionPair{agentId = 0, action = (a) => action0 = a},
new AgentIdActionPair{agentId = 1, action = (a) => action1 = a}
};
applier.Apply(inputTensor, callbacks);
var agent = agents[0] as TestAgent;
Assert.NotNull(agent);
var action = agent.GetAction();
Assert.AreEqual(action.vectorActions[0], 1);
Assert.AreEqual(action.vectorActions[1], 2);
Assert.AreEqual(action.vectorActions[2], 3);
Assert.AreEqual(action0.vectorActions[0], 1);
Assert.AreEqual(action0.vectorActions[1], 2);
Assert.AreEqual(action0.vectorActions[2], 3);
agent = agents[1] as TestAgent;
Assert.NotNull(agent);
action = agent.GetAction();
Assert.AreEqual(action.vectorActions[0], 4);
Assert.AreEqual(action.vectorActions[1], 5);
Assert.AreEqual(action.vectorActions[2], 6);
Assert.AreEqual(action1.vectorActions[0], 4);
Assert.AreEqual(action1.vectorActions[1], 5);
Assert.AreEqual(action1.vectorActions[2], 6);
}
[Test]

5,
new[] { 0.5f, 22.5f, 0.1f, 5f, 1f, 4f, 5f, 6f, 7f, 8f })
};
var agentInfos = GetFakeAgentInfos();
applier.Apply(inputTensor, agentInfos);
var agents = agentInfos;
var agent = agents[0] as TestAgent;
Assert.NotNull(agent);
var action = agent.GetAction();
Assert.AreEqual(action.vectorActions[0], 1);
Assert.AreEqual(action.vectorActions[1], 1);
agent = agents[1] as TestAgent;
Assert.NotNull(agent);
action = agent.GetAction();
Assert.AreEqual(action.vectorActions[0], 1);
Assert.AreEqual(action.vectorActions[1], 2);
alloc.Dispose();
}
[Test]
public void ApplyValueEstimate()
{
var inputTensor = new TensorProxy()
var action0 = new AgentAction();
var action1 = new AgentAction();
var callbacks = new List<AgentIdActionPair>()
shape = new long[] { 2, 1 },
data = new Tensor(2, 1, new[] { 0.5f, 8f })
new AgentIdActionPair{agentId = 0, action = (a) => action0 = a},
new AgentIdActionPair{agentId = 1, action = (a) => action1 = a}
var agentInfos = GetFakeAgentInfos();
var applier = new ValueEstimateApplier();
applier.Apply(inputTensor, agentInfos);
var agents = agentInfos;
applier.Apply(inputTensor, callbacks);
var agent = agents[0] as TestAgent;
Assert.NotNull(agent);
var action = agent.GetAction();
Assert.AreEqual(action.value, 0.5f);
Assert.AreEqual(action0.vectorActions[0], 1);
Assert.AreEqual(action0.vectorActions[1], 1);
agent = agents[1] as TestAgent;
Assert.NotNull(agent);
action = agent.GetAction();
Assert.AreEqual(action.value, 8);
Assert.AreEqual(action1.vectorActions[0], 1);
Assert.AreEqual(action1.vectorActions[1], 2);
alloc.Dispose();
}
}
}

33
UnitySDK/Assets/ML-Agents/Editor/Tests/EditModeTestInternalBrainTensorGenerator.cs


{
public class EditModeTestInternalBrainTensorGenerator
{
static IEnumerable<Agent> GetFakeAgents()
static List<Agent> GetFakeAgents()
{
var acaGo = new GameObject("TestAcademy");
acaGo.AddComponent<Academy>();

agentA.Info = infoA;
agentB.Info = infoB;
return agents;
}

generator.AddSensorIndex(0);
generator.AddSensorIndex(1);
generator.AddSensorIndex(2);
generator.Generate(inputTensor, batchSize, agentInfos);
var agent0 = agentInfos[0];
var agent1 = agentInfos[1];
var inputs = new List<AgentInfoSensorsPair>
{
new AgentInfoSensorsPair{agentInfo = agent0.Info, sensors = agent0.sensors},
new AgentInfoSensorsPair{agentInfo = agent1.Info, sensors = agent1.sensors},
};
generator.Generate(inputTensor, batchSize, inputs);
Assert.IsNotNull(inputTensor.data);
Assert.AreEqual(inputTensor.data[0, 0], 1);
Assert.AreEqual(inputTensor.data[0, 2], 3);

var agentInfos = GetFakeAgents();
var alloc = new TensorCachingAllocator();
var generator = new PreviousActionInputGenerator(alloc);
generator.Generate(inputTensor, batchSize, agentInfos);
var agent0 = agentInfos[0];
var agent1 = agentInfos[1];
var inputs = new List<AgentInfoSensorsPair>
{
new AgentInfoSensorsPair{agentInfo = agent0.Info, sensors = agent0.sensors},
new AgentInfoSensorsPair{agentInfo = agent1.Info, sensors = agent1.sensors},
};
generator.Generate(inputTensor, batchSize, inputs);
Assert.IsNotNull(inputTensor.data);
Assert.AreEqual(inputTensor.data[0, 0], 1);
Assert.AreEqual(inputTensor.data[0, 1], 2);

var agentInfos = GetFakeAgents();
var alloc = new TensorCachingAllocator();
var generator = new ActionMaskInputGenerator(alloc);
generator.Generate(inputTensor, batchSize, agentInfos);
var agent0 = agentInfos[0];
var agent1 = agentInfos[1];
var inputs = new List<AgentInfoSensorsPair>
{
new AgentInfoSensorsPair{agentInfo = agent0.Info, sensors = agent0.sensors},
new AgentInfoSensorsPair{agentInfo = agent1.Info, sensors = agent1.sensors},
};
generator.Generate(inputTensor, batchSize, inputs);
Assert.IsNotNull(inputTensor.data);
Assert.AreEqual(inputTensor.data[0, 0], 1);
Assert.AreEqual(inputTensor.data[0, 4], 1);

1
UnitySDK/Assets/ML-Agents/Editor/Tests/MLAgentsEditModeTest.cs


{
return new float[0];
}
}
public class TestSensor : ISensor

56
UnitySDK/Assets/ML-Agents/Scripts/Agent.cs


/// </summary>
public struct AgentInfo
{
/// <summary>
/// Most recent observations.
/// </summary>
public List<Observation> observations;
/// <summary>
/// Keeps track of the last vector action taken by the Brain.

m_Info.storedVectorActions = new float[param.vectorActionSize.Length];
}
}
m_Info.observations = new List<Observation>();
}
/// <summary>

{
// Get all attached sensor components
SensorComponent[] attachedSensorComponents;
if(m_PolicyFactory.useChildSensors)
if (m_PolicyFactory.useChildSensors)
{
attachedSensorComponents = GetComponentsInChildren<SensorComponent>();
}

Debug.Assert(!sensors[i].GetName().Equals(sensors[i + 1].GetName()), "Sensor names must be unique.");
}
#endif
// Create a buffer for writing uncompressed (i.e. float) sensor data to
int numFloatObservations = 0;
for (var i = 0; i < sensors.Count; i++)
{
if (sensors[i].GetCompressionType() == SensorCompressionType.None)
{
numFloatObservations += sensors[i].ObservationSize();
}
}
m_VectorSensorBuffer = new float[numFloatObservations];
}
/// <summary>

}
m_Info.storedVectorActions = m_Action.vectorActions;
m_Info.observations.Clear();
m_ActionMasker.ResetMask();
UpdateSensors();
using (TimerStack.Instance.Scoped("CollectObservations"))

m_Info.maxStepReached = m_MaxStepReached;
m_Info.id = m_Id;
m_Brain.RequestDecision(this);
m_Brain.RequestDecision(m_Info, sensors, UpdateAgentAction);
// This is a bit of a hack - if we're in inference mode, observations won't be generated
// But we need these to be generated for the recorder. So generate them here.
if (m_Info.observations.Count == 0)
if (m_VectorSensorBuffer == null)
GenerateSensorData();
// Create a buffer for writing uncompressed (i.e. float) sensor data to
m_VectorSensorBuffer = new float[sensors.GetSensorFloatObservationSize()];
m_Recorder.WriteExperience(m_Info);
// This is a bit of a hack - if we're in inference mode, observations won't be generated
// But we need these to be generated for the recorder. So generate them here.
var observations = new List<Observation>();
GenerateSensorData(sensors, m_VectorSensorBuffer, m_WriteAdapter, observations);
m_Recorder.WriteExperience(m_Info, observations);
}
}

}
/// <summary>
/// Generate data for each sensor and store it on the Agent's AgentInfo.
/// Generate data for each sensor and store it in the observations input.
public void GenerateSensorData()
/// <param name="sensors"> List of ISensors that will be used to generate the data.</param>
/// <param name="buffer"> A float array that will be used as buffer when generating the observations. Must
/// be at least the same length as the total number of uncompressed floats in the observations</param>
/// <param name="adapter"> The WriteAdapter that will be used to write the ISensor data to the observations</param>
/// <param name="observations"> A list of observations outputs. This argument will be modified by this method.</param>//
public static void GenerateSensorData(List<ISensor> sensors, float[] buffer, WriteAdapter adapter, List<Observation> observations)
{
int floatsWritten = 0;
// Generate data for all Sensors

if (sensor.GetCompressionType() == SensorCompressionType.None)
{
// TODO handle in communicator code instead
m_WriteAdapter.SetTarget(m_VectorSensorBuffer, sensor.GetObservationShape(), floatsWritten);
var numFloats = sensor.Write(m_WriteAdapter);
adapter.SetTarget(buffer, sensor.GetObservationShape(), floatsWritten);
var numFloats = sensor.Write(adapter);
FloatData = new ArraySegment<float>(m_VectorSensorBuffer, floatsWritten, numFloats),
FloatData = new ArraySegment<float>(buffer, floatsWritten, numFloats),
m_Info.observations.Add(floatObs);
observations.Add(floatObs);
floatsWritten += numFloats;
}
else

Shape = sensor.GetObservationShape(),
CompressionType = sensor.GetCompressionType()
};
m_Info.observations.Add(compressedObs);
observations.Add(compressedObs);
}
}
}

6
UnitySDK/Assets/ML-Agents/Scripts/DemonstrationRecorder.cs


using System.IO.Abstractions;
using System.Text.RegularExpressions;
using UnityEngine;
using System.Collections.Generic;
using MLAgents.Sensor;
namespace MLAgents
{

/// <summary>
/// Forwards AgentInfo to Demonstration Store.
/// </summary>
public void WriteExperience(AgentInfo info)
public void WriteExperience(AgentInfo info, List<Observation> observations)
m_DemoStore.Record(info);
m_DemoStore.Record(info, observations);
}
public void Close()

7
UnitySDK/Assets/ML-Agents/Scripts/DemonstrationStore.cs


using System.IO;
using System.IO.Abstractions;
using Google.Protobuf;
using UnityEngine;
using System.Collections.Generic;
using MLAgents.Sensor;
namespace MLAgents
{

/// <summary>
/// Write AgentInfo experience to file.
/// </summary>
public void Record(AgentInfo info)
public void Record(AgentInfo info, List<Observation> observations)
{
// Increment meta-data counters.
m_MetaData.numberExperiences++;

}
// Write AgentInfo to file.
var agentProto = info.ToInfoActionPairProto();
var agentProto = info.ToInfoActionPairProto(observations);
agentProto.WriteDelimitedTo(m_Writer);
}

10
UnitySDK/Assets/ML-Agents/Scripts/Grpc/GrpcExtensions.cs


/// Converts a AgentInfo to a protobuf generated AgentInfoActionPairProto
/// </summary>
/// <returns>The protobuf version of the AgentInfoActionPairProto.</returns>
public static AgentInfoActionPairProto ToInfoActionPairProto(this AgentInfo ai)
public static AgentInfoActionPairProto ToInfoActionPairProto(this AgentInfo ai, List<Observation> observations)
var agentInfoProto = ai.ToAgentInfoProto();
var agentInfoProto = ai.ToAgentInfoProto(observations);
var agentActionProto = new AgentActionProto
{

/// Converts a AgentInfo to a protobuf generated AgentInfoProto
/// </summary>
/// <returns>The protobuf version of the AgentInfo.</returns>
public static AgentInfoProto ToAgentInfoProto(this AgentInfo ai)
public static AgentInfoProto ToAgentInfoProto(this AgentInfo ai, List<Observation> observations)
{
var agentInfoProto = new AgentInfoProto
{

agentInfoProto.ActionMask.AddRange(ai.actionMasks);
}
if (ai.observations != null)
if (observations != null)
foreach (var obs in ai.observations)
foreach (var obs in observations)
{
agentInfoProto.Observations.Add(obs.ToProto());
}

100
UnitySDK/Assets/ML-Agents/Scripts/Grpc/RpcCommunicator.cs


using MLAgents.CommunicatorObjects;
using System.IO;
using Google.Protobuf;
using MLAgents.Sensor;
namespace MLAgents
{

public struct IdCallbackPair
{
public int AgentId;
public Action<AgentAction> Callback;
}
public event QuitCommandHandler QuitCommandReceived;
public event ResetCommandHandler ResetCommandReceived;

/// The default number of agents in the scene
const int k_NumAgents = 32;
/// Keeps track of the agents of each brain on the current step
Dictionary<string, List<Agent>> m_CurrentAgents =
new Dictionary<string, List<Agent>>();
List<string> m_BehaviorNames = new List<string>();
bool m_NeedCommunicateThisStep;
float[] m_VectorObservationBuffer = new float[0];
List<Observation> m_ObservationBuffer = new List<Observation>();
WriteAdapter m_WriteAdapter = new WriteAdapter();
Dictionary<string, SensorShapeValidator> m_SensorShapeValidators = new Dictionary<string, SensorShapeValidator>();
Dictionary<string, List<IdCallbackPair>> m_ActionCallbacks = new Dictionary<string, List<IdCallbackPair>>();
Dictionary<string, Dictionary<Agent, AgentAction>> m_LastActionsReceived =
new Dictionary<string, Dictionary<Agent, AgentAction>>();
Dictionary<string, Dictionary<int, AgentAction>> m_LastActionsReceived =
new Dictionary<string, Dictionary<int, AgentAction>>();
// Brains that we have sent over the communicator with agents.
HashSet<string> m_SentBrainKeys = new HashSet<string>();

/// <param name="brainParameters">Brain parameters needed to send to the trainer.</param>
public void SubscribeBrain(string brainKey, BrainParameters brainParameters)
{
if (m_CurrentAgents.ContainsKey(brainKey))
if (m_BehaviorNames.Contains(brainKey))
m_CurrentAgents[brainKey] = new List<Agent>(k_NumAgents);
m_BehaviorNames.Add(brainKey);
m_CurrentUnityRlOutput.AgentInfos.Add(
brainKey,
new UnityRLOutputProto.Types.ListAgentInfoProto()

public void DecideBatch()
{
if (m_CurrentAgents.Values.All(l => l.Count == 0))
if (!m_NeedCommunicateThisStep)
foreach (var brainKey in m_CurrentAgents.Keys)
{
using (TimerStack.Instance.Scoped("AgentInfo.ToProto"))
{
if (m_CurrentAgents[brainKey].Count > 0)
{
foreach (var agent in m_CurrentAgents[brainKey])
{
// Update the sensor data on the AgentInfo
agent.GenerateSensorData();
var agentInfoProto = agent.Info.ToAgentInfoProto();
m_CurrentUnityRlOutput.AgentInfos[brainKey].Value.Add(agentInfoProto);
}
m_NeedCommunicateThisStep = false;
}
}
}
foreach (var brainKey in m_CurrentAgents.Keys)
{
m_CurrentAgents[brainKey].Clear();
}
}
/// <summary>

/// <param name="agent">Agent info.</param>
public void PutObservations(string brainKey, Agent agent)
public void PutObservations(string brainKey, AgentInfo info, List<ISensor> sensors, Action<AgentAction> action)
m_CurrentAgents[brainKey].Add(agent);
if (!m_ActionCallbacks.ContainsKey(brainKey))
{
int numFloatObservations = sensors.GetSensorFloatObservationSize();
if (m_VectorObservationBuffer.Length < numFloatObservations)
{
m_VectorObservationBuffer = new float[numFloatObservations];
}
}
# if DEBUG
if (!m_SensorShapeValidators.ContainsKey(brainKey))
{
m_SensorShapeValidators[brainKey] = new SensorShapeValidator();
}
m_SensorShapeValidators[brainKey].ValidateSensors(sensors);
#endif
using (TimerStack.Instance.Scoped("GenerateSensorData"))
{
Agent.GenerateSensorData(sensors, m_VectorObservationBuffer, m_WriteAdapter, m_ObservationBuffer);
}
using (TimerStack.Instance.Scoped("AgentInfo.ToProto"))
{
var agentInfoProto = info.ToAgentInfoProto(m_ObservationBuffer);
m_CurrentUnityRlOutput.AgentInfos[brainKey].Value.Add(agentInfoProto);
}
m_ObservationBuffer.Clear();
m_NeedCommunicateThisStep = true;
if (!m_ActionCallbacks.ContainsKey(brainKey))
{
m_ActionCallbacks[brainKey] = new List<IdCallbackPair>();
}
m_ActionCallbacks[brainKey].Add(new IdCallbackPair { AgentId = info.id, Callback = action });
}
/// <summary>

m_LastActionsReceived.Clear();
foreach (var brainName in rlInput.AgentActions.Keys)
{
if (!m_CurrentAgents[brainName].Any())
if (!m_ActionCallbacks[brainName].Any())
{
continue;
}

}
var agentActions = rlInput.AgentActions[brainName].ToAgentActionList();
var numAgents = m_CurrentAgents[brainName].Count;
var agentActionDict = new Dictionary<Agent, AgentAction>(numAgents);
var numAgents = m_ActionCallbacks[brainName].Count;
var agentActionDict = new Dictionary<int, AgentAction>(numAgents);
var agent = m_CurrentAgents[brainName][i];
agentActionDict[agent] = agentAction;
agent.UpdateAgentAction(agentAction);
var agentId = m_ActionCallbacks[brainName][i].AgentId;
agentActionDict[agentId] = agentAction;
m_ActionCallbacks[brainName][i].Callback.Invoke(agentAction);
m_ActionCallbacks[brainName].Clear();
public Dictionary<Agent, AgentAction> GetActions(string key)
public Dictionary<int, AgentAction> GetActions(string key)
{
return m_LastActionsReceived[key];
}

9
UnitySDK/Assets/ML-Agents/Scripts/ICommunicator.cs


using System.Collections.Generic;
using UnityEngine;
using MLAgents.CommunicatorObjects;
using MLAgents.Sensor;
namespace MLAgents
{

/// Sends the observations of one Agent.
/// </summary>
/// <param name="brainKey">Batch Key.</param>
/// <param name="agent">Agent info.</param>
void PutObservations(string brainKey, Agent agent);
/// <param name="info">Agent info.</param>
/// <param name="sensors">The list of ISensors of the Agent.</param>
/// <param name="action">The action that will be called once the next AgentAction is ready.</param>
void PutObservations(string brainKey, AgentInfo info, List<ISensor> sensors, Action<AgentAction> action);
/// <summary>
/// Signals the ICommunicator that the Agents are now ready to receive their action

/// </summary>
/// <param name="key">A key to identify which actions to get</param>
/// <returns></returns>
Dictionary<Agent, AgentAction> GetActions(string key);
Dictionary<int, AgentAction> GetActions(string key);
/// <summary>
/// Registers a side channel to the communicator. The side channel will exchange

61
UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/ApplierImpl.cs


/// </summary>
public class ContinuousActionOutputApplier : TensorApplier.IApplier
{
public void Apply(TensorProxy tensorProxy, IEnumerable<Agent> agents)
public void Apply(TensorProxy tensorProxy, IEnumerable<AgentIdActionPair> actions)
foreach (var agent in agents)
foreach (var idActionPair in actions)
var action = new float[actionSize];
var actionValue = new float[actionSize];
action[j] = tensorProxy.data[agentIndex, j];
actionValue[j] = tensorProxy.data[agentIndex, j];
agent.UpdateVectorAction(action);
idActionPair.action.Invoke(new AgentAction { vectorActions = actionValue });
agentIndex++;
}
}

m_Allocator = allocator;
}
public void Apply(TensorProxy tensorProxy, IEnumerable<Agent> agents)
public void Apply(TensorProxy tensorProxy, IEnumerable<AgentIdActionPair> actions)
var agentsArray = agents as List<Agent> ?? agents.ToList();
var batchSize = agentsArray.Count;
var actions = new float[batchSize, m_ActionSize.Length];
var idActionPairList = actions as List<AgentIdActionPair> ?? actions.ToList();
var batchSize = idActionPairList.Count;
var actionValues = new float[batchSize, m_ActionSize.Length];
var startActionIndices = Utilities.CumSum(m_ActionSize);
for (var actionIndex = 0; actionIndex < m_ActionSize.Length; actionIndex++)
{

for (var ii = 0; ii < batchSize; ii++)
{
actions[ii, actionIndex] = outputTensor.data[ii, 0];
actionValues[ii, actionIndex] = outputTensor.data[ii, 0];
foreach (var agent in agentsArray)
foreach (var idActionPair in idActionPairList)
var action = new float[m_ActionSize.Length];
var actionVal = new float[m_ActionSize.Length];
action[j] = actions[agentIndex, j];
actionVal[j] = actionValues[agentIndex, j];
agent.UpdateVectorAction(action);
idActionPair.action.Invoke(new AgentAction { vectorActions = actionVal });
agentIndex++;
}
}

{
m_Memories = memories;
}
public void Apply(TensorProxy tensorProxy, IEnumerable<Agent> agents)
public void Apply(TensorProxy tensorProxy, IEnumerable<AgentIdActionPair> actions)
foreach (var agent in agents)
foreach (var idActionPair in actions)
if (!m_Memories.TryGetValue(agent.Info.id, out memory)
if (!m_Memories.TryGetValue(idActionPair.agentId, out memory)
|| memory.Count < memorySize)
{
memory = new List<float>();

m_Memories[agent.Info.id] = memory;
m_Memories[idActionPair.agentId] = memory;
agentIndex++;
}
}

m_Memories = memories;
}
public void Apply(TensorProxy tensorProxy, IEnumerable<Agent> agents)
public void Apply(TensorProxy tensorProxy, IEnumerable<AgentIdActionPair> actions)
foreach (var agent in agents)
foreach (var idActionPair in actions)
if (!m_Memories.TryGetValue(agent.Info.id, out memory)
if (!m_Memories.TryGetValue(idActionPair.agentId, out memory)
|| memory.Count < memorySize * m_MemoriesCount)
{
memory = new List<float>();

memory[memorySize * m_MemoryIndex + j] = tensorProxy.data[agentIndex, j];
}
m_Memories[agent.Info.id] = memory;
m_Memories[idActionPair.agentId] = memory;
/// <summary>
/// The Applier for the Value Estimate output tensor. Tensor is assumed to contain the
/// value estimates of the agents in the batch.
/// </summary>
public class ValueEstimateApplier : TensorApplier.IApplier
{
public void Apply(TensorProxy tensorProxy, IEnumerable<Agent> agents)
{
var agentIndex = 0;
foreach (var agent in agents)
{
agent.UpdateValueAction(tensorProxy.data[agentIndex, 0]);
agentIndex++;
}
}
}
}

56
UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/GeneratorImpl.cs


m_Allocator = allocator;
}
public void Generate(TensorProxy tensorProxy, int batchSize, IEnumerable<Agent> agents)
public void Generate(TensorProxy tensorProxy, int batchSize, IEnumerable<AgentInfoSensorsPair> infos)
{
TensorUtils.ResizeTensor(tensorProxy, batchSize, m_Allocator);
}

m_Allocator = allocator;
}
public void Generate(TensorProxy tensorProxy, int batchSize, IEnumerable<Agent> agents)
public void Generate(TensorProxy tensorProxy, int batchSize, IEnumerable<AgentInfoSensorsPair> infos)
{
tensorProxy.data?.Dispose();
tensorProxy.data = m_Allocator.Alloc(new TensorShape(1, 1));

m_Allocator = allocator;
}
public void Generate(TensorProxy tensorProxy, int batchSize, IEnumerable<Agent> agents)
public void Generate(TensorProxy tensorProxy, int batchSize, IEnumerable<AgentInfoSensorsPair> infos)
{
tensorProxy.shape = new long[0];
tensorProxy.data?.Dispose();

m_SensorIndices.Add(sensorIndex);
}
public void Generate(TensorProxy tensorProxy, int batchSize, IEnumerable<Agent> agents)
public void Generate(TensorProxy tensorProxy, int batchSize, IEnumerable<AgentInfoSensorsPair> infos)
foreach (var agent in agents)
foreach (var info in infos)
var sensor = agent.sensors[sensorIndex];
var sensor = info.sensors[sensorIndex];
m_WriteAdapter.SetTarget(tensorProxy, agentIndex, tensorOffset);
var numWritten = sensor.Write(m_WriteAdapter);
tensorOffset += numWritten;

agentIndex++;
}
}
}
/// <summary>

}
public void Generate(
TensorProxy tensorProxy, int batchSize, IEnumerable<Agent> agents)
TensorProxy tensorProxy, int batchSize, IEnumerable<AgentInfoSensorsPair> infos)
foreach (var agent in agents)
foreach (var infoSensorPair in infos)
var info = agent.Info;
var info = infoSensorPair.agentInfo;
if (agent.Info.done)
if (info.done)
m_Memories.Remove(agent.Info.id);
m_Memories.Remove(info.id);
if (!m_Memories.TryGetValue(agent.Info.id, out memory))
if (!m_Memories.TryGetValue(info.id, out memory))
{
for (var j = 0; j < memorySize; j++)
{

}
public void Generate(TensorProxy tensorProxy, int batchSize, IEnumerable<Agent> agents)
public void Generate(TensorProxy tensorProxy, int batchSize, IEnumerable<AgentInfoSensorsPair> infos)
foreach (var agent in agents)
foreach (var infoSensorPair in infos)
var info = infoSensorPair.agentInfo;
if (agent.Info.done)
if (info.done)
m_Memories.Remove(agent.Info.id);
m_Memories.Remove(info.id);
if (!m_Memories.TryGetValue(agent.Info.id, out memory))
if (!m_Memories.TryGetValue(info.id, out memory))
{
for (var j = 0; j < memorySize; j++)

m_Allocator = allocator;
}
public void Generate(TensorProxy tensorProxy, int batchSize, IEnumerable<Agent> agents)
public void Generate(TensorProxy tensorProxy, int batchSize, IEnumerable<AgentInfoSensorsPair> infos)
foreach (var agent in agents)
foreach (var infoSensorPair in infos)
var info = agent.Info;
var info = infoSensorPair.agentInfo;
var pastAction = info.storedVectorActions;
for (var j = 0; j < actionSize; j++)
{

m_Allocator = allocator;
}
public void Generate(TensorProxy tensorProxy, int batchSize, IEnumerable<Agent> agents)
public void Generate(TensorProxy tensorProxy, int batchSize, IEnumerable<AgentInfoSensorsPair> infos)
foreach (var agent in agents)
foreach (var infoSensorPair in infos)
var agentInfo = agent.Info;
var agentInfo = infoSensorPair.agentInfo;
var maskList = agentInfo.actionMasks;
for (var j = 0; j < maskSize; j++)
{

m_Allocator = allocator;
}
public void Generate(TensorProxy tensorProxy, int batchSize, IEnumerable<Agent> agents)
public void Generate(TensorProxy tensorProxy, int batchSize, IEnumerable<AgentInfoSensorsPair> infos)
{
TensorUtils.ResizeTensor(tensorProxy, batchSize, m_Allocator);
TensorUtils.FillTensorWithRandomNormal(tensorProxy, m_RandomNormal);

m_Allocator = allocator;
}
public void Generate(TensorProxy tensorProxy, int batchSize, IEnumerable<Agent> agents)
public void Generate(TensorProxy tensorProxy, int batchSize, IEnumerable<AgentInfoSensorsPair> infos)
foreach (var agent in agents)
foreach (var infoSensorPair in infos)
var sensor = agent.sensors[m_SensorIndex];
var sensor = infoSensorPair.sensors[m_SensorIndex];
m_WriteAdapter.SetTarget(tensorProxy, agentIndex, 0);
sensor.Write(m_WriteAdapter);
agentIndex++;

46
UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/ModelRunner.cs


using System.Collections.Generic;
using Barracuda;
using UnityEngine.Profiling;
using System;
using MLAgents.Sensor;
public struct AgentInfoSensorsPair
{
public AgentInfo agentInfo;
public List<ISensor> sensors;
}
public struct AgentIdActionPair
{
public int agentId;
public Action<AgentAction> action;
}
List<Agent> m_Agents = new List<Agent>();
List<AgentInfoSensorsPair> m_Infos = new List<AgentInfoSensorsPair>();
List<AgentIdActionPair> m_ActionFuncs = new List<AgentIdActionPair>();
ITensorAllocator m_TensorAllocator;
TensorGenerator m_TensorGenerator;
TensorApplier m_TensorApplier;

IReadOnlyList<TensorProxy> m_InferenceInputs;
IReadOnlyList<TensorProxy> m_InferenceOutputs;
Dictionary<int, List<float>> m_Memories = new Dictionary<int, List<float>>();
SensorShapeValidator m_SensorShapeValidator = new SensorShapeValidator();
bool m_VisualObservationsInitialized;

return outputs;
}
public void PutObservations(Agent agent)
public void PutObservations(AgentInfo info, List<ISensor> sensors, Action<AgentAction> action)
m_Agents.Add(agent);
#if DEBUG
m_SensorShapeValidator.ValidateSensors(sensors);
#endif
m_Infos.Add(new AgentInfoSensorsPair
{
agentInfo = info,
sensors = sensors
});
m_ActionFuncs.Add(new AgentIdActionPair { action = action, agentId = info.id });
var currentBatchSize = m_Agents.Count;
var currentBatchSize = m_Infos.Count;
var firstAgent = m_Agents[0];
m_TensorGenerator.InitializeObservations(firstAgent, m_TensorAllocator);
var firstInfo = m_Infos[0];
m_TensorGenerator.InitializeObservations(firstInfo.sensors, m_TensorAllocator);
m_VisualObservationsInitialized = true;
}

// Prepare the input tensors to be feed into the engine
m_TensorGenerator.GenerateTensors(m_InferenceInputs, currentBatchSize, m_Agents);
m_TensorGenerator.GenerateTensors(m_InferenceInputs, currentBatchSize, m_Infos);
Profiler.EndSample();
Profiler.BeginSample($"MLAgents.{m_Model.name}.PrepareBarracudaInputs");

Profiler.BeginSample($"MLAgents.{m_Model.name}.ApplyTensors");
// Update the outputs
m_TensorApplier.ApplyTensors(m_InferenceOutputs, m_Agents);
m_TensorApplier.ApplyTensors(m_InferenceOutputs, m_ActionFuncs);
m_Agents.Clear();
m_Infos.Clear();
m_ActionFuncs.Clear();
}
public bool HasModel(NNModel other, InferenceDevice otherInferenceDevice)

8
UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/TensorApplier.cs


using System.Collections.Generic;
using Barracuda;
using System;
namespace MLAgents.InferenceBrain
{

/// <param name="agents">
/// List of Agents that will receive the values of the Tensor.
/// </param>
void Apply(TensorProxy tensorProxy, IEnumerable<Agent> agents);
void Apply(TensorProxy tensorProxy, IEnumerable<AgentIdActionPair> actions);
}
readonly Dictionary<string, IApplier> m_Dict = new Dictionary<string, IApplier>();

Dictionary<int, List<float>> memories,
object barracudaModel = null)
{
m_Dict[TensorNames.ValueEstimateOutput] = new ValueEstimateApplier();
if (bp.vectorActionSpaceType == SpaceType.Continuous)
{
m_Dict[TensorNames.ActionOutput] = new ContinuousActionOutputApplier();

/// <exception cref="UnityAgentsException"> One of the tensor does not have an
/// associated applier.</exception>
public void ApplyTensors(
IEnumerable<TensorProxy> tensors, IEnumerable<Agent> agents)
IEnumerable<TensorProxy> tensors, IEnumerable<AgentIdActionPair> actions)
{
foreach (var tensor in tensors)
{

$"Unknown tensorProxy expected as output : {tensor.name}");
}
m_Dict[tensor.name].Apply(tensor, agents);
m_Dict[tensor.name].Apply(tensor, actions);
}
}
}

15
UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/TensorGenerator.cs


using System.Collections.Generic;
using Barracuda;
using MLAgents.Sensor;
namespace MLAgents.InferenceBrain
{

/// </summary>
/// <param name="tensorProxy"> The tensor the data and shape will be modified</param>
/// <param name="batchSize"> The number of agents present in the current batch</param>
/// <param name="agents"> List of Agents containing the
/// <param name="infos"> List of AgentInfos containing the
TensorProxy tensorProxy, int batchSize, IEnumerable<Agent> agents);
TensorProxy tensorProxy, int batchSize, IEnumerable<AgentInfoSensorsPair> infos);
}
readonly Dictionary<string, IGenerator> m_Dict = new Dictionary<string, IGenerator>();

m_Dict[TensorNames.ValueEstimateOutput] = new BiDimensionalOutputGenerator(allocator);
}
public void InitializeObservations(Agent agent, ITensorAllocator allocator)
public void InitializeObservations(List<ISensor> sensors, ITensorAllocator allocator)
{
// Loop through the sensors on a representative agent.
// For vector observations, add the index to the (single) VectorObservationGenerator

for (var sensorIndex = 0; sensorIndex < agent.sensors.Count; sensorIndex++)
for (var sensorIndex = 0; sensorIndex < sensors.Count; sensorIndex++)
var sensor = agent.sensors[sensorIndex];
var sensor = sensors[sensorIndex];
var shape = sensor.GetObservationShape();
// TODO generalize - we currently only have vector or visual, but can't handle "2D" observations
var isVectorSensor = (shape.Length == 1);

/// <exception cref="UnityAgentsException"> One of the tensor does not have an
/// associated generator.</exception>
public void GenerateTensors(
IEnumerable<TensorProxy> tensors, int currentBatchSize, IEnumerable<Agent> agents)
IEnumerable<TensorProxy> tensors, int currentBatchSize, IEnumerable<AgentInfoSensorsPair> infos)
{
foreach (var tensor in tensors)
{

$"Unknown tensorProxy expected as input : {tensor.name}");
}
m_Dict[tensor.name].Generate(tensor, currentBatchSize, agents);
m_Dict[tensor.name].Generate(tensor, currentBatchSize, infos);
}
}
}

6
UnitySDK/Assets/ML-Agents/Scripts/Monitor.cs


InstantiateCanvas();
s_IsInstantiated = true;
}
if (s_Canvas == null)
{
return;
}
if (target == null)
{

var displayValues = s_DisplayTransformValues[target];
var index = 0;
var orderedKeys = displayValues.Keys.OrderBy(x => - displayValues[x].time);
var orderedKeys = displayValues.Keys.OrderBy(x => -displayValues[x].time);
foreach (var key in orderedKeys)
{
s_KeyStyle.alignment = TextAnchor.MiddleRight;

45
UnitySDK/Assets/ML-Agents/Scripts/Policy/BarracudaPolicy.cs


using Barracuda;
using System.Collections.Generic;
using MLAgents.InferenceBrain;
using System;
using MLAgents.Sensor;
namespace MLAgents
{

NNModel model,
InferenceDevice inferenceDevice)
{
var aca = Object.FindObjectOfType<Academy>();
var aca = GameObject.FindObjectOfType<Academy>();
aca.LazyInitialization();
var modelRunner = aca.GetOrCreateModelRunner(model, brainParameters, inferenceDevice);
m_ModelRunner = modelRunner;

public void RequestDecision(Agent agent)
public void RequestDecision(AgentInfo info, List<ISensor> sensors, Action<AgentAction> action)
#if DEBUG
ValidateAgentSensorShapes(agent);
#endif
m_ModelRunner?.PutObservations(agent);
m_ModelRunner?.PutObservations(info, sensors, action);
}
/// <inheritdoc />

}
/// <summary>
/// Check that the Agent Sensors are the same shape as the the other Agents using the same Brain.
/// If this is the first Agent being checked, its Sensor sizes will be saved.
/// </summary>
/// <param name="agent">The Agent to check</param>
void ValidateAgentSensorShapes(Agent agent)
{
if (m_SensorShapes == null)
{
m_SensorShapes = new List<int[]>(agent.sensors.Count);
// First agent, save the sensor sizes
foreach (var sensor in agent.sensors)
{
m_SensorShapes.Add(sensor.GetObservationShape());
}
}
else
{
// Check for compatibility with the other Agents' Sensors
// TODO make sure this only checks once per agent
Debug.Assert(m_SensorShapes.Count == agent.sensors.Count, $"Number of Sensors must match. {m_SensorShapes.Count} != {agent.sensors.Count}");
for (var i = 0; i < m_SensorShapes.Count; i++)
{
var cachedShape = m_SensorShapes[i];
var sensorShape = agent.sensors[i].GetObservationShape();
Debug.Assert(cachedShape.Length == sensorShape.Length, "Sensor dimensions must match.");
for (var j = 0; j < cachedShape.Length; j++)
{
Debug.Assert(cachedShape[j] == sensorShape[j], "Sensor sizes much match.");
}
}
}
}
public void Dispose()

14
UnitySDK/Assets/ML-Agents/Scripts/Policy/HeuristicPolicy.cs


using UnityEngine;
using MLAgents.Sensor;
using System.Collections.Generic;
using System;
namespace MLAgents

public class HeuristicPolicy : IPolicy
{
Func<float[]> m_Heuristic;
Agent m_Agent;
Action<AgentAction> m_ActionFunc;
/// <inheritdoc />
public HeuristicPolicy(Func<float[]> heuristic)

/// <inheritdoc />
public void RequestDecision(Agent agent)
public void RequestDecision(AgentInfo info, List<ISensor> sensors, Action<AgentAction> action)
m_Agent = agent;
m_ActionFunc = action;
if (m_Agent != null)
if (m_ActionFunc != null)
m_Agent.UpdateVectorAction(m_Heuristic.Invoke());
m_ActionFunc.Invoke(new AgentAction { vectorActions = m_Heuristic.Invoke() });
m_ActionFunc = null;
}
}

5
UnitySDK/Assets/ML-Agents/Scripts/Policy/IPolicy.cs


using System;
using UnityEngine;
using System.Collections.Generic;
using MLAgents.Sensor;
namespace MLAgents
{

/// batching of requests.
/// </summary>
/// <param name="agent"></param>
void RequestDecision(Agent agent);
void RequestDecision(AgentInfo info, List<ISensor> sensors, Action<AgentAction> action);
/// <summary>
/// Signals the Policy that if the Decision has not been taken yet,

45
UnitySDK/Assets/ML-Agents/Scripts/Policy/RemotePolicy.cs


using UnityEngine;
using System.Collections.Generic;
using MLAgents.Sensor;
using System;
namespace MLAgents
{

string behaviorName)
{
m_BehaviorName = behaviorName;
var aca = Object.FindObjectOfType<Academy>();
var aca = GameObject.FindObjectOfType<Academy>();
aca.LazyInitialization();
m_Communicator = aca.Communicator;
aca.Communicator.SubscribeBrain(m_BehaviorName, brainParameters);

public void RequestDecision(Agent agent)
public void RequestDecision(AgentInfo info, List<ISensor> sensors, Action<AgentAction> action)
#if DEBUG
ValidateAgentSensorShapes(agent);
#endif
m_Communicator?.PutObservations(m_BehaviorName, agent);
m_Communicator?.PutObservations(m_BehaviorName, info, sensors, action);
}
/// <inheritdoc />

}
/// <summary>
/// Check that the Agent Sensors are the same shape as the the other Agents using the same Brain.
/// If this is the first Agent being checked, its Sensor sizes will be saved.
/// </summary>
/// <param name="agent">The Agent to check</param>
void ValidateAgentSensorShapes(Agent agent)
{
if (m_SensorShapes == null)
{
m_SensorShapes = new List<int[]>(agent.sensors.Count);
// First agent, save the sensor sizes
foreach (var sensor in agent.sensors)
{
m_SensorShapes.Add(sensor.GetObservationShape());
}
}
else
{
// Check for compatibility with the other Agents' Sensors
// TODO make sure this only checks once per agent
Debug.Assert(m_SensorShapes.Count == agent.sensors.Count, $"Number of Sensors must match. {m_SensorShapes.Count} != {agent.sensors.Count}");
for (var i = 0; i < m_SensorShapes.Count; i++)
{
var cachedShape = m_SensorShapes[i];
var sensorShape = agent.sensors[i].GetObservationShape();
Debug.Assert(cachedShape.Length == sensorShape.Length, "Sensor dimensions must match.");
for (var j = 0; j < cachedShape.Length; j++)
{
Debug.Assert(cachedShape[j] == sensorShape[j], "Sensor sizes much match.");
}
}
}
}
public void Dispose()

6
UnitySDK/Assets/ML-Agents/Scripts/Sensor/WriteAdapter.cs


{
if (h < 0 || h >= m_TensorShape.height)
{
throw new IndexOutOfRangeException($"height value {h} must be in range [0, {m_TensorShape.height-1}]");
throw new IndexOutOfRangeException($"height value {h} must be in range [0, {m_TensorShape.height - 1}]");
throw new IndexOutOfRangeException($"width value {w} must be in range [0, {m_TensorShape.width-1}]");
throw new IndexOutOfRangeException($"width value {w} must be in range [0, {m_TensorShape.width - 1}]");
throw new IndexOutOfRangeException($"channel value {ch} must be in range [0, {m_TensorShape.channels-1}]");
throw new IndexOutOfRangeException($"channel value {ch} must be in range [0, {m_TensorShape.channels - 1}]");
}
var index = m_TensorShape.Index(m_Batch, h, w, ch + m_Offset);

16
UnitySDK/Assets/ML-Agents/Scripts/Utilities.cs


dst.Add(item);
}
}
/// <summary>
/// Calculates the number of uncompressed floats in a list of ISensor
/// </summary>
public static int GetSensorFloatObservationSize(this List<ISensor> sensors)
{
int numFloatObservations = 0;
for (var i = 0; i < sensors.Count; i++)
{
if (sensors[i].GetCompressionType() == SensorCompressionType.None)
{
numFloatObservations += sensors[i].ObservationSize();
}
}
return numFloatObservations;
}
}
}

44
UnitySDK/Assets/ML-Agents/Scripts/Sensor/SensorShapeValidator.cs


using System.Collections.Generic;
using UnityEngine;
namespace MLAgents.Sensor
{
public class SensorShapeValidator
{
private List<int[]> m_SensorShapes;
/// <summary>
/// Check that the List Sensors are the same shape as the previous ones.
/// If this is the first List of Sensors being checked, its Sensor sizes will be saved.
/// </summary>
public void ValidateSensors(List<ISensor> sensors)
{
if (m_SensorShapes == null)
{
m_SensorShapes = new List<int[]>(sensors.Count);
// First agent, save the sensor sizes
foreach (var sensor in sensors)
{
m_SensorShapes.Add(sensor.GetObservationShape());
}
}
else
{
// Check for compatibility with the other Agents' Sensors
// TODO make sure this only checks once per agent
Debug.Assert(m_SensorShapes.Count == sensors.Count, $"Number of Sensors must match. {m_SensorShapes.Count} != {sensors.Count}");
for (var i = 0; i < m_SensorShapes.Count; i++)
{
var cachedShape = m_SensorShapes[i];
var sensorShape = sensors[i].GetObservationShape();
Debug.Assert(cachedShape.Length == sensorShape.Length, "Sensor dimensions must match.");
for (var j = 0; j < cachedShape.Length; j++)
{
Debug.Assert(cachedShape[j] == sensorShape[j], "Sensor sizes much match.");
}
}
}
}
}
}

11
UnitySDK/Assets/ML-Agents/Scripts/Sensor/SensorShapeValidator.cs.meta


fileFormatVersion: 2
guid: a7b5a4560ee254be497321527f92c174
MonoImporter:
externalObjects: {}
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant:
正在加载...
取消
保存