浏览代码

[MLA-1762] reduce memory allocations from DiscreteActionOutputApplier (#4922)

/bullet-hell-barracuda-test-1.3.1
GitHub 4 年前
当前提交
725e4363
共有 4 个文件被更改,包括 69 次插入276 次删除
  1. 1
      com.unity.ml-agents/CHANGELOG.md
  2. 129
      com.unity.ml-agents/Runtime/Inference/ApplierImpl.cs
  3. 17
      com.unity.ml-agents/Runtime/Inference/Utils/Multinomial.cs
  4. 198
      com.unity.ml-agents/Tests/Editor/DiscreteActionOutputApplierTest.cs

1
com.unity.ml-agents/CHANGELOG.md


- Removed unnecessary memory allocations in `SideChannelManager.GetSideChannelMessage()` (#4886)
- Removed several memory allocations that happened during inference. On a test scene, this
reduced the amount of memory allocated by approximately 25%. (#4887)
- Removed several memory allocations that happened during inference with discrete actions. (#4922)
- Properly catch permission errors when writing timer files. (#4921)
#### ml-agents / ml-agents-envs / gym-unity (Python)

129
com.unity.ml-agents/Runtime/Inference/ApplierImpl.cs


using System;
using System.Collections.Generic;
using System.Linq;
using Unity.MLAgents.Inference.Utils;

{
readonly int[] m_ActionSize;
readonly Multinomial m_Multinomial;
readonly ITensorAllocator m_Allocator;
readonly int[] m_StartActionIndices;
readonly float[] m_CdfBuffer;
m_Allocator = allocator;
m_StartActionIndices = Utilities.CumSum(m_ActionSize);
// Scratch space for computing the cumulative distribution function.
// In order to reuse it, make it the size of the largest branch.
var largestBranch = Mathf.Max(m_ActionSize);
m_CdfBuffer = new float[largestBranch];
//var tensorDataProbabilities = tensorProxy.Data as float[,];
var idActionPairList = actionIds as List<int> ?? actionIds.ToList();
var batchSize = idActionPairList.Count;
var actionValues = new float[batchSize, m_ActionSize.Length];
var startActionIndices = Utilities.CumSum(m_ActionSize);
for (var actionIndex = 0; actionIndex < m_ActionSize.Length; actionIndex++)
{
var nBranchAction = m_ActionSize[actionIndex];
var actionProbs = new TensorProxy()
{
valueType = TensorProxy.TensorType.FloatingPoint,
shape = new long[] { batchSize, nBranchAction },
data = m_Allocator.Alloc(new TensorShape(batchSize, nBranchAction))
};
for (var batchIndex = 0; batchIndex < batchSize; batchIndex++)
{
for (var branchActionIndex = 0;
branchActionIndex < nBranchAction;
branchActionIndex++)
{
actionProbs.data[batchIndex, branchActionIndex] =
tensorProxy.data[batchIndex, startActionIndices[actionIndex] + branchActionIndex];
}
}
var outputTensor = new TensorProxy()
{
valueType = TensorProxy.TensorType.FloatingPoint,
shape = new long[] { batchSize, 1 },
data = m_Allocator.Alloc(new TensorShape(batchSize, 1))
};
Eval(actionProbs, outputTensor, m_Multinomial);
for (var ii = 0; ii < batchSize; ii++)
{
actionValues[ii, actionIndex] = outputTensor.data[ii, 0];
}
actionProbs.data.Dispose();
outputTensor.data.Dispose();
}
var agentIndex = 0;
for (var i = 0; i < actionIds.Count; i++)
{

var discreteBuffer = actionBuffer.DiscreteActions;
for (var j = 0; j < m_ActionSize.Length; j++)
{
discreteBuffer[j] = (int)actionValues[agentIndex, j];
ComputeCdf(tensorProxy, agentIndex, m_StartActionIndices[j], m_ActionSize[j]);
discreteBuffer[j] = m_Multinomial.Sample(m_CdfBuffer, m_ActionSize[j]);
}
}
agentIndex++;

/// <summary>
/// Draw samples from a multinomial distribution based on log-probabilities specified
/// in tensor src. The samples will be saved in the dst tensor.
/// Compute the cumulative distribution function for a given agent's action
/// given the log-probabilities.
/// The results are stored in m_CdfBuffer, which is the size of the largest action's number of branches.
/// <param name="src">2-D tensor with shape batch_size x num_classes</param>
/// <param name="dst">Allocated tensor with size batch_size x num_samples</param>
/// <param name="multinomial">Multinomial object used to sample values</param>
/// <exception cref="NotImplementedException">
/// Multinomial doesn't support integer tensors
/// </exception>
/// <exception cref="ArgumentException">Issue with tensor shape or type</exception>
/// <exception cref="ArgumentNullException">
/// At least one of the tensors is not allocated
/// </exception>
public static void Eval(TensorProxy src, TensorProxy dst, Multinomial multinomial)
/// <param name="logProbs"></param>
/// <param name="batch">Index of the agent being considered</param>
/// <param name="channelOffset">Offset into the tensor's channel.</param>
/// <param name="branchSize"></param>
internal void ComputeCdf(TensorProxy logProbs, int batch, int channelOffset, int branchSize)
if (src.DataType != typeof(float))
{
throw new NotImplementedException("Only float tensors are currently supported");
}
if (src.valueType != dst.valueType)
{
throw new ArgumentException(
"Source and destination tensors have different types!");
}
if (src.data == null || dst.data == null)
{
throw new ArgumentNullException();
}
if (src.data.batch != dst.data.batch)
// Find the class maximum
var maxProb = float.NegativeInfinity;
for (var cls = 0; cls < branchSize; ++cls)
throw new ArgumentException("Batch size for input and output data is different!");
maxProb = Mathf.Max(logProbs.data[batch, cls + channelOffset], maxProb);
var cdf = new float[src.data.channels];
for (var batch = 0; batch < src.data.batch; ++batch)
// Sum the log probabilities and compute CDF
var sumProb = 0.0f;
for (var cls = 0; cls < branchSize; ++cls)
// Find the class maximum
var maxProb = float.NegativeInfinity;
for (var cls = 0; cls < src.data.channels; ++cls)
{
maxProb = Mathf.Max(src.data[batch, cls], maxProb);
}
// Sum the log probabilities and compute CDF
var sumProb = 0.0f;
for (var cls = 0; cls < src.data.channels; ++cls)
{
sumProb += Mathf.Exp(src.data[batch, cls] - maxProb);
cdf[cls] = sumProb;
}
// Generate the samples
for (var sample = 0; sample < dst.data.channels; ++sample)
{
dst.data[batch, sample] = multinomial.Sample(cdf);
}
sumProb += Mathf.Exp(logProbs.data[batch, cls + channelOffset] - maxProb);
m_CdfBuffer[cls] = sumProb;
}
}
}

17
com.unity.ml-agents/Runtime/Inference/Utils/Multinomial.cs


/// to be monotonic (always increasing). If the CMF is scaled, then the last entry in
/// the array will be 1.0.
/// </param>
/// <returns>A sampled index from the CMF ranging from 0 to cmf.Length-1.</returns>
public int Sample(float[] cmf)
/// <param name="branchSize">The number of possible branches, i.e. the effective size of the cmf array.</param>
/// <returns>A sampled index from the CMF ranging from 0 to branchSize-1.</returns>
public int Sample(float[] cmf, int branchSize)
var p = (float)m_Random.NextDouble() * cmf[cmf.Length - 1];
var p = (float)m_Random.NextDouble() * cmf[branchSize - 1];
var cls = 0;
while (cmf[cls] < p)
{

return cls;
}
/// <summary>
/// Samples from the Multinomial distribution defined by the provided cumulative
/// mass function.
/// </summary>
/// <returns>A sampled index from the CMF ranging from 0 to cmf.Length-1.</returns>
public int Sample(float[] cmf)
{
return Sample(cmf, cmf.Length);
}
}
}

198
com.unity.ml-agents/Tests/Editor/DiscreteActionOutputApplierTest.cs


using System;
using System.Collections.Generic;
using UnityEngine;
using Unity.MLAgents.Actuators;
using Unity.MLAgents.Inference.Utils;
namespace Unity.MLAgents.Tests
{

public void TestEvalP()
{
var m = new Multinomial(2018);
var src = new TensorProxy
{
data = new Tensor(1, 3, new[] { 0.1f, 0.2f, 0.7f }),
valueType = TensorProxy.TensorType.FloatingPoint
};
var dst = new TensorProxy
{
data = new Tensor(1, 3),
valueType = TensorProxy.TensorType.FloatingPoint
};
DiscreteActionOutputApplier.Eval(src, dst, m);
float[] reference = { 2, 2, 1 };
for (var i = 0; i < dst.data.length; i++)
{
Assert.AreEqual(reference[i], dst.data[i]);
++i;
}
}
[Test]
public void TestEvalLogits()
public void TestDiscreteApply()
var m = new Multinomial(2018);
var actionSpec = ActionSpec.MakeDiscrete(3, 2);
const float smallLogProb = -1000.0f;
const float largeLogProb = -1.0f;
var src = new TensorProxy
var logProbs = new TensorProxy
1,
3,
new[] { Mathf.Log(0.1f) - 50, Mathf.Log(0.2f) - 50, Mathf.Log(0.7f) - 50 }),
valueType = TensorProxy.TensorType.FloatingPoint
};
var dst = new TensorProxy
{
data = new Tensor(1, 3),
valueType = TensorProxy.TensorType.FloatingPoint
};
DiscreteActionOutputApplier.Eval(src, dst, m);
float[] reference = { 2, 2, 2 };
for (var i = 0; i < dst.data.length; i++)
{
Assert.AreEqual(reference[i], dst.data[i]);
++i;
}
}
[Test]
public void TestEvalBatching()
{
var m = new Multinomial(2018);
var src = new TensorProxy
{
data = new Tensor(2, 3, new[]
{
Mathf.Log(0.1f) - 50, Mathf.Log(0.2f) - 50, Mathf.Log(0.7f) - 50,
Mathf.Log(0.3f) - 25, Mathf.Log(0.4f) - 25, Mathf.Log(0.3f) - 25
}),
valueType = TensorProxy.TensorType.FloatingPoint
};
var dst = new TensorProxy
{
data = new Tensor(2, 3),
valueType = TensorProxy.TensorType.FloatingPoint
};
DiscreteActionOutputApplier.Eval(src, dst, m);
float[] reference = { 2, 2, 2, 0, 1, 0 };
for (var i = 0; i < dst.data.length; i++)
{
Assert.AreEqual(reference[i], dst.data[i]);
++i;
}
}
[Test]
public void TestSrcInt()
{
var m = new Multinomial(2018);
var src = new TensorProxy
{
valueType = TensorProxy.TensorType.Integer
};
Assert.Throws<NotImplementedException>(
() => DiscreteActionOutputApplier.Eval(src, null, m));
}
[Test]
public void TestDstInt()
{
var m = new Multinomial(2018);
var src = new TensorProxy
{
2,
5,
new[]
{
smallLogProb, smallLogProb, largeLogProb, // Agent 0, branch 0
smallLogProb, largeLogProb, // Agent 0, branch 1
largeLogProb, smallLogProb, smallLogProb, // Agent 1, branch 0
largeLogProb, smallLogProb, // Agent 1, branch 1
}),
var dst = new TensorProxy
{
valueType = TensorProxy.TensorType.Integer
};
var applier = new DiscreteActionOutputApplier(actionSpec, 2020, null);
var agentIds = new List<int> { 42, 1337 };
var actionBuffers = new Dictionary<int, ActionBuffers>();
actionBuffers[42] = new ActionBuffers(actionSpec);
actionBuffers[1337] = new ActionBuffers(actionSpec);
Assert.Throws<ArgumentException>(
() => DiscreteActionOutputApplier.Eval(src, dst, m));
}
[Test]
public void TestSrcDataNull()
{
var m = new Multinomial(2018);
applier.Apply(logProbs, agentIds, actionBuffers);
Assert.AreEqual(2, actionBuffers[42].DiscreteActions[0]);
Assert.AreEqual(1, actionBuffers[42].DiscreteActions[1]);
var src = new TensorProxy
{
valueType = TensorProxy.TensorType.FloatingPoint
};
var dst = new TensorProxy
{
valueType = TensorProxy.TensorType.FloatingPoint
};
Assert.Throws<ArgumentNullException>(
() => DiscreteActionOutputApplier.Eval(src, dst, m));
}
[Test]
public void TestDstDataNull()
{
var m = new Multinomial(2018);
var src = new TensorProxy
{
valueType = TensorProxy.TensorType.FloatingPoint,
data = new Tensor(0, 1)
};
var dst = new TensorProxy
{
valueType = TensorProxy.TensorType.FloatingPoint
};
Assert.Throws<ArgumentNullException>(
() => DiscreteActionOutputApplier.Eval(src, dst, m));
}
[Test]
public void TestUnequalBatchSize()
{
var m = new Multinomial(2018);
var src = new TensorProxy
{
valueType = TensorProxy.TensorType.FloatingPoint,
data = new Tensor(1, 1)
};
var dst = new TensorProxy
{
valueType = TensorProxy.TensorType.FloatingPoint,
data = new Tensor(2, 1)
};
Assert.Throws<ArgumentException>(
() => DiscreteActionOutputApplier.Eval(src, dst, m));
Assert.AreEqual(0, actionBuffers[1337].DiscreteActions[0]);
Assert.AreEqual(0, actionBuffers[1337].DiscreteActions[1]);
}
}
}
正在加载...
取消
保存