[MLA-1762] reduce memory allocations from DiscreteActionOutputApplier (#4922)

4 年前 · 725e4363
--- a/com.unity.ml-agents/CHANGELOG.md
+++ b/com.unity.ml-agents/CHANGELOG.md
 - Removed unnecessary memory allocations in `SideChannelManager.GetSideChannelMessage()` (#4886)
 - Removed several memory allocations that happened during inference. On a test scene, this
  reduced the amount of memory allocated by approximately 25%. (#4887)
+- Removed several memory allocations that happened during inference with discrete actions. (#4922)
 - Properly catch permission errors when writing timer files. (#4921)

 #### ml-agents / ml-agents-envs / gym-unity (Python)
--- a/com.unity.ml-agents/Runtime/Inference/ApplierImpl.cs
+++ b/com.unity.ml-agents/Runtime/Inference/ApplierImpl.cs
-using System;
 using System.Collections.Generic;
 using System.Linq;
 using Unity.MLAgents.Inference.Utils;
    {
        readonly int[] m_ActionSize;
        readonly Multinomial m_Multinomial;
-        readonly ITensorAllocator m_Allocator;
+        readonly int[] m_StartActionIndices;
+        readonly float[] m_CdfBuffer;
+
-            m_Allocator = allocator;
+            m_StartActionIndices = Utilities.CumSum(m_ActionSize);
+
+            // Scratch space for computing the cumulative distribution function.
+            // In order to reuse it, make it the size of the largest branch.
+            var largestBranch = Mathf.Max(m_ActionSize);
+            m_CdfBuffer = new float[largestBranch];
-            //var tensorDataProbabilities = tensorProxy.Data as float[,];
-            var idActionPairList = actionIds as List<int> ?? actionIds.ToList();
-            var batchSize = idActionPairList.Count;
-            var actionValues = new float[batchSize, m_ActionSize.Length];
-            var startActionIndices = Utilities.CumSum(m_ActionSize);
-            for (var actionIndex = 0; actionIndex < m_ActionSize.Length; actionIndex++)
-            {
-                var nBranchAction = m_ActionSize[actionIndex];
-                var actionProbs = new TensorProxy()
-                {
-                    valueType = TensorProxy.TensorType.FloatingPoint,
-                    shape = new long[] { batchSize, nBranchAction },
-                    data = m_Allocator.Alloc(new TensorShape(batchSize, nBranchAction))
-                };
-
-                for (var batchIndex = 0; batchIndex < batchSize; batchIndex++)
-                {
-                    for (var branchActionIndex = 0;
-                         branchActionIndex < nBranchAction;
-                         branchActionIndex++)
-                    {
-                        actionProbs.data[batchIndex, branchActionIndex] =
-                            tensorProxy.data[batchIndex, startActionIndices[actionIndex] + branchActionIndex];
-                    }
-                }
-
-                var outputTensor = new TensorProxy()
-                {
-                    valueType = TensorProxy.TensorType.FloatingPoint,
-                    shape = new long[] { batchSize, 1 },
-                    data = m_Allocator.Alloc(new TensorShape(batchSize, 1))
-                };
-
-                Eval(actionProbs, outputTensor, m_Multinomial);
-
-                for (var ii = 0; ii < batchSize; ii++)
-                {
-                    actionValues[ii, actionIndex] = outputTensor.data[ii, 0];
-                }
-                actionProbs.data.Dispose();
-                outputTensor.data.Dispose();
-            }
-
            var agentIndex = 0;
            for (var i = 0; i < actionIds.Count; i++)
            {
                    var discreteBuffer = actionBuffer.DiscreteActions;
                    for (var j = 0; j < m_ActionSize.Length; j++)
                    {
-                        discreteBuffer[j] = (int)actionValues[agentIndex, j];
+                        ComputeCdf(tensorProxy, agentIndex, m_StartActionIndices[j], m_ActionSize[j]);
+                        discreteBuffer[j] = m_Multinomial.Sample(m_CdfBuffer, m_ActionSize[j]);
                    }
                }
                agentIndex++;
        /// <summary>
-        /// Draw samples from a multinomial distribution based on log-probabilities specified
-        /// in tensor src. The samples will be saved in the dst tensor.
+        /// Compute the cumulative distribution function for a given agent's action
+        /// given the log-probabilities.
+        /// The results are stored in m_CdfBuffer, which is the size of the largest action's number of branches.
-        /// <param name="src">2-D tensor with shape batch_size x num_classes</param>
-        /// <param name="dst">Allocated tensor with size batch_size x num_samples</param>
-        /// <param name="multinomial">Multinomial object used to sample values</param>
-        /// <exception cref="NotImplementedException">
-        /// Multinomial doesn't support integer tensors
-        /// </exception>
-        /// <exception cref="ArgumentException">Issue with tensor shape or type</exception>
-        /// <exception cref="ArgumentNullException">
-        /// At least one of the tensors is not allocated
-        /// </exception>
-        public static void Eval(TensorProxy src, TensorProxy dst, Multinomial multinomial)
+        /// <param name="logProbs"></param>
+        /// <param name="batch">Index of the agent being considered</param>
+        /// <param name="channelOffset">Offset into the tensor's channel.</param>
+        /// <param name="branchSize"></param>
+        internal void ComputeCdf(TensorProxy logProbs, int batch, int channelOffset, int branchSize)
-            if (src.DataType != typeof(float))
-            {
-                throw new NotImplementedException("Only float tensors are currently supported");
-            }
-
-            if (src.valueType != dst.valueType)
-            {
-                throw new ArgumentException(
-                    "Source and destination tensors have different types!");
-            }
-
-            if (src.data == null || dst.data == null)
-            {
-                throw new ArgumentNullException();
-            }
-
-            if (src.data.batch != dst.data.batch)
+            // Find the class maximum
+            var maxProb = float.NegativeInfinity;
+            for (var cls = 0; cls < branchSize; ++cls)
-                throw new ArgumentException("Batch size for input and output data is different!");
+                maxProb = Mathf.Max(logProbs.data[batch, cls + channelOffset], maxProb);
-            var cdf = new float[src.data.channels];
-
-            for (var batch = 0; batch < src.data.batch; ++batch)
+            // Sum the log probabilities and compute CDF
+            var sumProb = 0.0f;
+            for (var cls = 0; cls < branchSize; ++cls)
-                // Find the class maximum
-                var maxProb = float.NegativeInfinity;
-                for (var cls = 0; cls < src.data.channels; ++cls)
-                {
-                    maxProb = Mathf.Max(src.data[batch, cls], maxProb);
-                }
-
-                // Sum the log probabilities and compute CDF
-                var sumProb = 0.0f;
-                for (var cls = 0; cls < src.data.channels; ++cls)
-                {
-                    sumProb += Mathf.Exp(src.data[batch, cls] - maxProb);
-                    cdf[cls] = sumProb;
-                }
-
-                // Generate the samples
-                for (var sample = 0; sample < dst.data.channels; ++sample)
-                {
-                    dst.data[batch, sample] = multinomial.Sample(cdf);
-                }
+                sumProb += Mathf.Exp(logProbs.data[batch, cls + channelOffset] - maxProb);
+                m_CdfBuffer[cls] = sumProb;
            }
        }
    }
--- a/com.unity.ml-agents/Runtime/Inference/Utils/Multinomial.cs
+++ b/com.unity.ml-agents/Runtime/Inference/Utils/Multinomial.cs
        /// to be monotonic (always increasing). If the CMF is scaled, then the last entry in
        /// the array will be 1.0.
        /// </param>
-        /// <returns>A sampled index from the CMF ranging from 0 to cmf.Length-1.</returns>
-        public int Sample(float[] cmf)
+        /// <param name="branchSize">The number of possible branches, i.e. the effective size of the cmf array.</param>
+        /// <returns>A sampled index from the CMF ranging from 0 to branchSize-1.</returns>
+        public int Sample(float[] cmf, int branchSize)
-            var p = (float)m_Random.NextDouble() * cmf[cmf.Length - 1];
+            var p = (float)m_Random.NextDouble() * cmf[branchSize - 1];
            var cls = 0;
            while (cmf[cls] < p)
            {
            return cls;
+        }
+
+        /// <summary>
+        /// Samples from the Multinomial distribution defined by the provided cumulative
+        /// mass function.
+        /// </summary>
+        /// <returns>A sampled index from the CMF ranging from 0 to cmf.Length-1.</returns>
+        public int Sample(float[] cmf)
+        {
+            return Sample(cmf, cmf.Length);
        }
    }
 }
--- a/com.unity.ml-agents/Tests/Editor/DiscreteActionOutputApplierTest.cs
+++ b/com.unity.ml-agents/Tests/Editor/DiscreteActionOutputApplierTest.cs
-using System;
+using System.Collections.Generic;
-using UnityEngine;
+using Unity.MLAgents.Actuators;
-using Unity.MLAgents.Inference.Utils;

 namespace Unity.MLAgents.Tests
 {
-        public void TestEvalP()
-        {
-            var m = new Multinomial(2018);
-
-            var src = new TensorProxy
-            {
-                data = new Tensor(1, 3, new[] { 0.1f, 0.2f, 0.7f }),
-                valueType = TensorProxy.TensorType.FloatingPoint
-            };
-
-            var dst = new TensorProxy
-            {
-                data = new Tensor(1, 3),
-                valueType = TensorProxy.TensorType.FloatingPoint
-            };
-
-            DiscreteActionOutputApplier.Eval(src, dst, m);
-
-            float[] reference = { 2, 2, 1 };
-            for (var i = 0; i < dst.data.length; i++)
-            {
-                Assert.AreEqual(reference[i], dst.data[i]);
-                ++i;
-            }
-        }
-
-        [Test]
-        public void TestEvalLogits()
+        public void TestDiscreteApply()
-            var m = new Multinomial(2018);
+            var actionSpec = ActionSpec.MakeDiscrete(3, 2);
+            const float smallLogProb = -1000.0f;
+            const float largeLogProb = -1.0f;
-            var src = new TensorProxy
+            var logProbs = new TensorProxy
-                    1,
-                    3,
-                    new[] { Mathf.Log(0.1f) - 50, Mathf.Log(0.2f) - 50, Mathf.Log(0.7f) - 50 }),
-                valueType = TensorProxy.TensorType.FloatingPoint
-            };
-
-            var dst = new TensorProxy
-            {
-                data = new Tensor(1, 3),
-                valueType = TensorProxy.TensorType.FloatingPoint
-            };
-
-            DiscreteActionOutputApplier.Eval(src, dst, m);
-
-            float[] reference = { 2, 2, 2 };
-            for (var i = 0; i < dst.data.length; i++)
-            {
-                Assert.AreEqual(reference[i], dst.data[i]);
-                ++i;
-            }
-        }
-
-        [Test]
-        public void TestEvalBatching()
-        {
-            var m = new Multinomial(2018);
-
-            var src = new TensorProxy
-            {
-                data = new Tensor(2, 3, new[]
-                {
-                    Mathf.Log(0.1f) - 50, Mathf.Log(0.2f) - 50, Mathf.Log(0.7f) - 50,
-                    Mathf.Log(0.3f) - 25, Mathf.Log(0.4f) - 25, Mathf.Log(0.3f) - 25
-                }),
-                valueType = TensorProxy.TensorType.FloatingPoint
-            };
-
-            var dst = new TensorProxy
-            {
-                data = new Tensor(2, 3),
-                valueType = TensorProxy.TensorType.FloatingPoint
-            };
-
-            DiscreteActionOutputApplier.Eval(src, dst, m);
-
-            float[] reference = { 2, 2, 2, 0, 1, 0 };
-            for (var i = 0; i < dst.data.length; i++)
-            {
-                Assert.AreEqual(reference[i], dst.data[i]);
-                ++i;
-            }
-        }
-
-        [Test]
-        public void TestSrcInt()
-        {
-            var m = new Multinomial(2018);
-
-            var src = new TensorProxy
-            {
-                valueType = TensorProxy.TensorType.Integer
-            };
-
-            Assert.Throws<NotImplementedException>(
-                () => DiscreteActionOutputApplier.Eval(src, null, m));
-        }
-
-        [Test]
-        public void TestDstInt()
-        {
-            var m = new Multinomial(2018);
-
-            var src = new TensorProxy
-            {
+                    2,
+                    5,
+                    new[]
+                    {
+                        smallLogProb, smallLogProb, largeLogProb, // Agent 0, branch 0
+                        smallLogProb, largeLogProb,               // Agent 0, branch 1
+                        largeLogProb, smallLogProb, smallLogProb, // Agent 1, branch 0
+                        largeLogProb, smallLogProb,               // Agent 1, branch 1
+                    }),
-            var dst = new TensorProxy
-            {
-                valueType = TensorProxy.TensorType.Integer
-            };
+            var applier = new DiscreteActionOutputApplier(actionSpec, 2020, null);
+            var agentIds = new List<int> { 42, 1337 };
+            var actionBuffers = new Dictionary<int, ActionBuffers>();
+            actionBuffers[42] = new ActionBuffers(actionSpec);
+            actionBuffers[1337] = new ActionBuffers(actionSpec);
-            Assert.Throws<ArgumentException>(
-                () => DiscreteActionOutputApplier.Eval(src, dst, m));
-        }
-
-        [Test]
-        public void TestSrcDataNull()
-        {
-            var m = new Multinomial(2018);
+            applier.Apply(logProbs, agentIds, actionBuffers);
+            Assert.AreEqual(2, actionBuffers[42].DiscreteActions[0]);
+            Assert.AreEqual(1, actionBuffers[42].DiscreteActions[1]);
-            var src = new TensorProxy
-            {
-                valueType = TensorProxy.TensorType.FloatingPoint
-            };
-
-            var dst = new TensorProxy
-            {
-                valueType = TensorProxy.TensorType.FloatingPoint
-            };
-
-            Assert.Throws<ArgumentNullException>(
-                () => DiscreteActionOutputApplier.Eval(src, dst, m));
-        }
-
-        [Test]
-        public void TestDstDataNull()
-        {
-            var m = new Multinomial(2018);
-
-            var src = new TensorProxy
-            {
-                valueType = TensorProxy.TensorType.FloatingPoint,
-                data = new Tensor(0, 1)
-            };
-
-            var dst = new TensorProxy
-            {
-                valueType = TensorProxy.TensorType.FloatingPoint
-            };
-
-            Assert.Throws<ArgumentNullException>(
-                () => DiscreteActionOutputApplier.Eval(src, dst, m));
-        }
-
-        [Test]
-        public void TestUnequalBatchSize()
-        {
-            var m = new Multinomial(2018);
-
-            var src = new TensorProxy
-            {
-                valueType = TensorProxy.TensorType.FloatingPoint,
-                data = new Tensor(1, 1)
-            };
-
-            var dst = new TensorProxy
-            {
-                valueType = TensorProxy.TensorType.FloatingPoint,
-                data = new Tensor(2, 1)
-            };
-
-            Assert.Throws<ArgumentException>(
-                () => DiscreteActionOutputApplier.Eval(src, dst, m));
+            Assert.AreEqual(0, actionBuffers[1337].DiscreteActions[0]);
+            Assert.AreEqual(0, actionBuffers[1337].DiscreteActions[1]);
        }
    }
 }