Moving the Tensor Applier around (#5185)

Co-authored-by: Ruo-Ping Dong <ruoping.dong@unity3d.com>
4 年前 · 00fc501e
--- a/Project/Packages/manifest.json
+++ b/Project/Packages/manifest.json
    "com.unity.2d.sprite": "1.0.0",
    "com.unity.2d.tilemap": "1.0.0",
    "com.unity.ads": "3.6.1",
-    "com.unity.collab-proxy": "1.3.9",
    "com.unity.ide.rider": "2.0.7",
    "com.unity.ide.visualstudio": "2.0.7",
    "com.unity.ide.vscode": "1.2.3",
    "com.unity.nuget.newtonsoft-json": "2.0.0",
    "com.unity.test-framework": "1.1.22",
-    "com.unity.textmeshpro": "3.0.1",
    "com.unity.timeline": "1.4.6",
    "com.unity.ugui": "1.0.0",
    "com.unity.xr.legacyinputhelpers": "2.1.7",
--- a/com.unity.ml-agents/Runtime/Inference/ApplierImpl.cs
+++ b/com.unity.ml-agents/Runtime/Inference/ApplierImpl.cs
        }
    }

-    internal class MaxActionOutputApplier : TensorApplier.IApplier
-    {
-        readonly ActionSpec m_ActionSpec;
-
-
-        public MaxActionOutputApplier(ActionSpec actionSpec, int seed, ITensorAllocator allocator)
-        {
-            m_ActionSpec = actionSpec;
-        }
-
-        public void Apply(TensorProxy tensorProxy, IList<int> actionIds, Dictionary<int, ActionBuffers> lastActions)
-        {
-            var agentIndex = 0;
-            var actionSpaceSize = tensorProxy.shape[tensorProxy.shape.Length - 1];
-
-            for (var i = 0; i < actionIds.Count; i++)
-            {
-                var agentId = actionIds[i];
-                if (lastActions.ContainsKey(agentId))
-                {
-                    var actionBuffer = lastActions[agentId];
-                    if (actionBuffer.IsEmpty())
-                    {
-                        actionBuffer = new ActionBuffers(m_ActionSpec);
-                        lastActions[agentId] = actionBuffer;
-                    }
-                    var discreteBuffer = actionBuffer.DiscreteActions;
-                    var maxIndex = 0;
-                    var maxValue = 0;
-                    for (var j = 0; j < actionSpaceSize; j++)
-                    {
-                        var value = (int)tensorProxy.data[agentIndex, j];
-                        if (value > maxValue)
-                        {
-                            maxIndex = j;
-                        }
-                    }
-                    var actionSize = discreteBuffer.Length;
-                    discreteBuffer[0] = maxIndex;
-                }
-                agentIndex++;
-            }
-        }
-    }
-
-    internal class ContinuousFromDiscreteOutputApplier : TensorApplier.IApplier
-    {
-        readonly ActionSpec m_ActionSpec;
-        int m_NumDiscretization;
-
-
-        public ContinuousFromDiscreteOutputApplier(ActionSpec actionSpec, int seed, ITensorAllocator allocator, int numDiscretization)
-        {
-            m_ActionSpec = actionSpec;
-            m_NumDiscretization = numDiscretization;
-        }
-
-        public void Apply(TensorProxy tensorProxy, IList<int> actionIds, Dictionary<int, ActionBuffers> lastActions)
-        {
-            var agentIndex = 0;
-            var actionSpaceSize = tensorProxy.shape[tensorProxy.shape.Length - 1];
-
-            for (var i = 0; i < actionIds.Count; i++)
-            {
-                var agentId = actionIds[i];
-                if (lastActions.ContainsKey(agentId))
-                {
-                    var actionBuffer = lastActions[agentId];
-                    if (actionBuffer.IsEmpty())
-                    {
-                        actionBuffer = new ActionBuffers(m_ActionSpec);
-                        lastActions[agentId] = actionBuffer;
-                    }
-                    var continuousBuffer = actionBuffer.ContinuousActions;
-                    var maxIndex = 0;
-                    var maxValue = 0;
-                    for (var j = 0; j < actionSpaceSize; j++)
-                    {
-                        var value = (int)tensorProxy.data[agentIndex, j];
-                        if (value > maxValue)
-                        {
-                            maxIndex = j;
-                        }
-                    }
-                    continuousBuffer[0] = ((maxIndex/m_NumDiscretization)/(m_NumDiscretization-1)/2)-1;
-                    continuousBuffer[1] = ((maxIndex%m_NumDiscretization)/(m_NumDiscretization-1)/2)-1;
-                }
-                agentIndex++;
-            }
-        }
-    }
-
-
    /// <summary>
    /// The Applier for the Discrete Action output tensor. Uses multinomial to sample discrete
    /// actions from the logits contained in the tensor.
--- a/com.unity.ml-agents/Runtime/Inference/BarracudaModelExtensions.cs
+++ b/com.unity.ml-agents/Runtime/Inference/BarracudaModelExtensions.cs
            return names.ToArray();
        }

-        public static string[] GetTrainingOutputNames(this Model model)
-        {
-            var names = new List<string>();
-
-            if (model == null)
-            {
-                return names.ToArray();
-            }
-
-            names.Add(TensorNames.TrainingStateOut);
-            names.Add(TensorNames.OuputLoss);
-            names.Add(TensorNames.TrainingOutput);
-
-            names.Sort();
-
-            return names.ToArray();
-        }
-
        /// <summary>
        /// Check if the model has continuous action outputs.
        /// </summary>
--- a/com.unity.ml-agents/Runtime/Inference/TensorApplier.cs
+++ b/com.unity.ml-agents/Runtime/Inference/TensorApplier.cs
                }
                if (modelVersion == (int)BarracudaModelParamLoader.ModelApiVersion.MLAgents2_0)
                {
-                    // m_Dict[tensorName] = new DiscreteActionOutputApplier(actionSpec, seed, allocator);
-                    m_Dict[tensorName] = new MaxActionOutputApplier(actionSpec, seed, allocator);
+                    m_Dict[tensorName] = new DiscreteActionOutputApplier(actionSpec, seed, allocator);
                }
            }
            m_Dict[TensorNames.RecurrentOutput] = new MemoryOutputApplier(memories);
--- a/com.unity.ml-agents/Runtime/Training/TrainingModelRunner.cs
+++ b/com.unity.ml-agents/Runtime/Training/TrainingModelRunner.cs
        ITensorAllocator m_TensorAllocator;
        TensorGenerator m_TensorGenerator;
        TrainingTensorGenerator m_TrainingTensorGenerator;
-        TensorApplier m_TensorApplier;
+        TrainingForwardTensorApplier m_TensorApplier;
-        string[] m_TrainingOutputNames;
        IReadOnlyList<TensorProxy> m_TrainingInputs;
        IReadOnlyList<TensorProxy> m_InferenceInputs;
        List<TensorProxy> m_TrainingOutputs;
                seed, m_TensorAllocator, m_Memories, barracudaModel);
            m_TrainingTensorGenerator = new TrainingTensorGenerator(
                seed, m_TensorAllocator, config.learningRate, config.gamma, barracudaModel);
-            m_TensorApplier = new TensorApplier(
-                actionSpec, seed, m_TensorAllocator, m_Memories, barracudaModel);
+            m_TensorApplier = new TrainingForwardTensorApplier(
+                actionSpec, seed, m_TensorAllocator, barracudaModel);
            m_InputsByName = new Dictionary<string, Tensor>();
            m_TrainingOutputs = new List<TensorProxy>();
            m_Buffer = buffer;
        void InitializeTrainingState()
        {
            var initState = m_Model.GetTensorByName(TensorNames.InitialTrainingState);
-            m_TrainingState = new TensorProxy{
+            m_TrainingState = new TensorProxy
+            {
                name = TensorNames.InitialTrainingState,
                valueType = TensorProxy.TensorType.FloatingPoint,
                data = initState,
            // Execute the Model
            m_Engine.Execute(m_InputsByName);

-            FetchBarracudaOutputs(m_TrainingOutputNames);
+            FetchBarracudaOutputs(new string[] { TensorNames.TrainingOutput });

            // Update the outputs
            m_TensorApplier.ApplyTensors(m_TrainingOutputs, m_OrderedAgentsRequestingDecisions, m_LastActionsReceived);
            // Execute the Model
            m_Engine.Execute(m_InputsByName);

-            FetchBarracudaOutputs(m_TrainingOutputNames);
-
-            // m_TensorApplier.UpdateModel(m_TrainingOutputs, m_OrderedAgentsRequestingDecisions, m_LastActionsReceived);
+            FetchBarracudaOutputs(new string[] { TensorNames.TrainingStateOut });
+            m_TrainingState = m_TrainingOutputs[0];
        }

        public ActionBuffers GetAction(int agentId)
--- a/com.unity.ml-agents/Runtime/Training.meta
+++ b/com.unity.ml-agents/Runtime/Training.meta
+fileFormatVersion: 2
+guid: 676cc58c5738749bf836e799a89c7c94
+folderAsset: yes
+DefaultImporter:
+  externalObjects: {}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
--- a/com.unity.ml-agents/Runtime/Training/TrainingForwardTensorApplier.cs
+++ b/com.unity.ml-agents/Runtime/Training/TrainingForwardTensorApplier.cs
+using System.Collections.Generic;
+using Unity.Barracuda;
+using Unity.MLAgents.Actuators;
+using System.Linq;
+using Unity.MLAgents.Inference.Utils;
+using UnityEngine;
+
+
+
+namespace Unity.MLAgents.Inference
+{
+    /// <summary>
+    /// Mapping between the output tensor names and the method that will use the
+    /// output tensors and the Agents present in the batch to update their action, memories and
+    /// value estimates.
+    /// A TensorApplier implements a Dictionary of strings (node names) to an Action.
+    /// This action takes as input the tensor and the Dictionary of Agent to AgentInfo for
+    /// the current batch.
+    /// </summary>
+    internal class TrainingForwardTensorApplier
+    {
+
+        readonly Dictionary<string, TensorApplier.IApplier> m_Dict = new Dictionary<string, TensorApplier.IApplier>();
+
+        /// <summary>
+        /// Returns a new TensorAppliers object.
+        /// </summary>
+        /// <param name="actionSpec"> Description of the actions for the Agent.</param>
+        /// <param name="seed"> The seed the Appliers will be initialized with.</param>
+        /// <param name="allocator"> Tensor allocator</param>
+        /// <param name="memories">Dictionary of AgentInfo.id to memory used to pass to the inference model.</param>
+        /// <param name="barracudaModel"></param>
+        public TrainingForwardTensorApplier(
+            ActionSpec actionSpec,
+            int seed,
+            ITensorAllocator allocator,
+            object barracudaModel = null)
+        {
+            // If model is null, no inference to run and exception is thrown before reaching here.
+            if (barracudaModel == null)
+            {
+                return;
+            }
+            if (actionSpec.NumContinuousActions > 0)
+            {
+                throw new System.Exception("Cannot do continuous actions");
+            }
+            if (actionSpec.NumDiscreteActions != 1)
+            {
+                throw new System.Exception("Cannot do multi discrete actions, only single discrete");
+            }
+
+            var model = (Model)barracudaModel;
+
+
+            m_Dict[TensorNames.TrainingOutput] = new MaxActionOutputApplier(actionSpec, seed, allocator);
+        }
+
+        /// <summary>
+        /// Updates the state of the agents based on the data present in the tensor.
+        /// </summary>
+        /// <param name="tensors"> Enumerable of tensors containing the data.</param>
+        /// <param name="actionIds"> List of Agents Ids that will be updated using the tensor's data</param>
+        /// <param name="lastActions"> Dictionary of AgentId to Actions to be updated</param>
+        /// <exception cref="UnityAgentsException"> One of the tensor does not have an
+        /// associated applier.</exception>
+        public void ApplyTensors(
+            IReadOnlyList<TensorProxy> tensors, IList<int> actionIds, Dictionary<int, ActionBuffers> lastActions)
+        {
+            for (var tensorIndex = 0; tensorIndex < tensors.Count; tensorIndex++)
+            {
+                var tensor = tensors[tensorIndex];
+                if (!m_Dict.ContainsKey(tensor.name))
+                {
+                    throw new UnityAgentsException(
+                        $"Unknown tensorProxy expected as output : {tensor.name}");
+                }
+                m_Dict[tensor.name].Apply(tensor, actionIds, lastActions);
+            }
+        }
+
+    }
+
+    internal class MaxActionOutputApplier : TensorApplier.IApplier
+    {
+        readonly ActionSpec m_ActionSpec;
+
+
+        public MaxActionOutputApplier(ActionSpec actionSpec, int seed, ITensorAllocator allocator)
+        {
+            m_ActionSpec = actionSpec;
+        }
+
+        public void Apply(TensorProxy tensorProxy, IList<int> actionIds, Dictionary<int, ActionBuffers> lastActions)
+        {
+            var agentIndex = 0;
+            var actionSpaceSize = tensorProxy.shape[tensorProxy.shape.Length - 1];
+
+            for (var i = 0; i < actionIds.Count; i++)
+            {
+                var agentId = actionIds[i];
+                if (lastActions.ContainsKey(agentId))
+                {
+                    var actionBuffer = lastActions[agentId];
+                    if (actionBuffer.IsEmpty())
+                    {
+                        actionBuffer = new ActionBuffers(m_ActionSpec);
+                        lastActions[agentId] = actionBuffer;
+                    }
+                    var discreteBuffer = actionBuffer.DiscreteActions;
+                    var maxIndex = 0;
+                    var maxValue = 0;
+                    for (var j = 0; j < actionSpaceSize; j++)
+                    {
+                        var value = (int)tensorProxy.data[agentIndex, j];
+                        if (value > maxValue)
+                        {
+                            maxIndex = j;
+                        }
+                    }
+                    var actionSize = discreteBuffer.Length;
+                    discreteBuffer[0] = maxIndex;
+                }
+                agentIndex++;
+            }
+        }
+
+    }
+
+}
--- a/com.unity.ml-agents/Runtime/Training/TrainingForwardTensorApplier.cs.meta
+++ b/com.unity.ml-agents/Runtime/Training/TrainingForwardTensorApplier.cs.meta
+fileFormatVersion: 2
+guid: a2677467266ab48cfb01c5d873d043a9
+MonoImporter:
+  externalObjects: {}
+  serializedVersion: 2
+  defaultReferences: []
+  executionOrder: 0
+  icon: {instanceID: 0}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: