Moving the tensor applier arround

--- a/Project/Packages/manifest.json
+++ b/Project/Packages/manifest.json
    "com.unity.2d.sprite": "1.0.0",
    "com.unity.2d.tilemap": "1.0.0",
    "com.unity.ads": "3.6.1",
-    "com.unity.collab-proxy": "1.2.16",
-    "com.unity.ide.rider": "1.1.4",
+    "com.unity.collab-proxy": "1.3.9",
+    "com.unity.ide.rider": "2.0.7",
+    "com.unity.ide.visualstudio": "2.0.7",
    "com.unity.ide.vscode": "1.2.3",
    "com.unity.ml-agents": "file:../../com.unity.ml-agents",
    "com.unity.ml-agents.extensions": "file:../../com.unity.ml-agents.extensions",
-    "com.unity.textmeshpro": "2.0.1",
-    "com.unity.timeline": "1.2.6",
+    "com.unity.textmeshpro": "3.0.1",
+    "com.unity.timeline": "1.4.6",
    "com.unity.ugui": "1.0.0",
    "com.unity.xr.legacyinputhelpers": "2.1.7",
    "com.unity.modules.ai": "1.0.0",
--- a/Project/ProjectSettings/ProjectVersion.txt
+++ b/Project/ProjectSettings/ProjectVersion.txt
-m_EditorVersion: 2019.4.20f1
-m_EditorVersionWithRevision: 2019.4.20f1 (6dd1c08eedfa)
+m_EditorVersion: 2020.3.0f1
+m_EditorVersionWithRevision: 2020.3.0f1 (c7b5465681fb)
--- a/Project/Assets/ML-Agents/Examples/3DBall/Prefabs/3DBall.prefab
+++ b/Project/Assets/ML-Agents/Examples/3DBall/Prefabs/3DBall.prefab
  m_PrefabInstance: {fileID: 0}
  m_PrefabAsset: {fileID: 0}
  m_GameObject: {fileID: 1036225416237908}
-  m_Material: {fileID: 13400000, guid: 56162663048874fd4b10e065f9cf78b7, type: 2}
+  m_Material: {fileID: 0}
  m_IsTrigger: 0
  m_Enabled: 1
  serializedVersion: 2
  m_MotionVectors: 1
  m_LightProbeUsage: 1
  m_ReflectionProbeUsage: 1
+  m_RayTracingMode: 2
+  m_RayTraceProcedural: 0
  m_RenderingLayerMask: 1
  m_RendererPriority: 0
  m_Materials:
  m_ProbeAnchor: {fileID: 0}
  m_LightProbeVolumeOverride: {fileID: 0}
  m_ScaleInLightmap: 1
+  m_ReceiveGI: 1
  m_PreserveUVs: 1
  m_IgnoreNormalsForChartDetection: 0
  m_ImportantGI: 0
  m_SortingLayerID: 0
  m_SortingLayer: 0
  m_SortingOrder: 0
+  m_AdditionalVertexStreams: {fileID: 0}
 --- !u!54 &54597526346971362
 Rigidbody:
  m_ObjectHideFlags: 0
  m_MotionVectors: 1
  m_LightProbeUsage: 1
  m_ReflectionProbeUsage: 1
+  m_RayTracingMode: 2
+  m_RayTraceProcedural: 0
  m_RenderingLayerMask: 1
  m_RendererPriority: 0
  m_Materials:
  m_ProbeAnchor: {fileID: 0}
  m_LightProbeVolumeOverride: {fileID: 0}
  m_ScaleInLightmap: 1
+  m_ReceiveGI: 1
  m_PreserveUVs: 1
  m_IgnoreNormalsForChartDetection: 0
  m_ImportantGI: 0
  m_SortingLayerID: 0
  m_SortingLayer: 0
  m_SortingOrder: 0
+  m_AdditionalVertexStreams: {fileID: 0}
 --- !u!1 &1321468028730240
 GameObject:
  m_ObjectHideFlags: 0
    VectorObservationSize: 8
    NumStackedVectorObservations: 1
    m_ActionSpec:
-      m_NumContinuousActions: 2
-      BranchSizes: 
-    VectorActionSize: 02000000
+      m_NumContinuousActions: 0
+      BranchSizes: 0a0000000a000000
+    VectorActionSize: 0a0000000a000000
-    VectorActionSpaceType: 1
+    VectorActionSpaceType: 0
-  m_Model: {fileID: 11400000, guid: 20a7b83be6b0c493d9271c65c897eb9b, type: 3}
+  m_Model: {fileID: 5022602860645237092, guid: 35d5202e6dbc04a50934f20df199b47f, type: 3}
-  m_BehaviorType: 0
+  m_BehaviorType: 3
  m_BehaviorName: 3DBall
  TeamId: 0
  m_UseChildSensors: 1
  m_ClearFlags: 2
  m_BackGroundColor: {r: 0.46666667, g: 0.5647059, b: 0.60784316, a: 1}
  m_projectionMatrixMode: 1
+  m_GateFitMode: 2
+  m_FOVAxisMode: 0
-  m_GateFitMode: 2
  m_FocalLength: 50
  m_NormalizedViewPortRect:
    serializedVersion: 2
  m_MotionVectors: 1
  m_LightProbeUsage: 1
  m_ReflectionProbeUsage: 1
+  m_RayTracingMode: 2
+  m_RayTraceProcedural: 0
  m_RenderingLayerMask: 1
  m_RendererPriority: 0
  m_Materials:
  m_ProbeAnchor: {fileID: 0}
  m_LightProbeVolumeOverride: {fileID: 0}
  m_ScaleInLightmap: 1
+  m_ReceiveGI: 1
  m_PreserveUVs: 1
  m_IgnoreNormalsForChartDetection: 0
  m_ImportantGI: 0
  m_SortingLayerID: 0
  m_SortingLayer: 0
  m_SortingOrder: 0
+  m_AdditionalVertexStreams: {fileID: 0}
 --- !u!1 &1854695166504686
 GameObject:
  m_ObjectHideFlags: 0
  m_MotionVectors: 1
  m_LightProbeUsage: 1
  m_ReflectionProbeUsage: 1
+  m_RayTracingMode: 2
+  m_RayTraceProcedural: 0
  m_RenderingLayerMask: 1
  m_RendererPriority: 0
  m_Materials:
  m_ProbeAnchor: {fileID: 0}
  m_LightProbeVolumeOverride: {fileID: 0}
  m_ScaleInLightmap: 1
+  m_ReceiveGI: 1
  m_PreserveUVs: 1
  m_IgnoreNormalsForChartDetection: 0
  m_ImportantGI: 0
  m_SortingLayerID: 0
  m_SortingLayer: 0
  m_SortingOrder: 0
+  m_AdditionalVertexStreams: {fileID: 0}
 --- !u!1 &1859240399150782
 GameObject:
  m_ObjectHideFlags: 0
  m_MotionVectors: 1
  m_LightProbeUsage: 1
  m_ReflectionProbeUsage: 1
+  m_RayTracingMode: 2
+  m_RayTraceProcedural: 0
  m_RenderingLayerMask: 1
  m_RendererPriority: 0
  m_Materials:
  m_ProbeAnchor: {fileID: 0}
  m_LightProbeVolumeOverride: {fileID: 0}
  m_ScaleInLightmap: 1
+  m_ReceiveGI: 1
  m_PreserveUVs: 1
  m_IgnoreNormalsForChartDetection: 0
  m_ImportantGI: 0
  m_SortingLayerID: 0
  m_SortingLayer: 0
  m_SortingOrder: 0
+  m_AdditionalVertexStreams: {fileID: 0}
 --- !u!1 &1999020414315134
 GameObject:
  m_ObjectHideFlags: 0
  m_MotionVectors: 1
  m_LightProbeUsage: 1
  m_ReflectionProbeUsage: 1
+  m_RayTracingMode: 2
+  m_RayTraceProcedural: 0
  m_RenderingLayerMask: 1
  m_RendererPriority: 0
  m_Materials:
  m_ProbeAnchor: {fileID: 0}
  m_LightProbeVolumeOverride: {fileID: 0}
  m_ScaleInLightmap: 1
+  m_ReceiveGI: 1
  m_PreserveUVs: 1
  m_IgnoreNormalsForChartDetection: 0
  m_ImportantGI: 0
  m_SortingLayerID: 0
  m_SortingLayer: 0
  m_SortingOrder: 0
+  m_AdditionalVertexStreams: {fileID: 0}
--- a/com.unity.ml-agents/Runtime/Academy.cs
+++ b/com.unity.ml-agents/Runtime/Academy.cs

        bool m_Initialized;
        List<ModelRunner> m_ModelRunners = new List<ModelRunner>();
+        List<Trainer> m_Trainers = new List<Trainer>();

        // Flag used to keep track of the first time the Academy is reset.
        bool m_HadFirstReset;
        // This will mark the Agent as Done if it has reached its maxSteps.
        internal event Action AgentIncrementStep;

+        internal event Action TrainerUpdate;

        /// <summary>
        /// Signals to all of the <see cref="Agent"/>s that their step is about to begin.
                {
                    AgentAct?.Invoke();
                }
+
+                TrainerUpdate?.Invoke();
            }
        }

                m_InferenceSeed++;
            }
            return modelRunner;
+        }
+        internal Trainer GetOrCreateTrainer(string behaviorName, ActionSpec actionSpec, NNModel model)
+        {
+            var trainer = m_Trainers.Find(x => x.BehaviorName == behaviorName);
+            if (trainer == null)
+            {
+                trainer = new Trainer(behaviorName, actionSpec, model);
+                m_Trainers.Add(trainer);
+            }
+            return trainer;
        }

        /// <summary>
--- a/com.unity.ml-agents/Runtime/Inference/TensorProxy.cs
+++ b/com.unity.ml-agents/Runtime/Inference/TensorProxy.cs
 using System;
+using System.Linq;
 using System.Collections.Generic;
 using Unity.Barracuda;
 using Unity.MLAgents.Inference.Utils;
            for (var i = 0; i < tensorProxy.data.length; i++)
            {
                tensorProxy.data[i] = (float)randomNormal.NextDouble();
+            }
+        }
+
+        public static void CopyTensor(TensorProxy source, TensorProxy target)
+        {
+            for (var b = 0; b < source.data.batch; b++)
+            {
+                for (var i = 0; i < source.data.height; i++)
+                {
+                    for (var j = 0; j < source.data.width; j++)
+                    {
+                        for(var k = 0; k < source.data.channels; k++)
+                        {
+                            target.data[b, i, j, k] = source.data[b, i, j, k];
+                        }
+                    }
+                }
            }
        }
    }
--- a/com.unity.ml-agents/Runtime/Inference/TensorApplier.cs
+++ b/com.unity.ml-agents/Runtime/Inference/TensorApplier.cs
                }
                if (modelVersion == (int)BarracudaModelParamLoader.ModelApiVersion.MLAgents2_0)
                {
-                    m_Dict[tensorName] = new DiscreteActionOutputApplier(actionSpec, seed, allocator);
+                    // m_Dict[tensorName] = new DiscreteActionOutputApplier(actionSpec, seed, allocator);
+                    m_Dict[tensorName] = new MaxActionOutputApplier(actionSpec, seed, allocator);
                }
            }
            m_Dict[TensorNames.RecurrentOutput] = new MemoryOutputApplier(memories);
--- a/com.unity.ml-agents/Runtime/Inference/BarracudaModelParamLoader.cs
+++ b/com.unity.ml-agents/Runtime/Inference/BarracudaModelParamLoader.cs
 using Unity.MLAgents.Actuators;
 using Unity.MLAgents.Sensors;
 using Unity.MLAgents.Policies;
+using UnityEngine;

 namespace Unity.MLAgents.Inference
 {
--- a/com.unity.ml-agents/Runtime/Inference/TensorNames.cs
+++ b/com.unity.ml-agents/Runtime/Inference/TensorNames.cs
+using System.Collections.Generic;
+using System.Linq;
+using System;
 namespace Unity.MLAgents.Inference
 {
    /// <summary>

        // Deprecated TensorNames entries for backward compatibility
        public const string IsContinuousControlDeprecated = "is_continuous_control";
-        public const string ActionOutputDeprecated = "action";
+        public const string ActionOutputDeprecated = "action_";
+
+        // Tensors for in-editor training
+        public const string Observations = "input";
+        public const string ActionInput = "action";
+        public const string RewardInput = "reward";
+        public const string DoneInput = "done";
+        public const string Gamma = "gamma";
+        public const string NextObservations = "next_state";
+        public const string LearningRate = "lr";
+        public const string TrainingStateIn = "training_state.1";
+
+
+        public const string TrainingOutput = "output";
+        public const string OuputLoss = "loss";
+        public const string TrainingStateOut = "training_state";
+        public const string InitialTrainingState = "initial_training_state";

        /// <summary>
        /// Returns the name of the visual observation with a given index
--- a/com.unity.ml-agents/Runtime/Inference/ApplierImpl.cs
+++ b/com.unity.ml-agents/Runtime/Inference/ApplierImpl.cs
        }
    }

-
    /// <summary>
    /// The Applier for the Discrete Action output tensor. Uses multinomial to sample discrete
    /// actions from the logits contained in the tensor.
--- a/com.unity.ml-agents/Runtime/Inference/BarracudaModelExtensions.cs
+++ b/com.unity.ml-agents/Runtime/Inference/BarracudaModelExtensions.cs
 using System.Linq;
 using Unity.Barracuda;
 using FailedCheck = Unity.MLAgents.Inference.BarracudaModelParamLoader.FailedCheck;
+using UnityEngine;

 namespace Unity.MLAgents.Inference
 {
        /// <returns>The api version of the model</returns>
        public static int GetVersion(this Model model)
        {
-            return (int)model.GetTensorByName(TensorNames.VersionNumber)[0];
+            // return (int)model.GetTensorByName(TensorNames.VersionNumber)[0];
+            return 3;
        }

        /// <summary>
                    valueType = TensorProxy.TensorType.FloatingPoint,
                    data = null,
                    shape = TensorUtils.TensorShapeFromBarracuda(mem.shape)
+                });
+            }
+
+            tensors.Sort((el1, el2) => el1.name.CompareTo(el2.name));
+
+            return tensors;
+        }
+
+        public static IReadOnlyList<TensorProxy> GetTrainingInputTensors(this Model model)
+        {
+            var tensors = new List<TensorProxy>();
+
+            if (model == null)
+                return tensors;
+
+            foreach (var input in model.inputs)
+            {
+                tensors.Add(new TensorProxy
+                {
+                    name = input.name,
+                    valueType = TensorProxy.TensorType.FloatingPoint,
+                    data = null,
+                    shape = input.shape.Select(i => (long)i).ToArray()
                });
            }

        public static bool CheckExpectedTensors(this Model model, List<FailedCheck> failedModelChecks)
        {
            // Check the presence of model version
-            var modelApiVersionTensor = model.GetTensorByName(TensorNames.VersionNumber);
-            if (modelApiVersionTensor == null)
-            {
-                failedModelChecks.Add(
-                    FailedCheck.Warning($"Required constant \"{TensorNames.VersionNumber}\" was not found in the model file.")
-                    );
-                return false;
-            }
+            // var modelApiVersionTensor = model.GetTensorByName(TensorNames.VersionNumber);
+            // if (modelApiVersionTensor == null)
+            // {
+            //     failedModelChecks.Add(
+            //         FailedCheck.Warning($"Required constant \"{TensorNames.VersionNumber}\" was not found in the model file.")
+            //         );
+            //     return false;
+            // }
-            var memorySizeTensor = model.GetTensorByName(TensorNames.MemorySize);
-            if (memorySizeTensor == null)
-            {
-                failedModelChecks.Add(
-                    FailedCheck.Warning($"Required constant \"{TensorNames.MemorySize}\" was not found in the model file.")
-                    );
-                return false;
-            }
+            // var memorySizeTensor = model.GetTensorByName(TensorNames.MemorySize);
+            // if (memorySizeTensor == null)
+            // {
+            //     failedModelChecks.Add(
+            //         FailedCheck.Warning($"Required constant \"{TensorNames.MemorySize}\" was not found in the model file.")
+            //         );
+            //     return false;
+            // }

            // Check the presence of action output tensor
            if (!model.outputs.Contains(TensorNames.ActionOutputDeprecated) &&
--- a/com.unity.ml-agents/Runtime/Policies/BehaviorParameters.cs
+++ b/com.unity.ml-agents/Runtime/Policies/BehaviorParameters.cs
        /// The Agent will always use inference with the provided
        /// neural network model.
        /// </summary>
-        InferenceOnly
+        InferenceOnly,
+
+        /// <summary>
+        /// C# training
+        /// </summary>
+        InEditorTraining
    }

    /// <summary>
                        }
                        return new BarracudaPolicy(actionSpec, actuatorManager, m_Model, m_InferenceDevice, m_BehaviorName);
                    }
+                case BehaviorType.InEditorTraining:
+                    return new TrainingPolicy(actionSpec, m_BehaviorName, m_Model);
                case BehaviorType.Default:
                    if (Academy.Instance.IsCommunicatorOn)
                    {
--- a/com.unity.ml-agents/Runtime/ReplayBuffer.cs.meta
+++ b/com.unity.ml-agents/Runtime/ReplayBuffer.cs.meta
+fileFormatVersion: 2
+guid: be3c5834a200742ed983cd073dd69f9a
+MonoImporter:
+  externalObjects: {}
+  serializedVersion: 2
+  defaultReferences: []
+  executionOrder: 0
+  icon: {instanceID: 0}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
--- a/com.unity.ml-agents/Runtime/Trainer.cs.meta
+++ b/com.unity.ml-agents/Runtime/Trainer.cs.meta
+fileFormatVersion: 2
+guid: 8dd9e7f1621bd487998fd883b2518733
+MonoImporter:
+  externalObjects: {}
+  serializedVersion: 2
+  defaultReferences: []
+  executionOrder: 0
+  icon: {instanceID: 0}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
--- a/com.unity.ml-agents/Runtime/ReplayBuffer.cs
+++ b/com.unity.ml-agents/Runtime/ReplayBuffer.cs
+// Buffer for C# training
+
+using System;
+using System.Linq;
+using Unity.Barracuda;
+using System.Collections.Generic;
+using Unity.MLAgents.Actuators;
+using Unity.MLAgents.Inference;
+
+namespace Unity.MLAgents
+{
+    internal struct Transition
+    {
+        public IReadOnlyList<TensorProxy> state;
+        public ActionBuffers action;
+        public float reward;
+        public bool done;
+        public IReadOnlyList<TensorProxy> nextState;
+    }
+
+    internal class ReplayBuffer
+    {
+        List<Transition> m_Buffer;
+        int m_CurrentIndex;
+        int m_MaxSize;
+
+        public ReplayBuffer(int maxSize)
+        {
+            m_Buffer = new List<Transition>();
+            m_Buffer.Capacity = maxSize;
+            m_MaxSize = maxSize;
+        }
+
+        public int Count
+        {
+            get => m_Buffer.Count;
+        }
+
+        public void Push(AgentInfo info, IReadOnlyList<TensorProxy> state, IReadOnlyList<TensorProxy> nextState)
+        {
+            if (m_Buffer.Count < m_MaxSize)
+            {
+                m_Buffer.Add(new Transition() {state=state, action=info.storedActions, reward=info.reward, done=info.done, nextState=nextState});
+            }
+            else
+            {
+                m_Buffer[m_CurrentIndex] = new Transition() {state=state, action=info.storedActions, reward=info.reward, done=info.done, nextState=nextState};
+            }
+            m_CurrentIndex += 1;
+            m_CurrentIndex = m_CurrentIndex % m_MaxSize;
+        }
+
+        public List<Transition> SampleBatch(int batchSize)
+        {
+            var indexList = SampleIndex(batchSize);
+            var samples = new List<Transition>(batchSize);
+            for (var i = 0; i < batchSize; i++)
+            {
+                samples.Add(m_Buffer[indexList[i]]);
+            }
+            return samples;
+        }
+
+        public List<Transition> SampleDummyBatch(int batchSize)
+        {
+            var indexList = SampleIndex(batchSize);
+            var samples = new List<Transition>(batchSize);
+            for (var i = 0; i < batchSize; i++)
+            {
+                samples.Add(m_Buffer[m_CurrentIndex-1]);
+            }
+            return samples;
+        }
+
+        private List<int> SampleIndex(int batchSize)
+        {
+            Random random = new Random();
+            HashSet<int> index = new HashSet<int>();
+
+            while (index.Count < batchSize)
+            {
+                index.Add(random.Next(m_Buffer.Count));
+            }
+            return index.ToList();
+        }
+    }
+}
--- a/com.unity.ml-agents/Runtime/Trainer.cs
+++ b/com.unity.ml-agents/Runtime/Trainer.cs
+// Trainer for C# training. One trainer per behavior.
+
+using System;
+using Unity.MLAgents.Actuators;
+using Unity.Barracuda;
+using UnityEngine;
+
+namespace Unity.MLAgents
+{
+    internal class TrainerConfig
+    {
+        public int bufferSize = 100;
+        public int batchSize = 4;
+        public float gamma = 0.99f;
+        public float learningRate = 0.0005f;
+        public int updateTargetFreq = 200;
+    }
+
+    internal class Trainer: IDisposable
+    {
+        ReplayBuffer m_Buffer;
+        TrainingModelRunner m_ModelRunner;
+        TrainingModelRunner m_TargetModelRunner;
+        string m_behaviorName;
+        TrainerConfig m_Config;
+        int m_TrainingStep;
+
+        public Trainer(string behaviorName, ActionSpec actionSpec, NNModel model, int seed=0, TrainerConfig config=null)
+        {
+            m_Config = config ?? new TrainerConfig();
+            m_behaviorName = behaviorName;
+            m_Buffer = new ReplayBuffer(m_Config.bufferSize);
+            m_ModelRunner = new TrainingModelRunner(actionSpec, model, m_Buffer, m_Config, seed);
+            m_TargetModelRunner = new TrainingModelRunner(actionSpec, model, m_Buffer, m_Config, seed);
+            // copy weights from model to target model
+            // m_TargetModelRunner.model.weights = m_ModelRunner.model.weights
+            Academy.Instance.TrainerUpdate += Update;
+        }
+
+        public string BehaviorName
+        {
+            get => m_behaviorName;
+        }
+
+        public ReplayBuffer Buffer
+        {
+            get => m_Buffer;
+        }
+
+        public TrainingModelRunner TrainerModelRunner
+        {
+            get => m_ModelRunner;
+        }
+
+        public void Dispose()
+        {
+            Academy.Instance.TrainerUpdate -= Update;
+        }
+
+        public void Update()
+        {
+            if (m_Buffer.Count < m_Config.batchSize * 2)
+            {
+                return;
+            }
+
+            var samples = m_Buffer.SampleBatch(m_Config.batchSize);
+            m_ModelRunner.UpdateModel(samples);
+
+            // Update target network
+            if (m_TrainingStep % m_Config.updateTargetFreq == 0)
+            {
+                // copy weights
+            }
+
+            m_TrainingStep += 1;
+        }
+    }
+}
--- a/com.unity.ml-agents/Runtime/Inference/TrainingTensorGenerator.cs
+++ b/com.unity.ml-agents/Runtime/Inference/TrainingTensorGenerator.cs
+using System.Collections.Generic;
+using Unity.Barracuda;
+using Unity.MLAgents.Sensors;
+using Unity.MLAgents;
+using UnityEngine;
+
+namespace Unity.MLAgents.Inference
+{
+
+    internal class TrainingTensorGenerator
+    {
+        public interface ITrainingGenerator
+        {
+            void Generate(
+                TensorProxy tensorProxy, int batchSize, IList<Transition> transitions, TensorProxy trainingState);
+        }
+
+        readonly Dictionary<string, ITrainingGenerator> m_Dict = new Dictionary<string, ITrainingGenerator>();
+
+
+        public TrainingTensorGenerator(
+            int seed,
+            ITensorAllocator allocator,
+            float learning_rate,
+            float gamma,
+            object barracudaModel = null
+            )
+        {
+            // If model is null, no inference to run and exception is thrown before reaching here.
+            if (barracudaModel == null)
+            {
+                return;
+            }
+            var model = (Model)barracudaModel;
+
+            // Generator for Inputs
+            var obsGen = new CopyObservationTensorsGenerator(allocator);
+            obsGen.SetSensorIndex(0);
+            m_Dict[TensorNames.Observations] = obsGen;
+            var nextObsGen = new CopyNextObservationTensorsGenerator(allocator);
+            nextObsGen.SetSensorIndex(0);
+            m_Dict[TensorNames.NextObservations] = nextObsGen;
+            m_Dict[TensorNames.ActionInput] = new ActionInputGenerator(allocator);
+            m_Dict[TensorNames.RewardInput] = new RewardInputGenerator(allocator);
+            m_Dict[TensorNames.DoneInput] = new DoneInputGenerator(allocator);
+            m_Dict[TensorNames.LearningRate] = new ConstantGenerator(allocator,learning_rate);
+            m_Dict[TensorNames.Gamma] = new ConstantGenerator(allocator, gamma);
+            m_Dict[TensorNames.BatchSizePlaceholder] = new TrainingBatchSizeGenerator(allocator);
+            m_Dict[TensorNames.TrainingStateIn] = new TrainingStateGenerator(allocator);
+        }
+
+        /// <summary>
+        /// Populates the data of the tensor inputs given the data contained in the current batch
+        /// of agents.
+        /// </summary>
+        /// <param name="tensors"> Enumerable of tensors that will be modified.</param>
+        /// <param name="currentBatchSize"> The number of agents present in the current batch
+        /// </param>
+        /// <param name="infos"> List of AgentsInfos and Sensors that contains the
+        /// data that will be used to modify the tensors</param>
+        /// <exception cref="UnityAgentsException"> One of the tensor does not have an
+        /// associated generator.</exception>
+        public void GenerateTensors(
+            IReadOnlyList<TensorProxy> tensors, int currentBatchSize, IList<Transition> transitions, TensorProxy trainingState, bool training=false)
+        {
+            for (var tensorIndex = 0; tensorIndex < tensors.Count; tensorIndex++)
+            {
+                var tensor = tensors[tensorIndex];
+                if (!m_Dict.ContainsKey(tensor.name))
+                {
+                    throw new UnityAgentsException(
+                        $"Unknown tensorProxy expected as input : {tensor.name}");
+                }
+                if ((tensor.name == TensorNames.Observations || tensor.name == TensorNames.BatchSizePlaceholder) && training == false)
+                {
+                    continue;
+                }
+                m_Dict[tensor.name].Generate(tensor, currentBatchSize, transitions, trainingState);
+            }
+        }
+
+        public static void CopyTensorToBatch(TensorProxy source, TensorProxy target, int batchIndex)
+        {
+            for (var i = 0; i < source.Height; i++)
+            {
+                for (var j = 0; j < source.Width; j++)
+                {
+                    for(var k = 0; k < source.Channels; k++)
+                    {
+                        target.data[batchIndex, i, j, k] = source.data[0, i, j, k];
+                    }
+                }
+            }
+        }
+    }
+
+    internal class ActionInputGenerator: TrainingTensorGenerator.ITrainingGenerator
+    {
+        readonly ITensorAllocator m_Allocator;
+
+        public ActionInputGenerator(ITensorAllocator allocator)
+        {
+            m_Allocator = allocator;
+        }
+
+        public void Generate(TensorProxy tensorProxy, int batchSize, IList<Transition> transitions, TensorProxy trainingState)
+        {
+            TensorUtils.ResizeTensor(tensorProxy, batchSize, m_Allocator);
+            for (var index = 0; index < batchSize; index++)
+            {
+                var actions = transitions[index].action.DiscreteActions;
+                for (var j = 0; j < actions.Length; j++)
+                {
+                    tensorProxy.data[index, j] = actions[j];
+                }
+            }
+        }
+    }
+
+    internal class RewardInputGenerator: TrainingTensorGenerator.ITrainingGenerator
+    {
+        readonly ITensorAllocator m_Allocator;
+
+        public RewardInputGenerator(ITensorAllocator allocator)
+        {
+            m_Allocator = allocator;
+        }
+
+        public void Generate(TensorProxy tensorProxy, int batchSize, IList<Transition> transitions, TensorProxy trainingState)
+        {
+            TensorUtils.ResizeTensor(tensorProxy, batchSize, m_Allocator);
+            for (var index = 0; index < batchSize; index++)
+            {
+                tensorProxy.data[index, 0] = transitions[index].reward;
+            }
+        }
+    }
+
+    internal class DoneInputGenerator: TrainingTensorGenerator.ITrainingGenerator
+    {
+        readonly ITensorAllocator m_Allocator;
+
+        public DoneInputGenerator(ITensorAllocator allocator)
+        {
+            m_Allocator = allocator;
+        }
+
+        public void Generate(TensorProxy tensorProxy, int batchSize, IList<Transition> transitions, TensorProxy trainingState)
+        {
+            TensorUtils.ResizeTensor(tensorProxy, batchSize, m_Allocator);
+            for (var index = 0; index < batchSize; index++)
+            {
+                tensorProxy.data[index, 0] = transitions[index].done==true ? 1f : 0f;
+            }
+        }
+    }
+
+    internal class CopyObservationTensorsGenerator: TrainingTensorGenerator.ITrainingGenerator
+    {
+        readonly ITensorAllocator m_Allocator;
+
+        int m_SensorIndex;
+
+        public CopyObservationTensorsGenerator(ITensorAllocator allocator)
+        {
+            m_Allocator = allocator;
+        }
+
+        public void SetSensorIndex(int index)
+        {
+            m_SensorIndex = index;
+        }
+
+        public void Generate(TensorProxy tensorProxy, int batchSize, IList<Transition> transitions, TensorProxy trainingState)
+        {
+            TensorUtils.ResizeTensor(tensorProxy, batchSize, m_Allocator);
+            for (var index = 0; index < batchSize; index++)
+            {
+                TrainingTensorGenerator.CopyTensorToBatch(transitions[index].state[m_SensorIndex], tensorProxy, index);
+            }
+        }
+    }
+
+    internal class CopyNextObservationTensorsGenerator: TrainingTensorGenerator.ITrainingGenerator
+    {
+        readonly ITensorAllocator m_Allocator;
+
+        int m_SensorIndex;
+
+        public CopyNextObservationTensorsGenerator(ITensorAllocator allocator)
+        {
+            m_Allocator = allocator;
+        }
+
+        public void SetSensorIndex(int index)
+        {
+            m_SensorIndex = index;
+        }
+
+        public void Generate(TensorProxy tensorProxy, int batchSize, IList<Transition> transitions, TensorProxy trainingState)
+        {
+            TensorUtils.ResizeTensor(tensorProxy, batchSize, m_Allocator);
+            for (var index = 0; index < batchSize; index++)
+            {
+                TrainingTensorGenerator.CopyTensorToBatch(transitions[index].nextState[m_SensorIndex], tensorProxy, index);
+            }
+        }
+    }
+
+    internal class ConstantGenerator: TrainingTensorGenerator.ITrainingGenerator
+    {
+        readonly ITensorAllocator m_Allocator;
+        float m_Const;
+
+        public ConstantGenerator(ITensorAllocator allocator, float c)
+        {
+            m_Allocator = allocator;
+            m_Const = c;
+        }
+
+        public void Generate(TensorProxy tensorProxy, int batchSize, IList<Transition> transitions, TensorProxy trainingState)
+        {
+            TensorUtils.ResizeTensor(tensorProxy, 1, m_Allocator);
+            for (var index = 0; index < batchSize; index++)
+            {
+                tensorProxy.data?.Dispose();
+                tensorProxy.data = m_Allocator.Alloc(new TensorShape(1, 1));
+                tensorProxy.data[0] = m_Const;
+            }
+        }
+    }
+    internal class TrainingBatchSizeGenerator : TrainingTensorGenerator.ITrainingGenerator
+    {
+        readonly ITensorAllocator m_Allocator;
+
+        public TrainingBatchSizeGenerator(ITensorAllocator allocator)
+        {
+            m_Allocator = allocator;
+        }
+
+        public void Generate(TensorProxy tensorProxy, int batchSize, IList<Transition> transitions, TensorProxy trainingState)
+        {
+            tensorProxy.data?.Dispose();
+            tensorProxy.data = m_Allocator.Alloc(new TensorShape(1, 1));
+            tensorProxy.data[0] = batchSize;
+        }
+    }
+
+    internal class TrainingStateGenerator: TrainingTensorGenerator.ITrainingGenerator
+    {
+        readonly ITensorAllocator m_Allocator;
+
+        public TrainingStateGenerator(ITensorAllocator allocator)
+        {
+            m_Allocator = allocator;
+        }
+
+        public void Generate(TensorProxy tensorProxy, int batchSize, IList<Transition> transitions, TensorProxy trainingState)
+        {
+            TensorUtils.ResizeTensor(tensorProxy, batchSize, m_Allocator);
+            for (var index = 0; index < batchSize; index++)
+            {
+                TensorUtils.CopyTensor(trainingState, tensorProxy);
+            }
+        }
+    }
+}
--- a/com.unity.ml-agents/Runtime/Inference/TrainingTensorGenerator.cs.meta
+++ b/com.unity.ml-agents/Runtime/Inference/TrainingTensorGenerator.cs.meta
+fileFormatVersion: 2
+guid: cca690e21a2fe49b49f636cd4e76e0b4
+MonoImporter:
+  externalObjects: {}
+  serializedVersion: 2
+  defaultReferences: []
+  executionOrder: 0
+  icon: {instanceID: 0}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
--- a/com.unity.ml-agents/Runtime/Inference/TrainingForwardTensorApplier.cs
+++ b/com.unity.ml-agents/Runtime/Inference/TrainingForwardTensorApplier.cs
+using System.Collections.Generic;
+using Unity.Barracuda;
+using Unity.MLAgents.Actuators;
+using System.Linq;
+using Unity.MLAgents.Inference.Utils;
+using UnityEngine;
+
+
+
+namespace Unity.MLAgents.Inference
+{
+    /// <summary>
+    /// Mapping between the output tensor names and the method that will use the
+    /// output tensors and the Agents present in the batch to update their action, memories and
+    /// value estimates.
+    /// A TensorApplier implements a Dictionary of strings (node names) to an Action.
+    /// This action takes as input the tensor and the Dictionary of Agent to AgentInfo for
+    /// the current batch.
+    /// </summary>
+    internal class TrainingForwardTensorApplier
+    {
+
+        readonly Dictionary<string, TensorApplier.IApplier> m_Dict = new Dictionary<string, TensorApplier.IApplier>();
+
+        /// <summary>
+        /// Returns a new TensorAppliers object.
+        /// </summary>
+        /// <param name="actionSpec"> Description of the actions for the Agent.</param>
+        /// <param name="seed"> The seed the Appliers will be initialized with.</param>
+        /// <param name="allocator"> Tensor allocator</param>
+        /// <param name="memories">Dictionary of AgentInfo.id to memory used to pass to the inference model.</param>
+        /// <param name="barracudaModel"></param>
+        public TrainingForwardTensorApplier(
+            ActionSpec actionSpec,
+            int seed,
+            ITensorAllocator allocator,
+            object barracudaModel = null)
+        {
+            // If model is null, no inference to run and exception is thrown before reaching here.
+            if (barracudaModel == null)
+            {
+                return;
+            }
+            if (actionSpec.NumContinuousActions > 0)
+            {
+                throw new System.Exception("Cannot do continuous actions");
+            }
+            if (actionSpec.NumDiscreteActions != 1)
+            {
+                throw new System.Exception("Cannot do multi discrete actions, only single discrete");
+            }
+
+            var model = (Model)barracudaModel;
+
+
+            m_Dict[TensorNames.TrainingOutput] = new MaxActionOutputApplier(actionSpec, seed, allocator);
+        }
+
+        /// <summary>
+        /// Updates the state of the agents based on the data present in the tensor.
+        /// </summary>
+        /// <param name="tensors"> Enumerable of tensors containing the data.</param>
+        /// <param name="actionIds"> List of Agents Ids that will be updated using the tensor's data</param>
+        /// <param name="lastActions"> Dictionary of AgentId to Actions to be updated</param>
+        /// <exception cref="UnityAgentsException"> One of the tensor does not have an
+        /// associated applier.</exception>
+        public void ApplyTensors(
+            IReadOnlyList<TensorProxy> tensors, IList<int> actionIds, Dictionary<int, ActionBuffers> lastActions)
+        {
+            for (var tensorIndex = 0; tensorIndex < tensors.Count; tensorIndex++)
+            {
+                var tensor = tensors[tensorIndex];
+                if (!m_Dict.ContainsKey(tensor.name))
+                {
+                    throw new UnityAgentsException(
+                        $"Unknown tensorProxy expected as output : {tensor.name}");
+                }
+                m_Dict[tensor.name].Apply(tensor, actionIds, lastActions);
+            }
+        }
+
+    }
+
+    internal class MaxActionOutputApplier : TensorApplier.IApplier
+    {
+        readonly ActionSpec m_ActionSpec;
+
+
+        public MaxActionOutputApplier(ActionSpec actionSpec, int seed, ITensorAllocator allocator)
+        {
+            m_ActionSpec = actionSpec;
+        }
+
+        public void Apply(TensorProxy tensorProxy, IList<int> actionIds, Dictionary<int, ActionBuffers> lastActions)
+        {
+            var agentIndex = 0;
+            var actionSpaceSize = tensorProxy.shape[tensorProxy.shape.Length - 1];
+
+            for (var i = 0; i < actionIds.Count; i++)
+            {
+                var agentId = actionIds[i];
+                if (lastActions.ContainsKey(agentId))
+                {
+                    var actionBuffer = lastActions[agentId];
+                    if (actionBuffer.IsEmpty())
+                    {
+                        actionBuffer = new ActionBuffers(m_ActionSpec);
+                        lastActions[agentId] = actionBuffer;
+                    }
+                    var discreteBuffer = actionBuffer.DiscreteActions;
+                    var maxIndex = 0;
+                    var maxValue = 0;
+                    for (var j = 0; j < actionSpaceSize; j++)
+                    {
+                        var value = (int)tensorProxy.data[agentIndex, j];
+                        if (value > maxValue)
+                        {
+                            maxIndex = j;
+                        }
+                    }
+                    var actionSize = discreteBuffer.Length;
+                    discreteBuffer[0] = maxIndex;
+                }
+                agentIndex++;
+            }
+        }
+
+    }
+
+}
--- a/com.unity.ml-agents/Runtime/Inference/TrainingForwardTensorApplier.cs.meta
+++ b/com.unity.ml-agents/Runtime/Inference/TrainingForwardTensorApplier.cs.meta
+fileFormatVersion: 2
+guid: eaafcce9c7c794667bc726e40e420824
+MonoImporter:
+  externalObjects: {}
+  serializedVersion: 2
+  defaultReferences: []
+  executionOrder: 0
+  icon: {instanceID: 0}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
--- a/com.unity.ml-agents/Runtime/Policies/TrainingModelRunner.cs.meta
+++ b/com.unity.ml-agents/Runtime/Policies/TrainingModelRunner.cs.meta
+fileFormatVersion: 2
+guid: 03ace8815cd804ee994a5068f618b845
+MonoImporter:
+  externalObjects: {}
+  serializedVersion: 2
+  defaultReferences: []
+  executionOrder: 0
+  icon: {instanceID: 0}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
--- a/com.unity.ml-agents/Runtime/Policies/TrainingPolicy.cs
+++ b/com.unity.ml-agents/Runtime/Policies/TrainingPolicy.cs
+// Policy for C# training
+
+using Unity.Barracuda;
+using System.Collections.Generic;
+using Unity.MLAgents.Actuators;
+using Unity.MLAgents.Inference;
+using Unity.MLAgents.Sensors;
+
+namespace Unity.MLAgents.Policies
+{
+    internal class TrainingPolicy : IPolicy
+    {
+        protected TrainingModelRunner m_ModelRunner;
+        ActionBuffers m_LastActionBuffer;
+
+        int m_AgentId;
+
+        ActionSpec m_ActionSpec;
+
+        string m_BehaviorName;
+
+        AgentInfo m_LastInfo;
+
+        IReadOnlyList<TensorProxy> m_LastObservations;
+
+        ReplayBuffer m_buffer;
+
+        IReadOnlyList<TensorProxy> m_CurrentObservations;
+
+        /// <inheritdoc />
+        public TrainingPolicy(
+            ActionSpec actionSpec,
+            string behaviorName,
+            NNModel model
+        )
+        {
+            var trainer = Academy.Instance.GetOrCreateTrainer(behaviorName, actionSpec, model);
+            m_ModelRunner = trainer.TrainerModelRunner;
+            m_buffer = trainer.Buffer;
+            m_CurrentObservations = m_ModelRunner.GetInputTensors();
+            m_BehaviorName = behaviorName;
+            m_ActionSpec = actionSpec;
+        }
+
+        /// <inheritdoc />
+        public void RequestDecision(AgentInfo info, List<ISensor> sensors)
+        {
+            m_AgentId = info.episodeId;
+            m_ModelRunner.PutObservations(info, sensors);
+            m_ModelRunner.GetObservationTensors(m_CurrentObservations, info, sensors);
+
+            if (m_LastObservations != null)
+            {
+                m_buffer.Push(m_LastInfo, m_LastObservations, m_CurrentObservations);
+            }
+            else if (m_buffer.Count == 0)
+            {
+                // hack
+                m_buffer.Push(info, m_CurrentObservations, m_CurrentObservations);
+            }
+
+            m_LastInfo = info;
+            m_LastObservations = m_CurrentObservations;
+
+            if (info.done == true)
+            {
+                m_buffer.Push(info, m_CurrentObservations, m_CurrentObservations); // dummy next_state
+                m_LastObservations = null;
+            }
+        }
+
+        /// <inheritdoc />
+        public ref readonly ActionBuffers DecideAction()
+        {
+            m_ModelRunner.DecideBatch();
+            m_LastActionBuffer = m_ModelRunner.GetAction(m_AgentId);
+            return ref m_LastActionBuffer;
+        }
+
+        public void Dispose()
+        {
+        }
+    }
+}
--- a/com.unity.ml-agents/Runtime/Policies/TrainingPolicy.cs.meta
+++ b/com.unity.ml-agents/Runtime/Policies/TrainingPolicy.cs.meta
+fileFormatVersion: 2
+guid: 30a25b3276c294e5eb07b57fc1af4bdb
+MonoImporter:
+  externalObjects: {}
+  serializedVersion: 2
+  defaultReferences: []
+  executionOrder: 0
+  icon: {instanceID: 0}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
--- a/com.unity.ml-agents/Runtime/Policies/TrainingModelRunner.cs
+++ b/com.unity.ml-agents/Runtime/Policies/TrainingModelRunner.cs
+// ModelRunner for C# training.
+
+using System.Collections.Generic;
+using Unity.Barracuda;
+using Unity.MLAgents;
+using Unity.MLAgents.Actuators;
+using Unity.MLAgents.Inference;
+using Unity.MLAgents.Policies;
+using Unity.MLAgents.Sensors;
+using UnityEngine;
+using Unity.MLAgents.Inference.Utils;
+
+namespace Unity.MLAgents
+{
+    internal class TrainingModelRunner
+    {
+        List<AgentInfoSensorsPair> m_Infos = new List<AgentInfoSensorsPair>();
+        Dictionary<int, ActionBuffers> m_LastActionsReceived = new Dictionary<int, ActionBuffers>();
+        List<int> m_OrderedAgentsRequestingDecisions = new List<int>();
+        TensorProxy m_TrainingState;
+
+        ITensorAllocator m_TensorAllocator;
+        TensorGenerator m_TensorGenerator;
+        TrainingTensorGenerator m_TrainingTensorGenerator;
+        TrainingForwardTensorApplier m_TensorApplier;
+
+        Model m_Model;
+        IWorker m_Engine;
+        bool m_Verbose = false;
+        string[] m_OutputNames;
+        IReadOnlyList<TensorProxy> m_TrainingInputs;
+        List<TensorProxy> m_TrainingOutputs;
+        Dictionary<string, Tensor> m_InputsByName;
+        Dictionary<int, List<float>> m_Memories = new Dictionary<int, List<float>>();
+
+        bool m_ObservationsInitialized;
+        bool m_TrainingObservationsInitialized;
+
+        ReplayBuffer m_Buffer;
+
+        /// <summary>
+        /// Initializes the Brain with the Model that it will use when selecting actions for
+        /// the agents
+        /// </summary>
+        /// <param name="model"> The Barracuda model to load </param>
+        /// <param name="actionSpec"> Description of the actions for the Agent.</param>
+        /// <param name="inferenceDevice"> Inference execution device. CPU is the fastest
+        /// option for most of ML Agents models. </param>
+        /// <param name="seed"> The seed that will be used to initialize the RandomNormal
+        /// and Multinomial objects used when running inference.</param>
+        /// <exception cref="UnityAgentsException">Throws an error when the model is null
+        /// </exception>
+        public TrainingModelRunner(
+            ActionSpec actionSpec,
+            NNModel model,
+            ReplayBuffer buffer,
+            TrainerConfig config,
+            int seed = 0)
+        {
+            Model barracudaModel;
+            m_TensorAllocator = new TensorCachingAllocator();
+
+            // barracudaModel = Barracuda.SomeModelBuilder.CreateModel();
+            barracudaModel = ModelLoader.Load(model);
+            m_Model = barracudaModel;
+            WorkerFactory.Type executionDevice = WorkerFactory.Type.CSharpBurst;
+            m_Engine = WorkerFactory.CreateWorker(executionDevice, barracudaModel, m_Verbose);
+
+            m_TrainingInputs = barracudaModel.GetTrainingInputTensors();
+            m_OutputNames = barracudaModel.GetOutputNames();
+            InitializeTrainingState(barracudaModel);
+            m_TensorGenerator = new TensorGenerator(
+                seed, m_TensorAllocator, m_Memories, barracudaModel);
+            m_TrainingTensorGenerator = new TrainingTensorGenerator(
+                seed, m_TensorAllocator, config.learningRate, config.gamma, barracudaModel);
+            m_TensorApplier = new TrainingForwardTensorApplier(
+                actionSpec, seed, m_TensorAllocator, barracudaModel);
+            m_InputsByName = new Dictionary<string, Tensor>();
+            m_TrainingOutputs = new List<TensorProxy>();
+            m_Buffer = buffer;
+        }
+
+        void InitializeTrainingState(Model barracudaModel)
+        {
+            m_TrainingState = new TensorProxy
+            {
+                data = barracudaModel.GetTensorByName(TensorNames.InitialTrainingState)
+            };
+        }
+
+        void PrepareBarracudaInputs(IReadOnlyList<TensorProxy> infInputs)
+        {
+            m_InputsByName.Clear();
+            for (var i = 0; i < infInputs.Count; i++)
+            {
+                var inp = infInputs[i];
+                m_InputsByName[inp.name] = inp.data;
+            }
+        }
+
+        public void Dispose()
+        {
+            if (m_Engine != null)
+                m_Engine.Dispose();
+            m_TensorAllocator?.Reset(false);
+        }
+
+        void FetchBarracudaOutputs(string[] names)
+        {
+            m_TrainingOutputs.Clear();
+            foreach (var n in names)
+            {
+                var output = m_Engine.PeekOutput(n);
+                m_TrainingOutputs.Add(TensorUtils.TensorProxyFromBarracuda(output, n));
+            }
+        }
+
+        public void PutObservations(AgentInfo info, List<ISensor> sensors)
+        {
+            m_Infos.Add(new AgentInfoSensorsPair
+            {
+                agentInfo = info,
+                sensors = sensors
+            });
+
+            // We add the episodeId to this list to maintain the order in which the decisions were requested
+            m_OrderedAgentsRequestingDecisions.Add(info.episodeId);
+
+            if (!m_LastActionsReceived.ContainsKey(info.episodeId))
+            {
+                m_LastActionsReceived[info.episodeId] = ActionBuffers.Empty;
+            }
+            if (info.done)
+            {
+                // If the agent is done, we remove the key from the last action dictionary since no action
+                // should be taken.
+                m_LastActionsReceived.Remove(info.episodeId);
+            }
+        }
+
+        public void GetObservationTensors(IReadOnlyList<TensorProxy> tensors, AgentInfo info, List<ISensor> sensors)
+        {
+            if (!m_ObservationsInitialized)
+            {
+                // Just grab the first agent in the collection (any will suffice, really).
+                // We check for an empty Collection above, so this will always return successfully.
+                m_TensorGenerator.InitializeObservations(sensors, m_TensorAllocator);
+                m_ObservationsInitialized = true;
+            }
+            var infoSensorPair = new AgentInfoSensorsPair
+            {
+                agentInfo = info,
+                sensors = sensors
+            };
+            m_TensorGenerator.GenerateTensors(tensors, 1, new List<AgentInfoSensorsPair> { infoSensorPair });
+        }
+
+        public IReadOnlyList<TensorProxy> GetInputTensors()
+        {
+            return m_Model.GetInputTensors();
+        }
+
+        public void DecideBatch()
+        {
+            var currentBatchSize = m_Infos.Count;
+            if (currentBatchSize == 0)
+            {
+                return;
+            }
+            if (!m_ObservationsInitialized)
+            {
+                // Just grab the first agent in the collection (any will suffice, really).
+                // We check for an empty Collection above, so this will always return successfully.
+                var firstInfo = m_Infos[0];
+                m_TensorGenerator.InitializeObservations(firstInfo.sensors, m_TensorAllocator);
+                m_ObservationsInitialized = true;
+            }
+
+            // Prepare the input tensors to be feed into the engine
+            m_TensorGenerator.GenerateTensors(m_TrainingInputs, currentBatchSize, m_Infos);
+            m_TrainingTensorGenerator.GenerateTensors(m_TrainingInputs, currentBatchSize, m_Buffer.SampleDummyBatch(currentBatchSize), m_TrainingState);
+
+            PrepareBarracudaInputs(m_TrainingInputs);
+
+            // Execute the Model
+            m_Engine.Execute(m_InputsByName);
+
+            FetchBarracudaOutputs(m_OutputNames);
+
+            // Update the outputs
+            m_TensorApplier.ApplyTensors(m_TrainingOutputs, m_OrderedAgentsRequestingDecisions, m_LastActionsReceived);
+
+            m_Infos.Clear();
+
+            m_OrderedAgentsRequestingDecisions.Clear();
+        }
+
+        public void UpdateModel(List<Transition> transitions)
+        {
+            var currentBatchSize = transitions.Count;
+            if (currentBatchSize == 0)
+            {
+                return;
+            }
+
+            m_TrainingTensorGenerator.GenerateTensors(m_TrainingInputs, currentBatchSize, transitions, m_TrainingState, true);
+
+            PrepareBarracudaInputs(m_TrainingInputs);
+
+            // Execute the Model
+            m_Engine.Execute(m_InputsByName);
+
+            // Update the model
+            FetchBarracudaOutputs(new string[] { TensorNames.TrainingStateOut });
+            m_TrainingState = m_TrainingOutputs[0];
+
+        }
+
+        public ActionBuffers GetAction(int agentId)
+        {
+            if (m_LastActionsReceived.ContainsKey(agentId))
+            {
+                return m_LastActionsReceived[agentId];
+            }
+            return ActionBuffers.Empty;
+        }
+
+        // void PrintTensor(TensorProxy tensor)
+        // {
+        //     Debug.Log($"Print tensor {tensor.name}");
+        //     for (var b = 0; b < tensor.data.batch; b++)
+        //     {
+        //         var message = new List<float>();
+        //         for (var i = 0; i < tensor.data.height; i++)
+        //         {
+        //             for (var j = 0; j < tensor.data.width; j++)
+        //             {
+        //                 for(var k = 0; k < tensor.data.channels; k++)
+        //                 {
+        //                     message.Add(tensor.data[b, i, j, k]);
+        //                 }
+        //             }
+        //         }
+        //         Debug.Log(string.Join(", ", message));
+        //     }
+        // }
+    }
+}