Merge branch 'soccer-2v1' into asymm-envs

5 年前 · b4f52c88
--- a/.yamato/gym-interface-test.yml
+++ b/.yamato/gym-interface-test.yml
  commands:
    - pip install pyyaml
    - python -u -m ml-agents.tests.yamato.setup_venv
-    - ./venv/bin/python ml-agents/tests/yamato/scripts/run_gym.py --env=Project/testPlayer-Basic
+    - ./venv/bin/python ml-agents/tests/yamato/scripts/run_gym.py --env=artifacts/testPlayer-Basic
  dependencies:
    - .yamato/standalone-build-test.yml#test_mac_standalone_{{ editor.version }}
  triggers:
--- a/.yamato/protobuf-generation-test.yml
+++ b/.yamato/protobuf-generation-test.yml
        - "protobuf-definitions/*.md"
        - "protobuf-definitions/**/*.md"
  artifacts:
-    dist:
+    patch:
-        - "artifacts/*"
+        - "artifacts/*.*"
--- a/.yamato/python-ll-api-test.yml
+++ b/.yamato/python-ll-api-test.yml
    - python -u -m ml-agents.tests.yamato.setup_venv
    - ./venv/bin/python ml-agents/tests/yamato/scripts/run_llapi.py
  dependencies:
-    - .yamato/standalone-build-test.yml#test_mac_standalone_{{ editor.version }} --env=Project/testPlayer
+    - .yamato/standalone-build-test.yml#test_mac_standalone_{{ editor.version }}
  triggers:
    cancel_old_ci: true
    changes:
--- a/.yamato/standalone-build-test.yml
+++ b/.yamato/standalone-build-test.yml
        - "com.unity.ml-agents/*.md"
        - "com.unity.ml-agents/**/*.md"
  artifacts:
+    logs:
+      paths:
+        - "artifacts/standalone_build.txt"
-        - "Project/testPlayer*/**"
+        - "artifacts/testPlayer*/**"
 {% endfor %}
--- a/.yamato/training-int-tests.yml
+++ b/.yamato/training-int-tests.yml
    # Backwards-compatibility tests.
    # If we make a breaking change to the communication protocol, these will need
    # to be disabled until the next release.
-    - python -u -m ml-agents.tests.yamato.training_int_tests --python=0.15.0
-    - python -u -m ml-agents.tests.yamato.training_int_tests --csharp=0.15.0
+    # - python -u -m ml-agents.tests.yamato.training_int_tests --python=0.15.0
+    # - python -u -m ml-agents.tests.yamato.training_int_tests --csharp=0.15.0
  dependencies:
    - .yamato/standalone-build-test.yml#test_mac_standalone_{{ editor.version }}
  triggers:
        - "com.unity.ml-agents/*.md"
        - "com.unity.ml-agents/**/*.md"
  artifacts:
-    unit:
+    logs:
+      paths:
+        - "artifacts/standalone_build.txt"
+    standalonebuild:
-        - "artifacts/**"
+        - "artifacts/testplayer*/**"
 {% endfor %}
--- a/README.md
+++ b/README.md
 |:-------:|:------:|:-------------:|:-------:|:------------:|
 | **master (unstable)** | -- | [source](https://github.com/Unity-Technologies/ml-agents/tree/master) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/master/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/master.zip) |
 | **0.15.1** | **March 30, 2020** | **[source](https://github.com/Unity-Technologies/ml-agents/tree/0.15.1)** | **[docs](https://github.com/Unity-Technologies/ml-agents/tree/0.15.1/docs/Readme.md)** | **[download](https://github.com/Unity-Technologies/ml-agents/archive/0.15.1.zip)** |
-| **0.15.0** | **March 18, 2020** | **[source](https://github.com/Unity-Technologies/ml-agents/tree/0.15.0)** | **[docs](https://github.com/Unity-Technologies/ml-agents/tree/0.15.0/docs/Readme.md)** | **[download](https://github.com/Unity-Technologies/ml-agents/archive/0.15.0.zip)** |
+| **0.15.0** | March 18, 2020 | [source](https://github.com/Unity-Technologies/ml-agents/tree/0.15.0) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/0.15.0/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/0.15.0.zip) |
 | **0.14.1** | February 26, 2020 | [source](https://github.com/Unity-Technologies/ml-agents/tree/0.14.1) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/0.14.1/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/0.14.1.zip) |
 | **0.14.0** | February 13, 2020 | [source](https://github.com/Unity-Technologies/ml-agents/tree/0.14.0) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/0.14.0/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/0.14.0.zip) |
 | **0.13.1** | January 21, 2020 | [source](https://github.com/Unity-Technologies/ml-agents/tree/0.13.1) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/0.13.1/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/0.13.1.zip) |
--- a/com.unity.ml-agents/CHANGELOG.md
+++ b/com.unity.ml-agents/CHANGELOG.md
 - Removed the multi-agent gym option from the gym wrapper. For multi-agent scenarios, use the [Low Level Python API](Python-API.md).
 - The low level Python API has changed. You can look at the document [Low Level Python API documentation](Python-API.md) for more information. If you use `mlagents-learn` for training, this should be a transparent change.
 - Added ability to start training (initialize model weights) from a previous run ID. (#3710)
+ - The internal event `Academy.AgentSetStatus` was renamed to `Academy.AgentPreStep` and made public.
+ - The offset logic was removed from DecisionRequester.
+ - Timer files now contain a dictionary of metadata, including things like the package version numbers.
+ - SideChannel IncomingMessages methods now take an optional default argument, which is used when trying to read more data than the message contains.
 - The way that UnityEnvironment decides the port was changed. If no port is specified, the behavior will depend on the `file_name` parameter. If it is `None`, 5004 (the editor port) will be used; otherwise 5005 (the base environment port) will be used.
 - Fixed an issue where exceptions from environments provided a returncode of 0. (#3680)
 - Running `mlagents-learn` with the same `--run-id` twice will no longer overwrite the existing files. (#3705)
--- a/com.unity.ml-agents/Runtime/Academy.cs
+++ b/com.unity.ml-agents/Runtime/Academy.cs
        /// on each side, although we may allow some flexibility in the future.
        /// This should be incremented whenever a change is made to the communication protocol.
        /// </summary>
-        const string k_ApiVersion = "0.15.0";
+        const string k_ApiVersion = "0.16.0";
-        internal const string k_PackageVersion = "0.15.0-preview";
+        internal const string k_PackageVersion = "0.15.1-preview";

        const int k_EditorTrainingPort = 5004;

        // This will mark the Agent as Done if it has reached its maxSteps.
        internal event Action AgentIncrementStep;

-        // Signals to all the agents at each environment step along with the
-        // Academy's maxStepReached, done and stepCount values. The agents rely
-        // on this event to update their own values of max step reached and done
-        // in addition to aligning on the step count of the global episode.
-        internal event Action<int> AgentSetStatus;
+
+        /// <summary>
+        /// Signals to all of the <see cref="Agent"/>s that their step is about to begin.
+        /// This is a good time for an <see cref="Agent"/> to decide if it would like to
+        /// call <see cref="Agent.RequestDecision"/> or <see cref="Agent.RequestAction"/>
+        /// for this step.  Any other pre-step setup could be done during this even as well.
+        /// </summary>
+        public event Action<int> AgentPreStep;

        // Signals to all the agents at each environment step so they can send
        // their state to their Policy if they have requested a decision.
        /// </summary>
        void InitializeEnvironment()
        {
+            TimerStack.Instance.AddMetadata("communication_protocol_version", k_ApiVersion);
+            TimerStack.Instance.AddMetadata("package_version", k_PackageVersion);
+
            EnableAutomaticStepping();

            SideChannelUtils.RegisterSideChannel(new EngineConfigurationChannel());
        {
            DecideAction = () => {};
            DestroyAction = () => {};
-            AgentSetStatus = i => {};
+            AgentPreStep = i => {};
            AgentSendState = () => {};
            AgentAct = () => {};
            AgentForceReset = () => {};
                ForcedFullReset();
            }

-            AgentSetStatus?.Invoke(m_StepCount);
+            AgentPreStep?.Invoke(m_StepCount);

            m_StepCount += 1;
            m_TotalStepCount += 1;
--- a/com.unity.ml-agents/Runtime/Agent.cs
+++ b/com.unity.ml-agents/Runtime/Agent.cs
            m_Info.reward = m_Reward;
            m_Info.done = true;
            m_Info.maxStepReached = doneReason == DoneReason.MaxStepReached;
+            if (collectObservationsSensor != null)
+            {
+                // Make sure the latest observations are being passed to training.
+                collectObservationsSensor.Reset();
+                CollectObservations(collectObservationsSensor);
+            }
            // Request the last decision with no callbacks
            // We request a decision so Python knows the Agent is done immediately
            m_Brain?.RequestDecision(m_Info, sensors);
--- a/com.unity.ml-agents/Runtime/DecisionRequester.cs
+++ b/com.unity.ml-agents/Runtime/DecisionRequester.cs
+using System;
 using UnityEngine;
 using UnityEngine.Serialization;

    /// at regular intervals.
    /// </summary>
    [AddComponentMenu("ML Agents/Decision Requester", (int)MenuGroup.Default)]
-    internal class DecisionRequester : MonoBehaviour
+    [RequireComponent(typeof(Agent))]
+    public class DecisionRequester : MonoBehaviour
-        /// that the Agent will request a decision every 5 Academy steps.
-        /// </summary>
+        /// that the Agent will request a decision every 5 Academy steps. /// </summary>
        [Range(1, 20)]
        [Tooltip("The frequency with which the agent requests a decision. A DecisionPeriod " +
                 "of 5 means that the Agent will request a decision every 5 Academy steps.")]
        [FormerlySerializedAs("RepeatAction")]
        public bool TakeActionsBetweenDecisions = true;

-        /// <summary>
-        /// Whether or not the Agent decisions should start at an offset (different for each agent).
-        /// This does not affect <see cref="DecisionPeriod"/>. Turning this on will distribute
-        /// the decision-making computations for all the agents across multiple Academy steps.
-        /// This can be valuable in scenarios where you have many agents in the scene, particularly
-        /// during the inference phase.
-        /// </summary>
-        [Tooltip("Whether or not Agent decisions should start at an offset.")]
-        public bool offsetStep;
-
+        [NonSerialized]
-        int m_Offset;
-            m_Offset = offsetStep ? gameObject.GetInstanceID() : 0;
-            Academy.Instance.AgentSetStatus += MakeRequests;
+            Debug.Assert(m_Agent != null, "Agent component was not found on this gameObject and is required.");
+            Academy.Instance.AgentPreStep += MakeRequests;
        }

        void OnDestroy()
-                Academy.Instance.AgentSetStatus -= MakeRequests;
+                Academy.Instance.AgentPreStep -= MakeRequests;
-        void MakeRequests(int count)
+        /// <summary>
+        /// Method that hooks into the Academy in order inform the Agent on whether or not it should request a
+        /// decision, and whether or not it should take actions between decisions.
+        /// </summary>
+        /// <param name="academyStepCount">The current step count of the academy.</param>
+        void MakeRequests(int academyStepCount)
-            if ((count + m_Offset) % DecisionPeriod == 0)
+            if (academyStepCount % DecisionPeriod == 0)
            {
                m_Agent?.RequestDecision();
            }
--- a/com.unity.ml-agents/Runtime/SideChannels/IncomingMessage.cs
+++ b/com.unity.ml-agents/Runtime/SideChannels/IncomingMessage.cs
 using System.Collections.Generic;
+using System.Runtime.CompilerServices;
 using System;
 using System.IO;
 using System.Text;
        }

        /// <summary>
-        /// Read a boolan value from the message.
+        /// Read a boolean value from the message.
+        /// <param name="defaultValue">Default value to use if the end of the message is reached.</param>
-        public bool ReadBoolean()
+        public bool ReadBoolean(bool defaultValue = false)
-            return m_Reader.ReadBoolean();
+            return CanReadMore() ? m_Reader.ReadBoolean() : defaultValue;
+        /// <param name="defaultValue">Default value to use if the end of the message is reached.</param>
-        public int ReadInt32()
+        public int ReadInt32(int defaultValue = 0)
-            return m_Reader.ReadInt32();
+            return CanReadMore() ? m_Reader.ReadInt32() : defaultValue;
+        /// <param name="defaultValue">Default value to use if the end of the message is reached.</param>
-        public float ReadFloat32()
+        public float ReadFloat32(float defaultValue = 0.0f)
-            return m_Reader.ReadSingle();
+            return CanReadMore() ? m_Reader.ReadSingle() : defaultValue;
+        /// <param name="defaultValue">Default value to use if the end of the message is reached.</param>
-        public string ReadString()
+        public string ReadString(string defaultValue = default)
+            if (!CanReadMore())
+            {
+                return defaultValue;
+            }
+
            var strLength = ReadInt32();
            var str = Encoding.ASCII.GetString(m_Reader.ReadBytes(strLength));
            return str;
        /// Reads a list of floats from the message. The length of the list is stored in the message.
        /// </summary>
+        /// <param name="defaultValue">Default value to use if the end of the message is reached.</param>
-        public IList<float> ReadFloatList()
+        public IList<float> ReadFloatList(IList<float> defaultValue = default)
+            if (!CanReadMore())
+            {
+                return defaultValue;
+            }
+
            var len = ReadInt32();
            var output = new float[len];
            for (var i = 0; i < len; i++)
        {
            m_Reader?.Dispose();
            m_Stream?.Dispose();
+        }
+
+        /// <summary>
+        /// Whether or not there is more data left in the stream that can be read.
+        /// </summary>
+        /// <returns></returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        bool CanReadMore()
+        {
+            return m_Stream.Position < m_Stream.Length;
        }
    }
 }
--- a/com.unity.ml-agents/Runtime/Timer.cs
+++ b/com.unity.ml-agents/Runtime/Timer.cs
        Dictionary<string, TimerNode> m_Children;

        /// <summary>
-        /// Gauge Nodes to measure arbitrary values.
-        /// </summary>
-        [DataMember(Name = "gauges", EmitDefaultValue = false)]
-        Dictionary<string, GaugeNode> m_Gauges;
-
-        /// <summary>
        /// Custom sampler used to add timings to the profiler.
        /// </summary>
        CustomSampler m_Sampler;
            set {}  // Serialization needs this, but unused.
        }

-        public Dictionary<string, GaugeNode> Gauges
-        {
-            get { return m_Gauges; }
-        }
-
        /// <summary>
        /// Total seconds spent in this block, excluding it's children.
        /// </summary>
                // The root node doesn't have a sampler since that could interfere with the profiler.
                m_NumCalls = 1;
                m_TickStart = DateTime.Now.Ticks;
-                m_Gauges = new Dictionary<string, GaugeNode>();
            }
            else
            {
        }
    }

+    [DataContract]
+    internal class RootNode : TimerNode
+    {
+        // Timer output format version
+        internal const string k_timerFormatVersion = "0.1.0";
+
+        [DataMember(Name = "metadata", Order = 0)]
+        Dictionary<string, string> m_Metadata = new Dictionary<string, string>();
+
+        /// <summary>
+        /// Gauge Nodes to measure arbitrary values.
+        /// </summary>
+        [DataMember(Name = "gauges", EmitDefaultValue = false)]
+        Dictionary<string, GaugeNode> m_Gauges = new Dictionary<string, GaugeNode>();
+
+        public RootNode(string name="root") : base(name, true)
+        {
+            m_Metadata.Add("timer_format_version", k_timerFormatVersion);
+            m_Metadata.Add("start_time_seconds", $"{DateTimeOffset.Now.ToUnixTimeSeconds()}");
+            m_Metadata.Add("unity_version", Application.unityVersion);
+            m_Metadata.Add("command_line_arguments", String.Join(" ", Environment.GetCommandLineArgs()));
+        }
+
+        public void AddMetadata(string key, string value)
+        {
+            m_Metadata[key] = value;
+        }
+
+        public Dictionary<string, GaugeNode> Gauges
+        {
+            get { return m_Gauges; }
+        }
+
+        public Dictionary<string, string> Metadata
+        {
+            get { return m_Metadata; }
+        }
+    }
+
    /// <summary>
    /// Tracks the most recent value of a metric. This is analogous to gauges in statsd.
    /// </summary>
        static readonly TimerStack k_Instance = new TimerStack();

        Stack<TimerNode> m_Stack;
-        TimerNode m_RootNode;
+        RootNode m_RootNode;
+        Dictionary<string, string> m_Metadata;

        // Explicit static constructor to tell C# compiler
        // not to mark type as beforefieldinit
        public void Reset(string name = "root")
        {
            m_Stack = new Stack<TimerNode>();
-            m_RootNode = new TimerNode(name, true);
+            m_RootNode = new RootNode(name);
            m_Stack.Push(m_RootNode);
        }

            get { return k_Instance; }
        }

-        internal TimerNode RootNode
+        internal RootNode RootNode
        {
            get { return m_RootNode; }
        }
                    m_RootNode.Gauges[name] = new GaugeNode(value);
                }
            }
+        }
+
+        public void AddMetadata(string key, string value)
+        {
+            m_RootNode.AddMetadata(key, value);
        }

        void Push(string name)
        /// <param name="stream"></param>
        public void SaveJsonTimers(Stream stream)
        {
+            // Add some final metadata info
+            AddMetadata("scene_name", SceneManager.GetActiveScene().name);
+            AddMetadata("end_time_seconds", $"{DateTimeOffset.Now.ToUnixTimeSeconds()}");
+
-            var ser = new DataContractJsonSerializer(typeof(TimerNode), jsonSettings);
+            var ser = new DataContractJsonSerializer(typeof(RootNode), jsonSettings);
            ser.WriteObject(stream, m_RootNode);
        }
    }
--- a/com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs
+++ b/com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs
 using System.CodeDom;
+using System;
 using UnityEngine;
 using NUnit.Framework;
 using System.Reflection;
 {
    internal class TestPolicy : IPolicy
    {
-        public void RequestDecision(AgentInfo info, List<ISensor> sensors) {}
+        public Action OnRequestDecision;
+        private WriteAdapter m_Adapter = new WriteAdapter();
+        public void RequestDecision(AgentInfo info, List<ISensor> sensors) {
+            foreach(var sensor in sensors){
+                sensor.GetObservationProto(m_Adapter);
+            }
+            OnRequestDecision?.Invoke();
+        }

        public float[] DecideAction() { return new float[0]; }

        {
            collectObservationsCalls += 1;
            collectObservationsCallsForEpisode += 1;
-            sensor.AddObservation(0f);
+            sensor.AddObservation(collectObservationsCallsForEpisode);
        }

        public override void OnActionReceived(float[] vectorAction)
                aca.EnvironmentStep();
            }
        }
+
+        [Test]
+        public void AssertStackingReset()
+        {
+            var agentGo1 = new GameObject("TestAgent");
+            agentGo1.AddComponent<TestAgent>();
+            var behaviorParameters = agentGo1.GetComponent<BehaviorParameters>();
+            behaviorParameters.brainParameters.numStackedVectorObservations = 3;
+            var agent1 = agentGo1.GetComponent<TestAgent>();
+            var aca = Academy.Instance;
+            agent1.LazyInitialize();
+            var policy = new TestPolicy();
+            agent1.SetPolicy(policy);
+
+            StackingSensor sensor = null;
+            foreach(ISensor s in agent1.sensors){
+                if (s is  StackingSensor){
+                    sensor = s as StackingSensor;
+                }
+            }
+
+            Assert.NotNull(sensor);
+
+            for (int i = 0; i < 20; i++)
+            {
+                agent1.RequestDecision();
+                aca.EnvironmentStep();
+
+            }
+
+            policy.OnRequestDecision = () =>  SensorTestHelper.CompareObservation(sensor, new[] {18f, 19f, 21f});
+            agent1.EndEpisode();
+            SensorTestHelper.CompareObservation(sensor, new[] {0f, 0f, 0f});
+        }
    }

    [TestFixture]
                    expectedCollectObsCallsForEpisode = 0;
                    expectedAgentStepCount = 0;
                    expectedSensorResetCalls++;
+                    expectedCollectObsCalls += 1;
                }
                aca.EnvironmentStep();

--- a/com.unity.ml-agents/Tests/Editor/PublicAPI/PublicApiValidation.cs
+++ b/com.unity.ml-agents/Tests/Editor/PublicAPI/PublicApiValidation.cs
 using MLAgents.Sensors;
 using NUnit.Framework;
 using UnityEngine;
+using UnityEngine.TestTools;

 namespace MLAgentsExamples
 {
            sensorComponent.observationStacks = 2;

            sensorComponent.CreateSensor();
-        }
-
-        class PublicApiAgent : Agent
-        {
-            public int numHeuristicCalls;
-
-            public override float[] Heuristic()
-            {
-                numHeuristicCalls++;
-                return base.Heuristic();
-            }
-        }
-
-        // Simple SensorComponent that sets up a StackingSensor
-        class StackingComponent : SensorComponent
-        {
-            public SensorComponent wrappedComponent;
-            public int numStacks;
-
-            public override ISensor CreateSensor()
-            {
-                var wrappedSensor = wrappedComponent.CreateSensor();
-                return new StackingSensor(wrappedSensor, numStacks);
-            }
-
-            public override int[] GetObservationShape()
-            {
-                int[] shape = (int[]) wrappedComponent.GetObservationShape().Clone();
-                for (var i = 0; i < shape.Length; i++)
-                {
-                    shape[i] *= numStacks;
-                }
-
-                return shape;
-            }
-        }
-
-
-        [Test]
-        public void CheckSetupAgent()
-        {
-            var gameObject = new GameObject();
-
-            var behaviorParams = gameObject.AddComponent<BehaviorParameters>();
-            behaviorParams.brainParameters.vectorObservationSize = 3;
-            behaviorParams.brainParameters.numStackedVectorObservations = 2;
-            behaviorParams.brainParameters.vectorActionDescriptions = new[] { "TestActionA", "TestActionB" };
-            behaviorParams.brainParameters.vectorActionSize = new[] { 2, 2 };
-            behaviorParams.brainParameters.vectorActionSpaceType = SpaceType.Discrete;
-            behaviorParams.behaviorName = "TestBehavior";
-            behaviorParams.TeamId = 42;
-            behaviorParams.useChildSensors = true;
-
-            var agent = gameObject.AddComponent<PublicApiAgent>();
-            // Make sure we can set the behavior type correctly after the agent is added
-            behaviorParams.behaviorType = BehaviorType.InferenceOnly;
-            // Can't actually create an Agent with InferenceOnly and no model, so change back
-            behaviorParams.behaviorType = BehaviorType.Default;
-
-            // TODO -  not internal yet
-            // var decisionRequester = gameObject.AddComponent<DecisionRequester>();
-            // decisionRequester.DecisionPeriod = 2;
-
-            var sensorComponent = gameObject.AddComponent<RayPerceptionSensorComponent3D>();
-            sensorComponent.sensorName = "ray3d";
-            sensorComponent.detectableTags = new List<string> { "Player", "Respawn" };
-            sensorComponent.raysPerDirection = 3;
-
-            // Make a StackingSensor that wraps the RayPerceptionSensorComponent3D
-            // This isn't necessarily practical, just to ensure that it can be done
-            var wrappingSensorComponent = gameObject.AddComponent<StackingComponent>();
-            wrappingSensorComponent.wrappedComponent = sensorComponent;
-            wrappingSensorComponent.numStacks = 3;
-
-            // ISensor isn't set up yet.
-            Assert.IsNull(sensorComponent.raySensor);
-
-            agent.LazyInitialize();
-            // Make sure we can set the behavior type correctly after the agent is initialized
-            // (this creates a new policy).
-            behaviorParams.behaviorType = BehaviorType.HeuristicOnly;
-
-            // Initialization should set up the sensors
-            Assert.IsNotNull(sensorComponent.raySensor);
-
-            // Let's change the inference device
-            var otherDevice = behaviorParams.inferenceDevice == InferenceDevice.CPU ? InferenceDevice.GPU : InferenceDevice.CPU;
-            agent.SetModel(behaviorParams.behaviorName, behaviorParams.model, otherDevice);
-
-            agent.AddReward(1.0f);
-
-            agent.RequestAction();
-            agent.RequestDecision();
-
-            Academy.Instance.AutomaticSteppingEnabled = false;
-            Academy.Instance.EnvironmentStep();
-
-            var actions = agent.GetAction();
-            // default Heuristic implementation should return zero actions.
-            Assert.AreEqual(new[] {0.0f, 0.0f}, actions);
-            Assert.AreEqual(1, agent.numHeuristicCalls);
        }
    }
 }
--- a/com.unity.ml-agents/Tests/Editor/SideChannelTests.cs
+++ b/com.unity.ml-agents/Tests/Editor/SideChannelTests.cs
            Assert.AreEqual(stringVal, incomingMsg.ReadString());
            Assert.AreEqual(floatListVal, incomingMsg.ReadFloatList());
        }
+
+        [Test]
+        public void TestMessageReadDefaults()
+        {
+            // Make sure reading past the end of a message will apply defaults.
+            IncomingMessage incomingMsg;
+            using (var outgoingMsg = new OutgoingMessage())
+            {
+                incomingMsg = new IncomingMessage(outgoingMsg.ToByteArray());
+            }
+
+            Assert.AreEqual(false, incomingMsg.ReadBoolean());
+            Assert.AreEqual(true, incomingMsg.ReadBoolean(defaultValue: true));
+
+            Assert.AreEqual(0, incomingMsg.ReadInt32());
+            Assert.AreEqual(42, incomingMsg.ReadInt32(defaultValue: 42));
+
+            Assert.AreEqual(0.0f, incomingMsg.ReadFloat32());
+            Assert.AreEqual(1337.0f, incomingMsg.ReadFloat32(defaultValue: 1337.0f));
+
+            Assert.AreEqual(default(string), incomingMsg.ReadString());
+            Assert.AreEqual("foo", incomingMsg.ReadString(defaultValue: "foo"));
+
+            Assert.AreEqual(default(float[]), incomingMsg.ReadFloatList());
+            Assert.AreEqual(new float[] { 1001, 1002 }, incomingMsg.ReadFloatList(new float[] { 1001, 1002 }));
+        }
    }
 }
--- a/com.unity.ml-agents/Tests/Editor/TimerTest.cs
+++ b/com.unity.ml-agents/Tests/Editor/TimerTest.cs
                    using (myTimer.Scoped("bar"))
                    {
                        myTimer.SetGauge("my_gauge", i);
+                        myTimer.AddMetadata("i", $"{i}");
                    }
                }
            }
            Assert.AreEqual(0, gauge.minValue);
            Assert.AreEqual(4, gauge.maxValue);
            Assert.AreEqual(4, gauge.value);
+            Assert.AreEqual("4", myTimer.RootNode.Metadata["i"]);

            var fooChildren = rootChildren["foo"].Children;
            Assert.That(fooChildren, Contains.Key("bar"));
--- a/com.unity.ml-agents/package.json
+++ b/com.unity.ml-agents/package.json
 {
-	"name": "com.unity.ml-agents",
-	"displayName":"ML Agents",
-	"version": "0.15.0-preview",
-	"unity": "2018.4",
-	"description": "Add interactivity to your game with Machine Learning Agents trained using Deep Reinforcement Learning.",
-	"dependencies": {
-		"com.unity.barracuda": "0.6.1-preview"
-	}
+  "name": "com.unity.ml-agents",
+  "displayName": "ML Agents",
+  "version": "0.15.1-preview",
+  "unity": "2018.4",
+  "description": "Add interactivity to your game with Machine Learning Agents trained using Deep Reinforcement Learning.",
+  "dependencies": {
+    "com.unity.barracuda": "0.6.1-preview"
+  }
 }
--- a/docs/Getting-Started.md
+++ b/docs/Getting-Started.md
 - For a "Hello World" introduction to creating your own Learning Environment,
  check out the [Making a New Learning
  Environment](Learning-Environment-Create-New.md) page.
- For a series of YouTube video tutorials, checkout the
-  [Machine Learning Agents PlayList](https://www.youtube.com/playlist?list=PLX2vGYjWbI0R08eWQkO7nQkGiicHAX7IX)
-  page.
--- a/docs/ML-Agents-Overview.md
+++ b/docs/ML-Agents-Overview.md
  [Training With Environment Parameter Randomization](Training-Environment-Parameter-Randomization.md)
  to learn more about this feature.

- **Cloud Training on AWS** - To facilitate using the ML-Agents toolkit on
-  Amazon Web Services (AWS) machines, we provide a
-  [guide](Training-on-Amazon-Web-Service.md) on how to set-up EC2 instances in
-  addition to a public pre-configured Amazon Machine Image (AMI).
-
- **Cloud Training on Microsoft Azure** - To facilitate using the ML-Agents
-  toolkit on Azure machines, we provide a
-  [guide](Training-on-Microsoft-Azure.md) on how to set-up virtual machine
-  instances in addition to a pre-configured data science image.
-
 ## Summary and Next Steps

 To briefly summarize: The ML-Agents toolkit enables games and simulations built
--- a/docs/Migrating.md
+++ b/docs/Migrating.md
 * Replace `Academy.RegisterSideChannel` with `SideChannelUtils.RegisterSideChannel()`.
 * Replace `Academy.UnregisterSideChannel` with `SideChannelUtils.UnregisterSideChannel`.

+
 ## Migrating from 0.14 to 0.15

 ### Important changes
--- a/docs/Training-ML-Agents.md
+++ b/docs/Training-ML-Agents.md
 * `--debug`: Specify this option to enable debug-level logging for some parts of the code.
 * `--cpu`: Forces training using CPU only.
 * Engine Configuration :
-  * `--width' : The width of the executable window of the environment(s) in pixels
+  * `--width` : The width of the executable window of the environment(s) in pixels
  (ignored for editor training) (Default 84)
  * `--height` : The height of the executable window of the environment(s) in pixels
  (ignored for editor training). (Default 84)
--- a/docs/images/3dball_big.png
+++ b/docs/images/3dball_big.png
--- a/docs/images/3dball_small.png
+++ b/docs/images/3dball_small.png
--- a/docs/images/curriculum.png
+++ b/docs/images/curriculum.png
--- a/docs/images/ml-agents-LSTM.png
+++ b/docs/images/ml-agents-LSTM.png
--- a/docs/images/monitor.png
+++ b/docs/images/monitor.png
--- a/ml-agents-envs/mlagents_envs/environment.py
+++ b/ml-agents-envs/mlagents_envs/environment.py
    # Currently we require strict equality between the communication protocol
    # on each side, although we may allow some flexibility in the future.
    # This should be incremented whenever a change is made to the communication protocol.
-    API_VERSION = "0.15.0"
+    API_VERSION = "0.16.0"

    # Default port that the editor listens on. If an environment executable
    # isn't specified, this port will be used.
--- a/ml-agents-envs/mlagents_envs/side_channel/incoming_message.py
+++ b/ml-agents-envs/mlagents_envs/side_channel/incoming_message.py
        self.buffer = buffer
        self.offset = offset

-    def read_bool(self) -> bool:
+    def read_bool(self, default_value: bool = False) -> bool:
+        :param default_value: Default value to use if the end of the message is reached.
+        :return: The value read from the message, or the default value if the end was reached.
+        if self._at_end_of_buffer():
+            return default_value
+
-    def read_int32(self) -> int:
+    def read_int32(self, default_value: int = 0) -> int:
+        :param default_value: Default value to use if the end of the message is reached.
+        :return: The value read from the message, or the default value if the end was reached.
+        if self._at_end_of_buffer():
+            return default_value
+
-    def read_float32(self) -> float:
+    def read_float32(self, default_value: float = 0.0) -> float:
+        :param default_value: Default value to use if the end of the message is reached.
+        :return: The value read from the message, or the default value if the end was reached.
+        if self._at_end_of_buffer():
+            return default_value
+
-    def read_float32_list(self) -> List[float]:
+    def read_float32_list(self, default_value: List[float] = None) -> List[float]:
+        :param default_value: Default value to use if the end of the message is reached.
+        :return: The value read from the message, or the default value if the end was reached.
+        if self._at_end_of_buffer():
+            return [] if default_value is None else default_value
+
        list_len = self.read_int32()
        output = []
        for _ in range(list_len):
-    def read_string(self) -> str:
+    def read_string(self, default_value: str = "") -> str:
+        :param default_value: Default value to use if the end of the message is reached.
+        :return: The value read from the message, or the default value if the end was reached.
+        if self._at_end_of_buffer():
+            return default_value
+
        encoded_str_len = self.read_int32()
        val = self.buffer[self.offset : self.offset + encoded_str_len].decode("ascii")
        self.offset += encoded_str_len
        Get a copy of the internal bytes used by the message.
        """
        return bytearray(self.buffer)
+
+    def _at_end_of_buffer(self) -> bool:
+        return self.offset >= len(self.buffer)
--- a/ml-agents-envs/mlagents_envs/tests/test_side_channel.py
+++ b/ml-agents-envs/mlagents_envs/tests/test_side_channel.py
        read_vals.append(msg_in.read_bool())
    assert vals == read_vals

+    # Test reading with defaults
+    assert msg_in.read_bool() is False
+    assert msg_in.read_bool(default_value=True) is True
+

 def test_message_int32():
    val = 1337
    read_val = msg_in.read_int32()
    assert val == read_val

+    # Test reading with defaults
+    assert 0 == msg_in.read_int32()
+    assert val == msg_in.read_int32(default_value=val)
+

 def test_message_float32():
    val = 42.0
    # These won't be exactly equal in general, since python floats are 64-bit.
    assert val == read_val

+    # Test reading with defaults
+    assert 0.0 == msg_in.read_float32()
+    assert val == msg_in.read_float32(default_value=val)
+

 def test_message_string():
    val = "mlagents!"
    read_val = msg_in.read_string()
    assert val == read_val

+    # Test reading with defaults
+    assert "" == msg_in.read_string()
+    assert val == msg_in.read_string(default_value=val)
+

 def test_message_float_list():
    val = [1.0, 3.0, 9.0]
    read_val = msg_in.read_float32_list()
    # These won't be exactly equal in general, since python floats are 64-bit.
    assert val == read_val
+
+    # Test reading with defaults
+    assert [] == msg_in.read_float32_list()
+    assert val == msg_in.read_float32_list(default_value=val)
--- a/ml-agents-envs/mlagents_envs/tests/test_timers.py
+++ b/ml-agents-envs/mlagents_envs/tests/test_timers.py
                }
            },
            "gauges": {"my_gauge": {"value": 4.0, "max": 4.0, "min": 0.0, "count": 3}},
+            "metadata": {
+                "timer_format_version": timers.TIMER_FORMAT_VERSION,
+                "start_time_seconds": mock.ANY,
+                "end_time_seconds": mock.ANY,
+                "python_version": mock.ANY,
+                "command_line_arguments": mock.ANY,
+            },
        }

        assert timer_tree == expected_tree
--- a/ml-agents-envs/mlagents_envs/timers.py
+++ b/ml-agents-envs/mlagents_envs/timers.py
 """

 import math
-from time import perf_counter
+import sys
+import time
+
+TIMER_FORMAT_VERSION = "0.1.0"


 class TimerNode:
    sure that pushes and pops are already matched.
    """

-    __slots__ = ["root", "stack", "start_time", "gauges"]
+    __slots__ = ["root", "stack", "start_time", "gauges", "metadata"]
-        self.start_time = perf_counter()
+        self.start_time = time.perf_counter()
+        self.metadata: Dict[str, str] = {}
+        self._add_default_metadata()
-        self.start_time = perf_counter()
+        self.start_time = time.perf_counter()
+        self.metadata: Dict[str, str] = {}
+        self._add_default_metadata()

    def push(self, name: str) -> TimerNode:
        """
        Update the total time and count of the root name, and return it.
        """
        root = self.root
-        root.total = perf_counter() - self.start_time
+        root.total = time.perf_counter() - self.start_time
        root.count = 1
        return root

            if self.gauges:
                res["gauges"] = self._get_gauges()

+            if self.metadata:
+                self.metadata["end_time_seconds"] = str(int(time.time()))
+                res["metadata"] = self.metadata
+
        res["total"] = node.total
        res["count"] = node.count

        else:
            self.gauges[name] = GaugeNode(value)

+    def add_metadata(self, key: str, value: str) -> None:
+        self.metadata[key] = value
+
+
+    def _add_default_metadata(self):
+        self.metadata["timer_format_version"] = TIMER_FORMAT_VERSION
+        self.metadata["start_time_seconds"] = str(int(time.time()))
+        self.metadata["python_version"] = sys.version
+        self.metadata["command_line_arguments"] = " ".join(sys.argv)


 # Global instance of a TimerStack. This is generally all that we need for profiling, but you can potentially
    """
    timer_stack = timer_stack or _global_timer_stack
    timer_node = timer_stack.push(name)
-    start_time = perf_counter()
+    start_time = time.perf_counter()

    try:
        # The wrapped code block will run here.
        # We'll accumulate the time, and the exception (if any) gets raised automatically.
-        elapsed = perf_counter() - start_time
+        elapsed = time.perf_counter() - start_time
        timer_node.add_time(elapsed)
        timer_stack.pop()

    """
    timer_stack = timer_stack or _global_timer_stack
    timer_stack.set_gauge(name, value)
+
+
+def add_metadata(key: str, value: str, timer_stack: TimerStack = None) -> None:
+    timer_stack = timer_stack or _global_timer_stack
+    timer_stack.add_metadata(key, value)


 def get_timer_tree(timer_stack: TimerStack = None) -> Dict[str, Any]:
--- a/ml-agents/mlagents/trainers/agent_processor.py
+++ b/ml-agents/mlagents/trainers/agent_processor.py
            self._process_step(
                terminal_step, global_id, terminal_steps.agent_id_to_index[local_id]
            )
-
        # Iterate over all the decision steps
        for ongoing_step in decision_steps.values():
            local_id = ongoing_step.agent_id
--- a/ml-agents/mlagents/trainers/demo_loader.py
+++ b/ml-agents/mlagents/trainers/demo_loader.py
 from google.protobuf.internal.encoder import _EncodeVarint  # type: ignore


+INITIAL_POS = 33
+SUPPORTED_DEMONSTRATION_VERSIONS = frozenset([0, 1])
+
+
@timed
 def make_demo_buffer(
    pair_infos: List[AgentInfoActionPairProto],
        )


-INITIAL_POS = 33
-
-
@timed
 def load_demonstration(
    file_path: str
                if obs_decoded == 0:
                    meta_data_proto = DemonstrationMetaProto()
                    meta_data_proto.ParseFromString(data[pos : pos + next_pos])
+                    if (
+                        meta_data_proto.api_version
+                        not in SUPPORTED_DEMONSTRATION_VERSIONS
+                    ):
+                        raise RuntimeError(
+                            f"Can't load Demonstration data from an unsupported version ({meta_data_proto.api_version})"
+                        )
                    total_expected += meta_data_proto.number_steps
                    pos = INITIAL_POS
                if obs_decoded == 1:
--- a/ml-agents/mlagents/trainers/env_manager.py
+++ b/ml-agents/mlagents/trainers/env_manager.py
 AllStepResult = Dict[BehaviorName, Tuple[DecisionSteps, TerminalSteps]]
 AllGroupSpec = Dict[BehaviorName, BehaviorSpec]

-
 logger = get_logger(__name__)


--- a/ml-agents/mlagents/trainers/learn.py
+++ b/ml-agents/mlagents/trainers/learn.py
 from mlagents_envs.side_channel.side_channel import SideChannel
 from mlagents_envs.side_channel.engine_configuration_channel import EngineConfig
 from mlagents_envs.exception import UnityEnvironmentException
-from mlagents_envs.timers import hierarchical_timer, get_timer_tree
+from mlagents_envs.timers import (
+    hierarchical_timer,
+    get_timer_tree,
+    add_metadata as add_timer_metadata,
+)
 from mlagents_envs import logging_util

 logger = logging_util.get_logger(__name__)
    run_seed = options.seed
    if options.cpu:
        os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
+
+    # Add some timer metadata
+    add_timer_metadata("mlagents_version", mlagents.trainers.__version__)
+    add_timer_metadata("mlagents_envs_version", mlagents_envs.__version__)
+    add_timer_metadata("communication_protocol_version", UnityEnvironment.API_VERSION)
+    add_timer_metadata("tensorflow_version", tf_utils.tf.__version__)

    if options.seed == -1:
        run_seed = np.random.randint(0, 10000)
--- a/ml-agents/mlagents/trainers/tests/test_demo_loader.py
+++ b/ml-agents/mlagents/trainers/tests/test_demo_loader.py
+import io
+from unittest import mock
+from mlagents_envs.communicator_objects.demonstration_meta_pb2 import (
+    DemonstrationMetaProto,
+)
+
+    write_delimited,
 )


        assert get_demo_files(valid_fname) == [valid_fname]
        # valid directory
        assert get_demo_files(tmpdirname) == [valid_fname]
+
+
+@mock.patch("mlagents.trainers.demo_loader.get_demo_files", return_value=["foo.demo"])
+def test_unsupported_version_raises_error(mock_get_demo_files):
+    # Create a metadata proto with an unsupported version
+    bad_metadata = DemonstrationMetaProto()
+    bad_metadata.api_version = 1337
+
+    # Write the metadata to a temporary buffer, which will get returned by open()
+    buffer = io.BytesIO()
+    write_delimited(buffer, bad_metadata)
+    m = mock.mock_open(read_data=buffer.getvalue())
+
+    # Make sure that we get a RuntimeError when trying to load this.
+    with mock.patch("builtins.open", m):
+        with pytest.raises(RuntimeError):
+            load_demonstration("foo")
--- a/ml-agents/tests/yamato/scripts/run_llapi.py
+++ b/ml-agents/tests/yamato/scripts/run_llapi.py

 if __name__ == "__main__":
    parser = argparse.ArgumentParser()
-    parser.add_argument("--env", default="Project/testPlayer")
+    parser.add_argument("--env", default="artifacts/testPlayer")
    args = parser.parse_args()
    main(args.env)
--- a/ml-agents/tests/yamato/standalone_build_tests.py
+++ b/ml-agents/tests/yamato/standalone_build_tests.py
        executable_name = "testPlayer-" + executable_name

    returncode = run_standalone_build(
-        base_path, verbose=True, output_path=executable_name, scene_path=scene_path
+        base_path, output_path=executable_name, scene_path=scene_path
    )

    if returncode == 0:
--- a/ml-agents/tests/yamato/training_int_tests.py
+++ b/ml-agents/tests/yamato/training_int_tests.py

 from .yamato_utils import (
    get_base_path,
+    get_base_output_path,
    run_standalone_build,
    init_venv,
    override_config_file,
    if csharp_version is not None:
        # We can't rely on the old C# code recognizing the commandline argument to set the output
        # So rename testPlayer (containing the most recent build) to something else temporarily
-        full_player_path = os.path.join("Project", "testPlayer.app")
-        temp_player_path = os.path.join("Project", "temp_testPlayer.app")
-        final_player_path = os.path.join("Project", f"testPlayer_{csharp_version}.app")
+        artifact_path = get_base_output_path()
+        full_player_path = os.path.join(artifact_path, "testPlayer.app")
+        temp_player_path = os.path.join(artifact_path, "temp_testPlayer.app")
+        final_player_path = os.path.join(
+            artifact_path, f"testPlayer_{csharp_version}.app"
+        )

        os.rename(full_player_path, temp_player_path)

    )

    mla_learn_cmd = (
-        f"mlagents-learn override.yaml --train --env=Project/{standalone_player_path} "
+        f"mlagents-learn override.yaml --train --env="
+        f"{os.path.join(get_base_output_path(), standalone_player_path)} "
        f"--run-id={run_id} --no-graphics --env-args -logFile -"
    )  # noqa
    res = subprocess.run(
--- a/ml-agents/tests/yamato/yamato_utils.py
+++ b/ml-agents/tests/yamato/yamato_utils.py
 import os
+import shutil
 import subprocess
 import yaml
 from typing import List, Optional
    return os.getcwd()


+def get_base_output_path():
+    """"
+    Returns the artifact folder to use for yamato jobs.
+    """
+    return os.path.join(get_base_path(), "artifacts")
+
+
+    log_output_path: str = f"{get_base_output_path()}/standalone_build.txt",
-    Run BuildStandalonePlayerOSX test to produce a player. The location defaults to Project/testPlayer.
+    Run BuildStandalonePlayerOSX test to produce a player. The location defaults to
+    artifacts/standalone_build/testPlayer.
    """
    unity_exe = get_unity_executable_path()
    print(f"Running BuildStandalonePlayerOSX via {unity_exe}")
        "-executeMethod",
        "MLAgents.StandaloneBuildTest.BuildStandalonePlayerOSX",
    ]
-    if verbose:
-        test_args += ["-logfile", "-"]
+
+    os.makedirs(os.path.dirname(log_output_path), exist_ok=True)
+    subprocess.run(["touch", log_output_path])
+    test_args += ["-logfile", log_output_path]
+
+        output_path = os.path.join(get_base_output_path(), output_path)
+        os.makedirs(os.path.dirname(output_path), exist_ok=True)
    if scene_path is not None:
        test_args += ["--mlagents-build-scene-path", scene_path]
    print(f"{' '.join(test_args)} ...")
+
+    # Copy the default build name into the artifacts folder.
+    if output_path is None and res.returncode == 0:
+        shutil.move(
+            os.path.join(base_path, "Project", "testPlayer.app"),
+            os.path.join(get_base_output_path(), "testPlayer.app"),
+        )
+
+    # Print if we fail or want verbosity.
+    if verbose or res.returncode != 0:
+        subprocess.run(["cat", log_output_path])
+
    return res.returncode


    """
    if csharp_version is None:
        return
+
+        subprocess.check_call(f"rm -rf {csharp_dir}", shell=True)
        subprocess.check_call(
            f"git checkout {csharp_version} -- {csharp_dir}", shell=True
        )
    """
    subprocess.check_call("git reset HEAD .", shell=True)
    subprocess.check_call("git checkout -- .", shell=True)
+    # Ensure the cache isn't polluted with old compiled assemblies.
+    subprocess.check_call(f"rm -rf Project/Library", shell=True)


 def override_config_file(src_path, dest_path, **kwargs):
--- a/utils/make_readme_table.py
+++ b/utils/make_readme_table.py
    ["0.14.0", "February 13, 2020"],
    ["0.14.1", "February 26, 2020"],
    ["0.15.0", "March 18, 2020"],
+    ["0.15.1", "March 30, 2020"],
 ]

 MAX_DAYS = 150  # do not print releases older than this many days
--- a/com.unity.ml-agents/Tests/Runtime/RuntimeAPITest.cs
+++ b/com.unity.ml-agents/Tests/Runtime/RuntimeAPITest.cs
+#if UNITY_INCLUDE_TESTS
+using System.Collections;
+using System.Collections.Generic;
+using MLAgents;
+using MLAgents.Policies;
+using MLAgents.Sensors;
+using NUnit.Framework;
+using UnityEngine;
+using UnityEngine.TestTools;
+
+namespace Tests
+{
+
+    public class PublicApiAgent : Agent
+    {
+        public int numHeuristicCalls;
+
+        public override float[] Heuristic()
+        {
+            numHeuristicCalls++;
+            return base.Heuristic();
+        }
+    }// Simple SensorComponent that sets up a StackingSensor
+    public class StackingComponent : SensorComponent
+    {
+        public SensorComponent wrappedComponent;
+        public int numStacks;
+
+        public override ISensor CreateSensor()
+        {
+            var wrappedSensor = wrappedComponent.CreateSensor();
+            return new StackingSensor(wrappedSensor, numStacks);
+        }
+
+        public override int[] GetObservationShape()
+        {
+            int[] shape = (int[]) wrappedComponent.GetObservationShape().Clone();
+            for (var i = 0; i < shape.Length; i++)
+            {
+                shape[i] *= numStacks;
+            }
+
+            return shape;
+        }
+    }
+
+    public class RuntimeApiTest
+    {
+        [SetUp]
+        public static void Setup()
+        {
+            Academy.Instance.AutomaticSteppingEnabled = false;
+        }
+
+        [UnityTest]
+        public IEnumerator RuntimeApiTestWithEnumeratorPasses()
+        {
+            var gameObject = new GameObject();
+
+            var behaviorParams = gameObject.AddComponent<BehaviorParameters>();
+            behaviorParams.brainParameters.vectorObservationSize = 3;
+            behaviorParams.brainParameters.numStackedVectorObservations = 2;
+            behaviorParams.brainParameters.vectorActionDescriptions = new[] { "TestActionA", "TestActionB" };
+            behaviorParams.brainParameters.vectorActionSize = new[] { 2, 2 };
+            behaviorParams.brainParameters.vectorActionSpaceType = SpaceType.Discrete;
+            behaviorParams.behaviorName = "TestBehavior";
+            behaviorParams.TeamId = 42;
+            behaviorParams.useChildSensors = true;
+
+
+            // Can't actually create an Agent with InferenceOnly and no model, so change back
+            behaviorParams.behaviorType = BehaviorType.Default;
+
+            var sensorComponent = gameObject.AddComponent<RayPerceptionSensorComponent3D>();
+            sensorComponent.sensorName = "ray3d";
+            sensorComponent.detectableTags = new List<string> { "Player", "Respawn" };
+            sensorComponent.raysPerDirection = 3;
+
+            // Make a StackingSensor that wraps the RayPerceptionSensorComponent3D
+            // This isn't necessarily practical, just to ensure that it can be done
+            var wrappingSensorComponent = gameObject.AddComponent<StackingComponent>();
+            wrappingSensorComponent.wrappedComponent = sensorComponent;
+            wrappingSensorComponent.numStacks = 3;
+
+            // ISensor isn't set up yet.
+            Assert.IsNull(sensorComponent.raySensor);
+
+
+            // Make sure we can set the behavior type correctly after the agent is initialized
+            // (this creates a new policy).
+            behaviorParams.behaviorType = BehaviorType.HeuristicOnly;
+
+            // Agent needs to be added after everything else is setup.
+            var agent = gameObject.AddComponent<PublicApiAgent>();
+
+            // DecisionRequester has to be added after Agent.
+            var decisionRequester = gameObject.AddComponent<DecisionRequester>();
+            decisionRequester.DecisionPeriod = 2;
+            decisionRequester.TakeActionsBetweenDecisions = true;
+
+
+            // Initialization should set up the sensors
+            Assert.IsNotNull(sensorComponent.raySensor);
+
+            // Let's change the inference device
+            var otherDevice = behaviorParams.inferenceDevice == InferenceDevice.CPU ? InferenceDevice.GPU : InferenceDevice.CPU;
+            agent.SetModel(behaviorParams.behaviorName, behaviorParams.model, otherDevice);
+
+            agent.AddReward(1.0f);
+
+            // skip a frame.
+            yield return null;
+
+            Academy.Instance.EnvironmentStep();
+
+            var actions = agent.GetAction();
+            // default Heuristic implementation should return zero actions.
+            Assert.AreEqual(new[] {0.0f, 0.0f}, actions);
+            Assert.AreEqual(1, agent.numHeuristicCalls);
+
+            Academy.Instance.EnvironmentStep();
+            Assert.AreEqual(1, agent.numHeuristicCalls);
+
+            Academy.Instance.EnvironmentStep();
+            Assert.AreEqual(2, agent.numHeuristicCalls);
+        }
+    }
+}
+#endif
--- a/com.unity.ml-agents/Tests/Runtime/RuntimeAPITest.cs.meta
+++ b/com.unity.ml-agents/Tests/Runtime/RuntimeAPITest.cs.meta
+fileFormatVersion: 2
+guid: 17878576e4ed14b09875e37394e5ad90
+MonoImporter:
+  externalObjects: {}
+  serializedVersion: 2
+  defaultReferences: []
+  executionOrder: 0
+  icon: {instanceID: 0}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
--- a/com.unity.ml-agents/Tests/Runtime/Unity.ML-Agents.Runtime.Tests.asmdef
+++ b/com.unity.ml-agents/Tests/Runtime/Unity.ML-Agents.Runtime.Tests.asmdef
+{
+    "name": "Tests",
+    "references": [
+        "Unity.ML-Agents",
+        "Barracuda",
+        "Unity.ML-Agents.CommunicatorObjects",
+        "Unity.ML-Agents.Editor"
+    ],
+    "optionalUnityReferences": [
+        "TestAssemblies"
+    ],
+    "includePlatforms": [],
+    "excludePlatforms": [],
+    "allowUnsafeCode": false,
+    "overrideReferences": true,
+    "precompiledReferences": [
+        "System.IO.Abstractions.dll",
+        "System.IO.Abstractions.TestingHelpers.dll",
+        "Google.Protobuf.dll"
+    ],
+    "autoReferenced": false,
+    "defineConstraints": [
+        "UNITY_INCLUDE_TESTS"
+    ]
+}
--- a/com.unity.ml-agents/Tests/Runtime/Unity.ML-Agents.Runtime.Tests.asmdef.meta
+++ b/com.unity.ml-agents/Tests/Runtime/Unity.ML-Agents.Runtime.Tests.asmdef.meta
+fileFormatVersion: 2
+guid: d29014db7ebcd4cf4a14f537fbf02110
+AssemblyDefinitionImporter:
+  externalObjects: {}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
--- a/com.unity.ml-agents/Tests/Runtime/SerializeTestScene.unity
+++ b/com.unity.ml-agents/Tests/Runtime/SerializeTestScene.unity
-%YAML 1.1
-%TAG !u! tag:unity3d.com,2011:
--- !u!29 &1
-OcclusionCullingSettings:
-  m_ObjectHideFlags: 0
-  serializedVersion: 2
-  m_OcclusionBakeSettings:
-    smallestOccluder: 5
-    smallestHole: 0.25
-    backfaceThreshold: 100
-  m_SceneGUID: 00000000000000000000000000000000
-  m_OcclusionCullingData: {fileID: 0}
--- !u!104 &2
-RenderSettings:
-  m_ObjectHideFlags: 0
-  serializedVersion: 9
-  m_Fog: 0
-  m_FogColor: {r: 0.5, g: 0.5, b: 0.5, a: 1}
-  m_FogMode: 3
-  m_FogDensity: 0.01
-  m_LinearFogStart: 0
-  m_LinearFogEnd: 300
-  m_AmbientSkyColor: {r: 0.212, g: 0.227, b: 0.259, a: 1}
-  m_AmbientEquatorColor: {r: 0.114, g: 0.125, b: 0.133, a: 1}
-  m_AmbientGroundColor: {r: 0.047, g: 0.043, b: 0.035, a: 1}
-  m_AmbientIntensity: 1
-  m_AmbientMode: 0
-  m_SubtractiveShadowColor: {r: 0.42, g: 0.478, b: 0.627, a: 1}
-  m_SkyboxMaterial: {fileID: 10304, guid: 0000000000000000f000000000000000, type: 0}
-  m_HaloStrength: 0.5
-  m_FlareStrength: 1
-  m_FlareFadeSpeed: 3
-  m_HaloTexture: {fileID: 0}
-  m_SpotCookie: {fileID: 10001, guid: 0000000000000000e000000000000000, type: 0}
-  m_DefaultReflectionMode: 0
-  m_DefaultReflectionResolution: 128
-  m_ReflectionBounces: 1
-  m_ReflectionIntensity: 1
-  m_CustomReflection: {fileID: 0}
-  m_Sun: {fileID: 0}
-  m_IndirectSpecularColor: {r: 0.44657898, g: 0.49641287, b: 0.5748173, a: 1}
-  m_UseRadianceAmbientProbe: 0
--- !u!157 &3
-LightmapSettings:
-  m_ObjectHideFlags: 0
-  serializedVersion: 11
-  m_GIWorkflowMode: 0
-  m_GISettings:
-    serializedVersion: 2
-    m_BounceScale: 1
-    m_IndirectOutputScale: 1
-    m_AlbedoBoost: 1
-    m_EnvironmentLightingMode: 0
-    m_EnableBakedLightmaps: 1
-    m_EnableRealtimeLightmaps: 1
-  m_LightmapEditorSettings:
-    serializedVersion: 10
-    m_Resolution: 2
-    m_BakeResolution: 40
-    m_AtlasSize: 1024
-    m_AO: 0
-    m_AOMaxDistance: 1
-    m_CompAOExponent: 1
-    m_CompAOExponentDirect: 0
-    m_Padding: 2
-    m_LightmapParameters: {fileID: 0}
-    m_LightmapsBakeMode: 1
-    m_TextureCompression: 1
-    m_FinalGather: 0
-    m_FinalGatherFiltering: 1
-    m_FinalGatherRayCount: 256
-    m_ReflectionCompression: 2
-    m_MixedBakeMode: 2
-    m_BakeBackend: 1
-    m_PVRSampling: 1
-    m_PVRDirectSampleCount: 32
-    m_PVRSampleCount: 500
-    m_PVRBounces: 2
-    m_PVRFilterTypeDirect: 0
-    m_PVRFilterTypeIndirect: 0
-    m_PVRFilterTypeAO: 0
-    m_PVRFilteringMode: 1
-    m_PVRCulling: 1
-    m_PVRFilteringGaussRadiusDirect: 1
-    m_PVRFilteringGaussRadiusIndirect: 5
-    m_PVRFilteringGaussRadiusAO: 2
-    m_PVRFilteringAtrousPositionSigmaDirect: 0.5
-    m_PVRFilteringAtrousPositionSigmaIndirect: 2
-    m_PVRFilteringAtrousPositionSigmaAO: 1
-    m_ShowResolutionOverlay: 1
-  m_LightingDataAsset: {fileID: 0}
-  m_UseShadowmask: 1
--- !u!196 &4
-NavMeshSettings:
-  serializedVersion: 2
-  m_ObjectHideFlags: 0
-  m_BuildSettings:
-    serializedVersion: 2
-    agentTypeID: 0
-    agentRadius: 0.5
-    agentHeight: 2
-    agentSlope: 45
-    agentClimb: 0.4
-    ledgeDropHeight: 0
-    maxJumpAcrossDistance: 0
-    minRegionArea: 2
-    manualCellSize: 0
-    cellSize: 0.16666667
-    manualTileSize: 0
-    tileSize: 256
-    accuratePlacement: 0
-    debug:
-      m_Flags: 0
-  m_NavMeshData: {fileID: 0}
--- !u!1 &106586301
-GameObject:
-  m_ObjectHideFlags: 0
-  m_CorrespondingSourceObject: {fileID: 0}
-  m_PrefabInstance: {fileID: 0}
-  m_PrefabAsset: {fileID: 0}
-  serializedVersion: 6
-  m_Component:
-  - component: {fileID: 106586304}
-  - component: {fileID: 106586303}
-  - component: {fileID: 106586302}
-  m_Layer: 0
-  m_Name: Agent
-  m_TagString: Untagged
-  m_Icon: {fileID: 0}
-  m_NavMeshLayer: 0
-  m_StaticEditorFlags: 0
-  m_IsActive: 1
--- !u!114 &106586302
-MonoBehaviour:
-  m_ObjectHideFlags: 0
-  m_CorrespondingSourceObject: {fileID: 0}
-  m_PrefabInstance: {fileID: 0}
-  m_PrefabAsset: {fileID: 0}
-  m_GameObject: {fileID: 106586301}
-  m_Enabled: 1
-  m_EditorHideFlags: 0
-  m_Script: {fileID: 11500000, guid: c3d607733e457478885f15ee89725709, type: 3}
-  m_Name: 
-  m_EditorClassIdentifier: 
-  agentParameters:
-    maxStep: 5000
-  hasUpgradedFromAgentParameters: 1
-  maxStep: 5000
--- !u!114 &106586303
-MonoBehaviour:
-  m_ObjectHideFlags: 0
-  m_CorrespondingSourceObject: {fileID: 0}
-  m_PrefabInstance: {fileID: 0}
-  m_PrefabAsset: {fileID: 0}
-  m_GameObject: {fileID: 106586301}
-  m_Enabled: 1
-  m_EditorHideFlags: 0
-  m_Script: {fileID: 11500000, guid: 5d1c4e0b1822b495aa52bc52839ecb30, type: 3}
-  m_Name: 
-  m_EditorClassIdentifier: 
-  m_BrainParameters:
-    vectorObservationSize: 1
-    numStackedVectorObservations: 1
-    vectorActionSize: 01000000
-    vectorActionDescriptions: []
-    vectorActionSpaceType: 0
-  m_Model: {fileID: 0}
-  m_InferenceDevice: 0
-  m_BehaviorType: 0
-  m_BehaviorName: My Behavior
-  m_TeamID: 0
-  m_UseChildSensors: 1
--- !u!4 &106586304
-Transform:
-  m_ObjectHideFlags: 0
-  m_CorrespondingSourceObject: {fileID: 0}
-  m_PrefabInstance: {fileID: 0}
-  m_PrefabAsset: {fileID: 0}
-  m_GameObject: {fileID: 106586301}
-  m_LocalRotation: {x: 0, y: 0, z: 0, w: 1}
-  m_LocalPosition: {x: 0, y: 0, z: 0}
-  m_LocalScale: {x: 1, y: 1, z: 1}
-  m_Children:
-  - {fileID: 1471486645}
-  m_Father: {fileID: 0}
-  m_RootOrder: 2
-  m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
--- !u!1 &185701317
-GameObject:
-  m_ObjectHideFlags: 0
-  m_CorrespondingSourceObject: {fileID: 0}
-  m_PrefabInstance: {fileID: 0}
-  m_PrefabAsset: {fileID: 0}
-  serializedVersion: 6
-  m_Component:
-  - component: {fileID: 185701319}
-  - component: {fileID: 185701318}
-  m_Layer: 0
-  m_Name: Directional Light
-  m_TagString: Untagged
-  m_Icon: {fileID: 0}
-  m_NavMeshLayer: 0
-  m_StaticEditorFlags: 0
-  m_IsActive: 1
--- !u!108 &185701318
-Light:
-  m_ObjectHideFlags: 0
-  m_CorrespondingSourceObject: {fileID: 0}
-  m_PrefabInstance: {fileID: 0}
-  m_PrefabAsset: {fileID: 0}
-  m_GameObject: {fileID: 185701317}
-  m_Enabled: 1
-  serializedVersion: 8
-  m_Type: 1
-  m_Color: {r: 1, g: 0.95686275, b: 0.8392157, a: 1}
-  m_Intensity: 1
-  m_Range: 10
-  m_SpotAngle: 30
-  m_CookieSize: 10
-  m_Shadows:
-    m_Type: 2
-    m_Resolution: -1
-    m_CustomResolution: -1
-    m_Strength: 1
-    m_Bias: 0.05
-    m_NormalBias: 0.4
-    m_NearPlane: 0.2
-  m_Cookie: {fileID: 0}
-  m_DrawHalo: 0
-  m_Flare: {fileID: 0}
-  m_RenderMode: 0
-  m_CullingMask:
-    serializedVersion: 2
-    m_Bits: 4294967295
-  m_Lightmapping: 4
-  m_LightShadowCasterMode: 0
-  m_AreaSize: {x: 1, y: 1}
-  m_BounceIntensity: 1
-  m_ColorTemperature: 6570
-  m_UseColorTemperature: 0
-  m_ShadowRadius: 0
-  m_ShadowAngle: 0
--- !u!4 &185701319
-Transform:
-  m_ObjectHideFlags: 0
-  m_CorrespondingSourceObject: {fileID: 0}
-  m_PrefabInstance: {fileID: 0}
-  m_PrefabAsset: {fileID: 0}
-  m_GameObject: {fileID: 185701317}
-  m_LocalRotation: {x: 0.40821788, y: -0.23456968, z: 0.10938163, w: 0.8754261}
-  m_LocalPosition: {x: 0, y: 3, z: 0}
-  m_LocalScale: {x: 1, y: 1, z: 1}
-  m_Children: []
-  m_Father: {fileID: 0}
-  m_RootOrder: 1
-  m_LocalEulerAnglesHint: {x: 50, y: -30, z: 0}
--- !u!1 &804630118
-GameObject:
-  m_ObjectHideFlags: 0
-  m_CorrespondingSourceObject: {fileID: 0}
-  m_PrefabInstance: {fileID: 0}
-  m_PrefabAsset: {fileID: 0}
-  serializedVersion: 6
-  m_Component:
-  - component: {fileID: 804630121}
-  - component: {fileID: 804630120}
-  - component: {fileID: 804630119}
-  m_Layer: 0
-  m_Name: Main Camera
-  m_TagString: MainCamera
-  m_Icon: {fileID: 0}
-  m_NavMeshLayer: 0
-  m_StaticEditorFlags: 0
-  m_IsActive: 1
--- !u!81 &804630119
-AudioListener:
-  m_ObjectHideFlags: 0
-  m_CorrespondingSourceObject: {fileID: 0}
-  m_PrefabInstance: {fileID: 0}
-  m_PrefabAsset: {fileID: 0}
-  m_GameObject: {fileID: 804630118}
-  m_Enabled: 1
--- !u!20 &804630120
-Camera:
-  m_ObjectHideFlags: 0
-  m_CorrespondingSourceObject: {fileID: 0}
-  m_PrefabInstance: {fileID: 0}
-  m_PrefabAsset: {fileID: 0}
-  m_GameObject: {fileID: 804630118}
-  m_Enabled: 1
-  serializedVersion: 2
-  m_ClearFlags: 1
-  m_BackGroundColor: {r: 0.19215687, g: 0.3019608, b: 0.4745098, a: 0}
-  m_projectionMatrixMode: 1
-  m_SensorSize: {x: 36, y: 24}
-  m_LensShift: {x: 0, y: 0}
-  m_GateFitMode: 2
-  m_FocalLength: 50
-  m_NormalizedViewPortRect:
-    serializedVersion: 2
-    x: 0
-    y: 0
-    width: 1
-    height: 1
-  near clip plane: 0.3
-  far clip plane: 1000
-  field of view: 60
-  orthographic: 0
-  orthographic size: 5
-  m_Depth: -1
-  m_CullingMask:
-    serializedVersion: 2
-    m_Bits: 4294967295
-  m_RenderingPath: -1
-  m_TargetTexture: {fileID: 0}
-  m_TargetDisplay: 0
-  m_TargetEye: 3
-  m_HDR: 1
-  m_AllowMSAA: 1
-  m_AllowDynamicResolution: 0
-  m_ForceIntoRT: 0
-  m_OcclusionCulling: 1
-  m_StereoConvergence: 10
-  m_StereoSeparation: 0.022
--- !u!4 &804630121
-Transform:
-  m_ObjectHideFlags: 0
-  m_CorrespondingSourceObject: {fileID: 0}
-  m_PrefabInstance: {fileID: 0}
-  m_PrefabAsset: {fileID: 0}
-  m_GameObject: {fileID: 804630118}
-  m_LocalRotation: {x: 0, y: 0, z: 0, w: 1}
-  m_LocalPosition: {x: 0, y: 1, z: -10}
-  m_LocalScale: {x: 1, y: 1, z: 1}
-  m_Children: []
-  m_Father: {fileID: 0}
-  m_RootOrder: 0
-  m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
--- !u!1 &1471486644
-GameObject:
-  m_ObjectHideFlags: 0
-  m_CorrespondingSourceObject: {fileID: 0}
-  m_PrefabInstance: {fileID: 0}
-  m_PrefabAsset: {fileID: 0}
-  serializedVersion: 6
-  m_Component:
-  - component: {fileID: 1471486645}
-  - component: {fileID: 1471486648}
-  - component: {fileID: 1471486647}
-  - component: {fileID: 1471486646}
-  m_Layer: 0
-  m_Name: Cube
-  m_TagString: Untagged
-  m_Icon: {fileID: 0}
-  m_NavMeshLayer: 0
-  m_StaticEditorFlags: 0
-  m_IsActive: 1
--- !u!4 &1471486645
-Transform:
-  m_ObjectHideFlags: 0
-  m_CorrespondingSourceObject: {fileID: 0}
-  m_PrefabInstance: {fileID: 0}
-  m_PrefabAsset: {fileID: 0}
-  m_GameObject: {fileID: 1471486644}
-  m_LocalRotation: {x: 0, y: 0, z: 0, w: 1}
-  m_LocalPosition: {x: 0, y: 0, z: 0}
-  m_LocalScale: {x: 1, y: 1, z: 1}
-  m_Children: []
-  m_Father: {fileID: 106586304}
-  m_RootOrder: 0
-  m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
--- !u!65 &1471486646
-BoxCollider:
-  m_ObjectHideFlags: 0
-  m_CorrespondingSourceObject: {fileID: 0}
-  m_PrefabInstance: {fileID: 0}
-  m_PrefabAsset: {fileID: 0}
-  m_GameObject: {fileID: 1471486644}
-  m_Material: {fileID: 0}
-  m_IsTrigger: 0
-  m_Enabled: 1
-  serializedVersion: 2
-  m_Size: {x: 1, y: 1, z: 1}
-  m_Center: {x: 0, y: 0, z: 0}
--- !u!23 &1471486647
-MeshRenderer:
-  m_ObjectHideFlags: 0
-  m_CorrespondingSourceObject: {fileID: 0}
-  m_PrefabInstance: {fileID: 0}
-  m_PrefabAsset: {fileID: 0}
-  m_GameObject: {fileID: 1471486644}
-  m_Enabled: 1
-  m_CastShadows: 1
-  m_ReceiveShadows: 1
-  m_DynamicOccludee: 1
-  m_MotionVectors: 1
-  m_LightProbeUsage: 1
-  m_ReflectionProbeUsage: 1
-  m_RenderingLayerMask: 1
-  m_RendererPriority: 0
-  m_Materials:
-  - {fileID: 10303, guid: 0000000000000000f000000000000000, type: 0}
-  m_StaticBatchInfo:
-    firstSubMesh: 0
-    subMeshCount: 0
-  m_StaticBatchRoot: {fileID: 0}
-  m_ProbeAnchor: {fileID: 0}
-  m_LightProbeVolumeOverride: {fileID: 0}
-  m_ScaleInLightmap: 1
-  m_PreserveUVs: 0
-  m_IgnoreNormalsForChartDetection: 0
-  m_ImportantGI: 0
-  m_StitchLightmapSeams: 0
-  m_SelectedEditorRenderState: 3
-  m_MinimumChartSize: 4
-  m_AutoUVMaxDistance: 0.5
-  m_AutoUVMaxAngle: 89
-  m_LightmapParameters: {fileID: 0}
-  m_SortingLayerID: 0
-  m_SortingLayer: 0
-  m_SortingOrder: 0
--- !u!33 &1471486648
-MeshFilter:
-  m_ObjectHideFlags: 0
-  m_CorrespondingSourceObject: {fileID: 0}
-  m_PrefabInstance: {fileID: 0}
-  m_PrefabAsset: {fileID: 0}
-  m_GameObject: {fileID: 1471486644}
-  m_Mesh: {fileID: 10202, guid: 0000000000000000e000000000000000, type: 0}
--- a/com.unity.ml-agents/Tests/Runtime/SerializeTestScene.unity.meta
+++ b/com.unity.ml-agents/Tests/Runtime/SerializeTestScene.unity.meta
-fileFormatVersion: 2
-guid: 60783bd849bd242eeb66243542762b23
-DefaultImporter:
-  externalObjects: {}
-  userData: 
-  assetBundleName: 
-  assetBundleVariant: 
--- a/docs/images/banana.png
+++ b/docs/images/banana.png
--- a/docs/images/running-a-pretrained-model.gif
+++ b/docs/images/running-a-pretrained-model.gif
--- a/docs/images/3dballhard.png
+++ b/docs/images/3dballhard.png
--- a/docs/images/bananaimitation.png
+++ b/docs/images/bananaimitation.png