Comment improvements & refactoring to Academy.cs

Added several class and method-level comments that are compatibale with Doxygen for auto-generation of documentation. In addition to some stylistic and minor code changes (summarized below). Stylistic changes: - Modified comments to /// style instead of /** */ - Removed unnecessary imports - Removed unnecessary “private” declarations - Limited code to 80 characters per line - Re-organized variables to group those that are visible in Inspector (they are now at the top) Code changes: - Renamed ScreenConfiguration to EnvironmentConfiguration (variable only used within Academy.cs, thus no other files needed modification) - Renamed ConfigureEngine to ConfigureEnvironment and created a ConfigureEnvironmentHelper method - Renamed _isCurrentlyInference to modeSwitched to signify when the engine config needs to be changed - Added isCommunicatorOn flag to be explicit about the existence of a communicator - Made isInference private which requ...
7 年前 · fa638000
--- a/unity-environment/Assets/ML-Agents/Editor/MLAgentsEditModeTest.cs
+++ b/unity-environment/Assets/ML-Agents/Editor/MLAgentsEditModeTest.cs
 using UnityEngine;
-using UnityEditor;
-using UnityEngine.TestTools;
-using System.Collections;
 using System.Reflection;

 namespace MLAgentsTests
            collectObservationsCalls += 1;
        }

-        public override void AgentAction(float[] vetorAction, string textAction)
+        public override void AgentAction(float[] vectorAction, string textAction)
        {
            agentActionCalls += 1;
            AddReward(0.1f);
            TestAcademy aca = acaGO.GetComponent<TestAcademy>();
            Assert.AreNotEqual(null, aca);
            Assert.AreEqual(0, aca.initializeAcademyCalls);
-            Assert.AreEqual(0, aca.episodeCount);
-            Assert.AreEqual(0, aca.stepsSinceReset);
+            Assert.AreEqual(0, aca.GetEpisodeCount());
+            Assert.AreEqual(0, aca.GetStepCount());
        }

        [Test]
            acaGO.AddComponent<TestAcademy>();
            TestAcademy aca = acaGO.GetComponent<TestAcademy>();
            Assert.AreEqual(0, aca.initializeAcademyCalls);
-            Assert.AreEqual(0, aca.stepsSinceReset);
-            Assert.AreEqual(0, aca.episodeCount);
+            Assert.AreEqual(0, aca.GetStepCount());
+            Assert.AreEqual(0, aca.GetEpisodeCount());
-            MethodInfo AcademyInitializeMethod = typeof(Academy).GetMethod("_InitializeAcademy",
+            MethodInfo AcademyInitializeMethod = typeof(Academy).GetMethod("InitializeEnvironment",
-            Assert.AreEqual(0, aca.episodeCount);
-            Assert.AreEqual(0, aca.stepsSinceReset);
+            Assert.AreEqual(0, aca.GetEpisodeCount());
+            Assert.AreEqual(0, aca.GetStepCount());
            Assert.AreEqual(false, aca.IsDone());
            Assert.AreEqual(0, aca.academyResetCalls);
            Assert.AreEqual(0, aca.AcademyStepCalls);

            MethodInfo AgentEnableMethod = typeof(Agent).GetMethod("_InitializeAgent",
                   BindingFlags.Instance | BindingFlags.NonPublic);
-            MethodInfo AcademyInitializeMethod = typeof(Academy).GetMethod("_InitializeAcademy",
+            MethodInfo AcademyInitializeMethod = typeof(Academy).GetMethod("InitializeEnvironment",
                           BindingFlags.Instance | BindingFlags.NonPublic);


            GameObject acaGO = new GameObject("TestAcademy");
            acaGO.AddComponent<TestAcademy>();
            TestAcademy aca = acaGO.GetComponent<TestAcademy>();
-            MethodInfo AcademyInitializeMethod = typeof(Academy).GetMethod("_InitializeAcademy",
+            MethodInfo AcademyInitializeMethod = typeof(Academy).GetMethod("InitializeEnvironment",
-            MethodInfo AcademyStepMethod = typeof(Academy).GetMethod("_AcademyStep",
+            MethodInfo AcademyStepMethod = typeof(Academy).GetMethod("EnvironmentStep",
-            for (int i = 0; i < 10; i++){  
+            for (int i = 0; i < 10; i++)
+            {
-                Assert.AreEqual(numberReset, aca.episodeCount);
-                Assert.AreEqual(i, aca.stepsSinceReset);
+                Assert.AreEqual(numberReset, aca.GetEpisodeCount());
+                Assert.AreEqual(i, aca.GetStepCount());
                Assert.AreEqual(false, aca.IsDone());
                Assert.AreEqual(numberReset, aca.academyResetCalls);
                Assert.AreEqual(i, aca.AcademyStepCalls);
-                { 
+                {
                    numberReset += 1;
                }
                AcademyStepMethod.Invoke((object)aca, new object[] { });
            TestBrain brain = brainGO.GetComponent<TestBrain>();


-            MethodInfo AgentEnableMethod = typeof(Agent).GetMethod("_InitializeAgent",
-                   BindingFlags.Instance | BindingFlags.NonPublic);
-            MethodInfo AcademyInitializeMethod = typeof(Academy).GetMethod("_InitializeAcademy",
-                           BindingFlags.Instance | BindingFlags.NonPublic);
-
-
-
+            MethodInfo AgentEnableMethod = typeof(Agent).GetMethod(
+                "_InitializeAgent", BindingFlags.Instance | BindingFlags.NonPublic);
+            MethodInfo AcademyInitializeMethod = typeof(Academy).GetMethod(
+                "InitializeEnvironment", BindingFlags.Instance | BindingFlags.NonPublic);

            agent1.agentParameters = new AgentParameters();
            agent2.agentParameters = new AgentParameters();
            AgentEnableMethod.Invoke(agent1, new object[] { aca });
            AcademyInitializeMethod.Invoke(aca, new object[] { });

-            MethodInfo AcademyStepMethod = typeof(Academy).GetMethod("_AcademyStep",
-                           BindingFlags.Instance | BindingFlags.NonPublic);
+            MethodInfo AcademyStepMethod = typeof(Academy).GetMethod(
+                "EnvironmentStep", BindingFlags.Instance | BindingFlags.NonPublic);
-            int numberAgent1Reset = 0; 
-            int numberAgent2Initialization = 0; 
-            int requestDecision =0;
-            int requestAction=0;
+            int numberAgent1Reset = 0;
+            int numberAgent2Initialization = 0;
+            int requestDecision = 0;
+            int requestAction = 0;
-                Assert.AreEqual(0, agent2.agentResetCalls); 
+                Assert.AreEqual(0, agent2.agentResetCalls);
-                Assert.AreEqual((i+1)/2, agent1.collectObservationsCalls);
+                Assert.AreEqual((i + 1) / 2, agent1.collectObservationsCalls);
-                if (i == 0) 
+                if (i == 0)
-                if (i == 2) 
+                if (i == 2)
                {
                    AgentEnableMethod.Invoke(agent2, new object[] { aca });
                    numberAgent2Initialization += 1;
                if ((i % 3 == 0) && (i > 2))
                {
                    //Every 3 steps after agent 2 is initialized, request decision
-                    requestDecision +=1;
-                    requestAction+=1;
+                    requestDecision += 1;
+                    requestAction += 1;
                    agent2.RequestDecision();
                }
                else if ((i % 5 == 0) && (i > 2))
            GameObject acaGO = new GameObject("TestAcademy");
            acaGO.AddComponent<TestAcademy>();
            TestAcademy aca = acaGO.GetComponent<TestAcademy>();
-            MethodInfo AcademyInitializeMethod = typeof(Academy).GetMethod("_InitializeAcademy",
-                           BindingFlags.Instance | BindingFlags.NonPublic);
+            MethodInfo AcademyInitializeMethod = typeof(Academy).GetMethod(
+                "InitializeEnvironment", BindingFlags.Instance | BindingFlags.NonPublic);
-            MethodInfo AcademyStepMethod = typeof(Academy).GetMethod("_AcademyStep",
-                           BindingFlags.Instance | BindingFlags.NonPublic);
+            MethodInfo AcademyStepMethod = typeof(Academy).GetMethod(
+                "EnvironmentStep", BindingFlags.Instance | BindingFlags.NonPublic);

            int numberReset = 0;
            int stepsSinceReset = 0;
-                Assert.AreEqual(stepsSinceReset, aca.stepsSinceReset);
+                Assert.AreEqual(stepsSinceReset, aca.GetStepCount());
-                Assert.AreEqual(numberReset, aca.episodeCount);
+                Assert.AreEqual(numberReset, aca.GetEpisodeCount());

                Assert.AreEqual(false, aca.IsDone());
                Assert.AreEqual(numberReset, aca.academyResetCalls);

                stepsSinceReset += 1;
                // Regularly set the academy to done to check behavior
-                if (i % 5 == 3) 
+                if (i % 5 == 3)
                {
                    aca.Done();
                    numberReset += 1;
            TestBrain brain = brainGO.GetComponent<TestBrain>();


-            MethodInfo AgentEnableMethod = typeof(Agent).GetMethod("_InitializeAgent",
-                   BindingFlags.Instance | BindingFlags.NonPublic);
-            MethodInfo AcademyInitializeMethod = typeof(Academy).GetMethod("_InitializeAcademy",
-                           BindingFlags.Instance | BindingFlags.NonPublic);
+            MethodInfo AgentEnableMethod = typeof(Agent).GetMethod(
+                "_InitializeAgent", BindingFlags.Instance | BindingFlags.NonPublic);
+            MethodInfo AcademyInitializeMethod = typeof(Academy).GetMethod(
+                "InitializeEnvironment", BindingFlags.Instance | BindingFlags.NonPublic);
-            MethodInfo AcademyStepMethod = typeof(Academy).GetMethod("_AcademyStep",
-                           BindingFlags.Instance | BindingFlags.NonPublic);
-
-
-
+            MethodInfo AcademyStepMethod = typeof(Academy).GetMethod(
+                "EnvironmentStep", BindingFlags.Instance | BindingFlags.NonPublic);

            agent1.agentParameters = new AgentParameters();
            agent2.agentParameters = new AgentParameters();
            AgentEnableMethod.Invoke(agent2, new object[] { aca });
            AcademyInitializeMethod.Invoke(aca, new object[] { });

-            int numberAgent1Reset = 0; 
-            int numberAgent2Reset = 0; 
+            int numberAgent1Reset = 0;
+            int numberAgent2Reset = 0;
-            int agent1StepSinceReset =0;
-            int agent2StepSinceReset=0;
+            int agent1StepSinceReset = 0;
+            int agent2StepSinceReset = 0;
-                Assert.AreEqual(acaStepsSinceReset, aca.stepsSinceReset);
+                Assert.AreEqual(acaStepsSinceReset, aca.GetStepCount());
-                Assert.AreEqual(numberAcaReset, aca.episodeCount);
+                Assert.AreEqual(numberAcaReset, aca.GetEpisodeCount());

                Assert.AreEqual(false, aca.IsDone());
                Assert.AreEqual(numberAcaReset, aca.academyResetCalls);
                Assert.AreEqual(numberAgent2Reset, agent2.agentResetCalls);

                // Agent 2  and academy reset at the first step
-                if (i == 0) 
+                if (i == 0)
-                if (i == 2) 
+                if (i == 2)
-                if (i % 100 == 3) 
+                if (i % 100 == 3)
                {
                    aca.Done();
                    numberAcaReset += 1;
-                if (i % 11 == 5) 
+                if (i % 11 == 5)
-                if (i % 13 == 3) 
+                if (i % 13 == 3)
-                    if (!(agent2.IsDone()||aca.IsDone()))
+                    if (!(agent2.IsDone() || aca.IsDone()))
                    {
                        // If the agent was already reset before the request decision
                        // We should not reset again
                    }
                }
                // Request a decision for agent 2 regularly
-                if (i % 3 == 2) 
+                if (i % 3 == 2)
-                else if (i % 5 == 1) 
+                else if (i % 5 == 1)
-                if (agent1.IsDone() && (((acaStepsSinceReset) % agent1.agentParameters.numberOfActionsBetweenDecisions==0)) || aca.IsDone())
+                if (agent1.IsDone() && (((acaStepsSinceReset) % agent1.agentParameters.numberOfActionsBetweenDecisions == 0)) || aca.IsDone())
                {
                    numberAgent1Reset += 1;
                    agent1StepSinceReset = 0;
                agent1StepSinceReset += 1;
                agent2StepSinceReset += 1;
                //Agent 1 is only initialized at step 2
-                if (i < 2) 
+                if (i < 2)
                {
                    agent1StepSinceReset = 0;
                }
            GameObject acaGO = new GameObject("TestAcademy");
            acaGO.AddComponent<TestAcademy>();
            TestAcademy aca = acaGO.GetComponent<TestAcademy>();
-            MethodInfo AcademyInitializeMethod = typeof(Academy).GetMethod("_InitializeAcademy",
-                           BindingFlags.Instance | BindingFlags.NonPublic);
+            MethodInfo AcademyInitializeMethod = typeof(Academy).GetMethod(
+                "InitializeEnvironment", BindingFlags.Instance | BindingFlags.NonPublic);
-            MethodInfo AcademyStepMethod = typeof(Academy).GetMethod("_AcademyStep",
-                           BindingFlags.Instance | BindingFlags.NonPublic);
+            MethodInfo AcademyStepMethod = typeof(Academy).GetMethod(
+                "EnvironmentStep", BindingFlags.Instance | BindingFlags.NonPublic);
-            FieldInfo maxStep = typeof(Academy).GetField("maxSteps", BindingFlags.Instance | BindingFlags.NonPublic);
+            FieldInfo maxStep = typeof(Academy).GetField(
+                "maxSteps", BindingFlags.Instance | BindingFlags.NonPublic);
            maxStep.SetValue((object)aca, 20);

            int numberReset = 0;
-                Assert.AreEqual(stepsSinceReset, aca.stepsSinceReset);
+                Assert.AreEqual(stepsSinceReset, aca.GetStepCount());
-                Assert.AreEqual(numberReset, aca.episodeCount);
+                Assert.AreEqual(numberReset, aca.GetEpisodeCount());
-                if (i % 20 == 0) 
+                if (i % 20 == 0)
                {
                    numberReset += 1;
                    stepsSinceReset = 1;
            TestBrain brain = brainGO.GetComponent<TestBrain>();


-            MethodInfo AgentEnableMethod = typeof(Agent).GetMethod("_InitializeAgent",
-                   BindingFlags.Instance | BindingFlags.NonPublic);
-            MethodInfo AcademyInitializeMethod = typeof(Academy).GetMethod("_InitializeAcademy",
-                           BindingFlags.Instance | BindingFlags.NonPublic);
+            MethodInfo AgentEnableMethod = typeof(Agent).GetMethod(
+                "_InitializeAgent", BindingFlags.Instance | BindingFlags.NonPublic);
+            MethodInfo AcademyInitializeMethod = typeof(Academy).GetMethod(
+                "InitializeEnvironment", BindingFlags.Instance | BindingFlags.NonPublic);
-            MethodInfo AcademyStepMethod = typeof(Academy).GetMethod("_AcademyStep",
-                           BindingFlags.Instance | BindingFlags.NonPublic);
+            MethodInfo AcademyStepMethod = typeof(Academy).GetMethod(
+                "EnvironmentStep", BindingFlags.Instance | BindingFlags.NonPublic);
-            FieldInfo maxStep = typeof(Academy).GetField("maxSteps", BindingFlags.Instance | BindingFlags.NonPublic);
+            FieldInfo maxStep = typeof(Academy).GetField(
+                "maxSteps", BindingFlags.Instance | BindingFlags.NonPublic);
            maxStep.SetValue((object)aca, 100);

            agent1.agentParameters = new AgentParameters();
            AcademyInitializeMethod.Invoke(aca, new object[] { });


-            int numberAgent1Reset = 0; 
+            int numberAgent1Reset = 0;
            int numberAgent2Reset = 0;
            int numberAcaReset = 0;
            int acaStepsSinceReset = 0;
            for (int i = 0; i < 500; i++)
            {
-                Assert.AreEqual(acaStepsSinceReset, aca.stepsSinceReset);
+                Assert.AreEqual(acaStepsSinceReset, aca.GetStepCount());
                Assert.AreEqual(1, aca.initializeAcademyCalls);

                Assert.AreEqual(i, aca.AcademyStepCalls);


-                Assert.AreEqual(numberAcaReset, aca.episodeCount);
+                Assert.AreEqual(numberAcaReset, aca.GetEpisodeCount());
-                if (i == 0) 
+                if (i == 0)
-                if (i == 2) 
+                if (i == 2)
-                agent2.RequestDecision(); 
+                agent2.RequestDecision();
-                    if (i % 100 == 0) 
+                    if (i % 100 == 0)
                    {
                        acaStepsSinceReset = 0;
                        agent1StepSinceReset = 0;
                agent2StepSinceReset += 1;

                //Agent 1 is only initialized at step 2
-                if (i < 2) 
+                if (i < 2)
                {
                    agent1StepSinceReset = 0;
                }
            TestBrain brain = brainGO.GetComponent<TestBrain>();


-            MethodInfo AgentEnableMethod = typeof(Agent).GetMethod("_InitializeAgent",
-                   BindingFlags.Instance | BindingFlags.NonPublic);
-            MethodInfo AcademyInitializeMethod = typeof(Academy).GetMethod("_InitializeAcademy",
-                           BindingFlags.Instance | BindingFlags.NonPublic);
+            MethodInfo AgentEnableMethod = typeof(Agent).GetMethod(
+                "_InitializeAgent", BindingFlags.Instance | BindingFlags.NonPublic);
+            MethodInfo AcademyInitializeMethod = typeof(Academy).GetMethod(
+                "InitializeEnvironment", BindingFlags.Instance | BindingFlags.NonPublic);
-            MethodInfo AcademyStepMethod = typeof(Academy).GetMethod("_AcademyStep",
-                           BindingFlags.Instance | BindingFlags.NonPublic);
-            
+            MethodInfo AcademyStepMethod = typeof(Academy).GetMethod(
+                "EnvironmentStep", BindingFlags.Instance | BindingFlags.NonPublic);
+
            agent1.agentParameters = new AgentParameters();
            agent2.agentParameters = new AgentParameters();
            brain.brainParameters = new BrainParameters();
            agent1.agentParameters.maxStep = 20;
            //Here we specify that the agent does not reset when done
            agent1.agentParameters.resetOnDone = false;
-            agent2.agentParameters.resetOnDone = false; 
+            agent2.agentParameters.resetOnDone = false;
            brain.brainParameters.vectorObservationSize = 0;
            brain.brainParameters.cameraResolutions = new resolution[0];
            agent1.GiveBrain(brain);
                Assert.AreEqual(agent2ResetOnDone, agent2.agentOnDoneCalls);

                // we request a decision at each step
-                agent2.RequestDecision(); 
+                agent2.RequestDecision();
-                if (agent1ResetOnDone ==0)
+                if (agent1ResetOnDone == 0)
-                if ((i > 2) && (i % 21 == 0)){
+                if ((i > 2) && (i % 21 == 0))
+                {
                    agent1ResetOnDone = 1;
                }

            TestBrain brain = brainGO.GetComponent<TestBrain>();


-            MethodInfo AgentEnableMethod = typeof(Agent).GetMethod("_InitializeAgent",
-                   BindingFlags.Instance | BindingFlags.NonPublic);
-            MethodInfo AcademyInitializeMethod = typeof(Academy).GetMethod("_InitializeAcademy",
-                           BindingFlags.Instance | BindingFlags.NonPublic);
+            MethodInfo AgentEnableMethod = typeof(Agent).GetMethod(
+                "_InitializeAgent", BindingFlags.Instance | BindingFlags.NonPublic);
+            MethodInfo AcademyInitializeMethod = typeof(Academy).GetMethod(
+                "InitializeEnvironment", BindingFlags.Instance | BindingFlags.NonPublic);
-            MethodInfo AcademyStepMethod = typeof(Academy).GetMethod("_AcademyStep",
-                           BindingFlags.Instance | BindingFlags.NonPublic);
+            MethodInfo AcademyStepMethod = typeof(Academy).GetMethod(
+                "EnvironmentStep", BindingFlags.Instance | BindingFlags.NonPublic);

            agent1.agentParameters = new AgentParameters();
            agent2.agentParameters = new AgentParameters();
            {
                agent2.RequestAction();
                Assert.LessOrEqual(Mathf.Abs(j * 0.1f + j * 10f - agent1.GetCumulativeReward()), 0.05f);
-                Assert.LessOrEqual(Mathf.Abs(i * 0.1f- agent2.GetCumulativeReward()), 0.05f);
+                Assert.LessOrEqual(Mathf.Abs(i * 0.1f - agent2.GetCumulativeReward()), 0.05f);
-                if ((i % 21 == 0) && (i>0))
+                if ((i % 21 == 0) && (i > 0))
                {
                    j = 0;
                }
--- a/unity-environment/Assets/ML-Agents/Examples/Basic/Scripts/BasicAgent.cs
+++ b/unity-environment/Assets/ML-Agents/Examples/Basic/Scripts/BasicAgent.cs
-using System.Collections;
+using System.Collections;
 using System.Collections.Generic;
 using UnityEngine;

    private BasicAcademy academy;
    public float timeBetweenDecisionsAtInference;
    private float timeSinceDecision;
-	public int position;
-	public int smallGoalPosition;
-	public int largeGoalPosition;
-	public GameObject largeGoal;
-	public GameObject smallGoal;
-	public int minPosition;
-	public int maxPosition;
+    public int position;
+    public int smallGoalPosition;
+    public int largeGoalPosition;
+    public GameObject largeGoal;
+    public GameObject smallGoal;
+    public int minPosition;
+    public int maxPosition;

    public override void InitializeAgent()
    {
-	public override void CollectObservations()
-	{
-		AddVectorObs(position);
-	}
+    public override void CollectObservations()
+    {
+        AddVectorObs(position);
+    }

    public override void AgentAction(float[] vectorAction, string textAction)
 	{
 		if (movement == 1) { direction = 1; }

-		position += direction;
-		if (position < minPosition) { position = minPosition; }
-		if (position > maxPosition) { position = maxPosition; }
+        position += direction;
+        if (position < minPosition) { position = minPosition; }
+        if (position > maxPosition) { position = maxPosition; }
-		gameObject.transform.position = new Vector3(position, 0f, 0f);
+        gameObject.transform.position = new Vector3(position, 0f, 0f);
-        AddReward( - 0.01f);
+        AddReward(-0.01f);
-		if (position == smallGoalPosition)
-		{
+        if (position == smallGoalPosition)
+        {
-            AddReward( 0.1f);
-		}
+            AddReward(0.1f);
+        }
-		if (position == largeGoalPosition)
-		{
+        if (position == largeGoalPosition)
+        {
-		}
-	}
+        }
+    }
-	public override void AgentReset()
-	{
-		position = 0;
-		minPosition = -10;
-		maxPosition = 10;
-		smallGoalPosition = -3;
-		largeGoalPosition = 7;
-		smallGoal.transform.position = new Vector3(smallGoalPosition, 0f, 0f);
-		largeGoal.transform.position = new Vector3(largeGoalPosition, 0f, 0f);
-	}
+    public override void AgentReset()
+    {
+        position = 0;
+        minPosition = -10;
+        maxPosition = 10;
+        smallGoalPosition = -3;
+        largeGoalPosition = 7;
+        smallGoal.transform.position = new Vector3(smallGoalPosition, 0f, 0f);
+        largeGoal.transform.position = new Vector3(largeGoalPosition, 0f, 0f);
+    }
-	public override void AgentOnDone()
-	{
+    public override void AgentOnDone()
+    {
-	}
+    }

    public void FixedUpdate()
    {
    private void WaitTimeInference()
    {
-        if (!academy.isInference)
+        if (!academy.GetIsInference())
        {
            RequestDecision();
        }
--- a/unity-environment/Assets/ML-Agents/Examples/GridWorld/Scripts/GridAgent.cs
+++ b/unity-environment/Assets/ML-Agents/Examples/GridWorld/Scripts/GridAgent.cs

    private void WaitTimeInference()
    {
-        if (!academy.isInference)
+        if (!academy.GetIsInference())
        {
            RequestDecision();
        }
--- a/unity-environment/Assets/ML-Agents/Scripts/Academy.cs
+++ b/unity-environment/Assets/ML-Agents/Scripts/Academy.cs
-using System.Collections;
-using System.Collections.Generic;
+using System.Collections.Generic;
- * Welcome to Unity Machine Learning Agents documentation.
+ * Welcome to Unity Machine Learning Agents (ML-Agents).
+ * 
+ * ML-Agents contains five entities: Academy, Brain, Agent, Communicator and
+ * Python API. The academy, and all its brains and connected agents live within
+ * a learning environment (herin called Environment), while the communicator
+ * manages the communication between the learning environment and the Python
+ * API. For more information on each of these entities, in addition to how to
+ * set-up a learning environment and train the behavior of characters in a
+ * Unity scene, please browse our documentation pages on GitHub:
+ * https://github.com/Unity-Technologies/ml-agents/blob/master/docs/
-
+/// <summary>
+/// Wraps the environment-level parameters that are provided within the
+/// Editor. These parameters can be provided for training and inference
+/// modes separately and represent screen resolution, rendering quality and
+/// frame rate.
+/// </summary>
-public class ScreenConfiguration
+public class EnvironmentConfiguration
-    [Tooltip("Height of the environment window in pixels")]
+
+    [Tooltip("Height of the environment window in pixels.")]
-    [Tooltip("Rendering quality of environment. (Higher is better quality)")]
+
+    [Tooltip("Rendering quality of environment. (Higher is better quality.)")]
-    [Tooltip("Speed at which environment is run. (Higher is faster)")]
+
+    [Tooltip("Speed at which environment is run. (Higher is faster.)")]
-    [Tooltip("FPS engine attempts to maintain.")]
+
+    [Tooltip("Frames per second (FPS) engine attempts to maintain.")]
-    public ScreenConfiguration(int w, int h, int q, float ts, int tf)
+    /// Initializes a new instance of the 
+    /// <see cref="EnvironmentConfiguration"/> class.
+    /// <param name="width">Width of environment window (pixels).</param>
+    /// <param name="height">Height of environment window (pixels).</param>
+    /// <param name="qualityLevel">
+    /// Rendering quality of environment. Ranges from 0 to 5, with higher.
+    /// </param>
+    /// <param name="timeScale">
+    /// Speed at which environment is run. Ranges from 1 to 100, with higher
+    /// values representing faster speed.
+    /// </param>
+    /// <param name="targetFrameRate">
+    /// Target frame rate (per second) that the engine tries to maintain.
+    /// </param>
+    public EnvironmentConfiguration(
+        int width, int height, int qualityLevel,
+        float timeScale, int targetFrameRate)
-        width = w;
-        height = h;
-        qualityLevel = q;
-        timeScale = ts;
-        targetFrameRate = tf;
+        this.width = width;
+        this.height = height;
+        this.qualityLevel = qualityLevel;
+        this.timeScale = timeScale;
+        this.targetFrameRate = targetFrameRate;
-[HelpURL("https://github.com/Unity-Technologies/ml-agents/blob/master/docs/Agents-Editor-Interface.md#academy")]
-/** Create a child class to implement InitializeAcademy(), AcademyStep() 
- * and AcademyReset(). The child class script must be attached to an empty game
- * object in your scene, and there can only be one such object within the scene.
- */
+/// <summary>
+/// An Academy is where Agent objects go to train their behaviors. More 
+/// specifically, an academy is a collection of Brain objects and each agent
+/// in a scene is attached to one brain (a single brain may be attached to 
+/// multiple agents). Currently, this class is expected to be extended to
+/// implement the desired academy behavior.
+/// </summary>
+/// <remarks>
+/// When an academy is run, it can either be in inference or training mode.
+/// The mode is determined by the presence or absence of a Communicator. In
+/// the presence of a communicator, the academy is run in training mode where
+/// the states and observations of each agent are sent through the
+/// communicator. In the absence of a communciator, the academy is run in
+/// inference mode where the agent behavior is determined by the brain
+/// attached to it (which may be internal, heuristic or player).
+/// </remarks>
+[HelpURL("https://github.com/Unity-Technologies/ml-agents/blob/master/" +
+         "docs/Learning-Environment-Design-Academy.md")]
+    // Fields provided in the Inspector
-    [Tooltip("Total number of steps per episode. \n" +
-             "0 corresponds to episodes without a maximum number of steps. \n" +
-             "Once the step counter reaches maximum, " +
-             "the environment will reset.")]
-    private int maxSteps;
+    [Tooltip("Total number of steps per global episode.\nNon-positive " +
+             "values correspond to episodes without a maximum number of \n" +
+             "steps. Once the step counter reaches this maximum value, the " +
+             "environment will reset.")]
+    int maxSteps;
-    [HideInInspector]
-    public bool isInference = true;
-    /**< \brief Do not modify : If true, the Academy will use inference 
-     * settings. */
-    private bool _isCurrentlyInference;
-    [SerializeField]
-    [Tooltip("The engine-level settings which correspond to rendering quality" +
-             " and engine speed during Training.")]
-    private ScreenConfiguration trainingConfiguration =
-        new ScreenConfiguration(80, 80, 1, 100.0f, -1);
+    [Tooltip("The engine-level settings which correspond to rendering " +
+             "quality and engine speed during Training.")]
+    EnvironmentConfiguration trainingConfiguration =
+        new EnvironmentConfiguration(80, 80, 1, 100.0f, -1);
+
-    [Tooltip("The engine-level settings which correspond to rendering quality" +
-             " and engine speed during Inference.")]
-    private ScreenConfiguration inferenceConfiguration =
-        new ScreenConfiguration(1280, 720, 5, 1.0f, 60);
+    [Tooltip("The engine-level settings which correspond to rendering " +
+             "quality and engine speed during Inference.")]
+    EnvironmentConfiguration inferenceConfiguration =
+        new EnvironmentConfiguration(1280, 720, 5, 1.0f, 60);
-    /**< \brief Contains a mapping from parameter names to float values. */
-    /**< You can specify the Default Reset Parameters in the Inspector of the
-     * Academy. You can modify these parameters when training with an External 
-     * brain by passing a config dictionary at reset. Reference resetParameters
-     * in your AcademyReset() or AcademyStep() to modify elements in your 
-     * environment at reset time. */
+    /// <summary>
+    /// Contains a mapping from parameter names to float values. They are
+    /// used in <see cref="AcademyReset"/> and <see cref="AcademyStep"/>
+    /// to modify elements in the environment at reset time.
+    /// <summary/>
+    /// <remarks>
+    /// Default reset parameters are specified in the academy Editor, and can
+    /// be modified when training with an external Brain by passinga config 
+    /// dictionary at reset. 
+    /// </remarks>
-             "environment on reset.")]
+             "environment when it resets.")]
+    // Fields not provided in the Inspector.
+
+    /// Boolean flag indicating whether a communicator is accessible by the
+    /// environment. This also specifies whether the environment is in
+    /// Training or Inference mode.
+    bool isCommunicatorOn;
+
+    /// If true, the Academy will use inference settings. This field is 
+    /// initialized in <see cref="Awake"/> depending on the presence
+    /// or absence of a communicator. Furthermore, it can be modified by an
+    /// external Brain during reset via <see cref="SetIsInference"/>.
+    bool isInference = true;
+
+    /// The done flag of the academy. When set to true, the academy will
+    /// call <see cref="AcademyReset"/> instead of <see cref="AcademyStep"/>
+    /// at step time. If true, all agents done flags will be set to true.
+    bool done;
+
+    /// Whether the academy has reached the maximum number of steps for the
+    /// current episode.
+    bool maxStepReached;
+
+    /// The number of episodes completed by the environment. Incremented 
+    /// each time the environment is reset.
+    int episodeCount;
+
+    /// The number of steps completed within the current episide. Incremented
+    /// each time a step is taken in the environment. Is reset to 0 during 
+    /// <see cref="AcademyReset"/>.
+    int stepCount;
+
+    /// Flag that indicates whether the inference/training mode of the
+    /// environment was switched by the external Brain. This impacts the
+    /// engine settings at the next environment step.
+    bool modeSwitched;
+
+    /// Pointer to the communicator currently in use by the Academy.
+    Communicator communicator;
+
+    // 
+    bool firstAcademyReset;
+
+    // The Academy uses a series of events to communicate with agents and
+    // brains to facilitate synchronization. More specifically, it ensure
+    // that all the agents performs their steps in a consistent order (i.e. no
+    // agent can act based on a decision before another agent has had a chance 
+    // to request a decision).
+
+    // Signals to all the Brains at each environment step so they can decide 
+    // actions for their agents.
+
+    // Signals to all the agents at each environment step along with the 
+    // Academy's maxStepReached, done and stepCount values. The agents rely
+    // on this event to update their own values of max step reached and done
+    // in addition to aligning on the step count of the global episode.
+
+    // Signals to all the agents at each environment step so they can reset
+    // if their flag has been set to done (assuming the agent has requested a 
+    // decision).
+
+    // Signals to all the agents at each environment step so they can send
+    // their state to their Brain if they have requested a decision.
+
+    // Signals to all the agents at each environment step so they can act if
+    // they have requested a decision.
-    public event System.Action AgentForceReset;
-    /**< \brief The done flag of the Academy. */
-    /**< When set to true, the Academy will call AcademyReset() instead of 
-    * AcademyStep() at step time.
-    * If true, all agents done flags will be set to true.*/
-    private bool done;
+    // Sigals to all the agents each time the Academy force resets.
+    public event System.Action AgentForceReset;
-    /// The max step reached.
+    /// Monobehavior function called at the very beginning of environment
+    /// creation. Academy uses this time to initialize internal data
+    /// structures, initialize the environment and check for the existence
+    /// of a communicator.
-    private bool maxStepReached;
-
-
-    /**< \brief Increments each time the environment is reset. */
-    [HideInInspector]
-    public int episodeCount;
-    [HideInInspector]
-    public int stepsSinceReset;
-
-    /**< \brief Do not modify : pointer to the communicator currently in 
-     * use by the Academy. */
-    public Communicator communicator;
-
-    private bool firstAcademyReset;
-
-        _InitializeAcademy();
+        InitializeEnvironment();
-    void _InitializeAcademy()
-    {
-        List<Brain> brains = GetBrains(gameObject);
-        InitializeAcademy();
-
+    /// <summary>
+    /// Initializes the environment, configures it and initialized the Academy.
+    /// </summary>
+    void InitializeEnvironment()
+    {
+        // Initialize communicator (if possible)
-        if (!communicator.CommunicatorHandShake())
+        if (communicator.CommunicatorHandShake())
+        {
+            isCommunicatorOn = true;
+            communicator.InitializeCommunicator();
+            communicator.UpdateCommand();
+        }
+        else
+        // Initialize Academy and Brains.
+        InitializeAcademy();
+        List<Brain> brains = GetBrains(gameObject);
-        if (communicator != null)
-        {
-            communicator.InitializeCommunicator();
-            communicator.UpdateCommand();
-        }
-        isInference = (communicator == null);
-        _isCurrentlyInference = !isInference;
+        // If a communicator is enabled/provided, then we assume we are in
+        // training mode. In the absence of a communicator, we assume we are
+        // in inference mode.
+        isInference = !isCommunicatorOn;

        BrainDecideAction += () => { };
        AgentSetStatus += (m, d, i) => { };
        AgentForceReset += () => { };

+        // Configure the environment using the configurations provided by
+        // the developer in the Editor.
+        ConfigureEnvironment();
+    }
+    /// <summary>
+    /// Configures the environment settings depending on the training/inference
+    /// mode and the corresponding parameters passed in the Editor.
+    /// </summary>
+    void ConfigureEnvironment()
+    {
+        if (isInference)
+        {
+            ConfigureEnvironmentHelper(inferenceConfiguration);
+            Monitor.SetActive(true);
+        }
+        else
+        {
+            ConfigureEnvironmentHelper(trainingConfiguration);
+            Monitor.SetActive(false);
+        }
+    /// <summary>
+    /// Helper method for initializing the environment based on the provided
+    /// configuration.
+    /// </summary>
+    /// <param name="config">
+    /// Environment configuration (specified in the Editor).
+    /// </param>
+    static void ConfigureEnvironmentHelper(EnvironmentConfiguration config)
+    {
+        Screen.SetResolution(config.width, config.height, false);
+        QualitySettings.SetQualityLevel(config.qualityLevel, true);
+        Time.timeScale = config.timeScale;
+        Time.captureFramerate = 60;
+        Application.targetFrameRate = config.targetFrameRate;
+    }
-    /// Environment specific initialization.
-    /**
-	* Implemented in environment-specific child class. 
-	* This method is called once when the environment is loaded.
-	*/
+    /// <summary>
+    /// Initializes the academy and environment. Called during the waking-up
+    /// phase of the environment before any of the scene objects/agents have
+    /// been initialized.
+    /// </summary>
+    /// <summary>
+    /// Specifies the academy behavior at every step of the environment.
+    /// </summary>
+    public virtual void AcademyStep()
+    {
-    private void ConfigureEngine()
+    }
+
+    /// <summary>
+    /// Specifies the academy behavior when being reset (i.e. at the completion
+    /// of a global episode).
+    /// </summary>
+    public virtual void AcademyReset()
-        if ((!isInference))
-        {
-            Screen.SetResolution(
-                trainingConfiguration.width,
-                trainingConfiguration.height,
-                false);
-            QualitySettings.SetQualityLevel(
-                trainingConfiguration.qualityLevel, true);
-            Time.timeScale = trainingConfiguration.timeScale;
-            Application.targetFrameRate =
-                trainingConfiguration.targetFrameRate;
-            QualitySettings.vSyncCount = 0;
-            Time.captureFramerate = 60;
-            Monitor.SetActive(false);
-        }
-        else
+
+    }
+
+    /// <summary>
+    /// Returns the <see cref="isInference"/> flag.
+    /// </summary>
+    /// <returns>
+    /// <c>true</c>, if current mode is inference, <c>false</c> if training.
+    /// </returns>
+    public bool GetIsInference()
+    {
+        return isInference;
+    }
+
+    /// <summary>
+    /// Sets the <see cref="isInference"/> flag to the provided value. If
+    /// the new flag differs from the current flag value, this signals that
+    /// the environment configuration needs to be updated.
+    /// </summary>
+    /// <param name="isInference">
+    /// Environment mode, if true then inference, otherwise training.
+    /// </param>
+    public void SetIsInference(bool isInference)
+    {
+        if (this.isInference != isInference)
-            Screen.SetResolution(
-                inferenceConfiguration.width,
-                inferenceConfiguration.height,
-                false);
-            QualitySettings.SetQualityLevel(
-                inferenceConfiguration.qualityLevel, true);
-            Time.timeScale = inferenceConfiguration.timeScale;
-            Application.targetFrameRate =
-                inferenceConfiguration.targetFrameRate;
-            Time.captureFramerate = 60;
-            Monitor.SetActive(true);
+            this.isInference = isInference;
+
+            // This signals to the academy that at the next environment step
+            // the engine configurations need updating to the respective mode
+            // (i.e. training vs inference) configuraiton.
+            modeSwitched = true;
-    /// Environment specific step logic.
-    /**
-	 * Implemented in environment-specific child class. 
-	 * This method is called at every step. 
-	*/
-    public virtual void AcademyStep()
+    /// <summary>
+    /// Returns the current episode counter.
+    /// </summary>
+    /// <returns>
+    /// Current episode number.
+    /// </returns>
+    public int GetEpisodeCount()
-
+        return episodeCount;
-    /// Environment specific reset logic.
-    /**
-	* Implemented in environment-specific child class. 
-	* This method is called everytime the Academy resets (when the global done
-	* flag is set to true).
-	*/
-    public virtual void AcademyReset()
+    /// <summary>
+    /// Returns the current step counter (within the current epside).
+    /// </summary>
+    /// <returns>
+    /// Current episode number.
+    /// </returns>
+    public int GetStepCount()
-
+        return stepCount;
-
+    /// <summary>
+    /// Sets the done flag to true.
+    /// </summary>
+
+    /// <summary>
+    /// Returns whether or not the academy is done.
+    /// </summary>
+    /// <returns>
+    /// <c>true</c>, if academy is done, <c>false</c> otherwise.
+    /// </returns>
    public bool IsDone()
    {
        return done;
-    /// Forceds the full reset. The done flags are not affected. Is either 
+    /// Returns whether or not the communicator is on.
+    /// </summary>
+    /// <returns>
+    /// <c>true</c>, if communicator is on, <c>false</c> otherwise.
+    /// </returns>
+    public bool IsCommunicatorOn()
+    {
+        return isCommunicatorOn;
+    }
+
+    /// <summary>
+    /// Returns the Communicator currently used by the Academy.
+    /// </summary>
+    /// <returns>The commincator currently in use (may be null).</returns>
+    public Communicator GetCommunicator()
+    {
+        return communicator;
+    }
+
+    /// <summary>
+    /// Forces the full reset. The done flags are not affected. Is either 
-    private void ForcedFullReset()
+    void ForcedFullReset()
-        _AcademyReset();
+        EnvironmentReset();
-
-    internal void _AcademyStep()
+    /// <summary>
+    /// Performs a single environment update to the Academy, Brain and Agent
+    /// objects within the environment.
+    /// </summary>
+    void EnvironmentStep()
-
-
-        if (isInference != _isCurrentlyInference)
+        if (modeSwitched)
-            ConfigureEngine();
-            _isCurrentlyInference = isInference;
+            ConfigureEnvironment();
+            modeSwitched = false;
-        if (communicator != null)
+
+        if (isCommunicatorOn)
+                // Update reset parameters.
                Dictionary<string, float> NewResetParameters =
                    communicator.GetResetParameters();
                foreach (KeyValuePair<string, float> kv in NewResetParameters)
+
                ForcedFullReset();
                communicator.SetCommand(ExternalCommand.STEP);
            }
            ForcedFullReset();
        }

-        if ((stepsSinceReset >= maxSteps) && maxSteps > 0)
+        if ((stepCount >= maxSteps) && maxSteps > 0)
-        AgentSetStatus(maxStepReached, done, stepsSinceReset);
+        AgentSetStatus(maxStepReached, done, stepCount);
-            _AcademyReset();
+        {
+            EnvironmentReset();
+        }

        AgentResetIfDone();


        AgentAct();

-        stepsSinceReset += 1;
-
-
+        stepCount += 1;
-    internal void _AcademyReset()
+    /// <summary>
+    /// Resets the environment, including the Academy.
+    /// </summary>
+    void EnvironmentReset()
-        stepsSinceReset = 0;
+        stepCount = 0;
-
+    /// <summary>
+    /// Monobehavior function that dictates each environment step.
+    /// </summary>
-        _AcademyStep();
-
+        EnvironmentStep();
-
-    private static List<Brain> GetBrains(GameObject gameObject)
+    /// <summary>
+    /// Helper method that retrieves the Brain objects that are currently
+    /// specified as children of the Academy within the Editor.
+    /// </summary>
+    /// <param name="academy">Academy.</param>
+    /// <returns>
+    /// List of brains currently attached to academy.
+    /// </returns>
+    static List<Brain> GetBrains(GameObject academy)
-        var transform = gameObject.transform;
+        var transform = academy.transform;

        for (var i = 0; i < transform.childCount; i++)
        {
            if (brain != null && child.gameObject.activeSelf)
+            {
-
+            }
-}
+}
--- a/unity-environment/Assets/ML-Agents/Scripts/Agent.cs
+++ b/unity-environment/Assets/ML-Agents/Scripts/Agent.cs
    }
    internal void AddVectorObs(float[] observation)
    {
-            _info.vectorObservation.AddRange(observation);
+        _info.vectorObservation.AddRange(observation);
-            _info.vectorObservation.AddRange(observation);
+        _info.vectorObservation.AddRange(observation);
    }


    /// Note: If your state is discrete, you need to convert your 
    /// state into a list of float with length 1.
    /// </summary>
-    /// <param name="action">The action the agent receives 
-    /// from the brain.</param>
    public virtual void AgentAction(float[] vectorAction, string textAction)
    {

        RenderTexture.active = prevActiveRT;
        RenderTexture.ReleaseTemporary(tempRT);
        return tex;
-
-
-
 }
--- a/unity-environment/Assets/ML-Agents/Scripts/Brain.cs
+++ b/unity-environment/Assets/ML-Agents/Scripts/Brain.cs
            CoreBrains = new ScriptableObject[numCoreBrains];
            foreach (BrainType bt in System.Enum.GetValues(typeof(BrainType)))
            {
-                CoreBrains[(int)bt] = 
+                CoreBrains[(int)bt] =
                    ScriptableObject.CreateInstance(
                        "CoreBrain" + bt.ToString());
            }
                    break;
                if (CoreBrains[(int)bt] == null)
                {
-                    CoreBrains[(int)bt] = 
+                    CoreBrains[(int)bt] =
                        ScriptableObject.CreateInstance(
                            "CoreBrain" + bt.ToString());
                }
        if (CoreBrains.Length < System.Enum.GetValues(typeof(BrainType)).Length)
        {
            int numCoreBrains = System.Enum.GetValues(typeof(BrainType)).Length;
-            ScriptableObject[] new_CoreBrains = 
+            ScriptableObject[] new_CoreBrains =
                new ScriptableObject[numCoreBrains];
            foreach (BrainType bt in System.Enum.GetValues(typeof(BrainType)))
            {
                }
                else
                {
-                    new_CoreBrains[(int)bt] = 
+                    new_CoreBrains[(int)bt] =
                        ScriptableObject.CreateInstance(
                            "CoreBrain" + bt.ToString());
                }
            {
                if (CoreBrains[(int)bt] == null)
                {
-                    CoreBrains[(int)bt] = 
+                    CoreBrains[(int)bt] =
-                    CoreBrains[(int)bt] = 
+                    CoreBrains[(int)bt] =
                        ScriptableObject.Instantiate(CoreBrains[(int)bt]);
                }
            }
    public void SendState(Agent agent, AgentInfo info)
    {
        agentInfos.Add(agent, info);
-
    }

    void DecideAction()
-
-
 }
--- a/unity-environment/Assets/ML-Agents/Scripts/ExternalCommunicator.cs
+++ b/unity-environment/Assets/ML-Agents/Scripts/ExternalCommunicator.cs
        sMessage.agents = new List<int>(defaultNumAgents);
        sMessage.vectorObservations = new List<float>(defaultNumAgents * defaultNumObservations);
        sMessage.rewards = new List<float>(defaultNumAgents);
-        sMessage.memories= new List<float>(defaultNumAgents * defaultNumObservations);
+        sMessage.memories = new List<float>(defaultNumAgents * defaultNumObservations);
-        sMessage.maxes= new List<bool>(defaultNumAgents);
+        sMessage.maxes = new List<bool>(defaultNumAgents);
-        foreach(string k in accParamerters.brainNames){
+        foreach (string k in accParamerters.brainNames)
+        {
            current_agents[k] = new List<Agent>(defaultNumAgents);
            hasSentState[k] = false;
            triedSendState[k] = false;
        sender.Send(Encoding.ASCII.GetBytes("CONFIG_REQUEST"));
        Receive();
        var resetParams = JsonConvert.DeserializeObject<ResetParametersMessage>(rMessageString.ToString());
-        academy.isInference = !resetParams.train_model;
+        academy.SetIsInference(!resetParams.train_model);
        return resetParams.parameters;
    }

                inputSeed = args[i + 1];
            }
        }
-
        comPort = int.Parse(inputPort);
        randomSeed = int.Parse(inputSeed);
        Random.InitState(randomSeed);
                sMessage.vectorObservations.AddRange(agentInfo[agent].stackedVectorObservation);
                sMessage.rewards.Add(agentInfo[agent].reward);
                sMessage.memories.AddRange(agentInfo[agent].memories);
-                for (int j = 0; j < memorySize - agentInfo[agent].memories.Count; j++ )
+                for (int j = 0; j < memorySize - agentInfo[agent].memories.Count; j++)
+                {
+                }
                sMessage.dones.Add(agentInfo[agent].done);
                sMessage.previousVectorActions.AddRange(agentInfo[agent].StoredVectorActions.ToList());
                sMessage.previousTextActions.Add(agentInfo[agent].StoredTextActions);

    }

-    public Dictionary<string, bool> GetHasTried(){
+    public Dictionary<string, bool> GetHasTried()
+    {
-	public Dictionary<string, bool> GetSent()
-	{
+    public Dictionary<string, bool> GetSent()
+    {
-	}
+    }

    /// Listens for actions, memories, and values and sends them 
    /// to the corrensponding brains.