ml-agents/unity-environment/Assets/ML-Agents/Scripts/Academy.cs


								using System.Collections.Generic;

								using UnityEngine;


								/**

								 * Welcome to Unity Machine Learning Agents (ML-Agents).

								 *

								 * ML-Agents contains five entities: Academy, Brain, Agent, Communicator and

								 * Python API. The academy, and all its brains and connected agents live within

								 * a learning environment (herin called Environment), while the communicator

								 * manages the communication between the learning environment and the Python

								 * API. For more information on each of these entities, in addition to how to

								 * set-up a learning environment and train the behavior of characters in a

								 * Unity scene, please browse our documentation pages on GitHub:

								 * https://github.com/Unity-Technologies/ml-agents/blob/master/docs/

								 */


								/// <summary>

								/// Wraps the environment-level parameters that are provided within the

								/// Editor. These parameters can be provided for training and inference

								/// modes separately and represent screen resolution, rendering quality and

								/// frame rate.

								/// </summary>

								[System.Serializable]

								public class EnvironmentConfiguration

								{

								    [Tooltip("Width of the environment window in pixels.")]

								    public int width;


								    [Tooltip("Height of the environment window in pixels.")]

								    public int height;


								    [Tooltip("Rendering quality of environment. (Higher is better quality.)")]

								    [Range(0, 5)]

								    public int qualityLevel;


								    [Tooltip("Speed at which environment is run. (Higher is faster.)")]

								    [Range(1f, 100f)]

								    public float timeScale;


								    [Tooltip("Frames per second (FPS) engine attempts to maintain.")]

								    public int targetFrameRate;


								    /// Initializes a new instance of the

								    /// <see cref="EnvironmentConfiguration"/> class.

								    /// <param name="width">Width of environment window (pixels).</param>

								    /// <param name="height">Height of environment window (pixels).</param>

								    /// <param name="qualityLevel">

								    /// Rendering quality of environment. Ranges from 0 to 5, with higher.

								    /// </param>

								    /// <param name="timeScale">

								    /// Speed at which environment is run. Ranges from 1 to 100, with higher

								    /// values representing faster speed.

								    /// </param>

								    /// <param name="targetFrameRate">

								    /// Target frame rate (per second) that the engine tries to maintain.

								    /// </param>

								    public EnvironmentConfiguration(

								        int width, int height, int qualityLevel,

								        float timeScale, int targetFrameRate)

								    {

								        this.width = width;

								        this.height = height;

								        this.qualityLevel = qualityLevel;

								        this.timeScale = timeScale;

								        this.targetFrameRate = targetFrameRate;

								    }

								}


								/// <summary>

								/// An Academy is where Agent objects go to train their behaviors. More

								/// specifically, an academy is a collection of Brain objects and each agent

								/// in a scene is attached to one brain (a single brain may be attached to

								/// multiple agents). Currently, this class is expected to be extended to

								/// implement the desired academy behavior.

								/// </summary>

								/// <remarks>

								/// When an academy is run, it can either be in inference or training mode.

								/// The mode is determined by the presence or absence of a Communicator. In

								/// the presence of a communicator, the academy is run in training mode where

								/// the states and observations of each agent are sent through the

								/// communicator. In the absence of a communciator, the academy is run in

								/// inference mode where the agent behavior is determined by the brain

								/// attached to it (which may be internal, heuristic or player).

								/// </remarks>

								[HelpURL("https://github.com/Unity-Technologies/ml-agents/blob/master/" +

								         "docs/Learning-Environment-Design-Academy.md")]

								public abstract class Academy : MonoBehaviour

								{

								    // Fields provided in the Inspector


								    [SerializeField]

								    [Tooltip("Total number of steps per global episode.\nNon-positive " +

								             "values correspond to episodes without a maximum number of \n" +

								             "steps. Once the step counter reaches this maximum value, the " +

								             "environment will reset.")]

								    int maxSteps;


								    [SerializeField]

								    [Tooltip("The engine-level settings which correspond to rendering " +

								             "quality and engine speed during Training.")]

								    EnvironmentConfiguration trainingConfiguration =

								        new EnvironmentConfiguration(80, 80, 1, 100.0f, -1);


								    [SerializeField]

								    [Tooltip("The engine-level settings which correspond to rendering " +

								             "quality and engine speed during Inference.")]

								    EnvironmentConfiguration inferenceConfiguration =

								        new EnvironmentConfiguration(1280, 720, 5, 1.0f, 60);


								    /// <summary>

								    /// Contains a mapping from parameter names to float values. They are

								    /// used in <see cref="AcademyReset"/> and <see cref="AcademyStep"/>

								    /// to modify elements in the environment at reset time.

								    /// <summary/>

								    /// <remarks>

								    /// Default reset parameters are specified in the academy Editor, and can

								    /// be modified when training with an external Brain by passinga config

								    /// dictionary at reset.

								    /// </remarks>

								    [SerializeField]

								    [Tooltip("List of custom parameters that can be changed in the " +

								             "environment when it resets.")]

								    public ResetParameters resetParameters;


								    // Fields not provided in the Inspector.


								    /// Boolean flag indicating whether a communicator is accessible by the

								    /// environment. This also specifies whether the environment is in

								    /// Training or Inference mode.

								    bool isCommunicatorOn;


								    /// If true, the Academy will use inference settings. This field is

								    /// initialized in <see cref="Awake"/> depending on the presence

								    /// or absence of a communicator. Furthermore, it can be modified by an

								    /// external Brain during reset via <see cref="SetIsInference"/>.

								    bool isInference = true;


								    /// The done flag of the academy. When set to true, the academy will

								    /// call <see cref="AcademyReset"/> instead of <see cref="AcademyStep"/>

								    /// at step time. If true, all agents done flags will be set to true.

								    bool done;


								    /// Whether the academy has reached the maximum number of steps for the

								    /// current episode.

								    bool maxStepReached;


								    /// The number of episodes completed by the environment. Incremented

								    /// each time the environment is reset.

								    int episodeCount;


								    /// The number of steps completed within the current episide. Incremented

								    /// each time a step is taken in the environment. Is reset to 0 during

								    /// <see cref="AcademyReset"/>.

								    int stepCount;


								    /// Flag that indicates whether the inference/training mode of the

								    /// environment was switched by the external Brain. This impacts the

								    /// engine settings at the next environment step.

								    bool modeSwitched;


								    /// Pointer to the communicator currently in use by the Academy.

								    Communicator communicator;


								    // Flag used to keep track of the first time the Academy is reset.

								    bool firstAcademyReset;


								    // The Academy uses a series of events to communicate with agents and

								    // brains to facilitate synchronization. More specifically, it ensure

								    // that all the agents performs their steps in a consistent order (i.e. no

								    // agent can act based on a decision before another agent has had a chance

								    // to request a decision).


								    // Signals to all the Brains at each environment step so they can decide

								    // actions for their agents.

								    public event System.Action BrainDecideAction;


								    // Signals to all the agents at each environment step along with the

								    // Academy's maxStepReached, done and stepCount values. The agents rely

								    // on this event to update their own values of max step reached and done

								    // in addition to aligning on the step count of the global episode.

								    public event System.Action<bool, bool, int> AgentSetStatus;


								    // Signals to all the agents at each environment step so they can reset

								    // if their flag has been set to done (assuming the agent has requested a

								    // decision).

								    public event System.Action AgentResetIfDone;


								    // Signals to all the agents at each environment step so they can send

								    // their state to their Brain if they have requested a decision.

								    public event System.Action AgentSendState;


								    // Signals to all the agents at each environment step so they can act if

								    // they have requested a decision.

								    public event System.Action AgentAct;


								    // Sigals to all the agents each time the Academy force resets.

								    public event System.Action AgentForceReset;


								    /// <summary>

								    /// Monobehavior function called at the very beginning of environment

								    /// creation. Academy uses this time to initialize internal data

								    /// structures, initialize the environment and check for the existence

								    /// of a communicator.

								    /// </summary>

								    void Awake()

								    {

								        InitializeEnvironment();

								    }


								    /// <summary>

								    /// Initializes the environment, configures it and initialized the Academy.

								    /// </summary>

								    void InitializeEnvironment()

								    {

								        // Retrieve Brain and initialize Academy

								        List<Brain> brains = GetBrains(gameObject);

								        InitializeAcademy();


								        // Check for existence of communicator

								        communicator = new ExternalCommunicator(this);

								        if (!communicator.CommunicatorHandShake())

								        {

								            communicator = null;

								        }


								        // Initialize Brains and communicator (if present)

								        foreach (Brain brain in brains)

								        {

								            brain.InitializeBrain(this, communicator);

								        }

								        if (communicator != null)

								        {

								            isCommunicatorOn = true;

								            communicator.InitializeCommunicator();

								            communicator.UpdateCommand();

								        }


								        // If a communicator is enabled/provided, then we assume we are in

								        // training mode. In the absence of a communicator, we assume we are

								        // in inference mode.

								        isInference = !isCommunicatorOn;


								        BrainDecideAction += () => { };

								        AgentSetStatus += (m, d, i) => { };

								        AgentResetIfDone += () => { };

								        AgentSendState += () => { };

								        AgentAct += () => { };

								        AgentForceReset += () => { };


								        // Configure the environment using the configurations provided by

								        // the developer in the Editor.

								        ConfigureEnvironment();

								    }


								    /// <summary>

								    /// Configures the environment settings depending on the training/inference

								    /// mode and the corresponding parameters passed in the Editor.

								    /// </summary>

								    void ConfigureEnvironment()

								    {

								        if (isInference)

								        {

								            ConfigureEnvironmentHelper(inferenceConfiguration);

								            Monitor.SetActive(true);

								        }

								        else

								        {

								            ConfigureEnvironmentHelper(trainingConfiguration);

								            Monitor.SetActive(false);

								        }

								    }


								    /// <summary>

								    /// Helper method for initializing the environment based on the provided

								    /// configuration.

								    /// </summary>

								    /// <param name="config">

								    /// Environment configuration (specified in the Editor).

								    /// </param>

								    static void ConfigureEnvironmentHelper(EnvironmentConfiguration config)

								    {

								        Screen.SetResolution(config.width, config.height, false);

								        QualitySettings.SetQualityLevel(config.qualityLevel, true);

								        Time.timeScale = config.timeScale;

								        Time.captureFramerate = 60;

								        Application.targetFrameRate = config.targetFrameRate;

								    }


								    /// <summary>

								    /// Initializes the academy and environment. Called during the waking-up

								    /// phase of the environment before any of the scene objects/agents have

								    /// been initialized.

								    /// </summary>

								    public virtual void InitializeAcademy()

								    {


								    }


								    /// <summary>

								    /// Specifies the academy behavior at every step of the environment.

								    /// </summary>

								    public virtual void AcademyStep()

								    {


								    }


								    /// <summary>

								    /// Specifies the academy behavior when being reset (i.e. at the completion

								    /// of a global episode).

								    /// </summary>

								    public virtual void AcademyReset()

								    {


								    }


								    /// <summary>

								    /// Returns the <see cref="isInference"/> flag.

								    /// </summary>

								    /// <returns>

								    /// <c>true</c>, if current mode is inference, <c>false</c> if training.

								    /// </returns>

								    public bool GetIsInference()

								    {

								        return isInference;

								    }


								    /// <summary>

								    /// Sets the <see cref="isInference"/> flag to the provided value. If

								    /// the new flag differs from the current flag value, this signals that

								    /// the environment configuration needs to be updated.

								    /// </summary>

								    /// <param name="isInference">

								    /// Environment mode, if true then inference, otherwise training.

								    /// </param>

								    public void SetIsInference(bool isInference)

								    {

								        if (this.isInference != isInference)

								        {

								            this.isInference = isInference;


								            // This signals to the academy that at the next environment step

								            // the engine configurations need updating to the respective mode

								            // (i.e. training vs inference) configuraiton.

								            modeSwitched = true;

								        }

								    }


								    /// <summary>

								    /// Returns the current episode counter.

								    /// </summary>

								    /// <returns>

								    /// Current episode number.

								    /// </returns>

								    public int GetEpisodeCount()

								    {

								        return episodeCount;

								    }


								    /// <summary>

								    /// Returns the current step counter (within the current epside).

								    /// </summary>

								    /// <returns>

								    /// Current episode number.

								    /// </returns>

								    public int GetStepCount()

								    {

								        return stepCount;

								    }


								    /// <summary>

								    /// Sets the done flag to true.

								    /// </summary>

								    public void Done()

								    {

								        done = true;

								    }


								    /// <summary>

								    /// Returns whether or not the academy is done.

								    /// </summary>

								    /// <returns>

								    /// <c>true</c>, if academy is done, <c>false</c> otherwise.

								    /// </returns>

								    public bool IsDone()

								    {

								        return done;

								    }


								    /// <summary>

								    /// Returns whether or not the communicator is on.

								    /// </summary>

								    /// <returns>

								    /// <c>true</c>, if communicator is on, <c>false</c> otherwise.

								    /// </returns>

								    public bool IsCommunicatorOn()

								    {

								        return isCommunicatorOn;

								    }


								    /// <summary>

								    /// Returns the Communicator currently used by the Academy.

								    /// </summary>

								    /// <returns>The commincator currently in use (may be null).</returns>

								    public Communicator GetCommunicator()

								    {

								        return communicator;

								    }


								    /// <summary>

								    /// Forces the full reset. The done flags are not affected. Is either

								    /// called the first reset at inference and every external reset

								    /// at training.

								    /// </summary>

								    void ForcedFullReset()

								    {

								        EnvironmentReset();

								        AgentForceReset();

								        firstAcademyReset = true;

								    }


								    /// <summary>

								    /// Performs a single environment update to the Academy, Brain and Agent

								    /// objects within the environment.

								    /// </summary>

								    void EnvironmentStep()

								    {

								        if (modeSwitched)

								        {

								            ConfigureEnvironment();

								            modeSwitched = false;

								        }


								        if (isCommunicatorOn)

								        {

								            if (communicator.GetCommand() == ExternalCommand.RESET)

								            {

								                // Update reset parameters.

								                Dictionary<string, float> NewResetParameters =

								                    communicator.GetResetParameters();

								                foreach (KeyValuePair<string, float> kv in NewResetParameters)

								                {

								                    resetParameters[kv.Key] = kv.Value;

								                }


								                ForcedFullReset();

								                communicator.SetCommand(ExternalCommand.STEP);

								            }

								            if (communicator.GetCommand() == ExternalCommand.QUIT)

								            {

								                Application.Quit();

								                return;

								            }

								        }

								        else if (!firstAcademyReset)

								        {

								            ForcedFullReset();

								        }


								        if ((stepCount >= maxSteps) && maxSteps > 0)

								        {

								            maxStepReached = true;

								            Done();

								        }


								        AgentSetStatus(maxStepReached, done, stepCount);


								        if (done)

								        {

								            EnvironmentReset();

								        }


								        AgentResetIfDone();


								        AgentSendState();


								        BrainDecideAction();


								        AcademyStep();


								        AgentAct();


								        if (done)

								        {

								            done = false;

								            maxStepReached = false;

								        }


								        stepCount += 1;

								    }


								    /// <summary>

								    /// Resets the environment, including the Academy.

								    /// </summary>

								    void EnvironmentReset()

								    {

								        stepCount = 0;

								        episodeCount++;

								        AcademyReset();

								    }


								    /// <summary>

								    /// Monobehavior function that dictates each environment step.

								    /// </summary>

								    void FixedUpdate()

								    {

								        EnvironmentStep();

								    }


								    /// <summary>

								    /// Helper method that retrieves the Brain objects that are currently

								    /// specified as children of the Academy within the Editor.

								    /// </summary>

								    /// <param name="academy">Academy.</param>

								    /// <returns>

								    /// List of brains currently attached to academy.

								    /// </returns>

								    static List<Brain> GetBrains(GameObject academy)

								    {

								        List<Brain> brains = new List<Brain>();

								        var transform = academy.transform;


								        for (var i = 0; i < transform.childCount; i++)

								        {

								            var child = transform.GetChild(i);

								            var brain = child.GetComponent<Brain>();


								            if (brain != null && child.gameObject.activeSelf)

								            {

								                brains.Add(brain);

								            }

								        }

								        return brains;

								    }

								}