ml-agents/com.unity.ml-agents/Runtime/Agent.cs


								using System;

								using System.Collections.Generic;

								using UnityEngine;

								using Barracuda;

								using MLAgents.Sensors;

								using MLAgents.Demonstrations;

								using MLAgents.Policies;


								namespace MLAgents

								{

								    /// <summary>

								    /// Struct that contains all the information for an Agent, including its

								    /// observations, actions and current status.

								    /// </summary>

								    internal struct AgentInfo

								    {

								        /// <summary>

								        /// Keeps track of the last vector action taken by the Brain.

								        /// </summary>

								        public float[] storedVectorActions;


								        /// <summary>

								        /// For discrete control, specifies the actions that the agent cannot take. Is true if

								        /// the action is masked.

								        /// </summary>

								        public bool[] discreteActionMasks;


								        /// <summary>

								        /// Current agent reward.

								        /// </summary>

								        public float reward;


								        /// <summary>

								        /// Whether the agent is done or not.

								        /// </summary>

								        public bool done;


								        /// <summary>

								        /// Whether the agent has reached its max step count for this episode.

								        /// </summary>

								        public bool maxStepReached;


								        /// <summary>

								        /// Episode identifier each agent receives at every reset. It is used

								        /// to separate between different agents in the environment.

								        /// </summary>

								        public int episodeId;

								    }


								    /// <summary>

								    /// Struct that contains the action information sent from the Brain to the

								    /// Agent.

								    /// </summary>

								    internal struct AgentAction

								    {

								        public float[] vectorActions;

								    }


								    /// <summary>

								    /// Agent MonoBehaviour class that is attached to a Unity GameObject, making it

								    /// an Agent. An agent produces observations and takes actions in the

								    /// environment. Observations are determined by the cameras attached

								    /// to the agent in addition to the vector observations implemented by the

								    /// user in <see cref="Agent.CollectObservations(VectorSensor)"/>.

								    /// On the other hand, actions are determined by decisions produced by a Policy.

								    /// Currently, this class is expected to be extended to implement the desired agent behavior.

								    /// </summary>

								    /// <remarks>

								    /// Simply speaking, an agent roams through an environment and at each step

								    /// of the environment extracts its current observation, sends them to its

								    /// policy and in return receives an action. In practice,

								    /// however, an agent need not send its observation at every step since very

								    /// little may have changed between successive steps.

								    ///

								    /// At any step, an agent may be considered done due to a variety of reasons:

								    ///     - The agent reached an end state within its environment.

								    ///     - The agent reached the maximum # of steps (i.e. timed out).

								    ///     - The academy reached the maximum # of steps (forced agent to be done).

								    ///

								    /// Here, an agent reaches an end state if it completes its task successfully

								    /// or somehow fails along the way. In the case where an agent is done before

								    /// the academy, it either resets and restarts, or just lingers until the

								    /// academy is done.

								    ///

								    /// An important note regarding steps and episodes is due. Here, an agent step

								    /// corresponds to an academy step, which also corresponds to Unity

								    /// environment step (i.e. each FixedUpdate call). This is not the case for

								    /// episodes. The academy controls the global episode count and each agent

								    /// controls its own local episode count and can reset and start a new local

								    /// episode independently (based on its own experience). Thus an academy

								    /// (global) episode can be viewed as the upper-bound on an agents episode

								    /// length and that within a single global episode, an agent may have completed

								    /// multiple local episodes. Consequently, if an agent max step is

								    /// set to a value larger than the academy max steps value, then the academy

								    /// value takes precedence (since the agent max step will never be reached).

								    ///

								    /// Lastly, note that at any step the policy to the agent is allowed to

								    /// change model with <see cref="SetModel"/>.

								    ///

								    /// Implementation-wise, it is required that this class is extended and the

								    /// virtual methods overridden. For sample implementations of agent behavior,

								    /// see the Examples/ directory within this Unity project.

								    /// </remarks>

								    [HelpURL("https://github.com/Unity-Technologies/ml-agents/blob/master/" +

								        "docs/Learning-Environment-Design-Agents.md")]

								    [Serializable]

								    [RequireComponent(typeof(BehaviorParameters))]

								    public class Agent : MonoBehaviour, ISerializationCallbackReceiver

								    {

								        IPolicy m_Brain;

								        BehaviorParameters m_PolicyFactory;


								        /// This code is here to make the upgrade path for users using maxStep

								        /// easier. We will hook into the Serialization code and make sure that

								        /// agentParameters.maxStep and this.maxStep are in sync.

								        [Serializable]

								        internal struct AgentParameters

								        {

								            public int maxStep;

								        }


								        [SerializeField][HideInInspector]

								        internal AgentParameters agentParameters;

								        [SerializeField][HideInInspector]

								        internal bool hasUpgradedFromAgentParameters;


								        /// <summary>

								        /// The maximum number of steps the agent takes before being done.

								        /// </summary>

								        /// <remarks>

								        /// If set to 0, the agent can only be set to done programmatically (or

								        /// when the Academy is done).

								        /// If set to any positive integer, the agent will be set to done after

								        /// that many steps. Note that setting the max step to a value greater

								        /// than the academy max step value renders it useless.

								        /// </remarks>

								        [HideInInspector] public int maxStep;


								        /// Current Agent information (message sent to Brain).

								        AgentInfo m_Info;


								        /// Current Agent action (message sent from Brain).

								        AgentAction m_Action;


								        /// Represents the reward the agent accumulated during the current step.

								        /// It is reset to 0 at the beginning of every step.

								        /// Should be set to a positive value when the agent performs a "good"

								        /// action that we wish to reinforce/reward, and set to a negative value

								        /// when the agent performs a "bad" action that we wish to punish/deter.

								        /// Additionally, the magnitude of the reward should not exceed 1.0

								        float m_Reward;


								        /// Keeps track of the cumulative reward in this episode.

								        float m_CumulativeReward;


								        /// Whether or not the agent requests an action.

								        bool m_RequestAction;


								        /// Whether or not the agent requests a decision.

								        bool m_RequestDecision;


								        /// Keeps track of the number of steps taken by the agent in this episode.

								        /// Note that this value is different for each agent, and may not overlap

								        /// with the step counter in the Academy, since agents reset based on

								        /// their own experience.

								        int m_StepCount;


								        /// Number of times the Agent has completed an episode.

								        int m_CompletedEpisodes;


								        /// Episode identifier each agent receives. It is used

								        /// to separate between different agents in the environment.

								        /// This Id will be changed every time the Agent resets.

								        int m_EpisodeId;


								        /// Whether or not the Agent has been initialized already

								        bool m_Initialized;


								        /// Keeps track of the actions that are masked at each step.

								        DiscreteActionMasker m_ActionMasker;


								        /// <summary>

								        /// Set of DemonstrationWriters that the Agent will write its step information to.

								        /// If you use a DemonstrationRecorder component, this will automatically register its DemonstrationWriter.

								        /// You can also add your own DemonstrationWriter by calling

								        /// DemonstrationRecorder.AddDemonstrationWriterToAgent()

								        /// </summary>

								        internal ISet<DemonstrationWriter> DemonstrationWriters = new HashSet<DemonstrationWriter>();


								        /// <summary>

								        /// List of sensors used to generate observations.

								        /// Currently generated from attached SensorComponents, and a legacy VectorSensor

								        /// </summary>

								        internal List<ISensor> sensors;


								        /// <summary>

								        /// VectorSensor which is written to by AddVectorObs

								        /// </summary>

								        internal VectorSensor collectObservationsSensor;


								        /// <summary>

								        /// Called when the attached <see cref="GameObject"/> becomes enabled and active.

								        /// </summary>

								        protected virtual void OnEnable()

								        {

								            LazyInitialize();

								        }


								        /// <summary>

								        /// <inheritdoc cref="OnBeforeSerialize"/>

								        /// </summary>

								        public void OnBeforeSerialize()

								        {

								            // Manages a serialization upgrade issue from v0.13 to v0.14 where maxStep moved

								            // from AgentParameters (since removed) to Agent

								            if (maxStep == 0 && maxStep != agentParameters.maxStep && !hasUpgradedFromAgentParameters)

								            {

								                maxStep = agentParameters.maxStep;

								            }

								            hasUpgradedFromAgentParameters = true;

								        }


								        /// <summary>

								        /// <inheritdoc cref="OnAfterDeserialize"/>

								        /// </summary>

								        public void OnAfterDeserialize()

								        {

								            // Manages a serialization upgrade issue from v0.13 to v0.14 where maxStep moved

								            // from AgentParameters (since removed) to Agent

								            if (maxStep == 0 && maxStep != agentParameters.maxStep && !hasUpgradedFromAgentParameters)

								            {

								                maxStep = agentParameters.maxStep;

								            }

								            hasUpgradedFromAgentParameters = true;

								        }


								        /// <summary>

								        /// Initializes the agent. Can be safely called multiple times.

								        /// </summary>

								        public void LazyInitialize()

								        {

								            if (m_Initialized)

								            {

								                return;

								            }

								            m_Initialized = true;


								            // Grab the "static" properties for the Agent.

								            m_EpisodeId = EpisodeIdCounter.GetEpisodeId();

								            m_PolicyFactory = GetComponent<BehaviorParameters>();


								            m_Info = new AgentInfo();

								            m_Action = new AgentAction();

								            sensors = new List<ISensor>();


								            Academy.Instance.AgentIncrementStep += AgentIncrementStep;

								            Academy.Instance.AgentSendState += SendInfo;

								            Academy.Instance.DecideAction += DecideAction;

								            Academy.Instance.AgentAct += AgentStep;

								            Academy.Instance.AgentForceReset += _AgentReset;

								            m_Brain = m_PolicyFactory.GeneratePolicy(Heuristic);

								            ResetData();

								            Initialize();

								            InitializeSensors();


								            // The first time the Academy resets, all Agents in the scene will be

								            // forced to reset through the <see cref="AgentForceReset"/> event.

								            // To avoid the Agent resetting twice, the Agents will not begin their

								            // episode when initializing until after the Academy had its first reset.

								            if (Academy.Instance.TotalStepCount != 0)

								            {

								                OnEpisodeBegin();

								            }

								        }


								        /// <summary>

								        /// Reason that the Agent is being considered "done"

								        /// </summary>

								        enum DoneReason

								        {

								            /// <summary>

								            /// The <see cref="Done"/> method was called.

								            /// </summary>

								            DoneCalled,


								            /// <summary>

								            /// The max steps for the Agent were reached.

								            /// </summary>

								            MaxStepReached,


								            /// <summary>

								            /// The Agent was disabled

								            /// </summary>

								            Disabled,

								        }


								        /// <summary>

								        /// Called when the attached <see cref="GameObject"/> becomes disabled and inactive.

								        /// </summary>

								        protected virtual void OnDisable()

								        {

								            DemonstrationWriters.Clear();


								            // If Academy.Dispose has already been called, we don't need to unregister with it.

								            // We don't want to even try, because this will lazily create a new Academy!

								            if (Academy.IsInitialized)

								            {

								                Academy.Instance.AgentIncrementStep -= AgentIncrementStep;

								                Academy.Instance.AgentSendState -= SendInfo;

								                Academy.Instance.DecideAction -= DecideAction;

								                Academy.Instance.AgentAct -= AgentStep;

								                Academy.Instance.AgentForceReset -= _AgentReset;

								            }

								            NotifyAgentDone(DoneReason.Disabled);

								            m_Brain?.Dispose();

								            m_Initialized = false;

								        }


								        void NotifyAgentDone(DoneReason doneReason)

								        {

								            if (m_Info.done)

								            {

								                // The Agent was already marked as Done and should not be notified again

								                return;

								            }

								            m_Info.episodeId = m_EpisodeId;

								            m_Info.reward = m_Reward;

								            m_Info.done = true;

								            m_Info.maxStepReached = doneReason == DoneReason.MaxStepReached;

								            if (collectObservationsSensor != null)

								            {

								                // Make sure the latest observations are being passed to training.

								                collectObservationsSensor.Reset();

								                CollectObservations(collectObservationsSensor);

								            }

								            // Request the last decision with no callbacks

								            // We request a decision so Python knows the Agent is done immediately

								            m_Brain?.RequestDecision(m_Info, sensors);

								            ResetSensors();


								            // We also have to write any to any DemonstationStores so that they get the "done" flag.

								            foreach (var demoWriter in DemonstrationWriters)

								            {

								                demoWriter.Record(m_Info, sensors);

								            }


								            if (doneReason != DoneReason.Disabled)

								            {

								                // We don't want to update the reward stats when the Agent is disabled, because this will make

								                // the rewards look lower than they actually are during shutdown.

								                m_CompletedEpisodes++;

								                UpdateRewardStats();

								            }


								            m_Reward = 0f;

								            m_CumulativeReward = 0f;

								            m_RequestAction = false;

								            m_RequestDecision = false;

								            Array.Clear(m_Info.storedVectorActions, 0, m_Info.storedVectorActions.Length);

								        }


								        /// <summary>

								        /// Updates the Model for the agent. Any model currently assigned to the

								        /// agent will be replaced with the provided one. If the arguments are

								        /// identical to the current parameters of the agent, the model will

								        /// remain unchanged.

								        /// </summary>

								        /// <param name="behaviorName"> The identifier of the behavior. This

								        /// will categorize the agent when training.

								        /// </param>

								        /// <param name="model"> The model to use for inference.</param>

								        /// <param name = "inferenceDevice"> Define on what device the model

								        /// will be run.</param>

								        public void SetModel(

								            string behaviorName,

								            NNModel model,

								            InferenceDevice inferenceDevice = InferenceDevice.CPU)

								        {

								            if (behaviorName == m_PolicyFactory.behaviorName &&

								                model == m_PolicyFactory.model &&

								                inferenceDevice == m_PolicyFactory.inferenceDevice)

								            {

								                // If everything is the same, don't make any changes.

								                return;

								            }

								            NotifyAgentDone(DoneReason.Disabled);

								            m_PolicyFactory.model = model;

								            m_PolicyFactory.inferenceDevice = inferenceDevice;

								            m_PolicyFactory.behaviorName = behaviorName;

								            ReloadPolicy();

								        }


								        internal void ReloadPolicy()

								        {

								            if (!m_Initialized)

								            {

								                // If we haven't initialized yet, no need to make any changes now; they'll

								                // happen in LazyInitialize later.

								                return;

								            }

								            m_Brain?.Dispose();

								            m_Brain = m_PolicyFactory.GeneratePolicy(Heuristic);

								        }


								        /// <summary>

								        /// Returns the current step counter (within the current episode).

								        /// </summary>

								        /// <returns>

								        /// Current step count.

								        /// </returns>

								        public int StepCount

								        {

								            get { return m_StepCount; }

								        }


								        /// <summary>

								        /// Returns the number of episodes that the Agent has completed (either <see cref="Agent.EndEpisode()"/>

								        /// was called, or maxSteps was reached).

								        /// </summary>

								        /// <returns>

								        /// Current episode count.

								        /// </returns>

								        public int CompletedEpisodes

								        {

								            get { return m_CompletedEpisodes; }

								        }


								        /// <summary>

								        /// Overrides the current step reward of the agent and updates the episode

								        /// reward accordingly.

								        /// </summary>

								        /// <param name="reward">The new value of the reward.</param>

								        public void SetReward(float reward)

								        {

								#if DEBUG

								            Utilities.DebugCheckNanAndInfinity(reward, nameof(reward), nameof(SetReward));

								#endif

								            m_CumulativeReward += (reward - m_Reward);

								            m_Reward = reward;

								        }


								        /// <summary>

								        /// Increments the step and episode rewards by the provided value.

								        /// </summary>

								        /// <param name="increment">Incremental reward value.</param>

								        public void AddReward(float increment)

								        {

								#if DEBUG

								            Utilities.DebugCheckNanAndInfinity(increment, nameof(increment), nameof(AddReward));

								#endif

								            m_Reward += increment;

								            m_CumulativeReward += increment;

								        }


								        /// <summary>

								        /// Retrieves the episode reward for the Agent.

								        /// </summary>

								        /// <returns>The episode reward.</returns>

								        public float GetCumulativeReward()

								        {

								            return m_CumulativeReward;

								        }


								        void UpdateRewardStats()

								        {

								            var gaugeName = $"{m_PolicyFactory.behaviorName}.CumulativeReward";

								            TimerStack.Instance.SetGauge(gaugeName, GetCumulativeReward());

								        }


								        /// <summary>

								        /// Sets the done flag to true.

								        /// </summary>

								        public void EndEpisode()

								        {

								            NotifyAgentDone(DoneReason.DoneCalled);

								            _AgentReset();

								        }


								        /// <summary>

								        /// Is called when the agent must request the brain for a new decision.

								        /// </summary>

								        public void RequestDecision()

								        {

								            m_RequestDecision = true;

								            RequestAction();

								        }


								        /// <summary>

								        /// Is called then the agent must perform a new action.

								        /// </summary>

								        public void RequestAction()

								        {

								            m_RequestAction = true;

								        }


								        /// Helper function that resets all the data structures associated with

								        /// the agent. Typically used when the agent is being initialized or reset

								        /// at the end of an episode.

								        void ResetData()

								        {

								            var param = m_PolicyFactory.brainParameters;

								            m_ActionMasker = new DiscreteActionMasker(param);

								            // If we haven't initialized vectorActions, initialize to 0. This should only

								            // happen during the creation of the Agent. In subsequent episodes, vectorAction

								            // should stay the previous action before the Done(), so that it is properly recorded.

								            if (m_Action.vectorActions == null)

								            {

								                m_Action.vectorActions = new float[param.numActions];

								                m_Info.storedVectorActions = new float[param.numActions];

								            }

								        }


								        /// <summary>

								        /// Initializes the agent, called once when the agent is enabled. Can be

								        /// left empty if there is no special, unique set-up behavior for the

								        /// agent.

								        /// </summary>

								        /// <remarks>

								        /// One sample use is to store local references to other objects in the

								        /// scene which would facilitate computing this agents observation.

								        /// </remarks>

								        public virtual void Initialize(){}


								        /// <summary>

								        /// When the Agent uses Heuristics, it will call this method every time it

								        /// needs an action. This can be used for debugging or controlling the agent

								        /// with keyboard. This can also be useful to record demonstrations for imitation learning.

								        /// </summary>

								        /// <param name="actionsOut">An array corresponding to the next action of the Agent</param>

								        public virtual void Heuristic(float[] actionsOut)

								        {

								            Debug.LogWarning("Heuristic method called but not implemented. Returning placeholder actions.");

								            Array.Clear(actionsOut, 0, actionsOut.Length);

								        }


								        /// <summary>

								        /// Set up the list of ISensors on the Agent. By default, this will select any

								        /// SensorBase's attached to the Agent.

								        /// </summary>

								        internal void InitializeSensors()

								        {

								            // Get all attached sensor components

								            SensorComponent[] attachedSensorComponents;

								            if (m_PolicyFactory.useChildSensors)

								            {

								                attachedSensorComponents = GetComponentsInChildren<SensorComponent>();

								            }

								            else

								            {

								                attachedSensorComponents = GetComponents<SensorComponent>();

								            }


								            sensors.Capacity += attachedSensorComponents.Length;

								            foreach (var component in attachedSensorComponents)

								            {

								                sensors.Add(component.CreateSensor());

								            }


								            // Support legacy CollectObservations

								            var param = m_PolicyFactory.brainParameters;

								            if (param.vectorObservationSize > 0)

								            {

								                collectObservationsSensor = new VectorSensor(param.vectorObservationSize);

								                if (param.numStackedVectorObservations > 1)

								                {

								                    var stackingSensor = new StackingSensor(

								                        collectObservationsSensor, param.numStackedVectorObservations);

								                    sensors.Add(stackingSensor);

								                }

								                else

								                {

								                    sensors.Add(collectObservationsSensor);

								                }

								            }


								            // Sort the Sensors by name to ensure determinism

								            sensors.Sort((x, y) => x.GetName().CompareTo(y.GetName()));


								#if DEBUG

								            // Make sure the names are actually unique

								            for (var i = 0; i < sensors.Count - 1; i++)

								            {

								                Debug.Assert(

								                    !sensors[i].GetName().Equals(sensors[i + 1].GetName()),

								                    "Sensor names must be unique.");

								            }

								#endif

								        }


								        /// <summary>

								        /// Sends the Agent info to the linked Brain.

								        /// </summary>

								        void SendInfoToBrain()

								        {

								            if (!m_Initialized)

								            {

								                throw new UnityAgentsException("Call to SendInfoToBrain when Agent hasn't been initialized." +

								                    "Please ensure that you are calling 'base.OnEnable()' if you have overridden OnEnable.");

								            }


								            if (m_Brain == null)

								            {

								                return;

								            }


								            if (m_Info.done)

								            {

								                Array.Clear(m_Info.storedVectorActions, 0, m_Info.storedVectorActions.Length);

								            }

								            else

								            {

								                Array.Copy(m_Action.vectorActions, m_Info.storedVectorActions, m_Action.vectorActions.Length);

								            }

								            m_ActionMasker.ResetMask();

								            UpdateSensors();

								            using (TimerStack.Instance.Scoped("CollectObservations"))

								            {

								                CollectObservations(collectObservationsSensor);

								            }

								            using (TimerStack.Instance.Scoped("CollectDiscreteActionMasks"))

								            {

								                if (m_PolicyFactory.brainParameters.vectorActionSpaceType == SpaceType.Discrete)

								                {

								                    CollectDiscreteActionMasks(m_ActionMasker);

								                }

								            }

								            m_Info.discreteActionMasks = m_ActionMasker.GetMask();


								            m_Info.reward = m_Reward;

								            m_Info.done = false;

								            m_Info.maxStepReached = false;

								            m_Info.episodeId = m_EpisodeId;


								            m_Brain.RequestDecision(m_Info, sensors);


								            // If we have any DemonstrationWriters, write the AgentInfo and sensors to them.

								            foreach (var demoWriter in DemonstrationWriters)

								            {

								                demoWriter.Record(m_Info, sensors);

								            }

								        }


								        void UpdateSensors()

								        {

								            foreach (var sensor in sensors)

								            {

								                sensor.Update();

								            }

								        }


								        void ResetSensors()

								        {

								            foreach (var sensor in sensors)

								            {

								                sensor.Reset();

								            }

								        }


								        /// <summary>

								        /// Collects the vector observations of the agent.

								        /// The agent observation describes the current environment from the

								        /// perspective of the agent.

								        /// </summary>

								        /// <param name="sensor">

								        /// The vector observations for the agent.

								        /// </param>

								        /// <remarks>

								        /// An agents observation is any environment information that helps

								        /// the Agent achieve its goal. For example, for a fighting Agent, its

								        /// observation could include distances to friends or enemies, or the

								        /// current level of ammunition at its disposal.

								        /// Recall that an Agent may attach vector or visual observations.

								        /// Vector observations are added by calling the provided helper methods

								        /// on the VectorSensor input:

								        ///     - <see cref="VectorSensor.AddObservation(int)"/>

								        ///     - <see cref="VectorSensor.AddObservation(float)"/>

								        ///     - <see cref="VectorSensor.AddObservation(Vector3)"/>

								        ///     - <see cref="VectorSensor.AddObservation(Vector2)"/>

								        ///     - <see cref="VectorSensor.AddObservation(Quaternion)"/>

								        ///     - <see cref="VectorSensor.AddObservation(bool)"/>

								        ///     - <see cref="VectorSensor.AddObservation(IEnumerable{float})"/>

								        ///     - <see cref="VectorSensor.AddOneHotObservation(int, int)"/>

								        /// Depending on your environment, any combination of these helpers can

								        /// be used. They just need to be used in the exact same order each time

								        /// this method is called and the resulting size of the vector observation

								        /// needs to match the vectorObservationSize attribute of the linked Brain.

								        /// Visual observations are implicitly added from the cameras attached to

								        /// the Agent.

								        /// </remarks>

								        public virtual void CollectObservations(VectorSensor sensor)

								        {

								        }


								        /// <summary>

								        /// Collects the masks for discrete actions.

								        /// When using discrete actions, the agent will not perform the masked action.

								        /// </summary>

								        /// <param name="actionMasker">

								        /// The action masker for the agent.

								        /// </param>

								        /// <remarks>

								        /// When using Discrete Control, you can prevent the Agent from using a certain

								        /// action by masking it with <see cref="DiscreteActionMasker.SetMask(int, IEnumerable{int})"/>

								        /// </remarks>

								        public virtual void CollectDiscreteActionMasks(DiscreteActionMasker actionMasker)

								        {

								        }


								        /// <summary>

								        /// Specifies the agent behavior at every step based on the provided

								        /// action.

								        /// </summary>

								        /// <param name="vectorAction">

								        /// Vector action. Note that for discrete actions, the provided array

								        /// will be of length 1.

								        /// </param>

								        public virtual void OnActionReceived(float[] vectorAction){}


								        /// <summary>

								        /// Specifies the agent behavior when being reset, which can be due to

								        /// the agent or Academy being done (i.e. completion of local or global

								        /// episode).

								        /// </summary>

								        public virtual void OnEpisodeBegin(){}


								        /// <summary>

								        /// Returns the last action that was decided on by the Agent

								        /// </summary>

								        /// <returns>

								        /// The last action that was decided by the Agent (or null if no decision has been made)

								        /// </returns>

								        public float[] GetAction()

								        {

								            return m_Action.vectorActions;

								        }


								        /// <summary>

								        /// An internal reset method that updates internal data structures in

								        /// addition to calling <see cref="AgentReset"/>.

								        /// </summary>

								        void _AgentReset()

								        {

								            ResetData();

								            m_StepCount = 0;

								            OnEpisodeBegin();

								        }


								        /// <summary>

								        /// Scales continuous action from [-1, 1] to arbitrary range.

								        /// </summary>

								        /// <param name="rawAction"></param>

								        /// <param name="min"></param>

								        /// <param name="max"></param>

								        /// <returns></returns>

								        protected static float ScaleAction(float rawAction, float min, float max)

								        {

								            var middle = (min + max) / 2;

								            var range = (max - min) / 2;

								            return rawAction * range + middle;

								        }


								        /// <summary>

								        /// Signals the agent that it must sent its decision to the brain.

								        /// </summary>

								        void SendInfo()

								        {

								            // If the Agent is done, it has just reset and thus requires a new decision

								            if (m_RequestDecision)

								            {

								                SendInfoToBrain();

								                m_Reward = 0f;

								                m_RequestDecision = false;

								            }

								        }


								        void AgentIncrementStep()

								        {

								            m_StepCount += 1;

								        }


								        /// Used by the brain to make the agent perform a step.

								        void AgentStep()

								        {

								            if ((m_RequestAction) && (m_Brain != null))

								            {

								                m_RequestAction = false;

								                OnActionReceived(m_Action.vectorActions);

								            }


								            if ((m_StepCount >= maxStep) && (maxStep > 0))

								            {

								                NotifyAgentDone(DoneReason.MaxStepReached);

								                _AgentReset();

								            }

								        }


								        void DecideAction()

								        {

								            if (m_Action.vectorActions == null)

								            {

								                ResetData();

								            }

								            var action = m_Brain?.DecideAction();


								            if (action == null)

								            {

								                Array.Clear(m_Action.vectorActions, 0, m_Action.vectorActions.Length);

								            }

								            else

								            {

								                Array.Copy(action, m_Action.vectorActions, action.Length);

								            }

								        }

								    }

								}