using System.Collections; using System.Collections.Generic; using UnityEngine; /// /// Agent info.The agent will send an instance of this class to the brain. /// public struct AgentInfo { public List vectorObservation; public List stackedVectorObservation; public List visualObservations; public List memories; public string textObservation; public float[] StoredVectorActions; public string StoredTextActions; public float reward; public bool done; public bool maxStepReached; public int id; } /// /// Agent action. The brain will send an instance of this class to the agent /// when taking a decision. /// public struct AgentAction { public float[] vectorActions; public string textActions; public List memories; } /// /// Agent parameters. Reflect the user's settings for the agents of the inspector. /// [System.Serializable] public class AgentParameters { public List agentCameras = new List(); public int maxStep; public bool resetOnDone = true; public bool onDemandDecision; public int numberOfActionsBetweenDecisions; } [HelpURL("https://github.com/Unity-Technologies/ml-agents/blob/master/docs/Agents-Editor-Interface.md#agent")] /** Generic functions for parent Agent class. * Contains all logic for Brain-Agent communication and Agent-Environment * interaction. */ [System.Serializable] public abstract class Agent : MonoBehaviour { /// /// The brain that will control this agent. /// Use the inspector to drag the desired brain gameObject into /// the Brain field. /// [HideInInspector] public Brain brain; /// /// The info. This is the placeholder for the information the agent will send /// to the brain. /// private AgentInfo _info; /// /// The action. This is the placeholder for the actions the agent will receive. /// private AgentAction _action; /// /// The reward. Describes the reward for the given step of the agent. /// It is reset to 0 at the beginning of every step. /// Modify in AgentStep(). /// Should be set to positive to reinforcement desired behavior, and /// set to a negative value to punish undesireable behavior. /// Additionally, the magnitude of the reward should not exceed 1.0 /// private float reward; /// Whether or not the agent is requests an action private bool requestAction; /// Whether or not the agent is requests a decision private bool requestDecision; /// /// Whether or not the agent is done /// Set to true when the agent has acted in some way which ends the /// episode for the given agent. /// private bool done; /// Whether or not the max step is reached private bool maxStepReached; /// Do not modify: This keeps track of the cumulative reward. private float cumulativeReward; /// This keeps track of the number of steps taken by the agent each episode. [HideInInspector] public int stepCounter; private bool hasAlreadyReset; private bool terminate; [HideInInspector] public AgentParameters agentParameters; /// This is the unique Identifier each agent /// receives at initialization. It is used by the brain to identify /// the agent. /// private int id; /// /// Unity method called when the agent is istanciated or set to active. /// private void OnEnable() { id = gameObject.GetInstanceID(); Academy aca = Object.FindObjectOfType() as Academy; _InitializeAgent(aca); } /// /// Is called when the agent is initialized. /// void _InitializeAgent(Academy aca) { _info = new AgentInfo(); _action = new AgentAction(); if (aca == null) throw new UnityAgentsException("No Academy Component could be" + "found in the scene."); aca.AgentSetStatus += SetStatus; aca.AgentResetIfDone += ResetIfDone; aca.AgentSendState += SendState; aca.AgentAct += _AgentStep; aca.AgentForceReset += _AgentReset; if (brain != null) { ResetState(); } else { Debug.Log( string.Format("The Agent component attached to the " + "GameObject {0} was initialized without a brain." , gameObject.name)); } InitializeAgent(); } /// /// Is called when the agent is disabled. /// void _DisableAgent(Academy aca) { if (aca != null) { aca.AgentSetStatus -= SetStatus; aca.AgentResetIfDone -= ResetIfDone; aca.AgentSendState -= SendState; aca.AgentAct -= _AgentStep; aca.AgentForceReset -= _AgentReset; } } /// /// Gets called when the agent is destroyed or is set inactive. /// void OnDisable() { Academy aca = Object.FindObjectOfType() as Academy; _DisableAgent(aca); } /// /// When GiveBrain is called, the agent unsubscribes from its /// previous brain and subscribes to the one passed in argument. /// Use this method to provide a brain to the agent via script. ///The Brain the agent will subscribe to. /// public void GiveBrain(Brain b) { brain = b; ResetState(); } /// /// Resets the reward of the agent /// public void ResetReward() { reward = 0f; if (done) { cumulativeReward = 0f; } } /// /// Use this method to overrite the current reward of the agent. /// /// The new value of the reward public void SetReward(float newValue) { cumulativeReward += newValue - reward; reward = newValue; } /// /// Use this method to increment the current reward of the agent. /// /// The value by which the reward will /// be incremented public void AddReward(float increment) { reward += increment; cumulativeReward += increment; } /// /// Gets the reward of the agent. /// /// The reward. public float GetReward() { return reward; } /// /// Gets the cumulative reward. /// public float GetCumulativeReward() { return cumulativeReward; } /// /// Is called then the agent is done. Either game-over, victory or timeout. /// public void Done() { done = true; } /// /// Is called when the agent must request the brain for a new decision. /// public void RequestDecision() { requestDecision = true; RequestAction(); } /// /// Is called then the agent must perform a new action. /// public void RequestAction() { requestAction = true; } /// /// Indicates if the agent has reached his maximum number of steps. /// /// true, if max step reached was reached, /// false otherwise. public bool IsMaxStepReached() { return maxStepReached; } /// /// Indicates if the agent is done /// /// true, if the agent is done, /// false otherwise. public bool IsDone() { return done; } /// /// Resets the info and action fields of the agent. Is called when the agent /// resets or changes brain. /// private void ResetState() { if (brain == null) return; BrainParameters param = brain.brainParameters; if (param.vectorActionSpaceType == SpaceType.continuous) { _action.vectorActions = new float[param.vectorActionSize]; _info.StoredVectorActions = new float[param.vectorActionSize]; } else { _action.vectorActions = new float[1]; _info.StoredVectorActions = new float[1]; } _action.textActions = ""; _info.memories = new List(); _action.memories = new List(); if (param.vectorObservationSpaceType == SpaceType.continuous) { _info.vectorObservation = new List(param.vectorObservationSize); _info.stackedVectorObservation = new List(param.vectorObservationSize * brain.brainParameters.numStackedVectorObservations); _info.stackedVectorObservation.AddRange( new float[param.vectorObservationSize * param.numStackedVectorObservations]); } else { _info.vectorObservation = new List(1); _info.stackedVectorObservation = new List(param.numStackedVectorObservations); _info.stackedVectorObservation.AddRange( new float[param.numStackedVectorObservations]); } _info.visualObservations = new List(); } /// /// Initialize the agent with this method /// Must be implemented in agent-specific child class. /// This method called only once when the agent is enabled. /// public virtual void InitializeAgent() { } /// /// Sends the state to brain. /// public void SendStateToBrain() { if (brain == null) return; _info.memories = _action.memories; _info.StoredVectorActions = _action.vectorActions; _info.StoredTextActions = _action.textActions; _info.vectorObservation.Clear(); CollectObservations(); BrainParameters param = brain.brainParameters; if (param.vectorObservationSpaceType == SpaceType.continuous) { if (_info.vectorObservation.Count != param.vectorObservationSize) { throw new UnityAgentsException(string.Format( "Vector Observation size mismatch between continuous " + "agent {0} and brain {1}. " + "Was Expecting {2} but received {3}. ", gameObject.name, brain.gameObject.name, brain.brainParameters.vectorObservationSize, _info.vectorObservation.Count)); } _info.stackedVectorObservation.RemoveRange( 0, param.vectorObservationSize); _info.stackedVectorObservation.AddRange(_info.vectorObservation); } else { if (_info.vectorObservation.Count != 1) { throw new UnityAgentsException(string.Format( "Vector Observation size mismatch between discrete agent" + " {0} and brain {1}. Was Expecting {2} but received {3}. ", gameObject.name, brain.gameObject.name, 1, _info.vectorObservation.Count)); } _info.stackedVectorObservation.RemoveRange(0, 1); _info.stackedVectorObservation.AddRange(_info.vectorObservation); } _info.visualObservations.Clear(); if (param.cameraResolutions.Length > agentParameters.agentCameras.Count) { throw new UnityAgentsException(string.Format( "Not enough cameras for agent {0} : Bain {1} expecting at " + "least {2} cameras but only {3} were present.", gameObject.name, brain.gameObject.name, brain.brainParameters.cameraResolutions.Length, agentParameters.agentCameras.Count)); } for (int i = 0; i < brain.brainParameters.cameraResolutions.Length; i++) { _info.visualObservations.Add(ObservationToTexture( agentParameters.agentCameras[i], param.cameraResolutions[i].width, param.cameraResolutions[i].height)); } _info.reward = reward; _info.done = done; _info.maxStepReached = maxStepReached; _info.id = id; brain.SendState(this, _info); _info.textObservation = ""; } /// /// Collects the observations. Must be implemented by the developer. /// public virtual void CollectObservations() { } /// /// Adds a vector observation. /// Note that the number of vector observation to add /// must be the same at each CollectObservations call. /// /// The float value to add to /// the vector observation. internal void AddVectorObs(float observation) { _info.vectorObservation.Add(observation); } internal void SetTextObs(object s) { _info.textObservation = s.ToString(); } /// /// Defines agent-specific behavior at every step depending on the action. /// Must be implemented in agent-specific child class. /// Note: If your state is discrete, you need to convert your /// state into a list of float with length 1. /// /// The action the agent receives /// from the brain. public virtual void AgentAction(float[] action) { } /// /// Defines agent-specific behaviour when done /// Must be implemented in agent-specific child class. /// Is called when the Agent is done if ResetOneDone is false. /// The agent will remain done. /// You can use this method to remove the agent from the scene. /// public virtual void AgentOnDone() { } /// /// Defines agent-specific reset logic /// Must be implemented in agent-specific child class. /// Is called when the academy is done. /// Is called when the Agent is done if ResetOneDone is true. /// public virtual void AgentReset() { } /// /// Is called when the agent resets. /// public void _AgentReset() { ResetState(); stepCounter = 0; AgentReset(); } /// Is used by the brain give new action to the agent. public void UpdateAction(AgentAction action) { _action = action; } public void UpdateVectorAction(float[] v) { _action.vectorActions = v; } public void UpdateMemoriesAction(List v) { _action.memories = v; } public void UpdateTextAction(string t) { _action.textActions = t; } /// /// Sets the status of the agent. /// /// If set to true /// The agent must set maxStepReached. /// If set to true /// The agent must set done. private void SetStatus(bool acaMaxStep, bool acaDone, int acaStepCounter) { if (acaDone) acaStepCounter = 0; MakeRequests(acaStepCounter); if (acaMaxStep) maxStepReached = true; if (acaDone) { Done(); hasAlreadyReset = false; // If the Academy needs to reset, the agent should reset // even if it reseted recently. } } /// /// Signals the agent that it must reset if its done flag is set to true. /// private void ResetIfDone() { // If an agent is done, then it will also // request for a decision and an action if (IsDone()) { if (agentParameters.resetOnDone) { if (agentParameters.onDemandDecision) { if (!hasAlreadyReset) { //If event based, the agent can reset as soon // as it is done _AgentReset(); hasAlreadyReset = true; } } else if (requestDecision) { // If not event based, the agent must wait to request a // decsion before reseting to keep multiple agents in sync. _AgentReset(); } } else { terminate = true; RequestDecision(); } } } /// /// Signals the agent that it must sent its decision to the brain. /// private void SendState() { if (requestDecision) { SendStateToBrain(); ResetReward(); done = false; maxStepReached = false; requestDecision = false; hasAlreadyReset = false; } } /// Is used by the brain to make the agent perform a step. private void _AgentStep() { if (terminate) { terminate = false; ResetReward(); done = false; maxStepReached = false; requestDecision = false; requestAction = false; hasAlreadyReset = false; OnDisable(); AgentOnDone(); } if ((requestAction) && (brain != null)) { requestAction = false; AgentAction(_action.vectorActions); } if ((stepCounter >= agentParameters.maxStep) && (agentParameters.maxStep > 0)) { maxStepReached = true; Done(); } stepCounter += 1; } /// /// Is called after every step, contains the logic to decide if the agent /// will request a decision at the next step. /// private void MakeRequests(int acaStepCounter) { agentParameters.numberOfActionsBetweenDecisions = Mathf.Max(agentParameters.numberOfActionsBetweenDecisions, 1); if (!agentParameters.onDemandDecision) { RequestAction(); if (acaStepCounter % agentParameters.numberOfActionsBetweenDecisions == 0) { RequestDecision(); } } } /** Contains logic for coverting a camera component into a Texture2D. */ public Texture2D ObservationToTexture(Camera cam, int width, int height) { Rect oldRec = cam.rect; cam.rect = new Rect(0f, 0f, 1f, 1f); var depth = 24; var format = RenderTextureFormat.Default; var readWrite = RenderTextureReadWrite.Default; var tempRT = RenderTexture.GetTemporary(width, height, depth, format, readWrite); var tex = new Texture2D(width, height, TextureFormat.RGB24, false); var prevActiveRT = RenderTexture.active; var prevCameraRT = cam.targetTexture; // render to offscreen texture (readonly from CPU side) RenderTexture.active = tempRT; cam.targetTexture = tempRT; cam.Render(); tex.ReadPixels(new Rect(0, 0, tex.width, tex.height), 0, 0); tex.Apply(); cam.targetTexture = prevCameraRT; cam.rect = oldRec; RenderTexture.active = prevActiveRT; RenderTexture.ReleaseTemporary(tempRT); return tex; } }