using System.Collections.Generic;
using UnityEngine;
namespace MLAgents
{
///
/// Struct that contains all the information for an Agent, including its
/// observations, actions and current status, that is sent to the Brain.
///
public struct AgentInfo
{
///
/// Most recent agent vector (i.e. numeric) observation.
///
public List vectorObservation;
///
/// The previous agent vector observations, stacked. The length of the
/// history (i.e. number of vector observations to stack) is specified
/// in the Brain parameters.
///
public List stackedVectorObservation;
///
/// Most recent agent camera (i.e. texture) observation.
///
public List visualObservations;
///
/// Most recent text observation.
///
public string textObservation;
///
/// Keeps track of the last vector action taken by the Brain.
///
public float[] storedVectorActions;
///
/// Keeps track of the last text action taken by the Brain.
///
public string storedTextActions;
///
/// Used by the Trainer to store information about the agent. This data
/// structure is not consumed or modified by the agent directly, they are
/// just the owners of their trainier's memory. Currently, however, the
/// size of the memory is in the Brain properties.
///
public List memories;
///
/// Current agent reward.
///
public float reward;
///
/// Whether the agent is done or not.
///
public bool done;
///
/// Whether the agent has reached its max step count for this episode.
///
public bool maxStepReached;
///
/// Unique identifier each agent receives at initialization. It is used
/// to separate between different agents in the environment.
///
public int id;
}
///
/// Struct that contains the action information sent from the Brain to the
/// Agent.
///
public struct AgentAction
{
public float[] vectorActions;
public string textActions;
public List memories;
public float value;
}
///
/// Struct that contains all the Agent-specific parameters provided in the
/// Editor. This excludes the Brain linked to the Agent since it can be
/// modified programmatically.
///
[System.Serializable]
public class AgentParameters
{
///
/// The list of the Camera GameObjects the agent uses for visual
/// observations.
///
public List agentCameras = new List();
///
/// The maximum number of steps the agent takes before being done.
///
///
/// If set to 0, the agent can only be set to done programmatically (or
/// when the Academy is done).
/// If set to any positive integer, the agent will be set to done after
/// that many steps. Note that setting the max step to a value greater
/// than the academy max step value renders it useless.
///
public int maxStep;
///
/// Determines the behaviour of the agent when done.
///
///
/// If true, the agent will reset when done and start a new episode.
/// Otherwise, the agent will remain done and its behavior will be
/// dictated by the AgentOnDone method.
///
public bool resetOnDone = true;
///
/// Whether to enable On Demand Decisions or make a decision at
/// every step.
///
public bool onDemandDecision;
///
/// Number of actions between decisions (used when On Demand Decisions
/// is turned off).
///
public int numberOfActionsBetweenDecisions;
}
///
/// Agent Monobehavior class that is attached to a Unity GameObject, making it
/// an Agent. An agent produces observations and takes actions in the
/// environment. Observations are determined by the cameras attached
/// to the agent in addition to the vector observations implemented by the
/// user in . On the other hand, actions
/// are determined by decisions produced by a linked Brain. Currently, this
/// class is expected to be extended to implement the desired agent behavior.
///
///
/// Simply speaking, an agent roams through an environment and at each step
/// of the environment extracts its current observation, sends them to its
/// linked brain and in return receives an action from its brain. In practice,
/// however, an agent need not send its observation at every step since very
/// little may have changed between sucessive steps. Currently, how often an
/// agent updates its brain with a fresh observation is determined by the
/// Academy.
///
/// At any step, an agent may be considered .
/// This could occur due to a variety of reasons:
/// - The agent reached an end state within its environment.
/// - The agent reached the maximum # of steps (i.e. timed out).
/// - The academy reached the maximum # of steps (forced agent to be done).
///
/// Here, an agent reaches an end state if it completes its task successfully
/// or somehow fails along the way. In the case where an agent is done before
/// the academy, it either resets and restarts, or just lingers until the
/// academy is done.
///
/// An important note regarding steps and episodes is due. Here, an agent step
/// corresponds to an academy step, which also corresponds to Unity
/// environment step (i.e. each FixedUpdate call). This is not the case for
/// episodes. The academy controls the global episode count and each agent
/// controls its own local episode count and can reset and start a new local
/// episode independently (based on its own experience). Thus an academy
/// (global) episode can be viewed as the upper-bound on an agents episode
/// length and that within a single global episode, an agent may have completed
/// multiple local episodes. Consequently, if an agent max step is
/// set to a value larger than the academy max steps value, then the academy
/// value takes precedence (since the agent max step will never be reached).
///
/// Lastly, note that at any step the brain linked to the agent is allowed to
/// change programmatically with .
///
/// Implementation-wise, it is required that this class is extended and the
/// virtual methods overridden. For sample implementations of agent behavior,
/// see the Examples/ directory within this Unity project.
///
[HelpURL("https://github.com/Unity-Technologies/ml-agents/blob/master/" +
"docs/Learning-Environment-Design-Agents.md")]
[System.Serializable]
public abstract class Agent : MonoBehaviour
{
///
/// The Brain attached to this agent. A brain can be attached either
/// directly from the Editor through AgentEditor or
/// programmatically through . It is OK for an agent
/// to not have a brain, as long as no decision is requested.
///
[HideInInspector] public Brain brain;
///
/// Agent parameters specified within the Editor via AgentEditor.
///
[HideInInspector] public AgentParameters agentParameters;
/// Current Agent information (message sent to Brain).
AgentInfo info;
/// Current Agent action (message sent from Brain).
AgentAction action;
/// Represents the reward the agent accumulated during the current step.
/// It is reset to 0 at the beginning of every step.
/// Should be set to a positive value when the agent performs a "good"
/// action that we wish to reinforce/reward, and set to a negative value
/// when the agent performs a "bad" action that we wish to punish/deter.
/// Additionally, the magnitude of the reward should not exceed 1.0
float reward;
/// Keeps track of the cumulative reward in this episode.
float cumulativeReward;
/// Whether or not the agent requests an action.
bool requestAction;
/// Whether or not the agent requests a decision.
bool requestDecision;
/// Whether or not the agent has completed the episode. This may be due
/// to either reaching a success or fail state, or reaching the maximum
/// number of steps (i.e. timing out).
bool done;
/// Whether or not the agent reached the maximum number of steps.
bool maxStepReached;
/// Keeps track of the number of steps taken by the agent in this episode.
/// Note that this value is different for each agent, and may not overlap
/// with the step counter in the Academy, since agents reset based on
/// their own experience.
int stepCount;
// Flag to signify that an agent has been reset but the fact that it is
// done has not been communicated (required for On Demand Decisions).
bool hasAlreadyReset;
// Flag to signify that an agent is done and should not reset until
// the fact that it is done has been communicated.
bool terminate;
/// Unique identifier each agent receives at initialization. It is used
/// to separate between different agents in the environment.
int id;
/// Monobehavior function that is called when the attached GameObject
/// becomes enabled or active.
void OnEnable()
{
id = gameObject.GetInstanceID();
Academy academy = Object.FindObjectOfType() as Academy;
OnEnableHelper(academy);
}
/// Helper method for the event, created to
/// facilitate testing.
void OnEnableHelper(Academy academy)
{
info = new AgentInfo();
action = new AgentAction();
if (academy == null)
{
throw new UnityAgentsException(
"No Academy Component could be found in the scene.");
}
academy.AgentSetStatus += SetStatus;
academy.AgentResetIfDone += ResetIfDone;
academy.AgentSendState += SendInfo;
academy.AgentAct += AgentStep;
academy.AgentForceReset += _AgentReset;
if (brain != null)
{
ResetData();
}
else
{
Debug.Log(
string.Format(
"The Agent component attached to the " +
"GameObject {0} was initialized without a brain.",
gameObject.name));
}
InitializeAgent();
}
/// Monobehavior function that is called when the attached GameObject
/// becomes disabled or inactive.
void OnDisable()
{
Academy academy = Object.FindObjectOfType() as Academy;
if (academy != null)
{
academy.AgentSetStatus -= SetStatus;
academy.AgentResetIfDone -= ResetIfDone;
academy.AgentSendState -= SendInfo;
academy.AgentAct -= AgentStep;
academy.AgentForceReset -= _AgentReset;
}
}
///
/// Updates the Brain for the agent. Any brain currently assigned to the
/// agent will be replaced with the provided one.
///
///
/// The agent unsubscribes from its current brain (if it has one) and
/// subscribes to the provided brain. This enables contextual brains, that
/// is, updating the behaviour (hence brain) of the agent depending on
/// the context of the game. For example, we may utilize one (wandering)
/// brain when an agent is randomly exploring an open world, but switch
/// to another (fighting) brain when it comes into contact with an enemy.
///
/// New brain to subscribe this agent to
public void GiveBrain(Brain brain)
{
this.brain = brain;
ResetData();
}
///
/// Returns the current step counter (within the current epside).
///
///
/// Current episode number.
///
public int GetStepCount()
{
return stepCount;
}
///
/// Resets the step reward and possibly the episode reward for the agent.
///
public void ResetReward()
{
reward = 0f;
if (done)
{
cumulativeReward = 0f;
}
}
///
/// Overrides the current step reward of the agent and updates the episode
/// reward accordingly.
///
/// The new value of the reward.
public void SetReward(float reward)
{
cumulativeReward += (reward - this.reward);
this.reward = reward;
}
///
/// Increments the step and episode rewards by the provided value.
///
/// Incremental reward value.
public void AddReward(float increment)
{
reward += increment;
cumulativeReward += increment;
}
///
/// Retrieves the step reward for the Agent.
///
/// The step reward.
public float GetReward()
{
return reward;
}
///
/// Retrieves the episode reward for the Agent.
///
/// The episode reward.
public float GetCumulativeReward()
{
return cumulativeReward;
}
///
/// Sets the done flag to true.
///
public void Done()
{
done = true;
}
///
/// Is called when the agent must request the brain for a new decision.
///
public void RequestDecision()
{
requestDecision = true;
RequestAction();
}
///
/// Is called then the agent must perform a new action.
///
public void RequestAction()
{
requestAction = true;
}
///
/// Indicates if the agent has reached his maximum number of steps.
///
///
/// true, if max step reached was reached, false otherwise.
///
public bool IsMaxStepReached()
{
return maxStepReached;
}
///
/// Indicates if the agent is done
///
///
/// true, if the agent is done, false otherwise.
///
public bool IsDone()
{
return done;
}
/// Helper function that resets all the data structures associated with
/// the agent. Typically used when the agent is being initialized or reset
/// at the end of an episode.
void ResetData()
{
if (brain == null)
{
return;
}
BrainParameters param = brain.brainParameters;
if (param.vectorActionSpaceType == SpaceType.continuous)
{
action.vectorActions = new float[param.vectorActionSize];
info.storedVectorActions = new float[param.vectorActionSize];
}
else
{
action.vectorActions = new float[1];
info.storedVectorActions = new float[1];
}
if (info.textObservation == null)
info.textObservation = "";
action.textActions = "";
info.memories = new List();
action.memories = new List();
info.vectorObservation =
new List(param.vectorObservationSize);
info.stackedVectorObservation =
new List(param.vectorObservationSize
* brain.brainParameters.numStackedVectorObservations);
info.stackedVectorObservation.AddRange(
new float[param.vectorObservationSize
* param.numStackedVectorObservations]);
info.visualObservations = new List();
}
///
/// Initializes the agent, called once when the agent is enabled. Can be
/// left empty if there is no special, unique set-up behavior for the
/// agent.
///
///
/// One sample use is to store local references to other objects in the
/// scene which would facilitate computing this agents observation.
///
public virtual void InitializeAgent()
{
}
///
/// Sends the Agent info to the linked Brain.
///
void SendInfoToBrain()
{
if (brain == null)
{
return;
}
info.memories = action.memories;
info.storedVectorActions = action.vectorActions;
info.storedTextActions = action.textActions;
info.vectorObservation.Clear();
CollectObservations();
BrainParameters param = brain.brainParameters;
if (info.vectorObservation.Count != param.vectorObservationSize)
{
throw new UnityAgentsException(string.Format(
"Vector Observation size mismatch between continuous " +
"agent {0} and brain {1}. " +
"Was Expecting {2} but received {3}. ",
gameObject.name, brain.gameObject.name,
brain.brainParameters.vectorObservationSize,
info.vectorObservation.Count));
}
info.stackedVectorObservation.RemoveRange(
0, param.vectorObservationSize);
info.stackedVectorObservation.AddRange(info.vectorObservation);
info.visualObservations.Clear();
if (param.cameraResolutions.Length > agentParameters.agentCameras.Count)
{
throw new UnityAgentsException(string.Format(
"Not enough cameras for agent {0} : Bain {1} expecting at " +
"least {2} cameras but only {3} were present.",
gameObject.name, brain.gameObject.name,
brain.brainParameters.cameraResolutions.Length,
agentParameters.agentCameras.Count));
}
for (int i = 0; i < brain.brainParameters.cameraResolutions.Length; i++)
{
info.visualObservations.Add(ObservationToTexture(
agentParameters.agentCameras[i],
param.cameraResolutions[i].width,
param.cameraResolutions[i].height));
}
info.reward = reward;
info.done = done;
info.maxStepReached = maxStepReached;
info.id = id;
brain.SendState(this, info);
info.textObservation = "";
}
///
/// Collects the (vector, visual, text) observations of the agent.
/// The agent observation describes the current environment from the
/// perspective of the agent.
///
///
/// Simply, an agents observation is any environment information that helps
/// the Agent acheive its goal. For example, for a fighting Agent, its
/// observation could include distances to friends or enemies, or the
/// current level of ammunition at its disposal.
/// Recall that an Agent may attach vector, visual or textual observations.
/// Vector observations are added by calling the provided helper methods:
/// -
/// -
/// -
/// -
/// -
/// -
/// -
/// -
/// -
/// Depending on your environment, any combination of these helpers can
/// be used. They just need to be used in the exact same order each time
/// this method is called and the resulting size of the vector observation
/// needs to match the vectorObservationSize attribute of the linked Brain.
/// Visual observations are implicitly added from the cameras attached to
/// the Agent.
/// Lastly, textual observations are added using
/// .
///
public virtual void CollectObservations()
{
}
///
/// Adds a float observation to the vector observations of the agent.
/// Increases the size of the agents vector observation by 1.
///
/// Observation.
protected void AddVectorObs(float observation)
{
info.vectorObservation.Add(observation);
}
///
/// Adds an integer observation to the vector observations of the agent.
/// Increases the size of the agents vector observation by 1.
///
/// Observation.
protected void AddVectorObs(int observation)
{
info.vectorObservation.Add(observation);
}
///
/// Adds an Vector3 observation to the vector observations of the agent.
/// Increases the size of the agents vector observation by 3.
///
/// Observation.
protected void AddVectorObs(Vector3 observation)
{
info.vectorObservation.Add(observation.x);
info.vectorObservation.Add(observation.y);
info.vectorObservation.Add(observation.z);
}
///
/// Adds an Vector2 observation to the vector observations of the agent.
/// Increases the size of the agents vector observation by 2.
///
/// Observation.
protected void AddVectorObs(Vector2 observation)
{
info.vectorObservation.Add(observation.x);
info.vectorObservation.Add(observation.y);
}
///
/// Adds a float array observation to the vector observations of the agent.
/// Increases the size of the agents vector observation by size of array.
///
/// Observation.
protected void AddVectorObs(float[] observation)
{
info.vectorObservation.AddRange(observation);
}
///
/// Adds a float list observation to the vector observations of the agent.
/// Increases the size of the agents vector observation by size of list.
///
/// Observation.
protected void AddVectorObs(List observation)
{
info.vectorObservation.AddRange(observation);
}
///
/// Adds a quaternion observation to the vector observations of the agent.
/// Increases the size of the agents vector observation by 4.
///
/// Observation.
protected void AddVectorObs(Quaternion observation)
{
info.vectorObservation.Add(observation.x);
info.vectorObservation.Add(observation.y);
info.vectorObservation.Add(observation.z);
info.vectorObservation.Add(observation.w);
}
///
/// Adds a boolean observation to the vector observation of the agent.
/// Increases the size of the agent's vector observation by 1.
///
///
protected void AddVectorObs(bool observation)
{
info.vectorObservation.Add(observation ? 1f : 0f);
}
protected void AddVectorObs(int observation, int range)
{
float[] oneHotVector = new float[range];
oneHotVector[observation] = 1;
info.vectorObservation.AddRange(oneHotVector);
}
///
/// Sets the text observation.
///
/// The text observation.
public void SetTextObs(string textObservation)
{
info.textObservation = textObservation;
}
///
/// Specifies the agent behavior at every step based on the provided
/// action.
///
///
/// Vector action. Note that for discrete actions, the provided array
/// will be of length 1.
///
/// Text action.
public virtual void AgentAction(float[] vectorAction, string textAction)
{
}
///
/// Specifies the agent behavior when done and
/// is false. This method can be
/// used to remove the agent from the scene.
///
public virtual void AgentOnDone()
{
}
///
/// Specifies the agent behavior when being reset, which can be due to
/// the agent or Academy being done (i.e. completion of local or global
/// episode).
///
public virtual void AgentReset()
{
}
///
/// An internal reset method that updates internal data structures in
/// addition to calling .
///
void _AgentReset()
{
ResetData();
stepCount = 0;
AgentReset();
}
///
/// Updates the vector action.
///
/// Vector actions.
public void UpdateVectorAction(float[] vectorActions)
{
action.vectorActions = vectorActions;
}
///
/// Updates the memories action.
///
/// Memories.
public void UpdateMemoriesAction(List memories)
{
action.memories = memories;
}
///
/// Updates the text action.
///
/// Text actions.
public void UpdateTextAction(string textActions)
{
action.textActions = textActions;
}
///
/// Updates the value of the agent.
///
/// Text actions.
public void UpdateValueAction(float value)
{
action.value = value;
}
protected float GetValueEstimate()
{
return action.value;
}
///
/// Scales continous action from [-1, 1] to arbitrary range.
///
///
///
///
///
protected float ScaleAction(float rawAction, float min, float max)
{
var middle = (min + max) / 2;
var range = (max - min) / 2;
return rawAction * range + middle;
}
///
/// Sets the status of the agent.
///
/// If set to true
/// The agent must set maxStepReached.
/// If set to true
/// The agent must set done.
/// Number of current steps in episode
void SetStatus(bool academyMaxStep, bool academyDone, int academyStepCounter)
{
if (academyDone)
{
academyStepCounter = 0;
}
MakeRequests(academyStepCounter);
if (academyMaxStep)
{
maxStepReached = true;
}
// If the Academy needs to reset, the agent should reset
// even if it reseted recently.
if (academyDone)
{
Done();
hasAlreadyReset = false;
}
}
/// Signals the agent that it must reset if its done flag is set to true.
void ResetIfDone()
{
// If an agent is done, then it will also
// request for a decision and an action
if (IsDone())
{
if (agentParameters.resetOnDone)
{
if (agentParameters.onDemandDecision)
{
if (!hasAlreadyReset)
{
// If event based, the agent can reset as soon
// as it is done
_AgentReset();
hasAlreadyReset = true;
}
}
else if (requestDecision)
{
// If not event based, the agent must wait to request a
// decsion before reseting to keep multiple agents in sync.
_AgentReset();
}
}
else
{
terminate = true;
RequestDecision();
}
}
}
///
/// Signals the agent that it must sent its decision to the brain.
///
void SendInfo()
{
if (requestDecision)
{
SendInfoToBrain();
ResetReward();
done = false;
maxStepReached = false;
requestDecision = false;
hasAlreadyReset = false;
}
}
/// Used by the brain to make the agent perform a step.
void AgentStep()
{
if (terminate)
{
terminate = false;
ResetReward();
done = false;
maxStepReached = false;
requestDecision = false;
requestAction = false;
hasAlreadyReset = false;
OnDisable();
AgentOnDone();
}
if ((requestAction) && (brain != null))
{
requestAction = false;
AgentAction(action.vectorActions, action.textActions);
}
if ((stepCount >= agentParameters.maxStep)
&& (agentParameters.maxStep > 0))
{
maxStepReached = true;
Done();
}
stepCount += 1;
}
///
/// Is called after every step, contains the logic to decide if the agent
/// will request a decision at the next step.
///
void MakeRequests(int academyStepCounter)
{
agentParameters.numberOfActionsBetweenDecisions =
Mathf.Max(agentParameters.numberOfActionsBetweenDecisions, 1);
if (!agentParameters.onDemandDecision)
{
RequestAction();
if (academyStepCounter %
agentParameters.numberOfActionsBetweenDecisions == 0)
{
RequestDecision();
}
}
}
///
/// Converts a camera and correspinding resolution to a 2D texture.
///
/// The 2D texture.
/// Camera.
/// Width of resulting 2D texture.
/// Height of resulting 2D texture.
public static Texture2D ObservationToTexture(Camera camera, int width, int height)
{
Rect oldRec = camera.rect;
camera.rect = new Rect(0f, 0f, 1f, 1f);
var depth = 24;
var format = RenderTextureFormat.Default;
var readWrite = RenderTextureReadWrite.Default;
var tempRT =
RenderTexture.GetTemporary(width, height, depth, format, readWrite);
var tex = new Texture2D(width, height, TextureFormat.RGB24, false);
var prevActiveRT = RenderTexture.active;
var prevCameraRT = camera.targetTexture;
// render to offscreen texture (readonly from CPU side)
RenderTexture.active = tempRT;
camera.targetTexture = tempRT;
camera.Render();
tex.ReadPixels(new Rect(0, 0, tex.width, tex.height), 0, 0);
tex.Apply();
camera.targetTexture = prevCameraRT;
camera.rect = oldRec;
RenderTexture.active = prevActiveRT;
RenderTexture.ReleaseTemporary(tempRT);
return tex;
}
}
}