using System.Collections;
using System.Collections.Generic;
using UnityEngine;
///
/// Agent info.The agent will send an instance of this class to the brain.
///
public struct AgentInfo
{
public List vectorObservation;
public List stackedVectorObservation;
public List visualObservations;
public List memories;
public string textObservation;
public float[] StoredVectorActions;
public string StoredTextActions;
public float reward;
public bool done;
public bool maxStepReached;
public int id;
}
///
/// Agent action. The brain will send an instance of this class to the agent
/// when taking a decision.
///
public struct AgentAction
{
public float[] vectorActions;
public string textActions;
public List memories;
}
///
/// Agent parameters. Reflect the user's settings for the agents of the inspector.
///
[System.Serializable]
public class AgentParameters
{
public List agentCameras = new List();
public int maxStep;
public bool resetOnDone = true;
public bool onDemandDecision;
public int numberOfActionsBetweenDecisions;
}
[HelpURL("https://github.com/Unity-Technologies/ml-agents/blob/master/docs/Agents-Editor-Interface.md#agent")]
/** Generic functions for parent Agent class.
* Contains all logic for Brain-Agent communication and Agent-Environment
* interaction.
*/
[System.Serializable]
public abstract class Agent : MonoBehaviour
{
///
/// The brain that will control this agent.
/// Use the inspector to drag the desired brain gameObject into
/// the Brain field.
///
[HideInInspector]
public Brain brain;
///
/// The info. This is the placeholder for the information the agent will send
/// to the brain.
///
private AgentInfo _info;
///
/// The action. This is the placeholder for the actions the agent will receive.
///
private AgentAction _action;
///
/// The reward. Describes the reward for the given step of the agent.
/// It is reset to 0 at the beginning of every step.
/// Modify in AgentStep().
/// Should be set to positive to reinforcement desired behavior, and
/// set to a negative value to punish undesireable behavior.
/// Additionally, the magnitude of the reward should not exceed 1.0
///
private float reward;
/// Whether or not the agent is requests an action
private bool requestAction;
/// Whether or not the agent is requests a decision
private bool requestDecision;
///
/// Whether or not the agent is done
/// Set to true when the agent has acted in some way which ends the
/// episode for the given agent.
///
private bool done;
/// Whether or not the max step is reached
private bool maxStepReached;
/// Do not modify: This keeps track of the cumulative reward.
private float cumulativeReward;
/// This keeps track of the number of steps taken by the agent each episode.
[HideInInspector]
public int stepCounter;
private bool hasAlreadyReset;
private bool terminate;
[HideInInspector]
public AgentParameters agentParameters;
/// This is the unique Identifier each agent
/// receives at initialization. It is used by the brain to identify
/// the agent.
///
private int id;
///
/// Unity method called when the agent is istanciated or set to active.
///
private void OnEnable()
{
id = gameObject.GetInstanceID();
Academy aca = Object.FindObjectOfType() as Academy;
_InitializeAgent(aca);
}
///
/// Is called when the agent is initialized.
///
void _InitializeAgent(Academy aca)
{
_info = new AgentInfo();
_action = new AgentAction();
if (aca == null)
throw new UnityAgentsException("No Academy Component could be" +
"found in the scene.");
aca.AgentSetStatus += SetStatus;
aca.AgentResetIfDone += ResetIfDone;
aca.AgentSendState += SendState;
aca.AgentAct += _AgentStep;
aca.AgentForceReset += _AgentReset;
if (brain != null)
{
ResetState();
}
else
{
Debug.Log(
string.Format("The Agent component attached to the " +
"GameObject {0} was initialized without a brain."
, gameObject.name));
}
InitializeAgent();
}
///
/// Is called when the agent is disabled.
///
void _DisableAgent(Academy aca)
{
if (aca != null)
{
aca.AgentSetStatus -= SetStatus;
aca.AgentResetIfDone -= ResetIfDone;
aca.AgentSendState -= SendState;
aca.AgentAct -= _AgentStep;
aca.AgentForceReset -= _AgentReset;
}
}
///
/// Gets called when the agent is destroyed or is set inactive.
///
void OnDisable()
{
Academy aca = Object.FindObjectOfType() as Academy;
_DisableAgent(aca);
}
///
/// When GiveBrain is called, the agent unsubscribes from its
/// previous brain and subscribes to the one passed in argument.
/// Use this method to provide a brain to the agent via script.
///The Brain the agent will subscribe to.
///
public void GiveBrain(Brain b)
{
brain = b;
ResetState();
}
///
/// Resets the reward of the agent
///
public void ResetReward()
{
reward = 0f;
if (done)
{
cumulativeReward = 0f;
}
}
///
/// Use this method to overrite the current reward of the agent.
///
/// The new value of the reward
public void SetReward(float newValue)
{
cumulativeReward += newValue - reward;
reward = newValue;
}
///
/// Use this method to increment the current reward of the agent.
///
/// The value by which the reward will
/// be incremented
public void AddReward(float increment)
{
reward += increment;
cumulativeReward += increment;
}
///
/// Gets the reward of the agent.
///
/// The reward.
public float GetReward()
{
return reward;
}
///
/// Gets the cumulative reward.
///
public float GetCumulativeReward()
{
return cumulativeReward;
}
///
/// Is called then the agent is done. Either game-over, victory or timeout.
///
public void Done()
{
done = true;
}
///
/// Is called when the agent must request the brain for a new decision.
///
public void RequestDecision()
{
requestDecision = true;
RequestAction();
}
///
/// Is called then the agent must perform a new action.
///
public void RequestAction()
{
requestAction = true;
}
///
/// Indicates if the agent has reached his maximum number of steps.
///
/// true, if max step reached was reached,
/// false otherwise.
public bool IsMaxStepReached()
{
return maxStepReached;
}
///
/// Indicates if the agent is done
///
/// true, if the agent is done,
/// false otherwise.
public bool IsDone()
{
return done;
}
///
/// Resets the info and action fields of the agent. Is called when the agent
/// resets or changes brain.
///
private void ResetState()
{
if (brain == null)
return;
BrainParameters param = brain.brainParameters;
if (param.vectorActionSpaceType == SpaceType.continuous)
{
_action.vectorActions = new float[param.vectorActionSize];
_info.StoredVectorActions = new float[param.vectorActionSize];
}
else
{
_action.vectorActions = new float[1];
_info.StoredVectorActions = new float[1];
}
_action.textActions = "";
_info.memories = new List();
_action.memories = new List();
if (param.vectorObservationSpaceType == SpaceType.continuous)
{
_info.vectorObservation =
new List(param.vectorObservationSize);
_info.stackedVectorObservation =
new List(param.vectorObservationSize
* brain.brainParameters.numStackedVectorObservations);
_info.stackedVectorObservation.AddRange(
new float[param.vectorObservationSize
* param.numStackedVectorObservations]);
}
else
{
_info.vectorObservation = new List(1);
_info.stackedVectorObservation =
new List(param.numStackedVectorObservations);
_info.stackedVectorObservation.AddRange(
new float[param.numStackedVectorObservations]);
}
_info.visualObservations = new List();
}
///
/// Initialize the agent with this method
/// Must be implemented in agent-specific child class.
/// This method called only once when the agent is enabled.
///
public virtual void InitializeAgent()
{
}
///
/// Sends the state to brain.
///
public void SendStateToBrain()
{
if (brain == null)
return;
_info.memories = _action.memories;
_info.StoredVectorActions = _action.vectorActions;
_info.StoredTextActions = _action.textActions;
_info.vectorObservation.Clear();
CollectObservations();
BrainParameters param = brain.brainParameters;
if (param.vectorObservationSpaceType == SpaceType.continuous)
{
if (_info.vectorObservation.Count != param.vectorObservationSize)
{
throw new UnityAgentsException(string.Format(
"Vector Observation size mismatch between continuous " +
"agent {0} and brain {1}. " +
"Was Expecting {2} but received {3}. ",
gameObject.name, brain.gameObject.name,
brain.brainParameters.vectorObservationSize,
_info.vectorObservation.Count));
}
_info.stackedVectorObservation.RemoveRange(
0, param.vectorObservationSize);
_info.stackedVectorObservation.AddRange(_info.vectorObservation);
}
else
{
if (_info.vectorObservation.Count != 1)
{
throw new UnityAgentsException(string.Format(
"Vector Observation size mismatch between discrete agent" +
" {0} and brain {1}. Was Expecting {2} but received {3}. ",
gameObject.name, brain.gameObject.name,
1, _info.vectorObservation.Count));
}
_info.stackedVectorObservation.RemoveRange(0, 1);
_info.stackedVectorObservation.AddRange(_info.vectorObservation);
}
_info.visualObservations.Clear();
if (param.cameraResolutions.Length > agentParameters.agentCameras.Count)
{
throw new UnityAgentsException(string.Format(
"Not enough cameras for agent {0} : Bain {1} expecting at " +
"least {2} cameras but only {3} were present.",
gameObject.name, brain.gameObject.name,
brain.brainParameters.cameraResolutions.Length,
agentParameters.agentCameras.Count));
}
for (int i = 0; i < brain.brainParameters.cameraResolutions.Length; i++)
{
_info.visualObservations.Add(ObservationToTexture(
agentParameters.agentCameras[i],
param.cameraResolutions[i].width,
param.cameraResolutions[i].height));
}
_info.reward = reward;
_info.done = done;
_info.maxStepReached = maxStepReached;
_info.id = id;
brain.SendState(this, _info);
_info.textObservation = "";
}
///
/// Collects the observations. Must be implemented by the developer.
///
public virtual void CollectObservations()
{
}
///
/// Adds a vector observation.
/// Note that the number of vector observation to add
/// must be the same at each CollectObservations call.
///
/// The float value to add to
/// the vector observation.
internal void AddVectorObs(float observation)
{
_info.vectorObservation.Add(observation);
}
internal void SetTextObs(object s)
{
_info.textObservation = s.ToString();
}
///
/// Defines agent-specific behavior at every step depending on the action.
/// Must be implemented in agent-specific child class.
/// Note: If your state is discrete, you need to convert your
/// state into a list of float with length 1.
///
/// The action the agent receives
/// from the brain.
public virtual void AgentAction(float[] action)
{
}
///
/// Defines agent-specific behaviour when done
/// Must be implemented in agent-specific child class.
/// Is called when the Agent is done if ResetOneDone is false.
/// The agent will remain done.
/// You can use this method to remove the agent from the scene.
///
public virtual void AgentOnDone()
{
}
///
/// Defines agent-specific reset logic
/// Must be implemented in agent-specific child class.
/// Is called when the academy is done.
/// Is called when the Agent is done if ResetOneDone is true.
///
public virtual void AgentReset()
{
}
///
/// Is called when the agent resets.
///
public void _AgentReset()
{
ResetState();
stepCounter = 0;
AgentReset();
}
/// Is used by the brain give new action to the agent.
public void UpdateAction(AgentAction action)
{
_action = action;
}
public void UpdateVectorAction(float[] v)
{
_action.vectorActions = v;
}
public void UpdateMemoriesAction(List v)
{
_action.memories = v;
}
public void UpdateTextAction(string t)
{
_action.textActions = t;
}
///
/// Sets the status of the agent.
///
/// If set to true
/// The agent must set maxStepReached.
/// If set to true
/// The agent must set done.
private void SetStatus(bool acaMaxStep, bool acaDone, int acaStepCounter)
{
if (acaDone)
acaStepCounter = 0;
MakeRequests(acaStepCounter);
if (acaMaxStep)
maxStepReached = true;
if (acaDone)
{
Done();
hasAlreadyReset = false;
// If the Academy needs to reset, the agent should reset
// even if it reseted recently.
}
}
///
/// Signals the agent that it must reset if its done flag is set to true.
///
private void ResetIfDone()
{
// If an agent is done, then it will also
// request for a decision and an action
if (IsDone())
{
if (agentParameters.resetOnDone)
{
if (agentParameters.onDemandDecision)
{
if (!hasAlreadyReset)
{
//If event based, the agent can reset as soon
// as it is done
_AgentReset();
hasAlreadyReset = true;
}
}
else if (requestDecision)
{
// If not event based, the agent must wait to request a
// decsion before reseting to keep multiple agents in sync.
_AgentReset();
}
}
else
{
terminate = true;
RequestDecision();
}
}
}
///
/// Signals the agent that it must sent its decision to the brain.
///
private void SendState()
{
if (requestDecision)
{
SendStateToBrain();
ResetReward();
done = false;
maxStepReached = false;
requestDecision = false;
hasAlreadyReset = false;
}
}
/// Is used by the brain to make the agent perform a step.
private void _AgentStep()
{
if (terminate)
{
terminate = false;
ResetReward();
done = false;
maxStepReached = false;
requestDecision = false;
requestAction = false;
hasAlreadyReset = false;
OnDisable();
AgentOnDone();
}
if ((requestAction) && (brain != null))
{
requestAction = false;
AgentAction(_action.vectorActions);
}
if ((stepCounter >= agentParameters.maxStep)
&& (agentParameters.maxStep > 0))
{
maxStepReached = true;
Done();
}
stepCounter += 1;
}
///
/// Is called after every step, contains the logic to decide if the agent
/// will request a decision at the next step.
///
private void MakeRequests(int acaStepCounter)
{
agentParameters.numberOfActionsBetweenDecisions =
Mathf.Max(agentParameters.numberOfActionsBetweenDecisions, 1);
if (!agentParameters.onDemandDecision)
{
RequestAction();
if (acaStepCounter %
agentParameters.numberOfActionsBetweenDecisions == 0)
{
RequestDecision();
}
}
}
/** Contains logic for coverting a camera component into a Texture2D. */
public Texture2D ObservationToTexture(Camera cam, int width, int height)
{
Rect oldRec = cam.rect;
cam.rect = new Rect(0f, 0f, 1f, 1f);
var depth = 24;
var format = RenderTextureFormat.Default;
var readWrite = RenderTextureReadWrite.Default;
var tempRT =
RenderTexture.GetTemporary(width, height, depth, format, readWrite);
var tex = new Texture2D(width, height, TextureFormat.RGB24, false);
var prevActiveRT = RenderTexture.active;
var prevCameraRT = cam.targetTexture;
// render to offscreen texture (readonly from CPU side)
RenderTexture.active = tempRT;
cam.targetTexture = tempRT;
cam.Render();
tex.ReadPixels(new Rect(0, 0, tex.width, tex.height), 0, 0);
tex.Apply();
cam.targetTexture = prevCameraRT;
cam.rect = oldRec;
RenderTexture.active = prevActiveRT;
RenderTexture.ReleaseTemporary(tempRT);
return tex;
}
}