|
|
|
|
|
|
using System.Collections; |
|
|
|
using System.Collections.Generic; |
|
|
|
using System.Collections.Generic; |
|
|
|
/// Agent info.The agent will send an instance of this class to the brain.
|
|
|
|
/// Struct that contains all the information for an Agent, including its
|
|
|
|
/// observations, actions and current status, that is sent to the Brain.
|
|
|
|
/// <summary>
|
|
|
|
/// Most recent agent vector (i.e. numeric) observation.
|
|
|
|
/// </summary>
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// The previous agent vector observations, stacked. The length of the
|
|
|
|
/// history (i.e. number of vector observations to stack) is specified
|
|
|
|
/// in the Brain parameters.
|
|
|
|
/// </summary>
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// Most recent agent camera (i.e. texture) observation.
|
|
|
|
/// </summary>
|
|
|
|
public List<float> memories; |
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// Most recent text observation.
|
|
|
|
/// </summary>
|
|
|
|
public float[] StoredVectorActions; |
|
|
|
public string StoredTextActions; |
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// Keeps track of the last vector action taken by the Brain.
|
|
|
|
/// </summary>
|
|
|
|
public float[] storedVectorActions; |
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// Keeps track of the last text action taken by the Brain.
|
|
|
|
/// </summary>
|
|
|
|
public string storedTextActions; |
|
|
|
/// <summary>
|
|
|
|
/// Used by the Trainer to store information about the agent. This data
|
|
|
|
/// structure is not consumed or modified by the agent directly, they are
|
|
|
|
/// just the owners of their trainier's memory. Currently, however, the
|
|
|
|
/// size of the memory is in the Brain properties.
|
|
|
|
/// </summary>
|
|
|
|
public List<float> memories; |
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// Current agent reward.
|
|
|
|
/// </summary>
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// Whether the agent is done or not.
|
|
|
|
/// </summary>
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// Whether the agent has reached its max step count for this episode.
|
|
|
|
/// </summary>
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// Unique identifier each agent receives at initialization. It is used
|
|
|
|
/// to separate between different agents in the environment.
|
|
|
|
/// </summary>
|
|
|
|
/// Agent action. The brain will send an instance of this class to the agent
|
|
|
|
/// when taking a decision.
|
|
|
|
/// Struct that contains the action information sent from the Brain to the
|
|
|
|
/// Agent.
|
|
|
|
/// </summary>
|
|
|
|
public struct AgentAction |
|
|
|
{ |
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// Agent parameters. Reflect the user's settings for the agents of the inspector.
|
|
|
|
/// Struct that contains all the Agent-specific parameters provided in the
|
|
|
|
/// Editor. This excludes the Brain linked to the Agent since it can be
|
|
|
|
/// modified programmatically.
|
|
|
|
/// <summary>
|
|
|
|
/// The list of the Camera GameObjects the agent uses for visual
|
|
|
|
/// observations.
|
|
|
|
/// </summary>
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// The maximum number of steps the agent takes before being done.
|
|
|
|
/// </summary>
|
|
|
|
/// <remarks>
|
|
|
|
/// If set to 0, the agent can only be set to done programmatically (or
|
|
|
|
/// when the Academy is done).
|
|
|
|
/// If set to any positive integer, the agent will be set to done after
|
|
|
|
/// that many steps. Note that setting the max step to a value greater
|
|
|
|
/// than the academy max step value renders it useless.
|
|
|
|
/// </remarks>
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// Determines the behaviour of the agent when done.
|
|
|
|
/// </summary>
|
|
|
|
/// <remarks>
|
|
|
|
/// If true, the agent will reset when done and start a new episode.
|
|
|
|
/// Otherwise, the agent will remain done and its behavior will be
|
|
|
|
/// dictated by the AgentOnDone method.
|
|
|
|
/// </remarks>
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// Whether to enable On Demand Decision Making or make a decision at
|
|
|
|
/// every step.
|
|
|
|
/// </summary>
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// Number of actions between decisions (used when On Demand Decision
|
|
|
|
/// Making is turned off).
|
|
|
|
/// </summary>
|
|
|
|
[HelpURL("https://github.com/Unity-Technologies/ml-agents/blob/master/docs/Agents-Editor-Interface.md#agent")] |
|
|
|
/** Generic functions for parent Agent class. |
|
|
|
* Contains all logic for Brain-Agent communication and Agent-Environment |
|
|
|
* interaction. |
|
|
|
*/ |
|
|
|
/// <summary>
|
|
|
|
/// Agent Monobehavior class that is attached to a Unity GameObject, making it
|
|
|
|
/// an Agent. An agent produces observations and takes actions in the
|
|
|
|
/// environment. Observations are determined by the cameras attached
|
|
|
|
/// to the agent in addition to the vector observations implemented by the
|
|
|
|
/// user in <see cref="CollectObservations"/>. On the other hand, actions
|
|
|
|
/// are determined by decisions produced by a linked Brain. Currently, this
|
|
|
|
/// class is expected to be extended to implement the desired agent behavior.
|
|
|
|
/// </summary>
|
|
|
|
/// <remarks>
|
|
|
|
/// Simply speaking, an agent roams through an environment and at each step
|
|
|
|
/// of the environment extracts its current observation, sends them to its
|
|
|
|
/// linked brain and in return receives an action from its brain. In practice,
|
|
|
|
/// however, an agent need not send its observation at every step since very
|
|
|
|
/// little may have changed between sucessive steps. Currently, how often an
|
|
|
|
/// agent updates its brain with a fresh observation is determined by the
|
|
|
|
/// Academy.
|
|
|
|
///
|
|
|
|
/// At any step, an agent may be considered <see cref="done"/>.
|
|
|
|
/// This could occur due to a variety of reasons:
|
|
|
|
/// - The agent reached an end state within its environment.
|
|
|
|
/// - The agent reached the maximum # of steps (i.e. timed out).
|
|
|
|
/// - The academy reached the maximum # of steps (forced agent to be done).
|
|
|
|
///
|
|
|
|
/// Here, an agent reaches an end state if it completes its task successfully
|
|
|
|
/// or somehow fails along the way. In the case where an agent is done before
|
|
|
|
/// the academy, it either resets and restarts, or just lingers until the
|
|
|
|
/// academy is done.
|
|
|
|
///
|
|
|
|
/// An important note regarding steps and episodes is due. Here, an agent step
|
|
|
|
/// corresponds to an academy step, which also corresponds to Unity
|
|
|
|
/// environment step (i.e. each FixedUpdate call). This is not the case for
|
|
|
|
/// episodes. The academy controls the global episode count and each agent
|
|
|
|
/// controls its own local episode count and can reset and start a new local
|
|
|
|
/// episode independently (based on its own experience). Thus an academy
|
|
|
|
/// (global) episode can be viewed as the upper-bound on an agents episode
|
|
|
|
/// length and that within a single global episode, an agent may have completed
|
|
|
|
/// multiple local episodes. Consequently, if an agent max step is
|
|
|
|
/// set to a value larger than the academy max steps value, then the academy
|
|
|
|
/// value takes precedence (since the agent max step will never be reached).
|
|
|
|
///
|
|
|
|
/// Lastly, note that at any step the brain linked to the agent is allowed to
|
|
|
|
/// change programmatically with <see cref="GiveBrain"/>.
|
|
|
|
///
|
|
|
|
/// Implementation-wise, it is required that this class is extended and the
|
|
|
|
/// virtual methods overridden. For sample implementations of agent behavior,
|
|
|
|
/// see the Examples/ directory within this Unity project.
|
|
|
|
/// </remarks>
|
|
|
|
[HelpURL("https://github.com/Unity-Technologies/ml-agents/blob/master/" + |
|
|
|
"docs/Learning-Environment-Design-Agent.md")] |
|
|
|
/// The brain that will control this agent.
|
|
|
|
/// Use the inspector to drag the desired brain gameObject into
|
|
|
|
/// the Brain field.
|
|
|
|
///</summary>
|
|
|
|
/// The Brain attached to this agent. A brain can be attached either
|
|
|
|
/// directly from the Editor through AgentEditor or
|
|
|
|
/// programmatically through <see cref="GiveBrain"/>. It is OK for an agent
|
|
|
|
/// to not have a brain, as long as no decision is requested.
|
|
|
|
/// </summary>
|
|
|
|
/// The info. This is the placeholder for the information the agent will send
|
|
|
|
/// to the brain.
|
|
|
|
/// Agent parameters specified within the Editor via AgentEditor.
|
|
|
|
private AgentInfo _info; |
|
|
|
/// <summary>
|
|
|
|
/// The action. This is the placeholder for the actions the agent will receive.
|
|
|
|
/// </summary>
|
|
|
|
private AgentAction _action; |
|
|
|
[HideInInspector] |
|
|
|
public AgentParameters agentParameters; |
|
|
|
|
|
|
|
/// Current Agent information (message sent to Brain).
|
|
|
|
AgentInfo info; |
|
|
|
|
|
|
|
/// Current Agent action (message sent from Brain).
|
|
|
|
AgentAction action; |
|
|
|
/// <summary>
|
|
|
|
/// The reward. Describes the reward for the given step of the agent.
|
|
|
|
/// It is reset to 0 at the beginning of every step.
|
|
|
|
/// Modify in AgentStep().
|
|
|
|
/// Should be set to positive to reinforcement desired behavior, and
|
|
|
|
/// set to a negative value to punish undesireable behavior.
|
|
|
|
/// Represents the reward the agent accumulated during the current step.
|
|
|
|
/// It is reset to 0 at the beginning of every step.
|
|
|
|
/// Should be set to a positive value when the agent performs a "good"
|
|
|
|
/// action that we wish to reinforce/reward, and set to a negative value
|
|
|
|
/// when the agent performs a "bad" action that we wish to punish/deter.
|
|
|
|
///</summary>
|
|
|
|
private float reward; |
|
|
|
float reward; |
|
|
|
|
|
|
|
/// Keeps track of the cumulative reward in this episode.
|
|
|
|
float cumulativeReward; |
|
|
|
private bool requestAction; |
|
|
|
bool requestAction; |
|
|
|
private bool requestDecision; |
|
|
|
bool requestDecision; |
|
|
|
/// <summary>
|
|
|
|
/// Whether or not the agent is done
|
|
|
|
/// Set to true when the agent has acted in some way which ends the
|
|
|
|
/// episode for the given agent.
|
|
|
|
///</summary>
|
|
|
|
private bool done; |
|
|
|
/// Whether or not the agent has completed the episode. This may be due
|
|
|
|
/// to either reaching a success or fail state, or reaching the maximum
|
|
|
|
/// number of steps (i.e. timing out).
|
|
|
|
bool done; |
|
|
|
/// Whether or not the max step is reached
|
|
|
|
private bool maxStepReached; |
|
|
|
/// Whether or not the agent reached the maximum number of steps.
|
|
|
|
bool maxStepReached; |
|
|
|
/// Do not modify: This keeps track of the cumulative reward.
|
|
|
|
private float cumulativeReward; |
|
|
|
/// Keeps track of the number of steps taken by the agent in this episode.
|
|
|
|
/// Note that this value is different for each agent, and may not overlap
|
|
|
|
/// with the step counter in the Academy, since agents reset based on
|
|
|
|
/// their own experience.
|
|
|
|
int stepCount; |
|
|
|
/// This keeps track of the number of steps taken by the agent each episode.
|
|
|
|
[HideInInspector] |
|
|
|
public int stepCounter; |
|
|
|
// Flag to signify that an agent has been reset but the fact that it is
|
|
|
|
// done has not been communicated (required for On Demand Decision Making).
|
|
|
|
bool hasAlreadyReset; |
|
|
|
private bool hasAlreadyReset; |
|
|
|
private bool terminate; |
|
|
|
// Flag to signify that an agent is done and should not reset until
|
|
|
|
// the fact that it is done has been communicated.
|
|
|
|
bool terminate; |
|
|
|
[HideInInspector] |
|
|
|
public AgentParameters agentParameters; |
|
|
|
/// Unique identifier each agent receives at initialization. It is used
|
|
|
|
/// to separate between different agents in the environment.
|
|
|
|
int id; |
|
|
|
/// <summary> This is the unique Identifier each agent
|
|
|
|
/// receives at initialization. It is used by the brain to identify
|
|
|
|
/// the agent.
|
|
|
|
/// </summary>
|
|
|
|
private int id; |
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// Unity method called when the agent is instantiated or set to active.
|
|
|
|
/// </summary>
|
|
|
|
private void OnEnable() |
|
|
|
/// Monobehavior function that is called when the attached GameObject
|
|
|
|
/// becomes enabled or active.
|
|
|
|
void OnEnable() |
|
|
|
Academy aca = Object.FindObjectOfType<Academy>() as Academy; |
|
|
|
_InitializeAgent(aca); |
|
|
|
Academy academy = Object.FindObjectOfType<Academy>() as Academy; |
|
|
|
OnEnableHelper(academy); |
|
|
|
/// <summary>
|
|
|
|
/// Is called when the agent is initialized.
|
|
|
|
/// </summary>
|
|
|
|
void _InitializeAgent(Academy aca) |
|
|
|
/// Helper method for the <see cref="OnEnable"/> event, created to
|
|
|
|
/// facilitate testing.
|
|
|
|
void OnEnableHelper(Academy academy) |
|
|
|
_info = new AgentInfo(); |
|
|
|
_action = new AgentAction(); |
|
|
|
info = new AgentInfo(); |
|
|
|
action = new AgentAction(); |
|
|
|
|
|
|
|
if (academy == null) |
|
|
|
{ |
|
|
|
throw new UnityAgentsException( |
|
|
|
"No Academy Component could be found in the scene."); |
|
|
|
} |
|
|
|
if (aca == null) |
|
|
|
throw new UnityAgentsException("No Academy Component could be" + |
|
|
|
"found in the scene."); |
|
|
|
aca.AgentSetStatus += SetStatus; |
|
|
|
aca.AgentResetIfDone += ResetIfDone; |
|
|
|
aca.AgentSendState += SendState; |
|
|
|
aca.AgentAct += _AgentStep; |
|
|
|
aca.AgentForceReset += _AgentReset; |
|
|
|
academy.AgentSetStatus += SetStatus; |
|
|
|
academy.AgentResetIfDone += ResetIfDone; |
|
|
|
academy.AgentSendState += SendInfo; |
|
|
|
academy.AgentAct += AgentStep; |
|
|
|
academy.AgentForceReset += _AgentReset; |
|
|
|
ResetState(); |
|
|
|
ResetData(); |
|
|
|
string.Format("The Agent component attached to the " + |
|
|
|
"GameObject {0} was initialized without a brain." |
|
|
|
, gameObject.name)); |
|
|
|
string.Format( |
|
|
|
"The Agent component attached to the " + |
|
|
|
"GameObject {0} was initialized without a brain.", |
|
|
|
gameObject.name)); |
|
|
|
/// <summary>
|
|
|
|
/// Is called when the agent is disabled.
|
|
|
|
/// </summary>
|
|
|
|
void _DisableAgent(Academy aca) |
|
|
|
/// Monobehavior function that is called when the attached GameObject
|
|
|
|
/// becomes disabled or inactive.
|
|
|
|
void OnDisable() |
|
|
|
if (aca != null) |
|
|
|
Academy academy = Object.FindObjectOfType<Academy>() as Academy; |
|
|
|
if (academy != null) |
|
|
|
aca.AgentSetStatus -= SetStatus; |
|
|
|
aca.AgentResetIfDone -= ResetIfDone; |
|
|
|
aca.AgentSendState -= SendState; |
|
|
|
aca.AgentAct -= _AgentStep; |
|
|
|
aca.AgentForceReset -= _AgentReset; |
|
|
|
academy.AgentSetStatus -= SetStatus; |
|
|
|
academy.AgentResetIfDone -= ResetIfDone; |
|
|
|
academy.AgentSendState -= SendInfo; |
|
|
|
academy.AgentAct -= AgentStep; |
|
|
|
academy.AgentForceReset -= _AgentReset; |
|
|
|
/// Gets called when the agent is destroyed or is set inactive.
|
|
|
|
/// Updates the Brain for the agent. Any brain currently assigned to the
|
|
|
|
/// agent will be replaced with the provided one.
|
|
|
|
void OnDisable() |
|
|
|
/// <remarks>
|
|
|
|
/// The agent unsubscribes from its current brain (if it has one) and
|
|
|
|
/// subscribes to the provided brain. This enables contextual brains, that
|
|
|
|
/// is, updating the behaviour (hence brain) of the agent depending on
|
|
|
|
/// the context of the game. For example, we may utilize one (wandering)
|
|
|
|
/// brain when an agent is randomly exploring an open world, but switch
|
|
|
|
/// to another (fighting) brain when it comes into contact with an enemy.
|
|
|
|
/// </remarks>
|
|
|
|
/// <param name="brain">New brain to subscribe this agent to</param>
|
|
|
|
public void GiveBrain(Brain brain) |
|
|
|
Academy aca = Object.FindObjectOfType<Academy>() as Academy; |
|
|
|
_DisableAgent(aca); |
|
|
|
this.brain = brain; |
|
|
|
ResetData(); |
|
|
|
/// When GiveBrain is called, the agent unsubscribes from its
|
|
|
|
/// previous brain and subscribes to the one passed in argument.
|
|
|
|
/// Use this method to provide a brain to the agent via script.
|
|
|
|
///<param name= "b" >The Brain the agent will subscribe to.</param>
|
|
|
|
/// <summary>
|
|
|
|
public void GiveBrain(Brain b) |
|
|
|
/// Returns the current step counter (within the current epside).
|
|
|
|
/// </summary>
|
|
|
|
/// <returns>
|
|
|
|
/// Current episode number.
|
|
|
|
/// </returns>
|
|
|
|
public int GetStepCount() |
|
|
|
brain = b; |
|
|
|
ResetState(); |
|
|
|
|
|
|
|
return stepCount; |
|
|
|
|
|
|
|
/// Resets the reward of the agent
|
|
|
|
/// Resets the step reward and possibly the episode reward for the agent.
|
|
|
|
/// </summary>
|
|
|
|
public void ResetReward() |
|
|
|
{ |
|
|
|
|
|
|
cumulativeReward = 0f; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
/// Use this method to overrite the current reward of the agent.
|
|
|
|
/// Overrides the current step reward of the agent and updates the episode
|
|
|
|
/// reward accordingly.
|
|
|
|
/// <param name="newValue">The new value of the reward</param>
|
|
|
|
public void SetReward(float newValue) |
|
|
|
/// <param name="reward">The new value of the reward.</param>
|
|
|
|
public void SetReward(float reward) |
|
|
|
cumulativeReward += newValue - reward; |
|
|
|
reward = newValue; |
|
|
|
cumulativeReward += (reward - this.reward); |
|
|
|
this.reward = reward; |
|
|
|
|
|
|
|
/// Use this method to increment the current reward of the agent.
|
|
|
|
/// Increments the step and episode rewards by the provided value.
|
|
|
|
/// <param name="increment">The value by which the reward will
|
|
|
|
/// be incremented</param>
|
|
|
|
/// <param name="increment">Incremental reward value.</param>
|
|
|
|
|
|
|
|
/// Gets the reward of the agent.
|
|
|
|
/// Retrieves the step reward for the Agent.
|
|
|
|
/// <returns>The reward.</returns>
|
|
|
|
/// <returns>The step reward.</returns>
|
|
|
|
|
|
|
|
/// Gets the cumulative reward.
|
|
|
|
/// Retrieves the episode reward for the Agent.
|
|
|
|
/// <returns>The episode reward.</returns>
|
|
|
|
|
|
|
|
/// Is called then the agent is done. Either game-over, victory or timeout.
|
|
|
|
/// Sets the done flag to true.
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// Is called when the agent must request the brain for a new decision.
|
|
|
|
/// </summary>
|
|
|
|
|
|
|
RequestAction(); |
|
|
|
} |
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// Is called then the agent must perform a new action.
|
|
|
|
/// </summary>
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
/// <returns><c>true</c>, if max step reached was reached,
|
|
|
|
/// <c>false</c> otherwise.</returns>
|
|
|
|
/// <returns>
|
|
|
|
/// <c>true</c>, if max step reached was reached, <c>false</c> otherwise.
|
|
|
|
/// </returns>
|
|
|
|
|
|
|
|
/// <returns><c>true</c>, if the agent is done,
|
|
|
|
/// <c>false</c> otherwise.</returns>
|
|
|
|
/// <returns>
|
|
|
|
/// <c>true</c>, if the agent is done, <c>false</c> otherwise.
|
|
|
|
/// </returns>
|
|
|
|
/// <summary>
|
|
|
|
/// Resets the info and action fields of the agent. Is called when the agent
|
|
|
|
/// resets or changes brain.
|
|
|
|
/// </summary>
|
|
|
|
private void ResetState() |
|
|
|
/// Helper function that resets all the data structures associated with
|
|
|
|
/// the agent. Typically used when the agent is being initialized or reset
|
|
|
|
/// at the end of an episode.
|
|
|
|
void ResetData() |
|
|
|
{ |
|
|
|
} |
|
|
|
_action.vectorActions = new float[param.vectorActionSize]; |
|
|
|
_info.StoredVectorActions = new float[param.vectorActionSize]; |
|
|
|
action.vectorActions = new float[param.vectorActionSize]; |
|
|
|
info.storedVectorActions = new float[param.vectorActionSize]; |
|
|
|
_action.vectorActions = new float[1]; |
|
|
|
_info.StoredVectorActions = new float[1]; |
|
|
|
action.vectorActions = new float[1]; |
|
|
|
info.storedVectorActions = new float[1]; |
|
|
|
_action.textActions = ""; |
|
|
|
_info.memories = new List<float>(); |
|
|
|
_action.memories = new List<float>(); |
|
|
|
action.textActions = ""; |
|
|
|
info.memories = new List<float>(); |
|
|
|
action.memories = new List<float>(); |
|
|
|
_info.vectorObservation = |
|
|
|
info.vectorObservation = |
|
|
|
_info.stackedVectorObservation = |
|
|
|
info.stackedVectorObservation = |
|
|
|
_info.stackedVectorObservation.AddRange( |
|
|
|
info.stackedVectorObservation.AddRange( |
|
|
|
_info.vectorObservation = new List<float>(1); |
|
|
|
_info.stackedVectorObservation = |
|
|
|
info.vectorObservation = new List<float>(1); |
|
|
|
info.stackedVectorObservation = |
|
|
|
_info.stackedVectorObservation.AddRange( |
|
|
|
info.stackedVectorObservation.AddRange( |
|
|
|
_info.visualObservations = new List<Texture2D>(); |
|
|
|
info.visualObservations = new List<Texture2D>(); |
|
|
|
|
|
|
|
/// Initialize the agent with this method
|
|
|
|
/// Must be implemented in agent-specific child class.
|
|
|
|
/// This method called only once when the agent is enabled.
|
|
|
|
/// Initializes the agent, called once when the agent is enabled. Can be
|
|
|
|
/// left empty if there is no special, unique set-up behavior for the
|
|
|
|
/// agent.
|
|
|
|
/// <remarks>
|
|
|
|
/// One sample use is to store local references to other objects in the
|
|
|
|
/// scene which would facilitate computing this agents observation.
|
|
|
|
/// </remarks>
|
|
|
|
public virtual void InitializeAgent() |
|
|
|
{ |
|
|
|
|
|
|
|
|
|
|
/// Sends the state to brain.
|
|
|
|
/// Sends the Agent info to the linked Brain.
|
|
|
|
public void SendStateToBrain() |
|
|
|
void SendInfoToBrain() |
|
|
|
{ |
|
|
|
_info.memories = _action.memories; |
|
|
|
_info.StoredVectorActions = _action.vectorActions; |
|
|
|
_info.StoredTextActions = _action.textActions; |
|
|
|
_info.vectorObservation.Clear(); |
|
|
|
} |
|
|
|
info.memories = action.memories; |
|
|
|
info.storedVectorActions = action.vectorActions; |
|
|
|
info.storedTextActions = action.textActions; |
|
|
|
info.vectorObservation.Clear(); |
|
|
|
if (_info.vectorObservation.Count != param.vectorObservationSize) |
|
|
|
if (info.vectorObservation.Count != param.vectorObservationSize) |
|
|
|
{ |
|
|
|
throw new UnityAgentsException(string.Format( |
|
|
|
"Vector Observation size mismatch between continuous " + |
|
|
|
|
|
|
brain.brainParameters.vectorObservationSize, |
|
|
|
_info.vectorObservation.Count)); |
|
|
|
info.vectorObservation.Count)); |
|
|
|
_info.stackedVectorObservation.RemoveRange( |
|
|
|
info.stackedVectorObservation.RemoveRange( |
|
|
|
_info.stackedVectorObservation.AddRange(_info.vectorObservation); |
|
|
|
info.stackedVectorObservation.AddRange(info.vectorObservation); |
|
|
|
if (_info.vectorObservation.Count != 1) |
|
|
|
if (info.vectorObservation.Count != 1) |
|
|
|
1, _info.vectorObservation.Count)); |
|
|
|
1, info.vectorObservation.Count)); |
|
|
|
_info.stackedVectorObservation.RemoveRange(0, 1); |
|
|
|
_info.stackedVectorObservation.AddRange(_info.vectorObservation); |
|
|
|
info.stackedVectorObservation.RemoveRange(0, 1); |
|
|
|
info.stackedVectorObservation.AddRange(info.vectorObservation); |
|
|
|
_info.visualObservations.Clear(); |
|
|
|
info.visualObservations.Clear(); |
|
|
|
if (param.cameraResolutions.Length > agentParameters.agentCameras.Count) |
|
|
|
{ |
|
|
|
throw new UnityAgentsException(string.Format( |
|
|
|
|
|
|
} |
|
|
|
for (int i = 0; i < brain.brainParameters.cameraResolutions.Length; i++) |
|
|
|
{ |
|
|
|
_info.visualObservations.Add(ObservationToTexture( |
|
|
|
info.visualObservations.Add(ObservationToTexture( |
|
|
|
_info.reward = reward; |
|
|
|
_info.done = done; |
|
|
|
_info.maxStepReached = maxStepReached; |
|
|
|
_info.id = id; |
|
|
|
|
|
|
|
info.reward = reward; |
|
|
|
info.done = done; |
|
|
|
info.maxStepReached = maxStepReached; |
|
|
|
info.id = id; |
|
|
|
brain.SendState(this, _info); |
|
|
|
_info.textObservation = ""; |
|
|
|
brain.SendState(this, info); |
|
|
|
info.textObservation = ""; |
|
|
|
|
|
|
|
/// Collects the observations. Must be implemented by the developer.
|
|
|
|
/// Collects the (vector, visual, text) observations of the agent.
|
|
|
|
/// The agent observation describes the current environment from the
|
|
|
|
/// perspective of the agent.
|
|
|
|
/// <remarks>
|
|
|
|
/// Simply, an agents observation is any environment information that helps
|
|
|
|
/// the Agent acheive its goal. For example, for a fighting Agent, its
|
|
|
|
/// observation could include distances to friends or enemies, or the
|
|
|
|
/// current level of ammunition at its disposal.
|
|
|
|
/// Recall that an Agent may attach vector, visual or textual observations.
|
|
|
|
/// Vector observations are added by calling the provided helper methods:
|
|
|
|
/// - <see cref="AddVectorObs(int)"/>
|
|
|
|
/// - <see cref="AddVectorObs(float)"/>
|
|
|
|
/// - <see cref="AddVectorObs(Vector3)"/>
|
|
|
|
/// - <see cref="AddVectorObs(Vector2)"/>
|
|
|
|
/// - <see cref="AddVectorObs(float[])"/>
|
|
|
|
/// - <see cref="AddVectorObs(List{float})"/>
|
|
|
|
/// - <see cref="AddVectorObs(Quaternion)"/>
|
|
|
|
/// Depending on your environment, any combination of these helpers can
|
|
|
|
/// be used. They just need to be used in the exact same order each time
|
|
|
|
/// this method is called and the resulting size of the vector observation
|
|
|
|
/// needs to match the vectorObservationSize attribute of the linked Brain.
|
|
|
|
/// Visual observations are implicitly added from the cameras attached to
|
|
|
|
/// the Agent.
|
|
|
|
/// Lastly, textual observations are added using
|
|
|
|
/// <see cref="SetTextObs(string)"/>.
|
|
|
|
/// </remarks>
|
|
|
|
public virtual void CollectObservations() |
|
|
|
{ |
|
|
|
|
|
|
|
|
|
|
/// Appends float values to the vector observation.
|
|
|
|
/// Note that the total number of vector observation added
|
|
|
|
/// must be the same at each CollectObservations call.
|
|
|
|
/// Adds a float observation to the vector observations of the agent.
|
|
|
|
/// Increases the size of the agents vector observation by 1.
|
|
|
|
/// <param name="observation">The value to add to
|
|
|
|
/// the vector observation.</param>
|
|
|
|
internal void AddVectorObs(float observation) |
|
|
|
/// <param name="observation">Observation.</param>
|
|
|
|
protected void AddVectorObs(float observation) |
|
|
|
_info.vectorObservation.Add(observation); |
|
|
|
info.vectorObservation.Add(observation); |
|
|
|
internal void AddVectorObs(int observation) |
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// Adds an integer observation to the vector observations of the agent.
|
|
|
|
/// Increases the size of the agents vector observation by 1.
|
|
|
|
/// </summary>
|
|
|
|
/// <param name="observation">Observation.</param>
|
|
|
|
protected void AddVectorObs(int observation) |
|
|
|
_info.vectorObservation.Add((float)observation); |
|
|
|
info.vectorObservation.Add((float)observation); |
|
|
|
internal void AddVectorObs(Vector3 observation) |
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// Adds an Vector3 observation to the vector observations of the agent.
|
|
|
|
/// Increases the size of the agents vector observation by 3.
|
|
|
|
/// </summary>
|
|
|
|
/// <param name="observation">Observation.</param>
|
|
|
|
protected void AddVectorObs(Vector3 observation) |
|
|
|
_info.vectorObservation.Add(observation.x); |
|
|
|
_info.vectorObservation.Add(observation.y); |
|
|
|
_info.vectorObservation.Add(observation.z); |
|
|
|
info.vectorObservation.Add(observation.x); |
|
|
|
info.vectorObservation.Add(observation.y); |
|
|
|
info.vectorObservation.Add(observation.z); |
|
|
|
internal void AddVectorObs(Vector2 observation) |
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// Adds an Vector2 observation to the vector observations of the agent.
|
|
|
|
/// Increases the size of the agents vector observation by 2.
|
|
|
|
/// </summary>
|
|
|
|
/// <param name="observation">Observation.</param>
|
|
|
|
protected void AddVectorObs(Vector2 observation) |
|
|
|
_info.vectorObservation.Add(observation.x); |
|
|
|
_info.vectorObservation.Add(observation.y); |
|
|
|
info.vectorObservation.Add(observation.x); |
|
|
|
info.vectorObservation.Add(observation.y); |
|
|
|
internal void AddVectorObs(float[] observation) |
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// Adds a float array observation to the vector observations of the agent.
|
|
|
|
/// Increases the size of the agents vector observation by size of array.
|
|
|
|
/// </summary>
|
|
|
|
/// <param name="observation">Observation.</param>
|
|
|
|
protected void AddVectorObs(float[] observation) |
|
|
|
_info.vectorObservation.AddRange(observation); |
|
|
|
info.vectorObservation.AddRange(observation); |
|
|
|
internal void AddVectorObs(List<float> observation) |
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// Adds a float list observation to the vector observations of the agent.
|
|
|
|
/// Increases the size of the agents vector observation by size of list.
|
|
|
|
/// </summary>
|
|
|
|
/// <param name="observation">Observation.</param>
|
|
|
|
protected void AddVectorObs(List<float> observation) |
|
|
|
_info.vectorObservation.AddRange(observation); |
|
|
|
info.vectorObservation.AddRange(observation); |
|
|
|
internal void AddVectorObs(Quaternion observation) |
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// Adds a quaternion observation to the vector observations of the agent.
|
|
|
|
/// Increases the size of the agents vector observation by 4.
|
|
|
|
/// </summary>
|
|
|
|
/// <param name="observation">Observation.</param>
|
|
|
|
protected void AddVectorObs(Quaternion observation) |
|
|
|
_info.vectorObservation.Add(observation.x); |
|
|
|
_info.vectorObservation.Add(observation.y); |
|
|
|
_info.vectorObservation.Add(observation.z); |
|
|
|
_info.vectorObservation.Add(observation.w); |
|
|
|
info.vectorObservation.Add(observation.x); |
|
|
|
info.vectorObservation.Add(observation.y); |
|
|
|
info.vectorObservation.Add(observation.z); |
|
|
|
info.vectorObservation.Add(observation.w); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/// <param name="s">The string the text observation must be set to.</param>
|
|
|
|
internal void SetTextObs(object s) |
|
|
|
/// <param name="textObservation">The text observation.</param>
|
|
|
|
public void SetTextObs(string textObservation) |
|
|
|
_info.textObservation = s.ToString(); |
|
|
|
info.textObservation = textObservation; |
|
|
|
/// Defines agent-specific behavior at every step depending on the action.
|
|
|
|
/// Must be implemented in agent-specific child class.
|
|
|
|
/// Note: If your state is discrete, you need to convert your
|
|
|
|
/// state into a list of float with length 1.
|
|
|
|
/// Specifies the agent behavior at every step based on the provided
|
|
|
|
/// action.
|
|
|
|
/// <param name="vectorAction">
|
|
|
|
/// Vector action. Note that for discrete actions, the provided array
|
|
|
|
/// will be of length 1.
|
|
|
|
/// </param>
|
|
|
|
/// <param name="textAction">Text action.</param>
|
|
|
|
public virtual void AgentAction(float[] vectorAction, string textAction) |
|
|
|
{ |
|
|
|
|
|
|
|
|
|
|
/// Defines agent-specific behaviour when done
|
|
|
|
/// Must be implemented in agent-specific child class.
|
|
|
|
/// Is called when the Agent is done if ResetOneDone is false.
|
|
|
|
/// The agent will remain done.
|
|
|
|
/// You can use this method to remove the agent from the scene.
|
|
|
|
/// Specifies the agent behavior when done and
|
|
|
|
/// <see cref="AgentParameters.resetOnDone"/> is false. This method can be
|
|
|
|
/// used to remove the agent from the scene.
|
|
|
|
|
|
|
|
/// Defines agent-specific reset logic
|
|
|
|
/// Must be implemented in agent-specific child class.
|
|
|
|
/// Is called when the academy is done.
|
|
|
|
/// Is called when the Agent is done if ResetOneDone is true.
|
|
|
|
/// Specifies the agent behavior when being reset, which can be due to
|
|
|
|
/// the agent or Academy being done (i.e. completion of local or global
|
|
|
|
/// episode).
|
|
|
|
/// </summary>
|
|
|
|
public virtual void AgentReset() |
|
|
|
{ |
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// Is called when the agent resets.
|
|
|
|
/// An internal reset method that updates internal data structures in
|
|
|
|
/// addition to calling <see cref="AgentReset"/>.
|
|
|
|
public void _AgentReset() |
|
|
|
void _AgentReset() |
|
|
|
ResetState(); |
|
|
|
stepCounter = 0; |
|
|
|
ResetData(); |
|
|
|
stepCount = 0; |
|
|
|
/// Is used by the brain give new action to the agent.
|
|
|
|
public void UpdateAction(AgentAction action) |
|
|
|
/// <summary>
|
|
|
|
/// Updates the vector action.
|
|
|
|
/// </summary>
|
|
|
|
/// <param name="vectorActions">Vector actions.</param>
|
|
|
|
public void UpdateVectorAction(float[] vectorActions) |
|
|
|
_action = action; |
|
|
|
action.vectorActions = vectorActions; |
|
|
|
public void UpdateVectorAction(float[] v) |
|
|
|
{ |
|
|
|
_action.vectorActions = v; |
|
|
|
} |
|
|
|
public void UpdateMemoriesAction(List<float> v) |
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// Updates the memories action.
|
|
|
|
/// </summary>
|
|
|
|
/// <param name="memories">Memories.</param>
|
|
|
|
public void UpdateMemoriesAction(List<float> memories) |
|
|
|
_action.memories = v; |
|
|
|
action.memories = memories; |
|
|
|
public void UpdateTextAction(string t) |
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// Updates the text action.
|
|
|
|
/// </summary>
|
|
|
|
/// <param name="textActions">Text actions.</param>
|
|
|
|
public void UpdateTextAction(string textActions) |
|
|
|
_action.textActions = t; |
|
|
|
action.textActions = textActions; |
|
|
|
/// <param name="acaMaxStep">If set to <c>true</c>
|
|
|
|
/// <param name="academyMaxStep">If set to <c>true</c>
|
|
|
|
/// <param name="acaDone">If set to <c>true</c>
|
|
|
|
/// <param name="academyDone">If set to <c>true</c>
|
|
|
|
private void SetStatus(bool acaMaxStep, bool acaDone, int acaStepCounter) |
|
|
|
void SetStatus(bool academyMaxStep, bool academyDone, int academyStepCounter) |
|
|
|
if (acaDone) |
|
|
|
acaStepCounter = 0; |
|
|
|
MakeRequests(acaStepCounter); |
|
|
|
if (acaMaxStep) |
|
|
|
if (academyDone) |
|
|
|
{ |
|
|
|
academyStepCounter = 0; |
|
|
|
} |
|
|
|
MakeRequests(academyStepCounter); |
|
|
|
if (academyMaxStep) |
|
|
|
{ |
|
|
|
if (acaDone) |
|
|
|
} |
|
|
|
|
|
|
|
// If the Academy needs to reset, the agent should reset
|
|
|
|
// even if it reseted recently.
|
|
|
|
if (academyDone) |
|
|
|
// If the Academy needs to reset, the agent should reset
|
|
|
|
// even if it reseted recently.
|
|
|
|
/// <summary>
|
|
|
|
/// </summary>
|
|
|
|
private void ResetIfDone() |
|
|
|
void ResetIfDone() |
|
|
|
{ |
|
|
|
// If an agent is done, then it will also
|
|
|
|
// request for a decision and an action
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// Signals the agent that it must sent its decision to the brain.
|
|
|
|
/// </summary>
|
|
|
|
private void SendState() |
|
|
|
void SendInfo() |
|
|
|
SendStateToBrain(); |
|
|
|
SendInfoToBrain(); |
|
|
|
ResetReward(); |
|
|
|
done = false; |
|
|
|
maxStepReached = false; |
|
|
|
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
/// Is used by the brain to make the agent perform a step.
|
|
|
|
private void _AgentStep() |
|
|
|
/// Used by the brain to make the agent perform a step.
|
|
|
|
void AgentStep() |
|
|
|
|
|
|
|
if (terminate) |
|
|
|
{ |
|
|
|
terminate = false; |
|
|
|
|
|
|
AgentOnDone(); |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
AgentAction(_action.vectorActions, _action.textActions); |
|
|
|
AgentAction(action.vectorActions, action.textActions); |
|
|
|
if ((stepCounter >= agentParameters.maxStep) |
|
|
|
if ((stepCount >= agentParameters.maxStep) |
|
|
|
stepCounter += 1; |
|
|
|
stepCount += 1; |
|
|
|
|
|
|
|
private void MakeRequests(int acaStepCounter) |
|
|
|
void MakeRequests(int academyStepCounter) |
|
|
|
Mathf.Max(agentParameters.numberOfActionsBetweenDecisions, 1); |
|
|
|
Mathf.Max(agentParameters.numberOfActionsBetweenDecisions, 1); |
|
|
|
if (acaStepCounter % |
|
|
|
if (academyStepCounter % |
|
|
|
agentParameters.numberOfActionsBetweenDecisions == 0) |
|
|
|
{ |
|
|
|
RequestDecision(); |
|
|
|
|
|
|
|
|
|
|
/** Contains logic for coverting a camera component into a Texture2D. */ |
|
|
|
public Texture2D ObservationToTexture(Camera cam, int width, int height) |
|
|
|
/// <summary>
|
|
|
|
/// Converts a camera and correspinding resolution to a 2D texture.
|
|
|
|
/// </summary>
|
|
|
|
/// <returns>The 2D texture.</returns>
|
|
|
|
/// <param name="camera">Camera.</param>
|
|
|
|
/// <param name="width">Width of resulting 2D texture.</param>
|
|
|
|
/// <param name="height">Height of resulting 2D texture.</param>
|
|
|
|
public static Texture2D ObservationToTexture(Camera camera, int width, int height) |
|
|
|
Rect oldRec = cam.rect; |
|
|
|
cam.rect = new Rect(0f, 0f, 1f, 1f); |
|
|
|
Rect oldRec = camera.rect; |
|
|
|
camera.rect = new Rect(0f, 0f, 1f, 1f); |
|
|
|
var depth = 24; |
|
|
|
var format = RenderTextureFormat.Default; |
|
|
|
var readWrite = RenderTextureReadWrite.Default; |
|
|
|
|
|
|
var tex = new Texture2D(width, height, TextureFormat.RGB24, false); |
|
|
|
|
|
|
|
var prevActiveRT = RenderTexture.active; |
|
|
|
var prevCameraRT = cam.targetTexture; |
|
|
|
var prevCameraRT = camera.targetTexture; |
|
|
|
cam.targetTexture = tempRT; |
|
|
|
camera.targetTexture = tempRT; |
|
|
|
cam.Render(); |
|
|
|
camera.Render(); |
|
|
|
cam.targetTexture = prevCameraRT; |
|
|
|
cam.rect = oldRec; |
|
|
|
camera.targetTexture = prevCameraRT; |
|
|
|
camera.rect = oldRec; |
|
|
|
RenderTexture.active = prevActiveRT; |
|
|
|
RenderTexture.ReleaseTemporary(tempRT); |
|
|
|
return tex; |