浏览代码

Merge pull request #418 from Unity-Technologies/dev-api-doc-agent

Comment improvements to Agent.cs.
/develop-generalizationTraining-TrainerController
GitHub 7 年前
当前提交
647b0a8f
共有 6 个文件被更改,包括 549 次插入307 次删除
  1. 62
      unity-environment/Assets/ML-Agents/Editor/AgentEditor.cs
  2. 23
      unity-environment/Assets/ML-Agents/Editor/MLAgentsEditModeTest.cs
  3. 7
      unity-environment/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DAgent.cs
  4. 758
      unity-environment/Assets/ML-Agents/Scripts/Agent.cs
  5. 2
      unity-environment/Assets/ML-Agents/Scripts/CoreBrainInternal.cs
  6. 4
      unity-environment/Assets/ML-Agents/Scripts/ExternalCommunicator.cs

62
unity-environment/Assets/ML-Agents/Editor/AgentEditor.cs


using System.Collections;
using System.Collections.Generic;
using UnityEngine;
using UnityEngine;
using UnityEditor;
using UnityEditor;
using System.Linq;
/*
This code is meant to modify the behavior of the inspector on Brain Components.
Depending on the type of brain that is used, the available fields will be modified in the inspector accordingly.

serializedAgent.Update();
SerializedProperty brain = serializedAgent.FindProperty("brain");
SerializedProperty actionsPerDecision = serializedAgent.FindProperty("agentParameters.numberOfActionsBetweenDecisions");
SerializedProperty maxSteps = serializedAgent.FindProperty("agentParameters.maxStep");
SerializedProperty isResetOnDone = serializedAgent.FindProperty("agentParameters.resetOnDone");
SerializedProperty isEBS = serializedAgent.FindProperty("agentParameters.onDemandDecision");
SerializedProperty cameras = serializedAgent.FindProperty("agentParameters.agentCameras");
SerializedProperty actionsPerDecision = serializedAgent.FindProperty(
"agentParameters.numberOfActionsBetweenDecisions");
SerializedProperty maxSteps = serializedAgent.FindProperty(
"agentParameters.maxStep");
SerializedProperty isResetOnDone = serializedAgent.FindProperty(
"agentParameters.resetOnDone");
SerializedProperty isODD = serializedAgent.FindProperty(
"agentParameters.onDemandDecision");
SerializedProperty cameras = serializedAgent.FindProperty(
"agentParameters.agentCameras");
EditorGUILayout.PropertyField(brain);

EditorGUILayout.PropertyField(cameras.GetArrayElementAtIndex(i), new GUIContent("Camera " + (i + 1).ToString() + ": "));
EditorGUILayout.PropertyField(
cameras.GetArrayElementAtIndex(i),
new GUIContent("Camera " + (i + 1).ToString() + ": "));
}
EditorGUILayout.BeginHorizontal();
if (GUILayout.Button("Add Camera", EditorStyles.miniButton))

cameras.arraySize--;
}
EditorGUILayout.EndHorizontal();
EditorGUILayout.PropertyField(maxSteps, new GUIContent("Max Step",
"The per-agent maximum number of steps."));
EditorGUILayout.PropertyField(isResetOnDone, new GUIContent("Reset On Done",
"If checked, the agent will reset on done. Else, AgentOnDone() will be called."));
EditorGUILayout.PropertyField(isEBS, new GUIContent("On Demand Decision", "If checked, you must manually request decisions."));
if (!isEBS.boolValue)
EditorGUILayout.PropertyField(
maxSteps,
new GUIContent(
"Max Step", "The per-agent maximum number of steps."));
EditorGUILayout.PropertyField(
isResetOnDone,
new GUIContent(
"Reset On Done",
"If checked, the agent will reset on done. Else, AgentOnDone() will be called."));
EditorGUILayout.PropertyField(
isODD,
new GUIContent(
"On Demand Decision",
"If checked, you must manually request decisions."));
if (!isODD.boolValue)
EditorGUILayout.PropertyField(actionsPerDecision, new GUIContent("Decision Frequency", "The agent will automatically request a " +
"decision every X steps and perform an action at every step."));
EditorGUILayout.PropertyField(
actionsPerDecision,
new GUIContent(
"Decision Frequency",
"The agent will automatically request a decision every X" +
" steps and perform an action at every step."));
actionsPerDecision.intValue = Mathf.Max(1, actionsPerDecision.intValue);
}

base.OnInspectorGUI();
}
}

23
unity-environment/Assets/ML-Agents/Editor/MLAgentsEditModeTest.cs


Assert.AreEqual(0, agent1.agentActionCalls);
Assert.AreEqual(0, agent2.agentActionCalls);
MethodInfo AgentEnableMethod = typeof(Agent).GetMethod("_InitializeAgent",
MethodInfo AgentEnableMethod = typeof(Agent).GetMethod("OnEnableHelper",
BindingFlags.Instance | BindingFlags.NonPublic);
MethodInfo AcademyInitializeMethod = typeof(Academy).GetMethod("InitializeEnvironment",
BindingFlags.Instance | BindingFlags.NonPublic);

MethodInfo AgentEnableMethod = typeof(Agent).GetMethod(
"_InitializeAgent", BindingFlags.Instance | BindingFlags.NonPublic);
"OnEnableHelper", BindingFlags.Instance | BindingFlags.NonPublic);
MethodInfo AcademyInitializeMethod = typeof(Academy).GetMethod(
"InitializeEnvironment", BindingFlags.Instance | BindingFlags.NonPublic);

MethodInfo AgentEnableMethod = typeof(Agent).GetMethod(
"_InitializeAgent", BindingFlags.Instance | BindingFlags.NonPublic);
"OnEnableHelper", BindingFlags.Instance | BindingFlags.NonPublic);
MethodInfo AcademyInitializeMethod = typeof(Academy).GetMethod(
"InitializeEnvironment", BindingFlags.Instance | BindingFlags.NonPublic);

Assert.AreEqual(numberAcaReset, aca.academyResetCalls);
Assert.AreEqual(i, aca.AcademyStepCalls);
Assert.AreEqual(agent1StepSinceReset, agent1.stepCounter);
Assert.AreEqual(agent2StepSinceReset, agent2.stepCounter);
Assert.AreEqual(agent2StepSinceReset, agent2.GetStepCount());
Assert.AreEqual(numberAgent1Reset, agent1.agentResetCalls);
Assert.AreEqual(numberAgent2Reset, agent2.agentResetCalls);

MethodInfo AgentEnableMethod = typeof(Agent).GetMethod(
"_InitializeAgent", BindingFlags.Instance | BindingFlags.NonPublic);
"OnEnableHelper", BindingFlags.Instance | BindingFlags.NonPublic);
MethodInfo AcademyInitializeMethod = typeof(Academy).GetMethod(
"InitializeEnvironment", BindingFlags.Instance | BindingFlags.NonPublic);

Assert.AreEqual(i, aca.AcademyStepCalls);
Assert.AreEqual(agent1StepSinceReset, agent1.stepCounter);
Assert.AreEqual(agent2StepSinceReset, agent2.stepCounter);
Assert.AreEqual(agent1StepSinceReset, agent1.GetStepCount());
Assert.AreEqual(agent2StepSinceReset, agent2.GetStepCount());
Assert.AreEqual(numberAcaReset, aca.GetEpisodeCount());

MethodInfo AgentEnableMethod = typeof(Agent).GetMethod(
"_InitializeAgent", BindingFlags.Instance | BindingFlags.NonPublic);
"OnEnableHelper", BindingFlags.Instance | BindingFlags.NonPublic);
MethodInfo AcademyInitializeMethod = typeof(Academy).GetMethod(
"InitializeEnvironment", BindingFlags.Instance | BindingFlags.NonPublic);

{
Assert.AreEqual(i, aca.AcademyStepCalls);
Assert.AreEqual(agent1StepSinceReset, agent1.stepCounter);
Assert.AreEqual(agent2StepSinceReset, agent2.stepCounter);
Assert.AreEqual(agent1StepSinceReset, agent1.GetStepCount());
Assert.AreEqual(agent2StepSinceReset, agent2.GetStepCount());
Assert.AreEqual(agent1ResetOnDone, agent1.agentOnDoneCalls);
Assert.AreEqual(agent2ResetOnDone, agent2.agentOnDoneCalls);

MethodInfo AgentEnableMethod = typeof(Agent).GetMethod(
"_InitializeAgent", BindingFlags.Instance | BindingFlags.NonPublic);
"OnEnableHelper", BindingFlags.Instance | BindingFlags.NonPublic);
MethodInfo AcademyInitializeMethod = typeof(Academy).GetMethod(
"InitializeEnvironment", BindingFlags.Instance | BindingFlags.NonPublic);

7
unity-environment/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DAgent.cs


AddVectorObs(gameObject.transform.rotation.x);
AddVectorObs((ball.transform.position - gameObject.transform.position));
AddVectorObs(ball.transform.GetComponent<Rigidbody>().velocity);
SetTextObs("Testing "+gameObject.GetInstanceID());
SetTextObs("Testing " + gameObject.GetInstanceID());
}
public override void AgentAction(float[] vectorAction, string textAction)

gameObject.transform.Rotate(new Vector3(1, 0, 0), action_x);
}
SetReward(0.1f);
SetReward(0.1f);
}
if ((ball.transform.position.y - gameObject.transform.position.y) < -2f ||
Mathf.Abs(ball.transform.position.x - gameObject.transform.position.x) > 3f ||

758
unity-environment/Assets/ML-Agents/Scripts/Agent.cs


using System.Collections;
using System.Collections.Generic;
using System.Collections.Generic;
/// Agent info.The agent will send an instance of this class to the brain.
/// Struct that contains all the information for an Agent, including its
/// observations, actions and current status, that is sent to the Brain.
/// <summary>
/// Most recent agent vector (i.e. numeric) observation.
/// </summary>
/// <summary>
/// The previous agent vector observations, stacked. The length of the
/// history (i.e. number of vector observations to stack) is specified
/// in the Brain parameters.
/// </summary>
/// <summary>
/// Most recent agent camera (i.e. texture) observation.
/// </summary>
public List<float> memories;
/// <summary>
/// Most recent text observation.
/// </summary>
public float[] StoredVectorActions;
public string StoredTextActions;
/// <summary>
/// Keeps track of the last vector action taken by the Brain.
/// </summary>
public float[] storedVectorActions;
/// <summary>
/// Keeps track of the last text action taken by the Brain.
/// </summary>
public string storedTextActions;
/// <summary>
/// Used by the Trainer to store information about the agent. This data
/// structure is not consumed or modified by the agent directly, they are
/// just the owners of their trainier's memory. Currently, however, the
/// size of the memory is in the Brain properties.
/// </summary>
public List<float> memories;
/// <summary>
/// Current agent reward.
/// </summary>
/// <summary>
/// Whether the agent is done or not.
/// </summary>
/// <summary>
/// Whether the agent has reached its max step count for this episode.
/// </summary>
/// <summary>
/// Unique identifier each agent receives at initialization. It is used
/// to separate between different agents in the environment.
/// </summary>
/// Agent action. The brain will send an instance of this class to the agent
/// when taking a decision.
/// Struct that contains the action information sent from the Brain to the
/// Agent.
/// </summary>
public struct AgentAction
{

}
/// <summary>
/// Agent parameters. Reflect the user's settings for the agents of the inspector.
/// Struct that contains all the Agent-specific parameters provided in the
/// Editor. This excludes the Brain linked to the Agent since it can be
/// modified programmatically.
/// <summary>
/// The list of the Camera GameObjects the agent uses for visual
/// observations.
/// </summary>
/// <summary>
/// The maximum number of steps the agent takes before being done.
/// </summary>
/// <remarks>
/// If set to 0, the agent can only be set to done programmatically (or
/// when the Academy is done).
/// If set to any positive integer, the agent will be set to done after
/// that many steps. Note that setting the max step to a value greater
/// than the academy max step value renders it useless.
/// </remarks>
/// <summary>
/// Determines the behaviour of the agent when done.
/// </summary>
/// <remarks>
/// If true, the agent will reset when done and start a new episode.
/// Otherwise, the agent will remain done and its behavior will be
/// dictated by the AgentOnDone method.
/// </remarks>
/// <summary>
/// Whether to enable On Demand Decision Making or make a decision at
/// every step.
/// </summary>
/// <summary>
/// Number of actions between decisions (used when On Demand Decision
/// Making is turned off).
/// </summary>
[HelpURL("https://github.com/Unity-Technologies/ml-agents/blob/master/docs/Agents-Editor-Interface.md#agent")]
/** Generic functions for parent Agent class.
* Contains all logic for Brain-Agent communication and Agent-Environment
* interaction.
*/
/// <summary>
/// Agent Monobehavior class that is attached to a Unity GameObject, making it
/// an Agent. An agent produces observations and takes actions in the
/// environment. Observations are determined by the cameras attached
/// to the agent in addition to the vector observations implemented by the
/// user in <see cref="CollectObservations"/>. On the other hand, actions
/// are determined by decisions produced by a linked Brain. Currently, this
/// class is expected to be extended to implement the desired agent behavior.
/// </summary>
/// <remarks>
/// Simply speaking, an agent roams through an environment and at each step
/// of the environment extracts its current observation, sends them to its
/// linked brain and in return receives an action from its brain. In practice,
/// however, an agent need not send its observation at every step since very
/// little may have changed between sucessive steps. Currently, how often an
/// agent updates its brain with a fresh observation is determined by the
/// Academy.
///
/// At any step, an agent may be considered <see cref="done"/>.
/// This could occur due to a variety of reasons:
/// - The agent reached an end state within its environment.
/// - The agent reached the maximum # of steps (i.e. timed out).
/// - The academy reached the maximum # of steps (forced agent to be done).
///
/// Here, an agent reaches an end state if it completes its task successfully
/// or somehow fails along the way. In the case where an agent is done before
/// the academy, it either resets and restarts, or just lingers until the
/// academy is done.
///
/// An important note regarding steps and episodes is due. Here, an agent step
/// corresponds to an academy step, which also corresponds to Unity
/// environment step (i.e. each FixedUpdate call). This is not the case for
/// episodes. The academy controls the global episode count and each agent
/// controls its own local episode count and can reset and start a new local
/// episode independently (based on its own experience). Thus an academy
/// (global) episode can be viewed as the upper-bound on an agents episode
/// length and that within a single global episode, an agent may have completed
/// multiple local episodes. Consequently, if an agent max step is
/// set to a value larger than the academy max steps value, then the academy
/// value takes precedence (since the agent max step will never be reached).
///
/// Lastly, note that at any step the brain linked to the agent is allowed to
/// change programmatically with <see cref="GiveBrain"/>.
///
/// Implementation-wise, it is required that this class is extended and the
/// virtual methods overridden. For sample implementations of agent behavior,
/// see the Examples/ directory within this Unity project.
/// </remarks>
[HelpURL("https://github.com/Unity-Technologies/ml-agents/blob/master/" +
"docs/Learning-Environment-Design-Agent.md")]
/// The brain that will control this agent.
/// Use the inspector to drag the desired brain gameObject into
/// the Brain field.
///</summary>
/// The Brain attached to this agent. A brain can be attached either
/// directly from the Editor through AgentEditor or
/// programmatically through <see cref="GiveBrain"/>. It is OK for an agent
/// to not have a brain, as long as no decision is requested.
/// </summary>
/// The info. This is the placeholder for the information the agent will send
/// to the brain.
/// Agent parameters specified within the Editor via AgentEditor.
private AgentInfo _info;
/// <summary>
/// The action. This is the placeholder for the actions the agent will receive.
/// </summary>
private AgentAction _action;
[HideInInspector]
public AgentParameters agentParameters;
/// Current Agent information (message sent to Brain).
AgentInfo info;
/// Current Agent action (message sent from Brain).
AgentAction action;
/// <summary>
/// The reward. Describes the reward for the given step of the agent.
/// It is reset to 0 at the beginning of every step.
/// Modify in AgentStep().
/// Should be set to positive to reinforcement desired behavior, and
/// set to a negative value to punish undesireable behavior.
/// Represents the reward the agent accumulated during the current step.
/// It is reset to 0 at the beginning of every step.
/// Should be set to a positive value when the agent performs a "good"
/// action that we wish to reinforce/reward, and set to a negative value
/// when the agent performs a "bad" action that we wish to punish/deter.
///</summary>
private float reward;
float reward;
/// Keeps track of the cumulative reward in this episode.
float cumulativeReward;
private bool requestAction;
bool requestAction;
private bool requestDecision;
bool requestDecision;
/// <summary>
/// Whether or not the agent is done
/// Set to true when the agent has acted in some way which ends the
/// episode for the given agent.
///</summary>
private bool done;
/// Whether or not the agent has completed the episode. This may be due
/// to either reaching a success or fail state, or reaching the maximum
/// number of steps (i.e. timing out).
bool done;
/// Whether or not the max step is reached
private bool maxStepReached;
/// Whether or not the agent reached the maximum number of steps.
bool maxStepReached;
/// Do not modify: This keeps track of the cumulative reward.
private float cumulativeReward;
/// Keeps track of the number of steps taken by the agent in this episode.
/// Note that this value is different for each agent, and may not overlap
/// with the step counter in the Academy, since agents reset based on
/// their own experience.
int stepCount;
/// This keeps track of the number of steps taken by the agent each episode.
[HideInInspector]
public int stepCounter;
// Flag to signify that an agent has been reset but the fact that it is
// done has not been communicated (required for On Demand Decision Making).
bool hasAlreadyReset;
private bool hasAlreadyReset;
private bool terminate;
// Flag to signify that an agent is done and should not reset until
// the fact that it is done has been communicated.
bool terminate;
[HideInInspector]
public AgentParameters agentParameters;
/// Unique identifier each agent receives at initialization. It is used
/// to separate between different agents in the environment.
int id;
/// <summary> This is the unique Identifier each agent
/// receives at initialization. It is used by the brain to identify
/// the agent.
/// </summary>
private int id;
/// <summary>
/// Unity method called when the agent is instantiated or set to active.
/// </summary>
private void OnEnable()
/// Monobehavior function that is called when the attached GameObject
/// becomes enabled or active.
void OnEnable()
Academy aca = Object.FindObjectOfType<Academy>() as Academy;
_InitializeAgent(aca);
Academy academy = Object.FindObjectOfType<Academy>() as Academy;
OnEnableHelper(academy);
/// <summary>
/// Is called when the agent is initialized.
/// </summary>
void _InitializeAgent(Academy aca)
/// Helper method for the <see cref="OnEnable"/> event, created to
/// facilitate testing.
void OnEnableHelper(Academy academy)
_info = new AgentInfo();
_action = new AgentAction();
info = new AgentInfo();
action = new AgentAction();
if (academy == null)
{
throw new UnityAgentsException(
"No Academy Component could be found in the scene.");
}
if (aca == null)
throw new UnityAgentsException("No Academy Component could be" +
"found in the scene.");
aca.AgentSetStatus += SetStatus;
aca.AgentResetIfDone += ResetIfDone;
aca.AgentSendState += SendState;
aca.AgentAct += _AgentStep;
aca.AgentForceReset += _AgentReset;
academy.AgentSetStatus += SetStatus;
academy.AgentResetIfDone += ResetIfDone;
academy.AgentSendState += SendInfo;
academy.AgentAct += AgentStep;
academy.AgentForceReset += _AgentReset;
ResetState();
ResetData();
string.Format("The Agent component attached to the " +
"GameObject {0} was initialized without a brain."
, gameObject.name));
string.Format(
"The Agent component attached to the " +
"GameObject {0} was initialized without a brain.",
gameObject.name));
/// <summary>
/// Is called when the agent is disabled.
/// </summary>
void _DisableAgent(Academy aca)
/// Monobehavior function that is called when the attached GameObject
/// becomes disabled or inactive.
void OnDisable()
if (aca != null)
Academy academy = Object.FindObjectOfType<Academy>() as Academy;
if (academy != null)
aca.AgentSetStatus -= SetStatus;
aca.AgentResetIfDone -= ResetIfDone;
aca.AgentSendState -= SendState;
aca.AgentAct -= _AgentStep;
aca.AgentForceReset -= _AgentReset;
academy.AgentSetStatus -= SetStatus;
academy.AgentResetIfDone -= ResetIfDone;
academy.AgentSendState -= SendInfo;
academy.AgentAct -= AgentStep;
academy.AgentForceReset -= _AgentReset;
/// Gets called when the agent is destroyed or is set inactive.
/// Updates the Brain for the agent. Any brain currently assigned to the
/// agent will be replaced with the provided one.
void OnDisable()
/// <remarks>
/// The agent unsubscribes from its current brain (if it has one) and
/// subscribes to the provided brain. This enables contextual brains, that
/// is, updating the behaviour (hence brain) of the agent depending on
/// the context of the game. For example, we may utilize one (wandering)
/// brain when an agent is randomly exploring an open world, but switch
/// to another (fighting) brain when it comes into contact with an enemy.
/// </remarks>
/// <param name="brain">New brain to subscribe this agent to</param>
public void GiveBrain(Brain brain)
Academy aca = Object.FindObjectOfType<Academy>() as Academy;
_DisableAgent(aca);
this.brain = brain;
ResetData();
/// When GiveBrain is called, the agent unsubscribes from its
/// previous brain and subscribes to the one passed in argument.
/// Use this method to provide a brain to the agent via script.
///<param name= "b" >The Brain the agent will subscribe to.</param>
/// <summary>
public void GiveBrain(Brain b)
/// Returns the current step counter (within the current epside).
/// </summary>
/// <returns>
/// Current episode number.
/// </returns>
public int GetStepCount()
brain = b;
ResetState();
return stepCount;
/// Resets the reward of the agent
/// Resets the step reward and possibly the episode reward for the agent.
/// </summary>
public void ResetReward()
{

cumulativeReward = 0f;
}
}
/// Use this method to overrite the current reward of the agent.
/// Overrides the current step reward of the agent and updates the episode
/// reward accordingly.
/// <param name="newValue">The new value of the reward</param>
public void SetReward(float newValue)
/// <param name="reward">The new value of the reward.</param>
public void SetReward(float reward)
cumulativeReward += newValue - reward;
reward = newValue;
cumulativeReward += (reward - this.reward);
this.reward = reward;
/// Use this method to increment the current reward of the agent.
/// Increments the step and episode rewards by the provided value.
/// <param name="increment">The value by which the reward will
/// be incremented</param>
/// <param name="increment">Incremental reward value.</param>
/// Gets the reward of the agent.
/// Retrieves the step reward for the Agent.
/// <returns>The reward.</returns>
/// <returns>The step reward.</returns>
/// Gets the cumulative reward.
/// Retrieves the episode reward for the Agent.
/// <returns>The episode reward.</returns>
/// Is called then the agent is done. Either game-over, victory or timeout.
/// Sets the done flag to true.
/// <summary>
/// Is called when the agent must request the brain for a new decision.
/// </summary>

RequestAction();
}
/// <summary>
/// Is called then the agent must perform a new action.
/// </summary>

}
/// <returns><c>true</c>, if max step reached was reached,
/// <c>false</c> otherwise.</returns>
/// <returns>
/// <c>true</c>, if max step reached was reached, <c>false</c> otherwise.
/// </returns>
/// <returns><c>true</c>, if the agent is done,
/// <c>false</c> otherwise.</returns>
/// <returns>
/// <c>true</c>, if the agent is done, <c>false</c> otherwise.
/// </returns>
/// <summary>
/// Resets the info and action fields of the agent. Is called when the agent
/// resets or changes brain.
/// </summary>
private void ResetState()
/// Helper function that resets all the data structures associated with
/// the agent. Typically used when the agent is being initialized or reset
/// at the end of an episode.
void ResetData()
{
}
_action.vectorActions = new float[param.vectorActionSize];
_info.StoredVectorActions = new float[param.vectorActionSize];
action.vectorActions = new float[param.vectorActionSize];
info.storedVectorActions = new float[param.vectorActionSize];
_action.vectorActions = new float[1];
_info.StoredVectorActions = new float[1];
action.vectorActions = new float[1];
info.storedVectorActions = new float[1];
_action.textActions = "";
_info.memories = new List<float>();
_action.memories = new List<float>();
action.textActions = "";
info.memories = new List<float>();
action.memories = new List<float>();
_info.vectorObservation =
info.vectorObservation =
_info.stackedVectorObservation =
info.stackedVectorObservation =
_info.stackedVectorObservation.AddRange(
info.stackedVectorObservation.AddRange(
_info.vectorObservation = new List<float>(1);
_info.stackedVectorObservation =
info.vectorObservation = new List<float>(1);
info.stackedVectorObservation =
_info.stackedVectorObservation.AddRange(
info.stackedVectorObservation.AddRange(
_info.visualObservations = new List<Texture2D>();
info.visualObservations = new List<Texture2D>();
/// Initialize the agent with this method
/// Must be implemented in agent-specific child class.
/// This method called only once when the agent is enabled.
/// Initializes the agent, called once when the agent is enabled. Can be
/// left empty if there is no special, unique set-up behavior for the
/// agent.
/// <remarks>
/// One sample use is to store local references to other objects in the
/// scene which would facilitate computing this agents observation.
/// </remarks>
public virtual void InitializeAgent()
{

/// Sends the state to brain.
/// Sends the Agent info to the linked Brain.
public void SendStateToBrain()
void SendInfoToBrain()
{
_info.memories = _action.memories;
_info.StoredVectorActions = _action.vectorActions;
_info.StoredTextActions = _action.textActions;
_info.vectorObservation.Clear();
}
info.memories = action.memories;
info.storedVectorActions = action.vectorActions;
info.storedTextActions = action.textActions;
info.vectorObservation.Clear();
if (_info.vectorObservation.Count != param.vectorObservationSize)
if (info.vectorObservation.Count != param.vectorObservationSize)
{
throw new UnityAgentsException(string.Format(
"Vector Observation size mismatch between continuous " +

brain.brainParameters.vectorObservationSize,
_info.vectorObservation.Count));
info.vectorObservation.Count));
_info.stackedVectorObservation.RemoveRange(
info.stackedVectorObservation.RemoveRange(
_info.stackedVectorObservation.AddRange(_info.vectorObservation);
info.stackedVectorObservation.AddRange(info.vectorObservation);
if (_info.vectorObservation.Count != 1)
if (info.vectorObservation.Count != 1)
1, _info.vectorObservation.Count));
1, info.vectorObservation.Count));
_info.stackedVectorObservation.RemoveRange(0, 1);
_info.stackedVectorObservation.AddRange(_info.vectorObservation);
info.stackedVectorObservation.RemoveRange(0, 1);
info.stackedVectorObservation.AddRange(info.vectorObservation);
_info.visualObservations.Clear();
info.visualObservations.Clear();
if (param.cameraResolutions.Length > agentParameters.agentCameras.Count)
{
throw new UnityAgentsException(string.Format(

}
for (int i = 0; i < brain.brainParameters.cameraResolutions.Length; i++)
{
_info.visualObservations.Add(ObservationToTexture(
info.visualObservations.Add(ObservationToTexture(
_info.reward = reward;
_info.done = done;
_info.maxStepReached = maxStepReached;
_info.id = id;
info.reward = reward;
info.done = done;
info.maxStepReached = maxStepReached;
info.id = id;
brain.SendState(this, _info);
_info.textObservation = "";
brain.SendState(this, info);
info.textObservation = "";
/// Collects the observations. Must be implemented by the developer.
/// Collects the (vector, visual, text) observations of the agent.
/// The agent observation describes the current environment from the
/// perspective of the agent.
/// <remarks>
/// Simply, an agents observation is any environment information that helps
/// the Agent acheive its goal. For example, for a fighting Agent, its
/// observation could include distances to friends or enemies, or the
/// current level of ammunition at its disposal.
/// Recall that an Agent may attach vector, visual or textual observations.
/// Vector observations are added by calling the provided helper methods:
/// - <see cref="AddVectorObs(int)"/>
/// - <see cref="AddVectorObs(float)"/>
/// - <see cref="AddVectorObs(Vector3)"/>
/// - <see cref="AddVectorObs(Vector2)"/>
/// - <see cref="AddVectorObs(float[])"/>
/// - <see cref="AddVectorObs(List{float})"/>
/// - <see cref="AddVectorObs(Quaternion)"/>
/// Depending on your environment, any combination of these helpers can
/// be used. They just need to be used in the exact same order each time
/// this method is called and the resulting size of the vector observation
/// needs to match the vectorObservationSize attribute of the linked Brain.
/// Visual observations are implicitly added from the cameras attached to
/// the Agent.
/// Lastly, textual observations are added using
/// <see cref="SetTextObs(string)"/>.
/// </remarks>
public virtual void CollectObservations()
{

/// Appends float values to the vector observation.
/// Note that the total number of vector observation added
/// must be the same at each CollectObservations call.
/// Adds a float observation to the vector observations of the agent.
/// Increases the size of the agents vector observation by 1.
/// <param name="observation">The value to add to
/// the vector observation.</param>
internal void AddVectorObs(float observation)
/// <param name="observation">Observation.</param>
protected void AddVectorObs(float observation)
_info.vectorObservation.Add(observation);
info.vectorObservation.Add(observation);
internal void AddVectorObs(int observation)
/// <summary>
/// Adds an integer observation to the vector observations of the agent.
/// Increases the size of the agents vector observation by 1.
/// </summary>
/// <param name="observation">Observation.</param>
protected void AddVectorObs(int observation)
_info.vectorObservation.Add((float)observation);
info.vectorObservation.Add((float)observation);
internal void AddVectorObs(Vector3 observation)
/// <summary>
/// Adds an Vector3 observation to the vector observations of the agent.
/// Increases the size of the agents vector observation by 3.
/// </summary>
/// <param name="observation">Observation.</param>
protected void AddVectorObs(Vector3 observation)
_info.vectorObservation.Add(observation.x);
_info.vectorObservation.Add(observation.y);
_info.vectorObservation.Add(observation.z);
info.vectorObservation.Add(observation.x);
info.vectorObservation.Add(observation.y);
info.vectorObservation.Add(observation.z);
internal void AddVectorObs(Vector2 observation)
/// <summary>
/// Adds an Vector2 observation to the vector observations of the agent.
/// Increases the size of the agents vector observation by 2.
/// </summary>
/// <param name="observation">Observation.</param>
protected void AddVectorObs(Vector2 observation)
_info.vectorObservation.Add(observation.x);
_info.vectorObservation.Add(observation.y);
info.vectorObservation.Add(observation.x);
info.vectorObservation.Add(observation.y);
internal void AddVectorObs(float[] observation)
/// <summary>
/// Adds a float array observation to the vector observations of the agent.
/// Increases the size of the agents vector observation by size of array.
/// </summary>
/// <param name="observation">Observation.</param>
protected void AddVectorObs(float[] observation)
_info.vectorObservation.AddRange(observation);
info.vectorObservation.AddRange(observation);
internal void AddVectorObs(List<float> observation)
/// <summary>
/// Adds a float list observation to the vector observations of the agent.
/// Increases the size of the agents vector observation by size of list.
/// </summary>
/// <param name="observation">Observation.</param>
protected void AddVectorObs(List<float> observation)
_info.vectorObservation.AddRange(observation);
info.vectorObservation.AddRange(observation);
internal void AddVectorObs(Quaternion observation)
/// <summary>
/// Adds a quaternion observation to the vector observations of the agent.
/// Increases the size of the agents vector observation by 4.
/// </summary>
/// <param name="observation">Observation.</param>
protected void AddVectorObs(Quaternion observation)
_info.vectorObservation.Add(observation.x);
_info.vectorObservation.Add(observation.y);
_info.vectorObservation.Add(observation.z);
_info.vectorObservation.Add(observation.w);
info.vectorObservation.Add(observation.x);
info.vectorObservation.Add(observation.y);
info.vectorObservation.Add(observation.z);
info.vectorObservation.Add(observation.w);
/// <param name="s">The string the text observation must be set to.</param>
internal void SetTextObs(object s)
/// <param name="textObservation">The text observation.</param>
public void SetTextObs(string textObservation)
_info.textObservation = s.ToString();
info.textObservation = textObservation;
/// Defines agent-specific behavior at every step depending on the action.
/// Must be implemented in agent-specific child class.
/// Note: If your state is discrete, you need to convert your
/// state into a list of float with length 1.
/// Specifies the agent behavior at every step based on the provided
/// action.
/// <param name="vectorAction">
/// Vector action. Note that for discrete actions, the provided array
/// will be of length 1.
/// </param>
/// <param name="textAction">Text action.</param>
public virtual void AgentAction(float[] vectorAction, string textAction)
{

/// Defines agent-specific behaviour when done
/// Must be implemented in agent-specific child class.
/// Is called when the Agent is done if ResetOneDone is false.
/// The agent will remain done.
/// You can use this method to remove the agent from the scene.
/// Specifies the agent behavior when done and
/// <see cref="AgentParameters.resetOnDone"/> is false. This method can be
/// used to remove the agent from the scene.
/// Defines agent-specific reset logic
/// Must be implemented in agent-specific child class.
/// Is called when the academy is done.
/// Is called when the Agent is done if ResetOneDone is true.
/// Specifies the agent behavior when being reset, which can be due to
/// the agent or Academy being done (i.e. completion of local or global
/// episode).
/// </summary>
public virtual void AgentReset()
{

/// <summary>
/// Is called when the agent resets.
/// An internal reset method that updates internal data structures in
/// addition to calling <see cref="AgentReset"/>.
public void _AgentReset()
void _AgentReset()
ResetState();
stepCounter = 0;
ResetData();
stepCount = 0;
/// Is used by the brain give new action to the agent.
public void UpdateAction(AgentAction action)
/// <summary>
/// Updates the vector action.
/// </summary>
/// <param name="vectorActions">Vector actions.</param>
public void UpdateVectorAction(float[] vectorActions)
_action = action;
action.vectorActions = vectorActions;
public void UpdateVectorAction(float[] v)
{
_action.vectorActions = v;
}
public void UpdateMemoriesAction(List<float> v)
/// <summary>
/// Updates the memories action.
/// </summary>
/// <param name="memories">Memories.</param>
public void UpdateMemoriesAction(List<float> memories)
_action.memories = v;
action.memories = memories;
public void UpdateTextAction(string t)
/// <summary>
/// Updates the text action.
/// </summary>
/// <param name="textActions">Text actions.</param>
public void UpdateTextAction(string textActions)
_action.textActions = t;
action.textActions = textActions;
/// <param name="acaMaxStep">If set to <c>true</c>
/// <param name="academyMaxStep">If set to <c>true</c>
/// <param name="acaDone">If set to <c>true</c>
/// <param name="academyDone">If set to <c>true</c>
private void SetStatus(bool acaMaxStep, bool acaDone, int acaStepCounter)
void SetStatus(bool academyMaxStep, bool academyDone, int academyStepCounter)
if (acaDone)
acaStepCounter = 0;
MakeRequests(acaStepCounter);
if (acaMaxStep)
if (academyDone)
{
academyStepCounter = 0;
}
MakeRequests(academyStepCounter);
if (academyMaxStep)
{
if (acaDone)
}
// If the Academy needs to reset, the agent should reset
// even if it reseted recently.
if (academyDone)
// If the Academy needs to reset, the agent should reset
// even if it reseted recently.
/// <summary>
/// </summary>
private void ResetIfDone()
void ResetIfDone()
{
// If an agent is done, then it will also
// request for a decision and an action

/// <summary>
/// Signals the agent that it must sent its decision to the brain.
/// </summary>
private void SendState()
void SendInfo()
SendStateToBrain();
SendInfoToBrain();
ResetReward();
done = false;
maxStepReached = false;

}
}
/// Is used by the brain to make the agent perform a step.
private void _AgentStep()
/// Used by the brain to make the agent perform a step.
void AgentStep()
if (terminate)
{
terminate = false;

AgentOnDone();
}
AgentAction(_action.vectorActions, _action.textActions);
AgentAction(action.vectorActions, action.textActions);
if ((stepCounter >= agentParameters.maxStep)
if ((stepCount >= agentParameters.maxStep)
stepCounter += 1;
stepCount += 1;
private void MakeRequests(int acaStepCounter)
void MakeRequests(int academyStepCounter)
Mathf.Max(agentParameters.numberOfActionsBetweenDecisions, 1);
Mathf.Max(agentParameters.numberOfActionsBetweenDecisions, 1);
if (acaStepCounter %
if (academyStepCounter %
agentParameters.numberOfActionsBetweenDecisions == 0)
{
RequestDecision();

/** Contains logic for coverting a camera component into a Texture2D. */
public Texture2D ObservationToTexture(Camera cam, int width, int height)
/// <summary>
/// Converts a camera and correspinding resolution to a 2D texture.
/// </summary>
/// <returns>The 2D texture.</returns>
/// <param name="camera">Camera.</param>
/// <param name="width">Width of resulting 2D texture.</param>
/// <param name="height">Height of resulting 2D texture.</param>
public static Texture2D ObservationToTexture(Camera camera, int width, int height)
Rect oldRec = cam.rect;
cam.rect = new Rect(0f, 0f, 1f, 1f);
Rect oldRec = camera.rect;
camera.rect = new Rect(0f, 0f, 1f, 1f);
var depth = 24;
var format = RenderTextureFormat.Default;
var readWrite = RenderTextureReadWrite.Default;

var tex = new Texture2D(width, height, TextureFormat.RGB24, false);
var prevActiveRT = RenderTexture.active;
var prevCameraRT = cam.targetTexture;
var prevCameraRT = camera.targetTexture;
cam.targetTexture = tempRT;
camera.targetTexture = tempRT;
cam.Render();
camera.Render();
cam.targetTexture = prevCameraRT;
cam.rect = oldRec;
camera.targetTexture = prevCameraRT;
camera.rect = oldRec;
RenderTexture.active = prevActiveRT;
RenderTexture.ReleaseTemporary(tempRT);
return tex;

2
unity-environment/Assets/ML-Agents/Scripts/CoreBrainInternal.cs


var i = 0;
foreach (Agent agent in agentList)
{
float[] action_list = agentInfo[agent].StoredVectorActions;
float[] action_list = agentInfo[agent].storedVectorActions;
inputPrevAction[i] = Mathf.FloorToInt(action_list[0]);
i++;
}

4
unity-environment/Assets/ML-Agents/Scripts/ExternalCommunicator.cs


sMessage.memories.Add(0f);
}
sMessage.dones.Add(agentInfo[agent].done);
sMessage.previousVectorActions.AddRange(agentInfo[agent].StoredVectorActions.ToList());
sMessage.previousTextActions.Add(agentInfo[agent].StoredTextActions);
sMessage.previousVectorActions.AddRange(agentInfo[agent].storedVectorActions.ToList());
sMessage.previousTextActions.Add(agentInfo[agent].storedTextActions);
sMessage.maxes.Add(agentInfo[agent].maxStepReached);
sMessage.textObservations.Add(agentInfo[agent].textObservation);

正在加载...
取消
保存