|
|
|
|
|
|
namespace MLAgents |
|
|
|
{ |
|
|
|
/// <summary>
|
|
|
|
/// Struct that contains all the information for an Agent, including its
|
|
|
|
/// Struct that contains all the information for an Agent, including its
|
|
|
|
/// observations, actions and current status, that is sent to the Brain.
|
|
|
|
/// </summary>
|
|
|
|
public struct AgentInfo |
|
|
|
|
|
|
agentInfoProto.VisualObservations.Add( |
|
|
|
ByteString.CopyFrom(obs.EncodeToPNG()) |
|
|
|
); |
|
|
|
} |
|
|
|
return agentInfoProto; |
|
|
|
} |
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// Remove the visual observations from memory. Call at each timestep
|
|
|
|
/// to avoid memory leaks.
|
|
|
|
/// </summary>
|
|
|
|
public void ClearVisualObs() |
|
|
|
{ |
|
|
|
foreach (Texture2D obs in visualObservations) |
|
|
|
{ |
|
|
|
return agentInfoProto; |
|
|
|
/// Struct that contains the action information sent from the Brain to the
|
|
|
|
/// Struct that contains the action information sent from the Brain to the
|
|
|
|
/// Agent.
|
|
|
|
/// </summary>
|
|
|
|
public struct AgentAction |
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// Struct that contains all the Agent-specific parameters provided in the
|
|
|
|
/// Struct that contains all the Agent-specific parameters provided in the
|
|
|
|
/// Editor. This excludes the Brain linked to the Agent since it can be
|
|
|
|
/// modified programmatically.
|
|
|
|
/// </summary>
|
|
|
|
|
|
|
/// observations.
|
|
|
|
/// </summary>
|
|
|
|
public List<Camera> agentCameras = new List<Camera>(); |
|
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// The list of the RenderTextures the agent uses for visual
|
|
|
|
/// observations.
|
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// The maximum number of steps the agent takes before being done.
|
|
|
|
/// The maximum number of steps the agent takes before being done.
|
|
|
|
/// </summary>
|
|
|
|
/// <remarks>
|
|
|
|
/// If set to 0, the agent can only be set to done programmatically (or
|
|
|
|
|
|
|
public bool resetOnDone = true; |
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// Whether to enable On Demand Decisions or make a decision at
|
|
|
|
/// Whether to enable On Demand Decisions or make a decision at
|
|
|
|
/// every step.
|
|
|
|
/// </summary>
|
|
|
|
public bool onDemandDecision; |
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// Agent Monobehavior class that is attached to a Unity GameObject, making it
|
|
|
|
/// an Agent. An agent produces observations and takes actions in the
|
|
|
|
/// environment. Observations are determined by the cameras attached
|
|
|
|
/// an Agent. An agent produces observations and takes actions in the
|
|
|
|
/// environment. Observations are determined by the cameras attached
|
|
|
|
/// to the agent in addition to the vector observations implemented by the
|
|
|
|
/// user in <see cref="CollectObservations"/>. On the other hand, actions
|
|
|
|
/// are determined by decisions produced by a linked Brain. Currently, this
|
|
|
|
|
|
|
/// however, an agent need not send its observation at every step since very
|
|
|
|
/// little may have changed between sucessive steps. Currently, how often an
|
|
|
|
/// agent updates its brain with a fresh observation is determined by the
|
|
|
|
/// Academy.
|
|
|
|
///
|
|
|
|
/// At any step, an agent may be considered <see cref="done"/>.
|
|
|
|
/// Academy.
|
|
|
|
///
|
|
|
|
/// At any step, an agent may be considered <see cref="done"/>.
|
|
|
|
///
|
|
|
|
///
|
|
|
|
///
|
|
|
|
///
|
|
|
|
/// episodes. The academy controls the global episode count and each agent
|
|
|
|
/// episodes. The academy controls the global episode count and each agent
|
|
|
|
/// controls its own local episode count and can reset and start a new local
|
|
|
|
/// episode independently (based on its own experience). Thus an academy
|
|
|
|
/// (global) episode can be viewed as the upper-bound on an agents episode
|
|
|
|
|
|
|
/// value takes precedence (since the agent max step will never be reached).
|
|
|
|
///
|
|
|
|
///
|
|
|
|
///
|
|
|
|
///
|
|
|
|
/// Implementation-wise, it is required that this class is extended and the
|
|
|
|
/// virtual methods overridden. For sample implementations of agent behavior,
|
|
|
|
/// see the Examples/ directory within this Unity project.
|
|
|
|
|
|
|
{ |
|
|
|
/// <summary>
|
|
|
|
/// The Brain attached to this agent. A brain can be attached either
|
|
|
|
/// directly from the Editor through AgentEditor or
|
|
|
|
/// directly from the Editor through AgentEditor or
|
|
|
|
/// programmatically through <see cref="GiveBrain"/>. It is OK for an agent
|
|
|
|
/// to not have a brain, as long as no decision is requested.
|
|
|
|
/// </summary>
|
|
|
|
|
|
|
actionMasker = new ActionMasker(param); |
|
|
|
// If we haven't initialized vectorActions, initialize to 0. This should only
|
|
|
|
// happen during the creation of the Agent. In subsequent episodes, vectorAction
|
|
|
|
// should stay the previous action before the Done(), so that it is properly recorded.
|
|
|
|
// should stay the previous action before the Done(), so that it is properly recorded.
|
|
|
|
if (action.vectorActions == null) |
|
|
|
{ |
|
|
|
if (param.vectorActionSpaceType == SpaceType.continuous) |
|
|
|
|
|
|
brain.brainParameters.vectorObservationSize, |
|
|
|
info.vectorObservation.Count)); |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
Utilities.ReplaceRange(info.stackedVectorObservation, info.vectorObservation, |
|
|
|
Utilities.ReplaceRange(info.stackedVectorObservation, info.vectorObservation, |
|
|
|
info.stackedVectorObservation.Count - info.vectorObservation.Count); |
|
|
|
|
|
|
|
info.visualObservations.Clear(); |
|
|
|
|
|
|
param.cameraResolutions[i].height); |
|
|
|
info.visualObservations.Add(obsTexture); |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
//Then add all renderTextures
|
|
|
|
var camCount = agentParameters.agentCameras.Count; |
|
|
|
for (int i = 0; i < agentParameters.agentRenderTextures.Count; i++) |
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// Collects the (vector, visual, text) observations of the agent.
|
|
|
|
/// The agent observation describes the current environment from the
|
|
|
|
/// The agent observation describes the current environment from the
|
|
|
|
/// observation could include distances to friends or enemies, or the
|
|
|
|
/// observation could include distances to friends or enemies, or the
|
|
|
|
/// current level of ammunition at its disposal.
|
|
|
|
/// Recall that an Agent may attach vector, visual or textual observations.
|
|
|
|
/// Vector observations are added by calling the provided helper methods:
|
|
|
|
|
|
|
/// needs to match the vectorObservationSize attribute of the linked Brain.
|
|
|
|
/// Visual observations are implicitly added from the cameras attached to
|
|
|
|
/// the Agent.
|
|
|
|
/// Lastly, textual observations are added using
|
|
|
|
/// Lastly, textual observations are added using
|
|
|
|
/// <see cref="SetTextObs(string)"/>.
|
|
|
|
/// </remarks>
|
|
|
|
public virtual void CollectObservations() |
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// Specifies the agent behavior when done and
|
|
|
|
/// Specifies the agent behavior when done and
|
|
|
|
/// <see cref="AgentParameters.resetOnDone"/> is false. This method can be
|
|
|
|
/// used to remove the agent from the scene.
|
|
|
|
/// </summary>
|
|
|
|
|
|
|
{ |
|
|
|
action.memories = memories; |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
public List<float> GetMemoriesAction() |
|
|
|
{ |
|
|
|
return action.memories; |
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// Sets the status of the agent.
|
|
|
|
/// </summary>
|
|
|
|
/// <param name="academyMaxStep">If set to <c>true</c>
|
|
|
|
/// <param name="academyMaxStep">If set to <c>true</c>
|
|
|
|
/// <param name="academyDone">If set to <c>true</c>
|
|
|
|
/// <param name="academyDone">If set to <c>true</c>
|
|
|
|
/// The agent must set done.</param>
|
|
|
|
/// <param name="academyStepCounter">Number of current steps in episode</param>
|
|
|
|
void SetStatus(bool academyMaxStep, bool academyDone, int academyStepCounter) |
|
|
|
|
|
|
maxStepReached = true; |
|
|
|
} |
|
|
|
|
|
|
|
// If the Academy needs to reset, the agent should reset
|
|
|
|
// If the Academy needs to reset, the agent should reset
|
|
|
|
// even if it reseted recently.
|
|
|
|
if (academyDone) |
|
|
|
{ |
|
|
|
|
|
|
/// Signals the agent that it must reset if its done flag is set to true.
|
|
|
|
void ResetIfDone() |
|
|
|
{ |
|
|
|
// If an agent is done, then it will also
|
|
|
|
// If an agent is done, then it will also
|
|
|
|
// request for a decision and an action
|
|
|
|
if (IsDone()) |
|
|
|
{ |
|
|
|
|
|
|
obsCamera.Render(); |
|
|
|
|
|
|
|
texture2D.ReadPixels(new Rect(0, 0, texture2D.width, texture2D.height), 0, 0); |
|
|
|
|
|
|
|
|
|
|
|
obsCamera.targetTexture = prevCameraRT; |
|
|
|
obsCamera.rect = oldRec; |
|
|
|
RenderTexture.active = prevActiveRT; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// Converts a RenderTexture and correspinding resolution to a 2D texture.
|
|
|
|
/// </summary>
|
|
|
|
|
|
|
{ |
|
|
|
texture2D.Resize(width, height); |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
if(width != obsTexture.width || height != obsTexture.height) |
|
|
|
{ |
|
|
|
throw new UnityAgentsException(string.Format( |
|
|
|
|
|
|
{ |
|
|
|
info.customObservation = customObservation; |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |