|
|
|
|
|
|
using System.Collections.Generic; |
|
|
|
using UnityEngine; |
|
|
|
using Barracuda; |
|
|
|
using UnityEngine.Serialization; |
|
|
|
/// observations, actions and current status, that is sent to the Brain.
|
|
|
|
/// observations, actions and current status.
|
|
|
|
/// </summary>
|
|
|
|
internal struct AgentInfo |
|
|
|
{ |
|
|
|
|
|
|
public float[] vectorActions; |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
/// Agent Monobehavior class that is attached to a Unity GameObject, making it
|
|
|
|
/// Agent MonoBehaviour class that is attached to a Unity GameObject, making it
|
|
|
|
/// user in <see cref="CollectObservations"/>. On the other hand, actions
|
|
|
|
/// are determined by decisions produced by a Policy. Currently, this
|
|
|
|
/// class is expected to be extended to implement the desired agent behavior.
|
|
|
|
/// user in <see cref="Agent.CollectObservations(VectorSensor)"/> or
|
|
|
|
/// <see cref="Agent.CollectObservations(VectorSensor, ActionMasker)"/>.
|
|
|
|
/// On the other hand, actions are determined by decisions produced by a Policy.
|
|
|
|
/// Currently, this class is expected to be extended to implement the desired agent behavior.
|
|
|
|
/// </summary>
|
|
|
|
/// <remarks>
|
|
|
|
/// Simply speaking, an agent roams through an environment and at each step
|
|
|
|
|
|
|
/// little may have changed between successive steps.
|
|
|
|
///
|
|
|
|
/// At any step, an agent may be considered <see cref="m_Done"/>.
|
|
|
|
/// This could occur due to a variety of reasons:
|
|
|
|
/// At any step, an agent may be considered done due to a variety of reasons:
|
|
|
|
/// - The agent reached an end state within its environment.
|
|
|
|
/// - The agent reached the maximum # of steps (i.e. timed out).
|
|
|
|
/// - The academy reached the maximum # of steps (forced agent to be done).
|
|
|
|
|
|
|
BehaviorParameters m_PolicyFactory; |
|
|
|
|
|
|
|
/// This code is here to make the upgrade path for users using maxStep
|
|
|
|
/// easier. We will hook into the Serialization code and make sure that
|
|
|
|
/// easier. We will hook into the Serialization code and make sure that
|
|
|
|
/// agentParameters.maxStep and this.maxStep are in sync.
|
|
|
|
[Serializable] |
|
|
|
internal struct AgentParameters |
|
|
|
|
|
|
/// </summary>
|
|
|
|
internal VectorSensor collectObservationsSensor; |
|
|
|
|
|
|
|
/// MonoBehaviour function that is called when the attached GameObject
|
|
|
|
/// becomes enabled or active.
|
|
|
|
/// <summary>
|
|
|
|
/// <inheritdoc cref="OnBeforeSerialize"/>
|
|
|
|
/// </summary>
|
|
|
|
// Manages a serialization upgrade issue from v0.13 to v0.14 where maxStep moved
|
|
|
|
// from AgentParameters (since removed) to Agent
|
|
|
|
if (maxStep == 0 && maxStep != agentParameters.maxStep && !hasUpgradedFromAgentParameters) |
|
|
|
{ |
|
|
|
maxStep = agentParameters.maxStep; |
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// <inheritdoc cref="OnAfterDeserialize"/>
|
|
|
|
/// </summary>
|
|
|
|
// Manages a serialization upgrade issue from v0.13 to v0.14 where maxStep moved
|
|
|
|
// from AgentParameters (since removed) to Agent
|
|
|
|
if (maxStep == 0 && maxStep != agentParameters.maxStep && !hasUpgradedFromAgentParameters) |
|
|
|
{ |
|
|
|
maxStep = agentParameters.maxStep; |
|
|
|
|
|
|
|
|
|
|
/// Helper method for the <see cref="OnEnable"/> event, created to
|
|
|
|
/// facilitate testing.
|
|
|
|
/// <summary>
|
|
|
|
/// Initializes the agent. Can be safely called multiple times.
|
|
|
|
/// </summary>
|
|
|
|
public void LazyInitialize() |
|
|
|
{ |
|
|
|
if (m_Initialized) |
|
|
|
|
|
|
InitializeSensors(); |
|
|
|
} |
|
|
|
|
|
|
|
/// Monobehavior function that is called when the attached GameObject
|
|
|
|
/// becomes disabled or inactive.
|
|
|
|
void OnDisable() |
|
|
|
{ |
|
|
|
DemonstrationStores.Clear(); |
|
|
|
|
|
|
m_Brain = m_PolicyFactory.GeneratePolicy(Heuristic); |
|
|
|
} |
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// Returns the current step counter (within the current episode).
|
|
|
|
/// </summary>
|
|
|
|
/// <returns>
|
|
|
|
|
|
|
/// </returns>
|
|
|
|
public virtual float[] Heuristic() |
|
|
|
{ |
|
|
|
throw new UnityAgentsException(string.Format( |
|
|
|
throw new UnityAgentsException( |
|
|
|
"{0} GameObject.", |
|
|
|
gameObject.name)); |
|
|
|
$"{gameObject.name} GameObject."); |
|
|
|
} |
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
|
|
collectObservationsSensor = new VectorSensor(param.vectorObservationSize); |
|
|
|
if (param.numStackedVectorObservations > 1) |
|
|
|
{ |
|
|
|
var stackingSensor = new StackingSensor(collectObservationsSensor, param.numStackedVectorObservations); |
|
|
|
var stackingSensor = new StackingSensor( |
|
|
|
collectObservationsSensor, param.numStackedVectorObservations); |
|
|
|
sensors.Add(stackingSensor); |
|
|
|
} |
|
|
|
else |
|
|
|
|
|
|
// Make sure the names are actually unique
|
|
|
|
for (var i = 0; i < sensors.Count - 1; i++) |
|
|
|
{ |
|
|
|
Debug.Assert(!sensors[i].GetName().Equals(sensors[i + 1].GetName()), "Sensor names must be unique."); |
|
|
|
Debug.Assert( |
|
|
|
!sensors[i].GetName().Equals(sensors[i + 1].GetName()), |
|
|
|
"Sensor names must be unique."); |
|
|
|
} |
|
|
|
#endif
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
void UpdateSensors() |
|
|
|
{ |
|
|
|
for (var i = 0; i < sensors.Count; i++) |
|
|
|
foreach (var sensor in sensors) |
|
|
|
sensors[i].Update(); |
|
|
|
sensor.Update(); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
/// The agent observation describes the current environment from the
|
|
|
|
/// perspective of the agent.
|
|
|
|
/// </summary>
|
|
|
|
/// <param name="sensor">
|
|
|
|
/// The vector observations for the agent.
|
|
|
|
/// </param>
|
|
|
|
/// <remarks>
|
|
|
|
/// An agents observation is any environment information that helps
|
|
|
|
/// the Agent achieve its goal. For example, for a fighting Agent, its
|
|
|
|
|
|
|
/// Vector observations are added by calling the provided helper methods
|
|
|
|
/// on the VectorSensor input:
|
|
|
|
/// - <see cref="AddObservation(int)"/>
|
|
|
|
/// - <see cref="AddObservation(float)"/>
|
|
|
|
/// - <see cref="AddObservation(Vector3)"/>
|
|
|
|
/// - <see cref="AddObservation(Vector2)"/>
|
|
|
|
/// - <see cref="AddObservation(Quaternion)"/>
|
|
|
|
/// - <see cref="AddObservation(bool)"/>
|
|
|
|
/// - <see cref="AddOneHotObservation(int, int)"/>
|
|
|
|
/// - <see cref="VectorSensor.AddObservation(int)"/>
|
|
|
|
/// - <see cref="VectorSensor.AddObservation(float)"/>
|
|
|
|
/// - <see cref="VectorSensor.AddObservation(Vector3)"/>
|
|
|
|
/// - <see cref="VectorSensor.AddObservation(Vector2)"/>
|
|
|
|
/// - <see cref="VectorSensor.AddObservation(Quaternion)"/>
|
|
|
|
/// - <see cref="VectorSensor.AddObservation(bool)"/>
|
|
|
|
/// - <see cref="VectorSensor.AddObservation(IEnumerable{float})"/>
|
|
|
|
/// - <see cref="VectorSensor.AddOneHotObservation(int, int)"/>
|
|
|
|
/// Depending on your environment, any combination of these helpers can
|
|
|
|
/// be used. They just need to be used in the exact same order each time
|
|
|
|
/// this method is called and the resulting size of the vector observation
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// Collects the vector observations of the agent.
|
|
|
|
/// Collects the vector observations of the agent alongside the masked actions.
|
|
|
|
/// <param name="sensor">
|
|
|
|
/// The vector observations for the agent.
|
|
|
|
/// </param>
|
|
|
|
/// <param name="actionMasker">
|
|
|
|
/// The masked actions for the agent.
|
|
|
|
/// </param>
|
|
|
|
/// <remarks>
|
|
|
|
/// An agents observation is any environment information that helps
|
|
|
|
/// the Agent achieve its goal. For example, for a fighting Agent, its
|
|
|
|
|
|
|
/// Vector observations are added by calling the provided helper methods
|
|
|
|
/// on the VectorSensor input:
|
|
|
|
/// - <see cref="AddObservation(int)"/>
|
|
|
|
/// - <see cref="AddObservation(float)"/>
|
|
|
|
/// - <see cref="AddObservation(Vector3)"/>
|
|
|
|
/// - <see cref="AddObservation(Vector2)"/>
|
|
|
|
/// - <see cref="AddObservation(Quaternion)"/>
|
|
|
|
/// - <see cref="AddObservation(bool)"/>
|
|
|
|
/// - <see cref="AddOneHotObservation(int, int)"/>
|
|
|
|
/// - <see cref="VectorSensor.AddObservation(int)"/>
|
|
|
|
/// - <see cref="VectorSensor.AddObservation(float)"/>
|
|
|
|
/// - <see cref="VectorSensor.AddObservation(Vector3)"/>
|
|
|
|
/// - <see cref="VectorSensor.AddObservation(Vector2)"/>
|
|
|
|
/// - <see cref="VectorSensor.AddObservation(Quaternion)"/>
|
|
|
|
/// - <see cref="VectorSensor.AddObservation(bool)"/>
|
|
|
|
/// - <see cref="VectorSensor.AddObservation(IEnumerable{float})"/>
|
|
|
|
/// - <see cref="VectorSensor.AddOneHotObservation(int, int)"/>
|
|
|
|
/// Depending on your environment, any combination of these helpers can
|
|
|
|
/// be used. They just need to be used in the exact same order each time
|
|
|
|
/// this method is called and the resulting size of the vector observation
|
|
|
|
|
|
|
/// When using Discrete Control, you can prevent the Agent from using a certain
|
|
|
|
/// action by masking it. You can call the following method on the ActionMasker
|
|
|
|
/// input :
|
|
|
|
/// - <see cref="SetActionMask(int branch, IEnumerable<int> actionIndices)"/>
|
|
|
|
/// - <see cref="SetActionMask(int branch, int actionIndex)"/>
|
|
|
|
/// - <see cref="SetActionMask(IEnumerable<int> actionIndices)"/>
|
|
|
|
/// - <see cref="SetActionMask(int branch, int actionIndex)"/>
|
|
|
|
/// - <see cref="ActionMasker.SetActionMask(int)"/>
|
|
|
|
/// - <see cref="ActionMasker.SetActionMask(int, int)"/>
|
|
|
|
/// - <see cref="ActionMasker.SetActionMask(int, IEnumerable{int})"/>
|
|
|
|
/// - <see cref="ActionMasker.SetActionMask(IEnumerable{int})"/>
|
|
|
|
/// The branch input is the index of the action, actionIndices are the indices of the
|
|
|
|
/// invalid options for that action.
|
|
|
|
/// </remarks>
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// Returns the last action that was decided on by the Agent (returns null if no decision has been made)
|
|
|
|
/// Returns the last action that was decided on by the Agent
|
|
|
|
/// <returns>
|
|
|
|
/// The last action that was decided by the Agent (or null if no decision has been made)
|
|
|
|
/// </returns>
|
|
|
|
public float[] GetAction() |
|
|
|
{ |
|
|
|
return m_Action.vectorActions; |
|
|
|
|
|
|
/// <param name="min"></param>
|
|
|
|
/// <param name="max"></param>
|
|
|
|
/// <returns></returns>
|
|
|
|
protected float ScaleAction(float rawAction, float min, float max) |
|
|
|
protected static float ScaleAction(float rawAction, float min, float max) |
|
|
|
{ |
|
|
|
var middle = (min + max) / 2; |
|
|
|
var range = (max - min) / 2; |
|
|
|