浏览代码

Develop mm validation fixes (#3487)

* Doc fixes for Agent and Academy to remove all validation errors.
* Made `ScaleAction()` in `Agent.cs` static.
/asymm-envs
GitHub 5 年前
当前提交
cd0a38c3
共有 2 个文件被更改,包括 84 次插入50 次删除
  1. 24
      com.unity.ml-agents/Runtime/Academy.cs
  2. 110
      com.unity.ml-agents/Runtime/Agent.cs

24
com.unity.ml-agents/Runtime/Academy.cs


// Lazy initializer pattern, see https://csharpindepth.com/articles/singleton#lazy
static Lazy<Academy> s_Lazy = new Lazy<Academy>(() => new Academy());
/// <summary>
/// True if the Academy is initialized, false otherwise.
/// </summary>
/// <summary>
/// The singleton Academy object.
/// </summary>
/// <summary>
/// Collection of float properties (indexed by a string).
/// </summary>
public IFloatProperties FloatProperties;

// Signals to all the agents each time the Academy force resets.
internal event Action AgentForceReset;
// Signals that the Academy has been reset by the training process
/// <summary>
/// Signals that the Academy has been reset by the training process.
/// </summary>
public event Action OnEnvironmentReset;
AcademyFixedUpdateStepper m_FixedUpdateStepper;

/// <summary>
/// Initialize the Academy if it hasn't already been initialized.
/// This method is always safe to call; it will have no effect if the Academy is already initialized.
/// This method is always safe to call; it will have no effect if the Academy is already
/// initialized.
/// </summary>
internal void LazyInitialize()
{

}
/// <summary>
/// Enable stepping of the Academy during the FixedUpdate phase. This is done by creating a temporary
/// GameObject with a MonoBehavior that calls Academy.EnvironmentStep().
/// Enable stepping of the Academy during the FixedUpdate phase. This is done by creating
/// a temporary GameObject with a MonoBehaviour that calls Academy.EnvironmentStep().
/// </summary>
void EnableAutomaticStepping()
{

/// Registers SideChannel to the Academy to send and receive data with Python.
/// If IsCommunicatorOn is false, the SideChannel will not be registered.
/// </summary>
/// <param name="sideChannel"> The side channel to be registered.</param>
/// <param name="channel"> The side channel to be registered.</param>
public void RegisterSideChannel(SideChannel channel)
{
LazyInitialize();

/// Unregisters SideChannel to the Academy. If the side channel was not registered,
/// nothing will happen.
/// </summary>
/// <param name="sideChannel"> The side channel to be unregistered.</param>
/// <param name="channel"> The side channel to be unregistered.</param>
public void UnregisterSideChannel(SideChannel channel)
{
Communicator?.UnregisterSideChannel(channel);

110
com.unity.ml-agents/Runtime/Agent.cs


using System.Collections.Generic;
using UnityEngine;
using Barracuda;
using UnityEngine.Serialization;
/// observations, actions and current status, that is sent to the Brain.
/// observations, actions and current status.
/// </summary>
internal struct AgentInfo
{

public float[] vectorActions;
}
/// Agent Monobehavior class that is attached to a Unity GameObject, making it
/// Agent MonoBehaviour class that is attached to a Unity GameObject, making it
/// user in <see cref="CollectObservations"/>. On the other hand, actions
/// are determined by decisions produced by a Policy. Currently, this
/// class is expected to be extended to implement the desired agent behavior.
/// user in <see cref="Agent.CollectObservations(VectorSensor)"/> or
/// <see cref="Agent.CollectObservations(VectorSensor, ActionMasker)"/>.
/// On the other hand, actions are determined by decisions produced by a Policy.
/// Currently, this class is expected to be extended to implement the desired agent behavior.
/// </summary>
/// <remarks>
/// Simply speaking, an agent roams through an environment and at each step

/// little may have changed between successive steps.
///
/// At any step, an agent may be considered <see cref="m_Done"/>.
/// This could occur due to a variety of reasons:
/// At any step, an agent may be considered done due to a variety of reasons:
/// - The agent reached an end state within its environment.
/// - The agent reached the maximum # of steps (i.e. timed out).
/// - The academy reached the maximum # of steps (forced agent to be done).

BehaviorParameters m_PolicyFactory;
/// This code is here to make the upgrade path for users using maxStep
/// easier. We will hook into the Serialization code and make sure that
/// easier. We will hook into the Serialization code and make sure that
/// agentParameters.maxStep and this.maxStep are in sync.
[Serializable]
internal struct AgentParameters

/// </summary>
internal VectorSensor collectObservationsSensor;
/// MonoBehaviour function that is called when the attached GameObject
/// becomes enabled or active.
/// <summary>
/// <inheritdoc cref="OnBeforeSerialize"/>
/// </summary>
// Manages a serialization upgrade issue from v0.13 to v0.14 where maxStep moved
// from AgentParameters (since removed) to Agent
if (maxStep == 0 && maxStep != agentParameters.maxStep && !hasUpgradedFromAgentParameters)
{
maxStep = agentParameters.maxStep;

/// <summary>
/// <inheritdoc cref="OnAfterDeserialize"/>
/// </summary>
// Manages a serialization upgrade issue from v0.13 to v0.14 where maxStep moved
// from AgentParameters (since removed) to Agent
if (maxStep == 0 && maxStep != agentParameters.maxStep && !hasUpgradedFromAgentParameters)
{
maxStep = agentParameters.maxStep;

/// Helper method for the <see cref="OnEnable"/> event, created to
/// facilitate testing.
/// <summary>
/// Initializes the agent. Can be safely called multiple times.
/// </summary>
public void LazyInitialize()
{
if (m_Initialized)

InitializeSensors();
}
/// Monobehavior function that is called when the attached GameObject
/// becomes disabled or inactive.
void OnDisable()
{
DemonstrationStores.Clear();

m_Brain = m_PolicyFactory.GeneratePolicy(Heuristic);
}
/// <summary>
/// Returns the current step counter (within the current episode).
/// </summary>
/// <returns>

/// </returns>
public virtual float[] Heuristic()
{
throw new UnityAgentsException(string.Format(
throw new UnityAgentsException(
"{0} GameObject.",
gameObject.name));
$"{gameObject.name} GameObject.");
}
/// <summary>

collectObservationsSensor = new VectorSensor(param.vectorObservationSize);
if (param.numStackedVectorObservations > 1)
{
var stackingSensor = new StackingSensor(collectObservationsSensor, param.numStackedVectorObservations);
var stackingSensor = new StackingSensor(
collectObservationsSensor, param.numStackedVectorObservations);
sensors.Add(stackingSensor);
}
else

// Make sure the names are actually unique
for (var i = 0; i < sensors.Count - 1; i++)
{
Debug.Assert(!sensors[i].GetName().Equals(sensors[i + 1].GetName()), "Sensor names must be unique.");
Debug.Assert(
!sensors[i].GetName().Equals(sensors[i + 1].GetName()),
"Sensor names must be unique.");
}
#endif
}

void UpdateSensors()
{
for (var i = 0; i < sensors.Count; i++)
foreach (var sensor in sensors)
sensors[i].Update();
sensor.Update();
}
}

/// The agent observation describes the current environment from the
/// perspective of the agent.
/// </summary>
/// <param name="sensor">
/// The vector observations for the agent.
/// </param>
/// <remarks>
/// An agents observation is any environment information that helps
/// the Agent achieve its goal. For example, for a fighting Agent, its

/// Vector observations are added by calling the provided helper methods
/// on the VectorSensor input:
/// - <see cref="AddObservation(int)"/>
/// - <see cref="AddObservation(float)"/>
/// - <see cref="AddObservation(Vector3)"/>
/// - <see cref="AddObservation(Vector2)"/>
/// - <see cref="AddObservation(Quaternion)"/>
/// - <see cref="AddObservation(bool)"/>
/// - <see cref="AddOneHotObservation(int, int)"/>
/// - <see cref="VectorSensor.AddObservation(int)"/>
/// - <see cref="VectorSensor.AddObservation(float)"/>
/// - <see cref="VectorSensor.AddObservation(Vector3)"/>
/// - <see cref="VectorSensor.AddObservation(Vector2)"/>
/// - <see cref="VectorSensor.AddObservation(Quaternion)"/>
/// - <see cref="VectorSensor.AddObservation(bool)"/>
/// - <see cref="VectorSensor.AddObservation(IEnumerable{float})"/>
/// - <see cref="VectorSensor.AddOneHotObservation(int, int)"/>
/// Depending on your environment, any combination of these helpers can
/// be used. They just need to be used in the exact same order each time
/// this method is called and the resulting size of the vector observation

}
/// <summary>
/// Collects the vector observations of the agent.
/// Collects the vector observations of the agent alongside the masked actions.
/// <param name="sensor">
/// The vector observations for the agent.
/// </param>
/// <param name="actionMasker">
/// The masked actions for the agent.
/// </param>
/// <remarks>
/// An agents observation is any environment information that helps
/// the Agent achieve its goal. For example, for a fighting Agent, its

/// Vector observations are added by calling the provided helper methods
/// on the VectorSensor input:
/// - <see cref="AddObservation(int)"/>
/// - <see cref="AddObservation(float)"/>
/// - <see cref="AddObservation(Vector3)"/>
/// - <see cref="AddObservation(Vector2)"/>
/// - <see cref="AddObservation(Quaternion)"/>
/// - <see cref="AddObservation(bool)"/>
/// - <see cref="AddOneHotObservation(int, int)"/>
/// - <see cref="VectorSensor.AddObservation(int)"/>
/// - <see cref="VectorSensor.AddObservation(float)"/>
/// - <see cref="VectorSensor.AddObservation(Vector3)"/>
/// - <see cref="VectorSensor.AddObservation(Vector2)"/>
/// - <see cref="VectorSensor.AddObservation(Quaternion)"/>
/// - <see cref="VectorSensor.AddObservation(bool)"/>
/// - <see cref="VectorSensor.AddObservation(IEnumerable{float})"/>
/// - <see cref="VectorSensor.AddOneHotObservation(int, int)"/>
/// Depending on your environment, any combination of these helpers can
/// be used. They just need to be used in the exact same order each time
/// this method is called and the resulting size of the vector observation

/// When using Discrete Control, you can prevent the Agent from using a certain
/// action by masking it. You can call the following method on the ActionMasker
/// input :
/// - <see cref="SetActionMask(int branch, IEnumerable<int> actionIndices)"/>
/// - <see cref="SetActionMask(int branch, int actionIndex)"/>
/// - <see cref="SetActionMask(IEnumerable<int> actionIndices)"/>
/// - <see cref="SetActionMask(int branch, int actionIndex)"/>
/// - <see cref="ActionMasker.SetActionMask(int)"/>
/// - <see cref="ActionMasker.SetActionMask(int, int)"/>
/// - <see cref="ActionMasker.SetActionMask(int, IEnumerable{int})"/>
/// - <see cref="ActionMasker.SetActionMask(IEnumerable{int})"/>
/// The branch input is the index of the action, actionIndices are the indices of the
/// invalid options for that action.
/// </remarks>

}
/// <summary>
/// Returns the last action that was decided on by the Agent (returns null if no decision has been made)
/// Returns the last action that was decided on by the Agent
/// <returns>
/// The last action that was decided by the Agent (or null if no decision has been made)
/// </returns>
public float[] GetAction()
{
return m_Action.vectorActions;

/// <param name="min"></param>
/// <param name="max"></param>
/// <returns></returns>
protected float ScaleAction(float rawAction, float min, float max)
protected static float ScaleAction(float rawAction, float min, float max)
{
var middle = (min + max) / 2;
var range = (max - min) / 2;

正在加载...
取消
保存