|
|
|
|
|
|
internal struct AgentInfo |
|
|
|
{ |
|
|
|
/// <summary>
|
|
|
|
/// Keeps track of the last vector action taken by the Brain.
|
|
|
|
/// Keeps track of the last actions taken by the Brain.
|
|
|
|
public ActionBuffers storedVectorActions; |
|
|
|
public ActionBuffers storedActions; |
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// For discrete control, specifies the actions that the agent cannot take.
|
|
|
|
|
|
|
|
|
|
|
public void ClearActions() |
|
|
|
{ |
|
|
|
storedVectorActions.Clear(); |
|
|
|
storedActions.Clear(); |
|
|
|
var continuousActions = storedVectorActions.ContinuousActions; |
|
|
|
var continuousActions = storedActions.ContinuousActions; |
|
|
|
var discreteActions = storedVectorActions.DiscreteActions; |
|
|
|
var discreteActions = storedActions.DiscreteActions; |
|
|
|
for (var i = 0; i < actionBuffers.DiscreteActions.Length; i++) |
|
|
|
{ |
|
|
|
discreteActions[i] = actionBuffers.DiscreteActions[i]; |
|
|
|
|
|
|
InitializeSensors(); |
|
|
|
} |
|
|
|
|
|
|
|
m_Info.storedVectorActions = new ActionBuffers( |
|
|
|
m_Info.storedActions = new ActionBuffers( |
|
|
|
new float[m_ActuatorManager.NumContinuousActions], |
|
|
|
new int[m_ActuatorManager.NumDiscreteActions] |
|
|
|
); |
|
|
|
|
|
|
m_CumulativeReward = 0f; |
|
|
|
m_RequestAction = false; |
|
|
|
m_RequestDecision = false; |
|
|
|
m_Info.storedVectorActions.Clear(); |
|
|
|
m_Info.storedActions.Clear(); |
|
|
|
} |
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
|
|
/// <seealso cref="IActionReceiver.OnActionReceived"/>
|
|
|
|
public virtual void Heuristic(in ActionBuffers actionsOut) |
|
|
|
{ |
|
|
|
var brainParams = m_PolicyFactory.BrainParameters; |
|
|
|
var actionSpec = brainParams.ActionSpec; |
|
|
|
// For continuous and discrete actions together, we don't need to fall back to the legacy method
|
|
|
|
if (actionSpec.NumContinuousActions > 0 && actionSpec.NumDiscreteActions > 0) |
|
|
|
{ |
|
|
|
Debug.LogWarning("Heuristic method called but not implemented. Clearing ActionBuffers."); |
|
|
|
actionsOut.Clear(); |
|
|
|
return; |
|
|
|
} |
|
|
|
|
|
|
|
switch (m_PolicyFactory.BrainParameters.VectorActionSpaceType) |
|
|
|
switch (brainParams.VectorActionSpaceType) |
|
|
|
{ |
|
|
|
case SpaceType.Continuous: |
|
|
|
Heuristic(actionsOut.ContinuousActions.Array); |
|
|
|
|
|
|
CollectObservations(collectObservationsSensor); |
|
|
|
} |
|
|
|
} |
|
|
|
using (TimerStack.Instance.Scoped("CollectDiscreteActionMasks")) |
|
|
|
using (TimerStack.Instance.Scoped("WriteActionMask")) |
|
|
|
{ |
|
|
|
m_ActuatorManager.WriteActionMask(); |
|
|
|
} |
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// Implement `CollectDiscreteActionMasks()` to collects the masks for discrete
|
|
|
|
/// Implement `WriteDiscreteActionMask()` to collects the masks for discrete
|
|
|
|
/// actions. When using discrete actions, the agent will not perform the masked
|
|
|
|
/// action.
|
|
|
|
/// </summary>
|
|
|
|
|
|
|
/// <remarks>
|
|
|
|
/// When using Discrete Control, you can prevent the Agent from using a certain
|
|
|
|
/// action by masking it with <see cref="DiscreteActionMasker.SetMask(int, IEnumerable{int})"/>.
|
|
|
|
/// action by masking it with <see cref="IDiscreteActionMask.WriteMask(int, IEnumerable{int})"/>.
|
|
|
|
///
|
|
|
|
/// See [Agents - Actions] for more information on masking actions.
|
|
|
|
///
|
|
|
|
|
|
|
/// on the provided action.
|
|
|
|
/// </summary>
|
|
|
|
/// <remarks>
|
|
|
|
/// An action is passed to this function in the form of an array vector. Your
|
|
|
|
/// implementation must use the array to direct the agent's behavior for the
|
|
|
|
/// An action is passed to this function in the form of an <seealso cref="ActionBuffers"/>.
|
|
|
|
/// Your implementation must use the array to direct the agent's behavior for the
|
|
|
|
/// You decide how many elements you need in the action array to control your
|
|
|
|
/// You decide how many elements you need in the ActionBuffers to control your
|
|
|
|
/// three values in the action array to use as the force components. During
|
|
|
|
/// training, the agent's policy learns to set those particular elements of
|
|
|
|
/// three values in ActionBuffers.ContinuousActions array to use as the force components.
|
|
|
|
/// During training, the agent's policy learns to set those particular elements of
|
|
|
|
/// Actions for an agent can be either *Continuous* or *Discrete*. Specify which
|
|
|
|
/// type of action space an agent uses, along with the size of the action array,
|
|
|
|
/// in the <see cref="BrainParameters"/> of the agent's associated
|
|
|
|
/// An Agent can use continuous and/or discrete actions. Configure this along with the size
|
|
|
|
/// of the action array, in the <see cref="BrainParameters"/> of the agent's associated
|
|
|
|
/// When an agent uses the continuous action space, the values in the action
|
|
|
|
/// When an agent uses continuous actions, the values in the ActionBuffers.ContinuousActions
|
|
|
|
/// When an agent uses the discrete action space, the values in the action array
|
|
|
|
/// When an agent uses discrete actions, the values in the ActionBuffers.DiscreteActions array
|
|
|
|
/// are integers that each represent a specific, discrete action. For example,
|
|
|
|
/// you could define a set of discrete actions such as:
|
|
|
|
///
|
|
|
|
|
|
|
/// </code>
|
|
|
|
///
|
|
|
|
/// When making a decision, the agent picks one of the five actions and puts the
|
|
|
|
/// corresponding integer value in the action vector. For example, if the agent
|
|
|
|
/// decided to move left, the action vector parameter would contain an array with
|
|
|
|
/// corresponding integer value in the ActionBuffers.DiscreteActions array. For example, if the agent
|
|
|
|
/// decided to move left, the ActionBuffers.DiscreteActions parameter would be an array with
|
|
|
|
/// a single element with the value 1.
|
|
|
|
///
|
|
|
|
/// You can define multiple sets, or branches, of discrete actions to allow an
|
|
|
|
|
|
|
///
|
|
|
|
/// The action vector of a discrete action space contains one element for each
|
|
|
|
/// branch. The value of each element is the integer representing the chosen
|
|
|
|
/// action for that branch. The agent always chooses one action for each
|
|
|
|
/// branch.
|
|
|
|
/// The ActionBuffers.DiscreteActions array of an agent with discrete actions contains one
|
|
|
|
/// element for each branch. The value of each element is the integer representing the
|
|
|
|
/// chosen action for that branch. The agent always chooses one action for each branch.
|
|
|
|
/// When you use the discrete action space, you can prevent the training process
|
|
|
|
/// When you use the discrete actions, you can prevent the training process
|
|
|
|
/// implementing the <see cref="CollectDiscreteActionMasks(DiscreteActionMasker)"/>
|
|
|
|
/// function. For example, if your agent is next to a wall, you could mask out any
|
|
|
|
/// implementing the <see cref="WriteDiscreteActionMask(IDiscreteActionMask)"/>
|
|
|
|
/// method. For example, if your agent is next to a wall, you could mask out any
|
|
|
|
/// actions that would result in the agent trying to move into the wall.
|
|
|
|
///
|
|
|
|
/// For more information about implementing agent actions see [Agents - Actions].
|
|
|
|
|
|
|
/// </param>
|
|
|
|
public virtual void OnActionReceived(ActionBuffers actions) |
|
|
|
{ |
|
|
|
var actionSpec = m_PolicyFactory.BrainParameters.ActionSpec; |
|
|
|
// For continuous and discrete actions together, we don't need to fall back to the legacy method
|
|
|
|
if (actionSpec.NumContinuousActions > 0 && actionSpec.NumDiscreteActions > 0) |
|
|
|
{ |
|
|
|
// Nothing implemented.
|
|
|
|
return; |
|
|
|
} |
|
|
|
|
|
|
|
if (!actions.ContinuousActions.IsEmpty()) |
|
|
|
{ |
|
|
|
m_LegacyActionCache = actions.ContinuousActions.Array; |
|
|
|