浏览代码

Merge branch 'master' into master-into-release-0.14.1

/release-0.14.1
Anupam Bhatnagar 5 年前
当前提交
e04fcd71
共有 214 个文件被更改,包括 1910 次插入1056 次删除
  1. 11
      .yamato/com.unity.ml-agents-test.yml
  2. 15
      .yamato/standalone-build-test.yml
  3. 17
      .yamato/training-int-tests.yml
  4. 10
      Project/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DAgent.cs
  5. 8
      Project/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DHardAgent.cs
  6. 4
      Project/Assets/ML-Agents/Examples/Basic/Scripts/BasicAgent.cs
  7. 6
      Project/Assets/ML-Agents/Examples/Bouncer/Scripts/BouncerAgent.cs
  8. 30
      Project/Assets/ML-Agents/Examples/Crawler/Scripts/CrawlerAgent.cs
  9. 11
      Project/Assets/ML-Agents/Examples/FoodCollector/Scripts/FoodCollectorAgent.cs
  10. 14
      Project/Assets/ML-Agents/Examples/GridWorld/Scripts/GridAgent.cs
  11. 4
      Project/Assets/ML-Agents/Examples/Hallway/Scripts/HallwayAgent.cs
  12. 6
      Project/Assets/ML-Agents/Examples/Pyramids/Scripts/PyramidAgent.cs
  13. 24
      Project/Assets/ML-Agents/Examples/Reacher/Scripts/ReacherAgent.cs
  14. 8
      Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/ProjectSettingsOverrides.cs
  15. 2
      Project/Assets/ML-Agents/Examples/Template/Scripts/TemplateAgent.cs
  16. 20
      Project/Assets/ML-Agents/Examples/Tennis/Scripts/TennisAgent.cs
  17. 30
      Project/Assets/ML-Agents/Examples/Walker/Scripts/WalkerAgent.cs
  18. 15
      Project/Assets/ML-Agents/Examples/WallJump/Scripts/WallJumpAgent.cs
  19. 36
      com.unity.ml-agents/CHANGELOG.md
  20. 2
      com.unity.ml-agents/Editor/AgentEditor.cs
  21. 3
      com.unity.ml-agents/Editor/BehaviorParametersEditor.cs
  22. 6
      com.unity.ml-agents/Editor/BrainParametersDrawer.cs
  23. 2
      com.unity.ml-agents/Editor/DemonstrationDrawer.cs
  24. 6
      com.unity.ml-agents/Editor/DemonstrationImporter.cs
  25. 3
      com.unity.ml-agents/LICENSE.md
  26. 107
      com.unity.ml-agents/Runtime/Academy.cs
  27. 55
      com.unity.ml-agents/Runtime/ActionMasker.cs
  28. 338
      com.unity.ml-agents/Runtime/Agent.cs
  29. 29
      com.unity.ml-agents/Runtime/DecisionRequester.cs
  30. 1
      com.unity.ml-agents/Runtime/Grpc/GrpcExtensions.cs
  31. 41
      com.unity.ml-agents/Runtime/Grpc/RpcCommunicator.cs
  32. 15
      com.unity.ml-agents/Runtime/ICommunicator.cs
  33. 2
      com.unity.ml-agents/Runtime/InferenceBrain/ApplierImpl.cs
  34. 5
      com.unity.ml-agents/Runtime/InferenceBrain/BarracudaModelParamLoader.cs
  35. 4
      com.unity.ml-agents/Runtime/InferenceBrain/GeneratorImpl.cs
  36. 1
      com.unity.ml-agents/Runtime/InferenceBrain/ModelRunner.cs
  37. 13
      com.unity.ml-agents/Runtime/InferenceBrain/TensorGenerator.cs
  38. 2
      com.unity.ml-agents/Runtime/InferenceBrain/TensorProxy.cs
  39. 2
      com.unity.ml-agents/Runtime/InferenceBrain/Utils/Multinomial.cs
  40. 6
      com.unity.ml-agents/Runtime/InferenceBrain/Utils/RandomNormal.cs
  41. 13
      com.unity.ml-agents/Runtime/Policy/BarracudaPolicy.cs
  42. 21
      com.unity.ml-agents/Runtime/Policy/BehaviorParameters.cs
  43. 34
      com.unity.ml-agents/Runtime/Policy/BrainParameters.cs
  44. 1
      com.unity.ml-agents/Runtime/Policy/HeuristicPolicy.cs
  45. 1
      com.unity.ml-agents/Runtime/Policy/IPolicy.cs
  46. 3
      com.unity.ml-agents/Runtime/Policy/RemotePolicy.cs
  47. 56
      com.unity.ml-agents/Runtime/Sensor/CameraSensor.cs
  48. 39
      com.unity.ml-agents/Runtime/Sensor/CameraSensorComponent.cs
  49. 67
      com.unity.ml-agents/Runtime/Sensor/ISensor.cs
  50. 4
      com.unity.ml-agents/Runtime/Sensor/Observation.cs
  51. 574
      com.unity.ml-agents/Runtime/Sensor/RayPerceptionSensor.cs
  52. 13
      com.unity.ml-agents/Runtime/Sensor/RayPerceptionSensorComponent2D.cs
  53. 43
      com.unity.ml-agents/Runtime/Sensor/RayPerceptionSensorComponent3D.cs
  54. 281
      com.unity.ml-agents/Runtime/Sensor/RayPerceptionSensorComponentBase.cs
  55. 23
      com.unity.ml-agents/Runtime/Sensor/RenderTextureSensor.cs
  56. 24
      com.unity.ml-agents/Runtime/Sensor/RenderTextureSensorComponent.cs
  57. 21
      com.unity.ml-agents/Runtime/Sensor/SensorBase.cs
  58. 19
      com.unity.ml-agents/Runtime/Sensor/SensorComponent.cs
  59. 2
      com.unity.ml-agents/Runtime/Sensor/SensorShapeValidator.cs
  60. 6
      com.unity.ml-agents/Runtime/Sensor/StackingSensor.cs
  61. 30
      com.unity.ml-agents/Runtime/Sensor/VectorSensor.cs
  62. 16
      com.unity.ml-agents/Runtime/Sensor/WriteAdapter.cs
  63. 14
      com.unity.ml-agents/Runtime/SideChannel/EngineConfigurationChannel.cs
  64. 27
      com.unity.ml-agents/Runtime/SideChannel/FloatPropertiesChannel.cs
  65. 17
      com.unity.ml-agents/Runtime/SideChannel/RawBytesChannel.cs
  66. 29
      com.unity.ml-agents/Runtime/SideChannel/SideChannel.cs
  67. 18
      com.unity.ml-agents/Runtime/Timer.cs
  68. 9
      com.unity.ml-agents/Runtime/UnityAgentsException.cs
  69. 84
      com.unity.ml-agents/Runtime/Utilities.cs
  70. 61
      com.unity.ml-agents/Tests/Editor/DemonstrationTests.cs
  71. 1
      com.unity.ml-agents/Tests/Editor/EditModeTestInternalBrainTensorApplier.cs
  72. 114
      com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs
  73. 1
      com.unity.ml-agents/Tests/Editor/Sensor/FloatVisualSensorTests.cs
  74. 3
      com.unity.ml-agents/Tests/Editor/Sensor/RayPerceptionSensorTests.cs
  75. 1
      com.unity.ml-agents/Tests/Editor/Sensor/StackingSensorTests.cs
  76. 1
      com.unity.ml-agents/Tests/Editor/Sensor/VectorSensorTests.cs
  77. 2
      com.unity.ml-agents/Tests/Editor/Sensor/WriterAdapterTests.cs
  78. 20
      com.unity.ml-agents/Tests/Editor/SideChannelTests.cs
  79. 2
      com.unity.ml-agents/Tests/Runtime/SerializationTest.cs
  80. 2
      com.unity.ml-agents/Tests/Runtime/SerializeAgent.cs
  81. 2
      com.unity.ml-agents/package.json
  82. 9
      config/sac_trainer_config.yaml
  83. 8
      config/trainer_config.yaml
  84. 9
      docs/API-Reference.md
  85. 8
      docs/Getting-Started-with-Balance-Ball.md
  86. 4
      docs/Learning-Environment-Best-Practices.md
  87. 20
      docs/Learning-Environment-Create-New.md
  88. 44
      docs/Learning-Environment-Design-Agents.md
  89. 8
      docs/Learning-Environment-Design.md
  90. 4
      docs/Limitations.md
  91. 21
      docs/Migrating.md
  92. 3
      docs/Profiling-Python.md
  93. 193
      docs/Python-API.md
  94. 3
      docs/Reward-Signals.md
  95. 2
      docs/Training-Generalized-Reinforcement-Learning-Agents.md
  96. 2
      docs/Training-Imitation-Learning.md
  97. 1
      docs/Training-ML-Agents.md
  98. 6
      docs/Training-PPO.md
  99. 6
      docs/Training-SAC.md
  100. 2
      docs/Training-Self-Play.md

11
.yamato/com.unity.ml-agents-test.yml


dependencies:
- .yamato/com.unity.ml-agents-pack.yml#pack
triggers:
pull_requests:
- targets:
only:
- "master"
- "/release-.*/"
- "/hotfix-.*/"
changes:
only:
- "com.unity.ml-agents/**"
- ".yamato/com.unity.ml-agents-test.yml"
{% endfor %}
{% endfor %}

15
.yamato/standalone-build-test.yml


- pip install pyyaml
- python -u -m ml-agents.tests.yamato.standalone_build_tests
triggers:
pull_requests:
- targets:
only:
- "master"
- "/release-.*/"
- "/hotfix-.*/"
changes:
only:
- "com.unity.ml-agents/**"
- "Project/**"
- ".yamato/standalone-build-test.yml"
except:
- "*.md"
- "com.unity.ml-agents/*.md"
- "com.unity.ml-agents/**/*.md"
{% endfor %}

17
.yamato/training-int-tests.yml


- pip install pyyaml
- python -u -m ml-agents.tests.yamato.training_int_tests
triggers:
pull_requests:
- targets:
only:
- "master"
- "/release-.*/"
- "/hotfix-.*/"
changes:
only:
- "com.unity.ml-agents/**"
- "Project/**"
- "ml-agents/**"
- "ml-agents-envs/**"
- ".yamato/training-int-tests.yml"
except:
- "*.md"
- "com.unity.ml-agents/*.md"
- "com.unity.ml-agents/**/*.md"
artifacts:
unit:
paths:

10
Project/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DAgent.cs


SetResetParameters();
}
public override void CollectObservations()
public override void CollectObservations(VectorSensor sensor)
AddVectorObs(gameObject.transform.rotation.z);
AddVectorObs(gameObject.transform.rotation.x);
AddVectorObs(ball.transform.position - gameObject.transform.position);
AddVectorObs(m_BallRb.velocity);
sensor.AddObservation(gameObject.transform.rotation.z);
sensor.AddObservation(gameObject.transform.rotation.x);
sensor.AddObservation(ball.transform.position - gameObject.transform.position);
sensor.AddObservation(m_BallRb.velocity);
}
public override void AgentAction(float[] vectorAction)

8
Project/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DHardAgent.cs


SetResetParameters();
}
public override void CollectObservations()
public override void CollectObservations(VectorSensor sensor)
AddVectorObs(gameObject.transform.rotation.z);
AddVectorObs(gameObject.transform.rotation.x);
AddVectorObs((ball.transform.position - gameObject.transform.position));
sensor.AddObservation(gameObject.transform.rotation.z);
sensor.AddObservation(gameObject.transform.rotation.x);
sensor.AddObservation((ball.transform.position - gameObject.transform.position));
}
public override void AgentAction(float[] vectorAction)

4
Project/Assets/ML-Agents/Examples/Basic/Scripts/BasicAgent.cs


{
}
public override void CollectObservations()
public override void CollectObservations(VectorSensor sensor)
AddVectorObs(m_Position, 20);
sensor.AddOneHotObservation(m_Position, 20);
}
public override void AgentAction(float[] vectorAction)

6
Project/Assets/ML-Agents/Examples/Bouncer/Scripts/BouncerAgent.cs


SetResetParameters();
}
public override void CollectObservations()
public override void CollectObservations(VectorSensor sensor)
AddVectorObs(gameObject.transform.localPosition);
AddVectorObs(target.transform.localPosition);
sensor.AddObservation(gameObject.transform.localPosition);
sensor.AddObservation(target.transform.localPosition);
}
public override void AgentAction(float[] vectorAction)

30
Project/Assets/ML-Agents/Examples/Crawler/Scripts/CrawlerAgent.cs


/// <summary>
/// Add relevant information on each body part to observations.
/// </summary>
public void CollectObservationBodyPart(BodyPart bp)
public void CollectObservationBodyPart(BodyPart bp, VectorSensor sensor)
AddVectorObs(bp.groundContact.touchingGround ? 1 : 0); // Whether the bp touching the ground
sensor.AddObservation(bp.groundContact.touchingGround ? 1 : 0); // Whether the bp touching the ground
AddVectorObs(velocityRelativeToLookRotationToTarget);
sensor.AddObservation(velocityRelativeToLookRotationToTarget);
AddVectorObs(angularVelocityRelativeToLookRotationToTarget);
sensor.AddObservation(angularVelocityRelativeToLookRotationToTarget);
AddVectorObs(localPosRelToBody);
AddVectorObs(bp.currentXNormalizedRot); // Current x rot
AddVectorObs(bp.currentYNormalizedRot); // Current y rot
AddVectorObs(bp.currentZNormalizedRot); // Current z rot
AddVectorObs(bp.currentStrength / m_JdController.maxJointForceLimit);
sensor.AddObservation(localPosRelToBody);
sensor.AddObservation(bp.currentXNormalizedRot); // Current x rot
sensor.AddObservation(bp.currentYNormalizedRot); // Current y rot
sensor.AddObservation(bp.currentZNormalizedRot); // Current z rot
sensor.AddObservation(bp.currentStrength / m_JdController.maxJointForceLimit);
public override void CollectObservations()
public override void CollectObservations(VectorSensor sensor)
{
m_JdController.GetCurrentJointForces();

RaycastHit hit;
if (Physics.Raycast(body.position, Vector3.down, out hit, 10.0f))
{
AddVectorObs(hit.distance);
sensor.AddObservation(hit.distance);
AddVectorObs(10.0f);
sensor.AddObservation(10.0f);
AddVectorObs(bodyForwardRelativeToLookRotationToTarget);
sensor.AddObservation(bodyForwardRelativeToLookRotationToTarget);
AddVectorObs(bodyUpRelativeToLookRotationToTarget);
sensor.AddObservation(bodyUpRelativeToLookRotationToTarget);
CollectObservationBodyPart(bodyPart);
CollectObservationBodyPart(bodyPart, sensor);
}
}

11
Project/Assets/ML-Agents/Examples/FoodCollector/Scripts/FoodCollectorAgent.cs


{
base.InitializeAgent();
m_AgentRb = GetComponent<Rigidbody>();
Monitor.verticalOffset = 1f;
m_MyArea = area.GetComponent<FoodCollectorArea>();
m_FoodCollecterSettings = FindObjectOfType<FoodCollectorSettings>();

public override void CollectObservations()
public override void CollectObservations(VectorSensor sensor)
AddVectorObs(localVelocity.x);
AddVectorObs(localVelocity.z);
AddVectorObs(System.Convert.ToInt32(m_Frozen));
AddVectorObs(System.Convert.ToInt32(m_Shoot));
sensor.AddObservation(localVelocity.x);
sensor.AddObservation(localVelocity.z);
sensor.AddObservation(System.Convert.ToInt32(m_Frozen));
sensor.AddObservation(System.Convert.ToInt32(m_Shoot));
}
}

14
Project/Assets/ML-Agents/Examples/GridWorld/Scripts/GridAgent.cs


{
}
public override void CollectObservations()
public override void CollectObservations(VectorSensor sensor, ActionMasker actionMasker)
{
// There are no numeric observations to collect as this environment uses visual
// observations.

{
SetMask();
SetMask(actionMasker);
}
}

void SetMask()
void SetMask(ActionMasker actionMasker)
{
// Prevents the agent from picking an action that would make it collide with a wall
var positionX = (int)transform.position.x;

if (positionX == 0)
{
SetActionMask(k_Left);
actionMasker.SetActionMask(k_Left);
SetActionMask(k_Right);
actionMasker.SetActionMask(k_Right);
SetActionMask(k_Down);
actionMasker.SetActionMask(k_Down);
SetActionMask(k_Up);
actionMasker.SetActionMask(k_Up);
}
}

4
Project/Assets/ML-Agents/Examples/Hallway/Scripts/HallwayAgent.cs


m_GroundMaterial = m_GroundRenderer.material;
}
public override void CollectObservations()
public override void CollectObservations(VectorSensor sensor)
AddVectorObs(GetStepCount() / (float)maxStep);
sensor.AddObservation(StepCount / (float)maxStep);
}
}

6
Project/Assets/ML-Agents/Examples/Pyramids/Scripts/PyramidAgent.cs


m_SwitchLogic = areaSwitch.GetComponent<PyramidSwitch>();
}
public override void CollectObservations()
public override void CollectObservations(VectorSensor sensor)
AddVectorObs(m_SwitchLogic.GetState());
AddVectorObs(transform.InverseTransformDirection(m_AgentRb.velocity));
sensor.AddObservation(m_SwitchLogic.GetState());
sensor.AddObservation(transform.InverseTransformDirection(m_AgentRb.velocity));
}
}

24
Project/Assets/ML-Agents/Examples/Reacher/Scripts/ReacherAgent.cs


/// We collect the normalized rotations, angularal velocities, and velocities of both
/// limbs of the reacher as well as the relative position of the target and hand.
/// </summary>
public override void CollectObservations()
public override void CollectObservations(VectorSensor sensor)
AddVectorObs(pendulumA.transform.localPosition);
AddVectorObs(pendulumA.transform.rotation);
AddVectorObs(m_RbA.angularVelocity);
AddVectorObs(m_RbA.velocity);
sensor.AddObservation(pendulumA.transform.localPosition);
sensor.AddObservation(pendulumA.transform.rotation);
sensor.AddObservation(m_RbA.angularVelocity);
sensor.AddObservation(m_RbA.velocity);
AddVectorObs(pendulumB.transform.localPosition);
AddVectorObs(pendulumB.transform.rotation);
AddVectorObs(m_RbB.angularVelocity);
AddVectorObs(m_RbB.velocity);
sensor.AddObservation(pendulumB.transform.localPosition);
sensor.AddObservation(pendulumB.transform.rotation);
sensor.AddObservation(m_RbB.angularVelocity);
sensor.AddObservation(m_RbB.velocity);
AddVectorObs(goal.transform.localPosition);
AddVectorObs(hand.transform.localPosition);
sensor.AddObservation(goal.transform.localPosition);
sensor.AddObservation(hand.transform.localPosition);
AddVectorObs(m_GoalSpeed);
sensor.AddObservation(m_GoalSpeed);
}
/// <summary>

8
Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/ProjectSettingsOverrides.cs


public class ProjectSettingsOverrides : MonoBehaviour
{
// Original values
float m_OriginalMonitorVerticalOffset;
Vector3 m_OriginalGravity;
float m_OriginalFixedDeltaTime;
float m_OriginalMaximumDeltaTime;

[Tooltip("Increase or decrease the scene gravity. Use ~3x to make things less floaty")]
public float gravityMultiplier = 1.0f;
[Header("Display Settings")]
public float monitorVerticalOffset;
[Header("Advanced physics settings")]
[Tooltip("The interval in seconds at which physics and other fixed frame rate updates (like MonoBehaviour's FixedUpdate) are performed.")]
public float fixedDeltaTime = .02f;

public void Awake()
{
// Save the original values
m_OriginalMonitorVerticalOffset = Monitor.verticalOffset;
m_OriginalGravity = Physics.gravity;
m_OriginalFixedDeltaTime = Time.fixedDeltaTime;
m_OriginalMaximumDeltaTime = Time.maximumDeltaTime;

// Override
Monitor.verticalOffset = monitorVerticalOffset;
Physics.gravity *= gravityMultiplier;
Time.fixedDeltaTime = fixedDeltaTime;
Time.maximumDeltaTime = maximumDeltaTime;

public void OnDestroy()
{
Monitor.verticalOffset = m_OriginalMonitorVerticalOffset;
Physics.gravity = m_OriginalGravity;
Time.fixedDeltaTime = m_OriginalFixedDeltaTime;
Time.maximumDeltaTime = m_OriginalMaximumDeltaTime;

2
Project/Assets/ML-Agents/Examples/Template/Scripts/TemplateAgent.cs


public class TemplateAgent : Agent
{
public override void CollectObservations()
public override void CollectObservations(VectorSensor sensor)
{
}

20
Project/Assets/ML-Agents/Examples/Tennis/Scripts/TennisAgent.cs


SetResetParameters();
}
public override void CollectObservations()
public override void CollectObservations(VectorSensor sensor)
AddVectorObs(m_InvertMult * (transform.position.x - myArea.transform.position.x));
AddVectorObs(transform.position.y - myArea.transform.position.y);
AddVectorObs(m_InvertMult * m_AgentRb.velocity.x);
AddVectorObs(m_AgentRb.velocity.y);
sensor.AddObservation(m_InvertMult * (transform.position.x - myArea.transform.position.x));
sensor.AddObservation(transform.position.y - myArea.transform.position.y);
sensor.AddObservation(m_InvertMult * m_AgentRb.velocity.x);
sensor.AddObservation(m_AgentRb.velocity.y);
AddVectorObs(m_InvertMult * (ball.transform.position.x - myArea.transform.position.x));
AddVectorObs(ball.transform.position.y - myArea.transform.position.y);
AddVectorObs(m_InvertMult * m_BallRb.velocity.x);
AddVectorObs(m_BallRb.velocity.y);
sensor.AddObservation(m_InvertMult * (ball.transform.position.x - myArea.transform.position.x));
sensor.AddObservation(ball.transform.position.y - myArea.transform.position.y);
sensor.AddObservation(m_InvertMult * m_BallRb.velocity.x);
sensor.AddObservation(m_BallRb.velocity.y);
AddVectorObs(m_InvertMult * gameObject.transform.rotation.z);
sensor.AddObservation(m_InvertMult * gameObject.transform.rotation.z);
}
public override void AgentAction(float[] vectorAction)

30
Project/Assets/ML-Agents/Examples/Walker/Scripts/WalkerAgent.cs


/// <summary>
/// Add relevant information on each body part to observations.
/// </summary>
public void CollectObservationBodyPart(BodyPart bp)
public void CollectObservationBodyPart(BodyPart bp, VectorSensor sensor)
AddVectorObs(bp.groundContact.touchingGround ? 1 : 0); // Is this bp touching the ground
AddVectorObs(rb.velocity);
AddVectorObs(rb.angularVelocity);
sensor.AddObservation(bp.groundContact.touchingGround ? 1 : 0); // Is this bp touching the ground
sensor.AddObservation(rb.velocity);
sensor.AddObservation(rb.angularVelocity);
AddVectorObs(localPosRelToHips);
sensor.AddObservation(localPosRelToHips);
AddVectorObs(bp.currentXNormalizedRot);
AddVectorObs(bp.currentYNormalizedRot);
AddVectorObs(bp.currentZNormalizedRot);
AddVectorObs(bp.currentStrength / m_JdController.maxJointForceLimit);
sensor.AddObservation(bp.currentXNormalizedRot);
sensor.AddObservation(bp.currentYNormalizedRot);
sensor.AddObservation(bp.currentZNormalizedRot);
sensor.AddObservation(bp.currentStrength / m_JdController.maxJointForceLimit);
}
}

public override void CollectObservations()
public override void CollectObservations(VectorSensor sensor)
AddVectorObs(m_DirToTarget.normalized);
AddVectorObs(m_JdController.bodyPartsDict[hips].rb.position);
AddVectorObs(hips.forward);
AddVectorObs(hips.up);
sensor.AddObservation(m_DirToTarget.normalized);
sensor.AddObservation(m_JdController.bodyPartsDict[hips].rb.position);
sensor.AddObservation(hips.forward);
sensor.AddObservation(hips.up);
CollectObservationBodyPart(bodyPart);
CollectObservationBodyPart(bodyPart, sensor);
}
}

15
Project/Assets/ML-Agents/Examples/WallJump/Scripts/WallJumpAgent.cs


}
}
public override void CollectObservations()
public override void CollectObservations(VectorSensor sensor)
AddVectorObs(agentPos / 20f);
AddVectorObs(DoGroundCheck(true) ? 1 : 0);
sensor.AddObservation(agentPos / 20f);
sensor.AddObservation(DoGroundCheck(true) ? 1 : 0);
}
/// <summary>

}
/// <summary>
/// Chenges the color of the ground for a moment
/// Changes the color of the ground for a moment.
/// <returns>The Enumerator to be used in a Coroutine</returns>
/// <param name="mat">The material to be swaped.</param>
/// <returns>The Enumerator to be used in a Coroutine.</returns>
/// <param name="mat">The material to be swapped.</param>
/// <param name="time">The time the material will remain.</param>
IEnumerator GoalScoredSwapGroundMaterial(Material mat, float time)
{

/// <param name="config">Config.
/// If 0 : No wall and noWallBrain.
/// If 1: Small wall and smallWallBrain.
/// Other : Tall wall and BigWallBrain. </param>
/// Other : Tall wall and BigWallBrain.
/// </param>
void ConfigureAgent(int config)
{
var localScale = wall.transform.localScale;

36
com.unity.ml-agents/CHANGELOG.md


The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
## [Unreleased]
### Major Changes
- Agent.CollectObservations now takes a VectorSensor argument. It was also overloaded to optionally take an ActionMasker argument. (#3352, #3389)
- Beta support for ONNX export was added. If the `tf2onnx` python package is installed, models will be saved to `.onnx` as well as `.nn` format.
Note that Barracuda 0.6.0 or later is required to import the `.onnx` files properly
- Multi-GPU training and the `--multi-gpu` option has been removed temporarily. (#3345)
### Minor Changes
- Monitor.cs was moved to Examples. (#3372)
- Automatic stepping for Academy is now controlled from the AutomaticSteppingEnabled property. (#3376)
- The GetEpisodeCount, GetStepCount, GetTotalStepCount and methods of Academy were changed to EpisodeCount, StepCount, TotalStepCount properties respectively. (#3376)
- Several classes were changed from public to internal visibility. (#3390)
- Academy.RegisterSideChannel and UnregisterSideChannel methods were added. (#3391)
- A tutorial on adding custom SideChannels was added (#3391)
- The stepping logic for the Agent and the Academy has been simplified (#3448)
- Update Barracuda to 0.6.0-preview
- The interface for `RayPerceptionSensor.PerceiveStatic()` was changed to take an input class and write to an output class.
- The checkpoint file suffix was changed from `.cptk` to `.ckpt` (#3470)
- The command-line argument used to determine the port that an environment will listen on was changed from `--port` to `--mlagents-port`.
- `DemonstrationRecorder` can now record observations outside of the editor.
- `DemonstrationRecorder` now has an optional path for the demonstrations. This will default to `Application.dataPath` if not set.
- `DemonstrationStore` was changed to accept a `Stream` for its constructor, and was renamed to `DemonstrationWriter`
- The method `GetStepCount()` on the Agent class has been replaced with the property getter `StepCount`
- `RayPerceptionSensorComponent` and related classes now display the debug gizmos whenever the Agent is selected (not just Play mode).
- Most fields on `RayPerceptionSensorComponent` can now be changed while the editor is in Play mode. The exceptions to this are fields that affect the number of observations.
- Unused static methods from the `Utilities` class (ShiftLeft, ReplaceRange, AddRangeNoAlloc, and GetSensorFloatObservationSize) were removed.
### Bugfixes
- Fixed demonstration recording of experiences when the Agent is done. (#3463)
- Fixed a bug with the rewards of multiple Agents in the gym interface (#3471, #3496)
## [0.14.1-preview] - 2020-02-25
### Bug Fixes

- Fixed demonstration recording of experiences when the Agent is done. (#3463)
- Fixed a bug with the rewards of multiple Agents in the gym interface (#3471, #3496)
## [0.14.0-preview] - 2020-02-13

2
com.unity.ml-agents/Editor/AgentEditor.cs


*/
[CustomEditor(typeof(Agent), true)]
[CanEditMultipleObjects]
public class AgentEditor : Editor
internal class AgentEditor : Editor
{
public override void OnInspectorGUI()
{

3
com.unity.ml-agents/Editor/BehaviorParametersEditor.cs


using UnityEngine;
using UnityEditor;
using Barracuda;
using MLAgents.Sensor;
namespace MLAgents
{

[CustomEditor(typeof(BehaviorParameters))]
[CanEditMultipleObjects]
public class BehaviorParametersEditor : Editor
internal class BehaviorParametersEditor : Editor
{
const float k_TimeBetweenModelReloads = 2f;
// Time since the last reload of the model

6
com.unity.ml-agents/Editor/BrainParametersDrawer.cs


/// Inspector.
/// </summary>
[CustomPropertyDrawer(typeof(BrainParameters))]
public class BrainParametersDrawer : PropertyDrawer
internal class BrainParametersDrawer : PropertyDrawer
{
// The height of a line in the Unity Inspectors
const float k_LineHeight = 17f;

}
/// <summary>
/// The Height required to draw the Vector Action parameters
/// The Height required to draw the Vector Action parameters.
/// <returns>The height of the drawer of the Vector Action </returns>
/// <returns>The height of the drawer of the Vector Action.</returns>
static float GetHeightDrawVectorAction(SerializedProperty property)
{
var actionSize = 2 + property.FindPropertyRelative(k_ActionSizePropName).arraySize;

2
com.unity.ml-agents/Editor/DemonstrationDrawer.cs


/// </summary>
[CustomEditor(typeof(Demonstration))]
[CanEditMultipleObjects]
public class DemonstrationEditor : Editor
internal class DemonstrationEditor : Editor
{
SerializedProperty m_BrainParameters;
SerializedProperty m_DemoMetaData;

6
com.unity.ml-agents/Editor/DemonstrationImporter.cs


/// Asset Importer used to parse demonstration files.
/// </summary>
[ScriptedImporter(1, new[] {"demo"})]
public class DemonstrationImporter : ScriptedImporter
internal class DemonstrationImporter : ScriptedImporter
const string k_IconPath = "Assets/ML-Agents/Resources/DemoIcon.png";
const string k_IconPath = "Packages/com.unity.ml-agents/Editor/Icons/DemoIcon.png";
public override void OnImportAsset(AssetImportContext ctx)
{

var metaDataProto = DemonstrationMetaProto.Parser.ParseDelimitedFrom(reader);
var metaData = metaDataProto.ToDemonstrationMetaData();
reader.Seek(DemonstrationStore.MetaDataBytes + 1, 0);
reader.Seek(DemonstrationWriter.MetaDataBytes + 1, 0);
var brainParamsProto = BrainParametersProto.Parser.ParseDelimitedFrom(reader);
var brainParameters = brainParamsProto.ToBrainParameters();

3
com.unity.ml-agents/LICENSE.md


com.unity.ml-agents copyright © 2020 Unity Technologies ApS
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/

107
com.unity.ml-agents/Runtime/Academy.cs


"docs/Learning-Environment-Design.md")]
public class Academy : IDisposable
{
const string k_ApiVersion = "API-14";
const string k_ApiVersion = "API-15-dev0";
internal const string k_portCommandLineFlag = "--mlagents-port";
/// <summary>
/// True if the Academy is initialized, false otherwise.
/// </summary>
/// <summary>
/// The singleton Academy object.
/// </summary>
/// <summary>
/// Collection of float properties (indexed by a string).
/// </summary>
public IFloatProperties FloatProperties;

// Signals to all the listeners that the academy is being destroyed
internal event Action DestroyAction;
// Signals the Agent that a new step is about to start.
// This will mark the Agent as Done if it has reached its maxSteps.
internal event Action AgentIncrementStep;
// Signals to all the agents at each environment step along with the
// Academy's maxStepReached, done and stepCount values. The agents rely
// on this event to update their own values of max step reached and done

// Signals to all the agents each time the Academy force resets.
internal event Action AgentForceReset;
// Signals that the Academy has been reset by the training process
/// <summary>
/// Signals that the Academy has been reset by the training process.
/// </summary>
public event Action OnEnvironmentReset;
AcademyFixedUpdateStepper m_FixedUpdateStepper;

{
Application.quitting += Dispose;
LazyInitialization();
LazyInitialize();
/// This method is always safe to call; it will have no effect if the Academy is already initialized.
/// This method is always safe to call; it will have no effect if the Academy is already
/// initialized.
internal void LazyInitialization()
internal void LazyInitialize()
{
if (!m_Initialized)
{

}
/// <summary>
/// Enable stepping of the Academy during the FixedUpdate phase. This is done by creating a temporary
/// GameObject with a MonoBehavior that calls Academy.EnvironmentStep().
/// Enable stepping of the Academy during the FixedUpdate phase. This is done by creating
/// a temporary GameObject with a MonoBehaviour that calls Academy.EnvironmentStep().
public void EnableAutomaticStepping()
void EnableAutomaticStepping()
{
if (m_FixedUpdateStepper != null)
{

// Don't show this object in the hierarchy
m_StepperObject.hideFlags = HideFlags.HideInHierarchy;
m_FixedUpdateStepper = m_StepperObject.AddComponent<AcademyFixedUpdateStepper>();
}
/// <summary>
/// Registers SideChannel to the Academy to send and receive data with Python.
/// If IsCommunicatorOn is false, the SideChannel will not be registered.
/// </summary>
/// <param name="channel"> The side channel to be registered.</param>
public void RegisterSideChannel(SideChannel channel)
{
LazyInitialize();
Communicator?.RegisterSideChannel(channel);
}
/// <summary>
/// Unregisters SideChannel to the Academy. If the side channel was not registered,
/// nothing will happen.
/// </summary>
/// <param name="channel"> The side channel to be unregistered.</param>
public void UnregisterSideChannel(SideChannel channel)
{
Communicator?.UnregisterSideChannel(channel);
}
/// <summary>

public void DisableAutomaticStepping(bool destroyImmediate = false)
void DisableAutomaticStepping()
{
if (m_FixedUpdateStepper == null)
{

m_FixedUpdateStepper = null;
if (destroyImmediate)
if (Application.isEditor)
{
UnityEngine.Object.DestroyImmediate(m_StepperObject);
}

}
/// <summary>
/// Returns whether or not the Academy is automatically stepped during the FixedUpdate phase.
/// Determines whether or not the Academy is automatically stepped during the FixedUpdate phase.
public bool IsAutomaticSteppingEnabled
public bool AutomaticSteppingEnabled
set
{
if (value)
{
EnableAutomaticStepping();
}
else
{
DisableAutomaticStepping();
}
}
}
// Used to read Python-provided environment parameters

var inputPort = "";
for (var i = 0; i < args.Length; i++)
{
if (args[i] == "--port")
if (args[i] == k_portCommandLineFlag)
{
inputPort = args[i + 1];
}

/// <returns>
/// Current episode number.
/// </returns>
public int GetEpisodeCount()
public int EpisodeCount
return m_EpisodeCount;
get { return m_EpisodeCount; }
}
/// <summary>

/// Current step count.
/// </returns>
public int GetStepCount()
public int StepCount
return m_StepCount;
get { return m_StepCount; }
}
/// <summary>

/// Total step count.
/// </returns>
public int GetTotalStepCount()
public int TotalStepCount
return m_TotalStepCount;
get { return m_TotalStepCount; }
}
/// <summary>

AgentSetStatus?.Invoke(m_StepCount);
m_StepCount += 1;
m_TotalStepCount += 1;
AgentIncrementStep?.Invoke();
using (TimerStack.Instance.Scoped("AgentSendState"))
{

{
AgentAct?.Invoke();
}
m_StepCount += 1;
m_TotalStepCount += 1;
}
/// <summary>

/// Creates or retrieves an existing ModelRunner that uses the same
/// NNModel and the InferenceDevice as provided.
/// </summary>
/// <param name="model"> The NNModel the ModelRunner must use </param>
/// <param name="brainParameters"> The brainParameters used to create
/// the ModelRunner </param>
/// <param name="inferenceDevice"> The inference device (CPU or GPU)
/// the ModelRunner will use </param>
/// <returns> The ModelRunner compatible with the input settings</returns>
/// <param name="model">The NNModel the ModelRunner must use.</param>
/// <param name="brainParameters">The brainParameters used to create the ModelRunner.</param>
/// <param name="inferenceDevice">
/// The inference device (CPU or GPU) the ModelRunner will use.
/// </param>
/// <returns> The ModelRunner compatible with the input settings.</returns>
internal ModelRunner GetOrCreateModelRunner(
NNModel model, BrainParameters brainParameters, InferenceDevice inferenceDevice)
{

/// </summary>
public void Dispose()
{
DisableAutomaticStepping(true);
DisableAutomaticStepping();
// Signal to listeners that the academy is being destroyed now
DestroyAction?.Invoke();

55
com.unity.ml-agents/Runtime/ActionMasker.cs


namespace MLAgents
{
internal class ActionMasker
/// <summary>
/// Agents that take discrete actions can explicitly indicate that specific actions
/// are not allowed at a point in time. This enables the agent to indicate that some actions
/// may be illegal (e.g. the King in Chess taking a move to the left if it is already in the
/// left side of the board). This class represents the set of masked actions and provides
/// the utilities for setting and retrieving them.
/// </summary>
public class ActionMasker
{
/// When using discrete control, is the starting indices of the actions
/// when all the branches are concatenated with each other.

}
/// <summary>
/// Sets an action mask for discrete control agents. When used, the agent will not be
/// able to perform the actions passed as argument at the next decision.
/// The actionIndices correspond to the actions the agent will be unable to perform
/// on the branch 0.
/// </summary>
/// <param name="actionIndices">The indices of the masked actions on branch 0.</param>
public void SetActionMask(IEnumerable<int> actionIndices)
{
SetActionMask(0, actionIndices);
}
/// <summary>
/// Sets an action mask for discrete control agents. When used, the agent will not be
/// able to perform the action passed as argument at the next decision for the specified
/// action branch. The actionIndex correspond to the action the agent will be unable
/// to perform.
/// </summary>
/// <param name="branch">The branch for which the actions will be masked.</param>
/// <param name="actionIndex">The index of the masked action.</param>
public void SetActionMask(int branch, int actionIndex)
{
SetActionMask(branch, new[] { actionIndex });
}
/// <summary>
/// Sets an action mask for discrete control agents. When used, the agent will not be
/// able to perform the action passed as argument at the next decision. The actionIndex
/// correspond to the action the agent will be unable to perform on the branch 0.
/// </summary>
/// <param name="actionIndex">The index of the masked action on branch 0</param>
public void SetActionMask(int actionIndex)
{
SetActionMask(0, new[] { actionIndex });
}
/// <summary>
/// able to perform the action passed as argument at the next decision. If no branch is
/// specified, the default branch will be 0. The actionIndex or actionIndices correspond
/// to the action the agent will be unable to perform.
/// able to perform the actions passed as argument at the next decision for the specified
/// action branch. The actionIndices correspond to the action options the agent will
/// be unable to perform.
/// </summary>
/// <param name="branch">The branch for which the actions will be masked</param>
/// <param name="actionIndices">The indices of the masked actions</param>

/// </summary>
/// <returns>A mask for the agent. A boolean array of length equal to the total number of
/// actions.</returns>
public bool[] GetMask()
internal bool[] GetMask()
{
if (m_CurrentMask != null)
{

/// <summary>
/// Resets the current mask for an agent
/// </summary>
public void ResetMask()
internal void ResetMask()
{
if (m_CurrentMask != null)
{

338
com.unity.ml-agents/Runtime/Agent.cs


using System.Collections.Generic;
using UnityEngine;
using Barracuda;
using MLAgents.Sensor;
using UnityEngine.Serialization;
/// observations, actions and current status, that is sent to the Brain.
/// observations, actions and current status.
public struct AgentInfo
internal struct AgentInfo
{
/// <summary>
/// Keeps track of the last vector action taken by the Brain.

public float[] vectorActions;
}
/// Agent Monobehavior class that is attached to a Unity GameObject, making it
/// Agent MonoBehaviour class that is attached to a Unity GameObject, making it
/// user in <see cref="CollectObservations"/>. On the other hand, actions
/// are determined by decisions produced by a Policy. Currently, this
/// class is expected to be extended to implement the desired agent behavior.
/// user in <see cref="Agent.CollectObservations(VectorSensor)"/> or
/// <see cref="Agent.CollectObservations(VectorSensor, ActionMasker)"/>.
/// On the other hand, actions are determined by decisions produced by a Policy.
/// Currently, this class is expected to be extended to implement the desired agent behavior.
/// </summary>
/// <remarks>
/// Simply speaking, an agent roams through an environment and at each step

/// little may have changed between successive steps.
///
/// At any step, an agent may be considered <see cref="m_Done"/>.
/// This could occur due to a variety of reasons:
/// At any step, an agent may be considered done due to a variety of reasons:
/// - The agent reached an end state within its environment.
/// - The agent reached the maximum # of steps (i.e. timed out).
/// - The academy reached the maximum # of steps (forced agent to be done).

BehaviorParameters m_PolicyFactory;
/// This code is here to make the upgrade path for users using maxStep
/// easier. We will hook into the Serialization code and make sure that
/// easier. We will hook into the Serialization code and make sure that
/// agentParameters.maxStep and this.maxStep are in sync.
[Serializable]
internal struct AgentParameters

[SerializeField] [HideInInspector]
[SerializeField][HideInInspector]
[SerializeField] [HideInInspector]
[SerializeField][HideInInspector]
internal bool hasUpgradedFromAgentParameters;
/// <summary>

/// Whether or not the agent requests a decision.
bool m_RequestDecision;
/// Keeps track of the number of steps taken by the agent in this episode.
/// Note that this value is different for each agent, and may not overlap
/// with the step counter in the Academy, since agents reset based on

ActionMasker m_ActionMasker;
/// <summary>
/// Demonstration recorder.
/// Set of DemonstrationWriters that the Agent will write its step information to.
/// If you use a DemonstrationRecorder component, this will automatically register its DemonstrationWriter.
/// You can also add your own DemonstrationWriter by calling
/// DemonstrationRecorder.AddDemonstrationWriterToAgent()
DemonstrationRecorder m_Recorder;
internal ISet<DemonstrationWriter> DemonstrationWriters = new HashSet<DemonstrationWriter>();
/// <summary>
/// List of sensors used to generate observations.

/// </summary>
internal VectorSensor collectObservationsSensor;
/// MonoBehaviour function that is called when the attached GameObject
/// becomes enabled or active.
/// <summary>
/// <inheritdoc cref="OnBeforeSerialize"/>
/// </summary>
// Manages a serialization upgrade issue from v0.13 to v0.14 where maxStep moved
// from AgentParameters (since removed) to Agent
/// <summary>
/// <inheritdoc cref="OnAfterDeserialize"/>
/// </summary>
// Manages a serialization upgrade issue from v0.13 to v0.14 where maxStep moved
// from AgentParameters (since removed) to Agent
if (maxStep == 0 && maxStep != agentParameters.maxStep && !hasUpgradedFromAgentParameters)
{
maxStep = agentParameters.maxStep;

/// Helper method for the <see cref="OnEnable"/> event, created to
/// facilitate testing.
/// <summary>
/// Initializes the agent. Can be safely called multiple times.
/// </summary>
public void LazyInitialize()
{
if (m_Initialized)

// Grab the "static" properties for the Agent.
m_EpisodeId = EpisodeIdCounter.GetEpisodeId();
m_PolicyFactory = GetComponent<BehaviorParameters>();
m_Recorder = GetComponent<DemonstrationRecorder>();
Academy.Instance.AgentIncrementStep += AgentIncrementStep;
Academy.Instance.AgentSendState += SendInfo;
Academy.Instance.DecideAction += DecideAction;
Academy.Instance.AgentAct += AgentStep;

InitializeSensors();
}
/// Monobehavior function that is called when the attached GameObject
/// becomes disabled or inactive.
DemonstrationWriters.Clear();
Academy.Instance.AgentIncrementStep -= AgentIncrementStep;
Academy.Instance.AgentSendState -= SendInfo;
Academy.Instance.DecideAction -= DecideAction;
Academy.Instance.AgentAct -= AgentStep;

// We request a decision so Python knows the Agent is done immediately
m_Brain?.RequestDecision(m_Info, sensors);
if (m_Recorder != null && m_Recorder.record && Application.isEditor)
// We also have to write any to any DemonstationStores so that they get the "done" flag.
foreach(var demoWriter in DemonstrationWriters)
m_Recorder.WriteExperience(m_Info, sensors);
demoWriter.Record(m_Info, sensors);
}
UpdateRewardStats();

/// Returns the current step counter (within the current episode).
/// </summary>
/// <returns>
/// Current episode number.
/// Current step count.
public int GetStepCount()
public int StepCount
return m_StepCount;
get { return m_StepCount; }
}
/// <summary>

public void SetReward(float reward)
{
#if DEBUG
if (float.IsNaN(reward))
{
throw new ArgumentException("NaN reward passed to SetReward.");
}
Utilities.DebugCheckNanAndInfinity(reward, nameof(reward), nameof(SetReward));
#endif
m_CumulativeReward += (reward - m_Reward);
m_Reward = reward;

public void AddReward(float increment)
{
#if DEBUG
if (float.IsNaN(increment))
{
throw new ArgumentException("NaN reward passed to AddReward.");
}
Utilities.DebugCheckNanAndInfinity(increment, nameof(increment), nameof(AddReward));
#endif
m_Reward += increment;
m_CumulativeReward += increment;

/// </returns>
public virtual float[] Heuristic()
{
throw new UnityAgentsException(string.Format(
throw new UnityAgentsException(
"{0} GameObject.",
gameObject.name));
$"{gameObject.name} GameObject.");
}
/// <summary>

collectObservationsSensor = new VectorSensor(param.vectorObservationSize);
if (param.numStackedVectorObservations > 1)
{
var stackingSensor = new StackingSensor(collectObservationsSensor, param.numStackedVectorObservations);
var stackingSensor = new StackingSensor(
collectObservationsSensor, param.numStackedVectorObservations);
sensors.Add(stackingSensor);
}
else

// Make sure the names are actually unique
for (var i = 0; i < sensors.Count - 1; i++)
{
Debug.Assert(!sensors[i].GetName().Equals(sensors[i + 1].GetName()), "Sensor names must be unique.");
Debug.Assert(
!sensors[i].GetName().Equals(sensors[i + 1].GetName()),
"Sensor names must be unique.");
}
#endif
}

UpdateSensors();
using (TimerStack.Instance.Scoped("CollectObservations"))
{
CollectObservations();
CollectObservations(collectObservationsSensor, m_ActionMasker);
}
m_Info.actionMasks = m_ActionMasker.GetMask();

m_Brain.RequestDecision(m_Info, sensors);
if (m_Recorder != null && m_Recorder.record && Application.isEditor)
// If we have any DemonstrationWriters, write the AgentInfo and sensors to them.
foreach(var demoWriter in DemonstrationWriters)
m_Recorder.WriteExperience(m_Info, sensors);
demoWriter.Record(m_Info, sensors);
for (var i = 0; i < sensors.Count; i++)
foreach (var sensor in sensors)
sensors[i].Update();
sensor.Update();
/// Collects the (vector, visual) observations of the agent.
/// Collects the vector observations of the agent.
/// <param name="sensor">
/// The vector observations for the agent.
/// </param>
/// Simply, an agents observation is any environment information that helps
/// the Agent acheive its goal. For example, for a fighting Agent, its
/// An agents observation is any environment information that helps
/// the Agent achieve its goal. For example, for a fighting Agent, its
/// Vector observations are added by calling the provided helper methods:
/// - <see cref="AddVectorObs(int)"/>
/// - <see cref="AddVectorObs(float)"/>
/// - <see cref="AddVectorObs(Vector3)"/>
/// - <see cref="AddVectorObs(Vector2)"/>
/// - <see>
/// <cref>AddVectorObs(float[])</cref>
/// </see>
/// - <see>
/// <cref>AddVectorObs(List{float})</cref>
/// </see>
/// - <see cref="AddVectorObs(Quaternion)"/>
/// - <see cref="AddVectorObs(bool)"/>
/// - <see cref="AddVectorObs(int, int)"/>
/// Vector observations are added by calling the provided helper methods
/// on the VectorSensor input:
/// - <see cref="VectorSensor.AddObservation(int)"/>
/// - <see cref="VectorSensor.AddObservation(float)"/>
/// - <see cref="VectorSensor.AddObservation(Vector3)"/>
/// - <see cref="VectorSensor.AddObservation(Vector2)"/>
/// - <see cref="VectorSensor.AddObservation(Quaternion)"/>
/// - <see cref="VectorSensor.AddObservation(bool)"/>
/// - <see cref="VectorSensor.AddObservation(IEnumerable{float})"/>
/// - <see cref="VectorSensor.AddOneHotObservation(int, int)"/>
/// Depending on your environment, any combination of these helpers can
/// be used. They just need to be used in the exact same order each time
/// this method is called and the resulting size of the vector observation

/// </remarks>
public virtual void CollectObservations()
{
}
/// <summary>
/// Sets an action mask for discrete control agents. When used, the agent will not be
/// able to perform the action passed as argument at the next decision. If no branch is
/// specified, the default branch will be 0. The actionIndex or actionIndices correspond
/// to the action the agent will be unable to perform.
/// </summary>
/// <param name="actionIndices">The indices of the masked actions on branch 0</param>
protected void SetActionMask(IEnumerable<int> actionIndices)
{
m_ActionMasker.SetActionMask(0, actionIndices);
}
/// <summary>
/// Sets an action mask for discrete control agents. When used, the agent will not be
/// able to perform the action passed as argument at the next decision. If no branch is
/// specified, the default branch will be 0. The actionIndex or actionIndices correspond
/// to the action the agent will be unable to perform.
/// </summary>
/// <param name="actionIndex">The index of the masked action on branch 0</param>
protected void SetActionMask(int actionIndex)
{
m_ActionMasker.SetActionMask(0, new[] { actionIndex });
}
/// <summary>
/// Sets an action mask for discrete control agents. When used, the agent will not be
/// able to perform the action passed as argument at the next decision. If no branch is
/// specified, the default branch will be 0. The actionIndex or actionIndices correspond
/// to the action the agent will be unable to perform.
/// </summary>
/// <param name="branch">The branch for which the actions will be masked</param>
/// <param name="actionIndex">The index of the masked action</param>
protected void SetActionMask(int branch, int actionIndex)
public virtual void CollectObservations(VectorSensor sensor)
m_ActionMasker.SetActionMask(branch, new[] { actionIndex });
/// Modifies an action mask for discrete control agents. When used, the agent will not be
/// able to perform the action passed as argument at the next decision. If no branch is
/// specified, the default branch will be 0. The actionIndex or actionIndices correspond
/// to the action the agent will be unable to perform.
/// Collects the vector observations of the agent alongside the masked actions.
/// The agent observation describes the current environment from the
/// perspective of the agent.
/// <param name="branch">The branch for which the actions will be masked</param>
/// <param name="actionIndices">The indices of the masked actions</param>
protected void SetActionMask(int branch, IEnumerable<int> actionIndices)
{
m_ActionMasker.SetActionMask(branch, actionIndices);
}
/// <summary>
/// Adds a float observation to the vector observations of the agent.
/// Increases the size of the agents vector observation by 1.
/// </summary>
/// <param name="observation">Observation.</param>
protected void AddVectorObs(float observation)
{
collectObservationsSensor.AddObservation(observation);
}
/// <summary>
/// Adds an integer observation to the vector observations of the agent.
/// Increases the size of the agents vector observation by 1.
/// </summary>
/// <param name="observation">Observation.</param>
protected void AddVectorObs(int observation)
/// <param name="sensor">
/// The vector observations for the agent.
/// </param>
/// <param name="actionMasker">
/// The masked actions for the agent.
/// </param>
/// <remarks>
/// An agents observation is any environment information that helps
/// the Agent achieve its goal. For example, for a fighting Agent, its
/// observation could include distances to friends or enemies, or the
/// current level of ammunition at its disposal.
/// Recall that an Agent may attach vector or visual observations.
/// Vector observations are added by calling the provided helper methods
/// on the VectorSensor input:
/// - <see cref="VectorSensor.AddObservation(int)"/>
/// - <see cref="VectorSensor.AddObservation(float)"/>
/// - <see cref="VectorSensor.AddObservation(Vector3)"/>
/// - <see cref="VectorSensor.AddObservation(Vector2)"/>
/// - <see cref="VectorSensor.AddObservation(Quaternion)"/>
/// - <see cref="VectorSensor.AddObservation(bool)"/>
/// - <see cref="VectorSensor.AddObservation(IEnumerable{float})"/>
/// - <see cref="VectorSensor.AddOneHotObservation(int, int)"/>
/// Depending on your environment, any combination of these helpers can
/// be used. They just need to be used in the exact same order each time
/// this method is called and the resulting size of the vector observation
/// needs to match the vectorObservationSize attribute of the linked Brain.
/// Visual observations are implicitly added from the cameras attached to
/// the Agent.
/// When using Discrete Control, you can prevent the Agent from using a certain
/// action by masking it. You can call the following method on the ActionMasker
/// input :
/// - <see cref="ActionMasker.SetActionMask(int)"/>
/// - <see cref="ActionMasker.SetActionMask(int, int)"/>
/// - <see cref="ActionMasker.SetActionMask(int, IEnumerable{int})"/>
/// - <see cref="ActionMasker.SetActionMask(IEnumerable{int})"/>
/// The branch input is the index of the action, actionIndices are the indices of the
/// invalid options for that action.
/// </remarks>
public virtual void CollectObservations(VectorSensor sensor, ActionMasker actionMasker)
collectObservationsSensor.AddObservation(observation);
}
/// <summary>
/// Adds an Vector3 observation to the vector observations of the agent.
/// Increases the size of the agents vector observation by 3.
/// </summary>
/// <param name="observation">Observation.</param>
protected void AddVectorObs(Vector3 observation)
{
collectObservationsSensor.AddObservation(observation);
}
/// <summary>
/// Adds an Vector2 observation to the vector observations of the agent.
/// Increases the size of the agents vector observation by 2.
/// </summary>
/// <param name="observation">Observation.</param>
protected void AddVectorObs(Vector2 observation)
{
collectObservationsSensor.AddObservation(observation);
}
/// <summary>
/// Adds a collection of float observations to the vector observations of the agent.
/// Increases the size of the agents vector observation by size of the collection.
/// </summary>
/// <param name="observation">Observation.</param>
protected void AddVectorObs(IEnumerable<float> observation)
{
collectObservationsSensor.AddObservation(observation);
}
/// <summary>
/// Adds a quaternion observation to the vector observations of the agent.
/// Increases the size of the agents vector observation by 4.
/// </summary>
/// <param name="observation">Observation.</param>
protected void AddVectorObs(Quaternion observation)
{
collectObservationsSensor.AddObservation(observation);
}
/// <summary>
/// Adds a boolean observation to the vector observation of the agent.
/// Increases the size of the agent's vector observation by 1.
/// </summary>
/// <param name="observation"></param>
protected void AddVectorObs(bool observation)
{
collectObservationsSensor.AddObservation(observation);
}
protected void AddVectorObs(int observation, int range)
{
collectObservationsSensor.AddOneHotObservation(observation, range);
CollectObservations(sensor);
}
/// <summary>

}
/// <summary>
/// Returns the last action that was decided on by the Agent (returns null if no decision has been made)
/// Returns the last action that was decided on by the Agent
/// <returns>
/// The last action that was decided by the Agent (or null if no decision has been made)
/// </returns>
return m_Action.vectorActions;
return m_Action.vectorActions;
}
/// <summary>

AgentReset();
}
internal void UpdateAgentAction(AgentAction action)
{
m_Action = action;
}
/// <summary>
/// Scales continuous action from [-1, 1] to arbitrary range.
/// </summary>