浏览代码

merge master into trainer-plugin branch

/trainer-plugin
Anupam Bhatnagar 4 年前
当前提交
f4f1a8d9
共有 94 个文件被更改,包括 6014 次插入402 次删除
  1. 10
      DevProject/Packages/manifest.json
  2. 4
      DevProject/ProjectSettings/ProjectVersion.txt
  3. 60
      Project/Assets/ML-Agents/Examples/Basic/Prefabs/Basic.prefab
  4. 53
      Project/Assets/ML-Agents/Examples/Basic/Scenes/Basic.unity
  5. 23
      Project/Assets/ML-Agents/Examples/Basic/Scripts/BasicController.cs
  6. 19
      com.unity.ml-agents/CHANGELOG.md
  7. 14
      com.unity.ml-agents/Editor/BehaviorParametersEditor.cs
  8. 7
      com.unity.ml-agents/Runtime/Academy.cs
  9. 17
      com.unity.ml-agents/Runtime/Actuators/ActionSpec.cs
  10. 9
      com.unity.ml-agents/Runtime/Actuators/ActuatorComponent.cs
  11. 3
      com.unity.ml-agents/Runtime/Actuators/ActuatorDiscreteActionMask.cs
  12. 49
      com.unity.ml-agents/Runtime/Actuators/ActuatorManager.cs
  13. 16
      com.unity.ml-agents/Runtime/Actuators/IActionReceiver.cs
  14. 2
      com.unity.ml-agents/Runtime/Actuators/IActuator.cs
  15. 10
      com.unity.ml-agents/Runtime/Actuators/VectorActuator.cs
  16. 25
      com.unity.ml-agents/Runtime/Agent.cs
  17. 11
      com.unity.ml-agents/Runtime/Agent.deprecated.cs
  18. 34
      com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs
  19. 5
      com.unity.ml-agents/Runtime/Communicator/ICommunicator.cs
  20. 27
      com.unity.ml-agents/Runtime/Communicator/RpcCommunicator.cs
  21. 16
      com.unity.ml-agents/Runtime/Demonstrations/DemonstrationRecorder.cs
  22. 8
      com.unity.ml-agents/Runtime/Demonstrations/DemonstrationWriter.cs
  23. 3
      com.unity.ml-agents/Runtime/DiscreteActionMasker.cs
  24. 90
      com.unity.ml-agents/Runtime/Inference/BarracudaModelParamLoader.cs
  25. 10
      com.unity.ml-agents/Runtime/Inference/ModelRunner.cs
  26. 13
      com.unity.ml-agents/Runtime/Inference/TensorApplier.cs
  27. 7
      com.unity.ml-agents/Runtime/Policies/BarracudaPolicy.cs
  28. 31
      com.unity.ml-agents/Runtime/Policies/BehaviorParameters.cs
  29. 4
      com.unity.ml-agents/Runtime/Policies/HeuristicPolicy.cs
  30. 9
      com.unity.ml-agents/Runtime/Policies/RemotePolicy.cs
  31. 2
      com.unity.ml-agents/Tests/Editor/Actuators/ActuatorManagerTests.cs
  32. 4
      com.unity.ml-agents/Tests/Editor/Actuators/TestActuator.cs
  33. 3
      com.unity.ml-agents/Tests/Editor/BehaviorParameterTests.cs
  34. 18
      com.unity.ml-agents/Tests/Editor/Communicator/GrpcExtensionsTests.cs
  35. 5
      com.unity.ml-agents/Tests/Editor/EditModeTestInternalBrainTensorApplier.cs
  36. 31
      com.unity.ml-agents/Tests/Editor/ModelRunnerTest.cs
  37. 33
      com.unity.ml-agents/Tests/Editor/ParameterLoaderTest.cs
  38. 6
      com.unity.ml-agents/package.json
  39. 4
      config/ppo/StrikersVsGoalie.yaml
  40. 2
      ml-agents/mlagents/trainers/buffer.py
  41. 7
      ml-agents/mlagents/trainers/cli_utils.py
  42. 12
      ml-agents/mlagents/trainers/ghost/trainer.py
  43. 2
      ml-agents/mlagents/trainers/policy/tf_policy.py
  44. 76
      ml-agents/mlagents/trainers/ppo/trainer.py
  45. 120
      ml-agents/mlagents/trainers/sac/trainer.py
  46. 14
      ml-agents/mlagents/trainers/settings.py
  47. 7
      ml-agents/mlagents/trainers/tests/test_ghost.py
  48. 5
      ml-agents/mlagents/trainers/tests/test_rl_trainer.py
  49. 3
      ml-agents/mlagents/trainers/tests/test_sac.py
  50. 2
      ml-agents/mlagents/trainers/tests/test_simple_rl.py
  51. 19
      ml-agents/mlagents/trainers/tests/torch/test_layers.py
  52. 17
      ml-agents/mlagents/trainers/tests/torch/test_networks.py
  53. 6
      ml-agents/mlagents/trainers/tests/torch/test_utils.py
  54. 20
      ml-agents/mlagents/trainers/tf/model_serialization.py
  55. 17
      ml-agents/mlagents/trainers/torch/encoders.py
  56. 67
      ml-agents/mlagents/trainers/torch/layers.py
  57. 115
      ml-agents/mlagents/trainers/torch/networks.py
  58. 4
      ml-agents/mlagents/trainers/torch/utils.py
  59. 99
      ml-agents/mlagents/trainers/trainer/rl_trainer.py
  60. 5
      ml-agents/mlagents/trainers/trainer/trainer.py
  61. 7
      ml-agents/mlagents/trainers/trainer_controller.py
  62. 2
      test_requirements.txt
  63. 85
      Project/Assets/ML-Agents/Examples/Basic/Scripts/BasicActuatorComponent.cs
  64. 3
      Project/Assets/ML-Agents/Examples/Basic/Scripts/BasicActuatorComponent.cs.meta
  65. 94
      ml-agents/mlagents/trainers/optimizer/torch_optimizer.py
  66. 281
      ml-agents/mlagents/trainers/policy/torch_policy.py
  67. 203
      ml-agents/mlagents/trainers/ppo/optimizer_torch.py
  68. 561
      ml-agents/mlagents/trainers/sac/optimizer_torch.py
  69. 118
      ml-agents/mlagents/trainers/saver/torch_saver.py
  70. 1001
      ml-agents/mlagents/trainers/tests/torch/test.demo
  71. 144
      ml-agents/mlagents/trainers/tests/torch/test_bcmodule.py
  72. 177
      ml-agents/mlagents/trainers/tests/torch/test_ghost.py
  73. 150
      ml-agents/mlagents/trainers/tests/torch/test_policy.py
  74. 105
      ml-agents/mlagents/trainers/tests/torch/test_saver.py
  75. 505
      ml-agents/mlagents/trainers/tests/torch/test_simple_rl.py
  76. 446
      ml-agents/mlagents/trainers/tests/torch/testdcvis.demo
  77. 74
      ml-agents/mlagents/trainers/torch/model_serialization.py
  78. 111
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_curiosity.py
  79. 56
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_extrinsic.py
  80. 138
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_gail.py
  81. 32
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/utils.py
  82. 0
      ml-agents/mlagents/trainers/torch/components/__init__.py
  83. 0
      ml-agents/mlagents/trainers/torch/components/bc/__init__.py
  84. 183
      ml-agents/mlagents/trainers/torch/components/bc/module.py
  85. 15
      ml-agents/mlagents/trainers/torch/components/reward_providers/__init__.py
  86. 72
      ml-agents/mlagents/trainers/torch/components/reward_providers/base_reward_provider.py
  87. 15
      ml-agents/mlagents/trainers/torch/components/reward_providers/extrinsic_reward_provider.py
  88. 43
      ml-agents/mlagents/trainers/torch/components/reward_providers/reward_provider_factory.py
  89. 225
      ml-agents/mlagents/trainers/torch/components/reward_providers/curiosity_reward_provider.py
  90. 256
      ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py

10
DevProject/Packages/manifest.json


"dependencies": {
"com.unity.2d.sprite": "1.0.0",
"com.unity.2d.tilemap": "1.0.0",
"com.unity.ads": "3.4.4",
"com.unity.ads": "3.4.7",
"com.unity.ide.vscode": "1.1.4",
"com.unity.ide.vscode": "1.2.1",
"com.unity.package-validation-suite": "0.11.0-preview",
"com.unity.package-validation-suite": "0.14.0-preview",
"com.unity.test-framework": "1.1.13",
"com.unity.test-framework": "1.1.14",
"com.unity.xr.legacyinputhelpers": "1.3.11",
"com.unity.xr.legacyinputhelpers": "2.1.4",
"com.unity.modules.ai": "1.0.0",
"com.unity.modules.androidjni": "1.0.0",
"com.unity.modules.animation": "1.0.0",

4
DevProject/ProjectSettings/ProjectVersion.txt


m_EditorVersion: 2019.3.11f1
m_EditorVersionWithRevision: 2019.3.11f1 (ceef2d848e70)
m_EditorVersion: 2019.4.7f1
m_EditorVersionWithRevision: 2019.4.7f1 (e992b1a16e65)

60
Project/Assets/ML-Agents/Examples/Basic/Prefabs/Basic.prefab


- component: {fileID: 114502619508238574}
- component: {fileID: 114827551040495112}
- component: {fileID: 6790639061807687247}
- component: {fileID: 748290217173149218}
- component: {fileID: 4562193454555387360}
- component: {fileID: 2066777822704547994}
m_Layer: 0
m_Name: BasicAgent
m_TagString: Untagged

m_Name:
m_EditorClassIdentifier:
m_BrainParameters:
vectorObservationSize: 20
numStackedVectorObservations: 1
vectorActionSize: 03000000
vectorActionDescriptions: []
vectorActionSpaceType: 0
VectorObservationSize: 0
NumStackedVectorObservations: 1
VectorActionSize:
VectorActionDescriptions: []
VectorActionSpaceType: 0
m_Model: {fileID: 11400000, guid: 468c183196f1844f69e125c99dd135a1, type: 3}
m_InferenceDevice: 0
m_BehaviorType: 0

m_UseChildActuators: 1
m_ObservableAttributeHandling: 0
--- !u!114 &114827551040495112
MonoBehaviour:
m_ObjectHideFlags: 0

m_Name:
m_EditorClassIdentifier:
timeBetweenDecisionsAtInference: 0.15
m_Position: 0
position: 0
largeGoal: {fileID: 1445405232822318}
smallGoal: {fileID: 1054019269138802}
--- !u!114 &6790639061807687247

m_Script: {fileID: 11500000, guid: 3a6da8f78a394c6ab027688eab81e04d, type: 3}
m_Name:
m_EditorClassIdentifier:
debugCommandLineOverride:
--- !u!114 &748290217173149218
MonoBehaviour:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 1263463520136984}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: 88b6042bc9a5d4aa58d931eae49442e5, type: 3}
m_Name:
m_EditorClassIdentifier:
agentParameters:
maxStep: 0
hasUpgradedFromAgentParameters: 1
MaxStep: 0
--- !u!114 &4562193454555387360
MonoBehaviour:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 1263463520136984}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: 4ce4e199dabb494e8764b09f4c378098, type: 3}
m_Name:
m_EditorClassIdentifier:
basicController: {fileID: 114827551040495112}
--- !u!114 &2066777822704547994
MonoBehaviour:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 1263463520136984}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: 6ee410d6d45349218d5e69bb2a347c63, type: 3}
m_Name:
m_EditorClassIdentifier:
basicController: {fileID: 114827551040495112}
--- !u!1 &1284220331185348
GameObject:
m_ObjectHideFlags: 0

53
Project/Assets/ML-Agents/Examples/Basic/Scenes/Basic.unity


objectReference: {fileID: 0}
m_RemovedComponents: []
m_SourcePrefab: {fileID: 100100000, guid: 5889392e3f05b448a8a06c5def6c2dec, type: 3}
--- !u!1 &1335907378 stripped
GameObject:
m_CorrespondingSourceObject: {fileID: 1263463520136984, guid: c5eb289873aca4f5a8cc59c7464ab7c1,
type: 3}
m_PrefabInstance: {fileID: 1783603361}
m_PrefabAsset: {fileID: 0}
--- !u!114 &1335907380
MonoBehaviour:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 1335907378}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: 88b6042bc9a5d4aa58d931eae49442e5, type: 3}
m_Name:
m_EditorClassIdentifier:
agentParameters:
maxStep: 0
hasUpgradedFromAgentParameters: 1
maxStep: 0
--- !u!114 &1335907381 stripped
MonoBehaviour:
m_CorrespondingSourceObject: {fileID: 114827551040495112, guid: c5eb289873aca4f5a8cc59c7464ab7c1,
type: 3}
m_PrefabInstance: {fileID: 1783603361}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 1335907378}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: 624480a72e46148118ab2e2d89b537de, type: 3}
m_Name:
m_EditorClassIdentifier:
--- !u!114 &1335907384
MonoBehaviour:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 1335907378}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: 6ee410d6d45349218d5e69bb2a347c63, type: 3}
m_Name:
m_EditorClassIdentifier:
basicController: {fileID: 1335907381}
--- !u!1001 &1502457254
PrefabInstance:
m_ObjectHideFlags: 0

propertyPath: m_BrainParameters.vectorObservationSize
value: 0
objectReference: {fileID: 0}
- target: {fileID: 114502619508238574, guid: c5eb289873aca4f5a8cc59c7464ab7c1,
type: 3}
propertyPath: m_BrainParameters.VectorObservationSize
value: 0
objectReference: {fileID: 0}
m_RemovedComponents: []
m_SourcePrefab: {fileID: 100100000, guid: c5eb289873aca4f5a8cc59c7464ab7c1, type: 3}
--- !u!1 &1889211226

maximumDeltaTime: 0.33333334
solverIterations: 6
solverVelocityIterations: 1
reuseCollisionCallbacks: 1
--- !u!4 &1889211228
Transform:
m_ObjectHideFlags: 0

23
Project/Assets/ML-Agents/Examples/Basic/Scripts/BasicController.cs


/// <summary>
/// Controls the movement of the GameObject based on the actions received.
/// </summary>
/// <param name="vectorAction"></param>
public void ApplyAction(ActionSegment<int> vectorAction)
/// <param name="direction"></param>
public void MoveDirection(int direction)
var movement = vectorAction[0];
var direction = 0;
switch (movement)
{
case 1:
direction = -1;
break;
case 2:
direction = 1;
break;
}
position += direction;
if (position < k_MinPosition) { position = k_MinPosition; }
if (position > k_MaxPosition) { position = k_MaxPosition; }

}
if (Academy.Instance.IsCommunicatorOn)
{
// Apply the previous step's actions
ApplyAction(m_Agent.GetStoredActionBuffers().DiscreteActions);
m_Agent?.RequestDecision();
}
else

// Apply the previous step's actions
ApplyAction(m_Agent.GetStoredActionBuffers().DiscreteActions);
m_TimeSinceDecision = 0f;
m_Agent?.RequestDecision();
}

19
com.unity.ml-agents/CHANGELOG.md


### Minor Changes
#### com.unity.ml-agents (C#)
#### ml-agents / ml-agents-envs / gym-unity (Python)
### Bug Fixes
#### com.unity.ml-agents (C#)
#### ml-agents / ml-agents-envs / gym-unity (Python)
## [1.3.0-preview] - 2020-08-12
### Major Changes
#### com.unity.ml-agents (C#)
#### ml-agents / ml-agents-envs / gym-unity (Python)
### Minor Changes
#### com.unity.ml-agents (C#)
- Update Barracuda to 1.0.2.
- Enabled C# formatting using `dotnet-format`.
#### ml-agents / ml-agents-envs / gym-unity (Python)

- The package dependencies were updated to include the built-in packages that are used also. (#4384)
#### ml-agents / ml-agents-envs / gym-unity (Python)
## [1.3.0-preview] - 2020-08-12

- The interaction between EnvManager and TrainerController was changed; EnvManager.advance() was split into to stages,
and TrainerController now uses the results from the first stage to handle new behavior names. This change speeds up
Python training by approximately 5-10%. (#4259)
- Experimental PyTorch support has been added. Use `--torch` when running `mlagents-learn`, or add
`framework: pytorch` to your trainer configuration (under the behavior name) to enable it.
Note that PyTorch 1.6.0 or greater should be installed to use this feature; see
[the PyTorch website](https://pytorch.org/) for installation instructions. (#4335)
### Minor Changes
#### com.unity.ml-agents (C#)

14
com.unity.ml-agents/Editor/BehaviorParametersEditor.cs


using System.Collections.Generic;
using UnityEditor;
using Unity.Barracuda;
using Unity.MLAgents.Actuators;
using Unity.MLAgents.Policies;
using Unity.MLAgents.Sensors;
using Unity.MLAgents.Sensors.Reflection;

var behaviorParameters = (BehaviorParameters)target;
// Grab the sensor components, since we need them to determine the observation sizes.
// TODO make these methods of BehaviorParameters
SensorComponent[] sensorComponents;
if (behaviorParameters.UseChildSensors)
{

{
sensorComponents = behaviorParameters.GetComponents<SensorComponent>();
}
ActuatorComponent[] actuatorComponents;
if (behaviorParameters.UseChildActuators)
{
actuatorComponents = behaviorParameters.GetComponentsInChildren<ActuatorComponent>();
}
else
{
actuatorComponents = behaviorParameters.GetComponents<ActuatorComponent>();
}
// Get the total size of the sensors generated by ObservableAttributes.

if (brainParameters != null)
{
var failedChecks = Inference.BarracudaModelParamLoader.CheckModel(
barracudaModel, brainParameters, sensorComponents,
barracudaModel, brainParameters, sensorComponents, actuatorComponents,
observableAttributeSensorTotalSize, behaviorParameters.BehaviorType
);
foreach (var check in failedChecks)

7
com.unity.ml-agents/Runtime/Academy.cs


#if UNITY_EDITOR
using UnityEditor;
#endif
using Unity.MLAgents.Actuators;
using Unity.MLAgents.Inference;
using Unity.MLAgents.Policies;
using Unity.MLAgents.SideChannels;

/// NNModel and the InferenceDevice as provided.
/// </summary>
/// <param name="model">The NNModel the ModelRunner must use.</param>
/// <param name="brainParameters">The BrainParameters used to create the ModelRunner.</param>
/// <param name="actionSpec"> Description of the action spaces for the Agent.</param>
NNModel model, BrainParameters brainParameters, InferenceDevice inferenceDevice)
NNModel model, ActionSpec actionSpec, InferenceDevice inferenceDevice)
modelRunner = new ModelRunner(model, brainParameters, inferenceDevice, m_InferenceSeed);
modelRunner = new ModelRunner(model, actionSpec, inferenceDevice, m_InferenceSeed);
m_ModelRunners.Add(modelRunner);
m_InferenceSeed++;
}

17
com.unity.ml-agents/Runtime/Actuators/ActionSpec.cs


/// <param name="branchSizes">The array of branch sizes for the discrete action space. Each index
/// contains the number of actions available for that branch.</param>
/// <returns>An Discrete ActionSpec initialized with the array of branch sizes.</returns>
public static ActionSpec MakeDiscrete(int[] branchSizes)
public static ActionSpec MakeDiscrete(params int[] branchSizes)
{
var numActions = branchSizes.Length;
var actuatorSpace = new ActionSpec(0, numActions, branchSizes);

ActionSpec(int numContinuousActions, int numDiscreteActions, int[] branchSizes = null)
internal ActionSpec(int numContinuousActions, int numDiscreteActions, int[] branchSizes = null)
}
/// <summary>
/// Temporary check that the ActionSpec uses either all continuous or all discrete actions.
/// This should be removed once the trainer supports them.
/// </summary>
/// <exception cref="UnityAgentsException"></exception>
internal void CheckNotHybrid()
{
if (NumContinuousActions > 0 && NumDiscreteActions > 0)
{
throw new UnityAgentsException("ActionSpecs must be all continuous or all discrete.");
}
}
}
}

9
com.unity.ml-agents/Runtime/Actuators/ActuatorComponent.cs


/// Editor components for creating Actuators. Generally an IActuator component should
/// have a corresponding ActuatorComponent.
/// </summary>
internal abstract class ActuatorComponent : MonoBehaviour
public abstract class ActuatorComponent : MonoBehaviour
{
/// <summary>
/// Create the IActuator. This is called by the Agent when it is initialized.

/// <summary>
/// The specification of the Action space for this ActuatorComponent.
/// This must produce the same results as the corresponding IActuator's ActionSpec.
/// </summary>
/// <seealso cref="ActionSpec"/>
public abstract ActionSpec ActionSpec { get; }
}
}

3
com.unity.ml-agents/Runtime/Actuators/ActuatorDiscreteActionMask.cs


/// </summary>
public int CurrentBranchOffset { get; set; }
internal ActuatorDiscreteActionMask(IList<IActuator> actuators, int sumOfDiscreteBranchSizes, int numBranches)
internal ActuatorDiscreteActionMask(IList<IActuator> actuators, int sumOfDiscreteBranchSizes, int numBranches, int[] branchSizes = null)
m_BranchSizes = branchSizes;
}
/// <inheritdoc cref="IDiscreteActionMask.WriteMask"/>

49
com.unity.ml-agents/Runtime/Actuators/ActuatorManager.cs


// An implementation of IDiscreteActionMask that allows for writing to it based on an offset.
ActuatorDiscreteActionMask m_DiscreteActionMask;
ActionSpec m_CombinedActionSpec;
/// <summary>
/// Flag used to check if our IActuators are ready for execution.
/// </summary>

var discreteActions = numDiscreteBranches == 0 ? ActionSegment<int>.Empty : new ActionSegment<int>(new int[numDiscreteBranches]);
StoredActions = new ActionBuffers(continuousActions, discreteActions);
m_DiscreteActionMask = new ActuatorDiscreteActionMask(actuators, sumOfDiscreteBranches, numDiscreteBranches);
m_CombinedActionSpec = CombineActionSpecs(actuators);
m_DiscreteActionMask = new ActuatorDiscreteActionMask(actuators, sumOfDiscreteBranches, numDiscreteBranches, m_CombinedActionSpec.BranchSizes);
}
internal static ActionSpec CombineActionSpecs(IList<IActuator> actuators)
{
int numContinuousActions = 0;
int numDiscreteActions = 0;
foreach (var actuator in actuators)
{
numContinuousActions += actuator.ActionSpec.NumContinuousActions;
numDiscreteActions += actuator.ActionSpec.NumDiscreteActions;
}
int[] combinedBranchSizes;
if (numDiscreteActions == 0)
{
combinedBranchSizes = Array.Empty<int>();
}
else
{
combinedBranchSizes = new int[numDiscreteActions];
var start = 0;
for (var i = 0; i < actuators.Count; i++)
{
var branchSizes = actuators[i].ActionSpec.BranchSizes;
if (branchSizes != null)
{
Array.Copy(branchSizes, 0, combinedBranchSizes, start, branchSizes.Length);
start += branchSizes.Length;
}
}
}
return new ActionSpec(numContinuousActions, numDiscreteActions, combinedBranchSizes);
}
/// <summary>
/// Returns an ActionSpec representing the concatenation of all IActuator's ActionSpecs
/// </summary>
/// <returns></returns>
public ActionSpec GetCombinedActionSpec()
{
ReadyActuatorsForExecution();
return m_CombinedActionSpec;
}
/// <summary>

16
com.unity.ml-agents/Runtime/Actuators/IActionReceiver.cs


/// <seealso cref="IActionReceiver.OnActionReceived"/>
void WriteDiscreteActionMask(IDiscreteActionMask actionMask);
}
/// <summary>
/// Helper methods to be shared by all classes that implement <see cref="IActionReceiver"/>.
/// </summary>
public static class ActionReceiverExtensions
{
/// <summary>
/// Returns the number of discrete branches + the number of continuous actions.
/// </summary>
/// <param name="actionReceiver"></param>
/// <returns></returns>
public static int TotalNumberOfActions(this IActionReceiver actionReceiver)
{
return actionReceiver.ActionSpec.NumContinuousActions + actionReceiver.ActionSpec.NumDiscreteActions;
}
}
}

2
com.unity.ml-agents/Runtime/Actuators/IActuator.cs


/// </summary>
public interface IActuator : IActionReceiver
{
int TotalNumberOfActions { get; }
/// <summary>
/// Gets the name of this IActuator which will be used to sort it.
/// </summary>

10
com.unity.ml-agents/Runtime/Actuators/VectorActuator.cs


Name = name + suffix;
}
/// <inheritdoc />
/// <inheritdoc />
public void OnActionReceived(ActionBuffers actionBuffers)
{
ActionBuffers = actionBuffers;

/// <inheritdoc />
public void WriteDiscreteActionMask(IDiscreteActionMask actionMask)
{
m_ActionReceiver.WriteDiscreteActionMask(actionMask);

/// Returns the number of discrete branches + the number of continuous actions.
/// </summary>
public int TotalNumberOfActions => ActionSpec.NumContinuousActions +
ActionSpec.NumDiscreteActions;
/// <summary>
/// <inheritdoc />
public string Name { get; }
}
}

25
com.unity.ml-agents/Runtime/Agent.cs


Academy.Instance.DecideAction += DecideAction;
Academy.Instance.AgentAct += AgentStep;
Academy.Instance.AgentForceReset += _AgentReset;
m_Brain = m_PolicyFactory.GeneratePolicy(Heuristic);
using (TimerStack.Instance.Scoped("InitializeActuators"))
{
InitializeActuators();
}
m_Brain = m_PolicyFactory.GeneratePolicy(m_ActuatorManager.GetCombinedActionSpec(), Heuristic);
ResetData();
Initialize();

}
using (TimerStack.Instance.Scoped("InitializeActuators"))
{
InitializeActuators();
}
m_Info.storedVectorActions = new float[m_ActuatorManager.TotalNumberOfActions];

return;
}
m_Brain?.Dispose();
m_Brain = m_PolicyFactory.GeneratePolicy(Heuristic);
m_Brain = m_PolicyFactory.GeneratePolicy(m_ActuatorManager.GetCombinedActionSpec(), Heuristic);
}
/// <summary>

}
// Support legacy OnActionReceived
// TODO don't set this up if the sizes are 0?
m_LegacyActionCache = new float[m_VectorActuator.TotalNumberOfActions];
m_LegacyActionCache = new float[m_VectorActuator.TotalNumberOfActions()];
m_ActuatorManager.Add(m_VectorActuator);

/// actions. When using discrete actions, the agent will not perform the masked
/// action.
/// </summary>
/// <param name="actionMasker">
/// The action masker for the agent.
/// <param name="actionMask">
/// The action mask for the agent.
/// </param>
/// <remarks>
/// When using Discrete Control, you can prevent the Agent from using a certain

public virtual void OnEpisodeBegin() { }
/// <summary>
/// Gets the last ActionBuffer for this agent.
/// Gets the most recent ActionBuffer for this agent.
/// <returns>The most recent ActionBuffer for this agent</returns>
public ActionBuffers GetStoredActionBuffers()
{
return m_ActuatorManager.StoredActions;

11
com.unity.ml-agents/Runtime/Agent.deprecated.cs


{
public partial class Agent
{
/// <summary>
/// Deprecated, use <see cref="WriteDiscreteActionMask"/> instead.
/// </summary>
/// <param name="actionMasker"></param>
/// This method passes in a float array that is to be populated with actions. The actions
/// This method passes in a float array that is to be populated with actions.
/// </summary>
/// <param name="actionsOut"></param>
public virtual void Heuristic(float[] actionsOut)

}
/// <summary>
/// Deprecated, use <see cref="OnActionReceived(ActionBuffers)"/> instead.
/// </summary>
/// <param name="vectorAction"></param>
public virtual void OnActionReceived(float[] vectorAction) { }
/// <summary>

{
return m_Info.storedVectorActions;
}
}
}

34
com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs


using Unity.MLAgents.CommunicatorObjects;
using UnityEngine;
using System.Runtime.CompilerServices;
using Unity.MLAgents.Actuators;
using Unity.MLAgents.Sensors;
using Unity.MLAgents.Demonstrations;
using Unity.MLAgents.Policies;

#region BrainParameters
/// <summary>
/// Converts a Brain into to a Protobuf BrainInfoProto so it can be sent
/// Converts a BrainParameters into to a BrainParametersProto so it can be sent.
/// </summary>
/// <returns>The BrainInfoProto generated.</returns>
/// <param name="bp">The instance of BrainParameter to extend.</param>

{
brainParametersProto.VectorActionDescriptions.AddRange(bp.VectorActionDescriptions);
}
return brainParametersProto;
}
/// <summary>
/// Converts an ActionSpec into to a Protobuf BrainInfoProto so it can be sent.
/// </summary>
/// <returns>The BrainInfoProto generated.</returns>
/// <param name="actionSpec"> Description of the action spaces for the Agent.</param>
/// <param name="name">The name of the brain.</param>
/// <param name="isTraining">Whether or not the Brain is training.</param>
public static BrainParametersProto ToBrainParametersProto(this ActionSpec actionSpec, string name, bool isTraining)
{
actionSpec.CheckNotHybrid();
var brainParametersProto = new BrainParametersProto
{
BrainName = name,
IsTraining = isTraining
};
if (actionSpec.NumContinuousActions > 0)
{
brainParametersProto.VectorActionSize.Add(actionSpec.NumContinuousActions);
brainParametersProto.VectorActionSpaceType = SpaceTypeProto.Continuous;
}
else if (actionSpec.NumDiscreteActions > 0)
{
brainParametersProto.VectorActionSize.AddRange(actionSpec.BranchSizes);
brainParametersProto.VectorActionSpaceType = SpaceTypeProto.Discrete;
}
// TODO handle ActionDescriptions?
return brainParametersProto;
}

5
com.unity.ml-agents/Runtime/Communicator/ICommunicator.cs


using System;
using System.Collections.Generic;
using Unity.MLAgents.Actuators;
using Unity.MLAgents.Policies;
using Unity.MLAgents.Sensors;

/// Registers a new Brain to the Communicator.
/// </summary>
/// <param name="name">The name or key uniquely identifying the Brain.</param>
/// <param name="brainParameters">The Parameters for the Brain being registered.</param>
void SubscribeBrain(string name, BrainParameters brainParameters);
/// <param name="actionSpec"> Description of the action spaces for the Agent.</param>
void SubscribeBrain(string name, ActionSpec actionSpec);
/// <summary>
/// Sends the observations of one Agent.

27
com.unity.ml-agents/Runtime/Communicator/RpcCommunicator.cs


using System.Collections.Generic;
using System.Linq;
using UnityEngine;
using Unity.MLAgents.Actuators;
using Unity.MLAgents.CommunicatorObjects;
using Unity.MLAgents.Sensors;
using Unity.MLAgents.Policies;

// Brains that we have sent over the communicator with agents.
HashSet<string> m_SentBrainKeys = new HashSet<string>();
Dictionary<string, BrainParameters> m_UnsentBrainKeys = new Dictionary<string, BrainParameters>();
Dictionary<string, ActionSpec> m_UnsentBrainKeys = new Dictionary<string, ActionSpec>();
#if UNITY_EDITOR || UNITY_STANDALONE_WIN || UNITY_STANDALONE_OSX || UNITY_STANDALONE_LINUX

/// Adds the brain to the list of brains which will be sending information to External.
/// </summary>
/// <param name="brainKey">Brain key.</param>
/// <param name="brainParameters">Brain parameters needed to send to the trainer.</param>
public void SubscribeBrain(string brainKey, BrainParameters brainParameters)
/// <param name="actionSpec"> Description of the action spaces for the Agent.</param>
public void SubscribeBrain(string brainKey, ActionSpec actionSpec)
{
if (m_BehaviorNames.Contains(brainKey))
{

new UnityRLOutputProto.Types.ListAgentInfoProto()
);
CacheBrainParameters(brainKey, brainParameters);
CacheActionSpec(brainKey, actionSpec);
}
void UpdateEnvironmentWithInput(UnityRLInputProto rlInput)

message.RlOutput.SideChannel = ByteString.CopyFrom(messageAggregated);
var input = Exchange(message);
UpdateSentBrainParameters(tempUnityRlInitializationOutput);
UpdateSentActionSpec(tempUnityRlInitializationOutput);
foreach (var k in m_CurrentUnityRlOutput.AgentInfos.Keys)
{

};
}
void CacheBrainParameters(string behaviorName, BrainParameters brainParameters)
void CacheActionSpec(string behaviorName, ActionSpec actionSpec)
{
if (m_SentBrainKeys.Contains(behaviorName))
{

// TODO We should check that if m_unsentBrainKeys has brainKey, it equals brainParameters
m_UnsentBrainKeys[behaviorName] = brainParameters;
// TODO We should check that if m_unsentBrainKeys has brainKey, it equals actionSpec
m_UnsentBrainKeys[behaviorName] = actionSpec;
}
UnityRLInitializationOutputProto GetTempUnityRlInitializationOutput()

{
if (m_CurrentUnityRlOutput.AgentInfos[behaviorName].CalculateSize() > 0)
{
// Only send the BrainParameters if there is a non empty list of
// Only send the actionSpec if there is a non empty list of
// observation when receiving the BrainParameters
// observation when receiving the ActionSpec
var brainParameters = m_UnsentBrainKeys[behaviorName];
output.BrainParameters.Add(brainParameters.ToProto(behaviorName, true));
var actionSpec = m_UnsentBrainKeys[behaviorName];
output.BrainParameters.Add(actionSpec.ToBrainParametersProto(behaviorName, true));
}
}
}

void UpdateSentBrainParameters(UnityRLInitializationOutputProto output)
void UpdateSentActionSpec(UnityRLInitializationOutputProto output)
{
if (output == null)
{

16
com.unity.ml-agents/Runtime/Demonstrations/DemonstrationRecorder.cs


public bool Record;
/// <summary>
/// Number of steps to record. The editor will stop playing when it reaches this threshold.
/// Set to zero to record indefinitely.
/// </summary>
[Tooltip("Number of steps to record. The editor will stop playing when it reaches this threshold. " +
"Set to zero to record indefinitely.")]
public int NumStepsToRecord = 0;
/// <summary>
/// Base demonstration file name. If multiple files are saved, the additional filenames
/// will have a sequence of unique numbers appended.
/// </summary>

if (Record)
{
LazyInitialize();
}
if (NumStepsToRecord > 0 && m_DemoWriter.NumSteps >= NumStepsToRecord)
{
Application.Quit(0);
#if UNITY_EDITOR
UnityEditor.EditorApplication.isPlaying = false;
#endif
}
}

8
com.unity.ml-agents/Runtime/Demonstrations/DemonstrationWriter.cs


}
/// <summary>
/// Number of steps written so far.
/// </summary>
internal int NumSteps
{
get { return m_MetaData.numberSteps; }
}
/// <summary>
/// Writes the initial data to the stream.
/// </summary>
/// <param name="demonstrationName">Base name of the demonstration file(s).</param>

3
com.unity.ml-agents/Runtime/DiscreteActionMasker.cs


m_Delegate.WriteMask(branch, actionIndices);
}
/// <inheritdoc />
/// <inheritdoc />
/// <inheritdoc />
public void ResetMask()
{
m_Delegate.ResetMask();

90
com.unity.ml-agents/Runtime/Inference/BarracudaModelParamLoader.cs


using System.Collections.Generic;
using System.Linq;
using Unity.Barracuda;
using Unity.MLAgents.Actuators;
using Unity.MLAgents.Sensors;
using Unity.MLAgents.Policies;

/// The BrainParameters that are used verify the compatibility with the InferenceEngine
/// </param>
/// <param name="sensorComponents">Attached sensor components</param>
/// <param name="actuatorComponents">Attached actuator components</param>
SensorComponent[] sensorComponents, int observableAttributeTotalSize = 0,
SensorComponent[] sensorComponents, ActuatorComponent[] actuatorComponents,
int observableAttributeTotalSize = 0,
BehaviorType behaviorType = BehaviorType.Default)
{
List<string> failedModelChecks = new List<string>();

return failedModelChecks;
}
var modelDiscreteActionSize = isContinuous == ModelActionType.Discrete ? actionSize : 0;
var modelContinuousActionSize = isContinuous == ModelActionType.Continuous ? actionSize : 0;
failedModelChecks.AddRange(
CheckIntScalarPresenceHelper(new Dictionary<string, int>()
{

CheckInputTensorShape(model, brainParameters, sensorComponents, observableAttributeTotalSize)
);
failedModelChecks.AddRange(
CheckOutputTensorShape(model, brainParameters, isContinuous, actionSize)
CheckOutputTensorShape(model, brainParameters, actuatorComponents, isContinuous, modelContinuousActionSize, modelDiscreteActionSize)
);
return failedModelChecks;
}

/// <param name="brainParameters">
/// The BrainParameters that are used verify the compatibility with the InferenceEngine
/// </param>
/// <param name="actuatorComponents">Array of attached actuator components.</param>
/// <param name="modelActionSize">
/// The size of the action output that is expected by the model.
/// <param name="modelContinuousActionSize">
/// The size of the continuous action output that is expected by the model.
/// </param>
/// <param name="modelSumDiscreteBranchSizes">
/// The size of the discrete action output that is expected by the model.
/// </param>
/// <returns>
/// A IEnumerable of string corresponding to the incompatible shapes between model

Model model,
BrainParameters brainParameters,
ActuatorComponent[] actuatorComponents,
int modelActionSize)
int modelContinuousActionSize, int modelSumDiscreteBranchSizes)
{
var failedModelChecks = new List<string>();
if (isContinuous == ModelActionType.Unknown)

"suggest Continuous Control.");
return failedModelChecks;
}
var tensorTester = new Dictionary<string, Func<BrainParameters, TensorShape?, int, string>>();
if (brainParameters.VectorActionSpaceType == SpaceType.Continuous)
var tensorTester = new Dictionary<string, Func<BrainParameters, ActuatorComponent[], TensorShape?, int, int, string>>();
// This will need to change a bit for hybrid action spaces.
if (isContinuous == ModelActionType.Continuous)
{
tensorTester[TensorNames.ActionOutput] = CheckContinuousActionOutputShape;
}

}
Func<BrainParameters, TensorShape?, int, string> tester = tensorTester[name];
var error = tester.Invoke(brainParameters, model.GetShapeByName(name), modelActionSize);
var tester = tensorTester[name];
var error = tester.Invoke(brainParameters, actuatorComponents, model.GetShapeByName(name), modelContinuousActionSize, modelSumDiscreteBranchSizes);
if (error != null)
{
failedModelChecks.Add(error);

/// <param name="brainParameters">
/// The BrainParameters that are used verify the compatibility with the InferenceEngine
/// </param>
/// <param name="actuatorComponents">Array of attached actuator components.</param>
/// <param name="modelActionSize">
/// The size of the action output that is expected by the model.
/// <param name="modelContinuousActionSize">
/// The size of the continuous action output that is expected by the model.
/// </param>
/// <param name="modelSumDiscreteBranchSizes">
/// The size of the discrete action output that is expected by the model.
/// </param>
/// <returns>
/// If the Check failed, returns a string containing information about why the

BrainParameters brainParameters, TensorShape? shape, int modelActionSize)
BrainParameters brainParameters, ActuatorComponent[] actuatorComponents, TensorShape? shape, int modelContinuousActionSize, int modelSumDiscreteBranchSizes)
var bpActionSize = brainParameters.VectorActionSize.Sum();
if (modelActionSize != bpActionSize)
var sumOfDiscreteBranchSizes = 0;
if (brainParameters.VectorActionSpaceType == SpaceType.Discrete)
return "Action Size of the model does not match. The BrainParameters expect " +
$"{bpActionSize} but the model contains {modelActionSize}.";
sumOfDiscreteBranchSizes += brainParameters.VectorActionSize.Sum();
}
foreach (var actuatorComponent in actuatorComponents)
{
var actionSpec = actuatorComponent.ActionSpec;
sumOfDiscreteBranchSizes += actionSpec.SumOfDiscreteBranchSizes;
}
if (modelSumDiscreteBranchSizes != sumOfDiscreteBranchSizes)
{
return "Discrete Action Size of the model does not match. The BrainParameters expect " +
$"{sumOfDiscreteBranchSizes} but the model contains {modelSumDiscreteBranchSizes}.";
}
return null;
}

/// <param name="brainParameters">
/// The BrainParameters that are used verify the compatibility with the InferenceEngine
/// </param>
/// <param name="actuatorComponents">Array of attached actuator components.</param>
/// <param name="modelActionSize">
/// The size of the action output that is expected by the model.
/// <param name="modelContinuousActionSize">
/// The size of the continuous action output that is expected by the model.
/// </param>
/// <param name="modelSumDiscreteBranchSizes">
/// The size of the discrete action output that is expected by the model.
BrainParameters brainParameters, TensorShape? shape, int modelActionSize)
BrainParameters brainParameters, ActuatorComponent[] actuatorComponents, TensorShape? shape, int modelContinuousActionSize, int modelSumDiscreteBranchSizes)
var bpActionSize = brainParameters.VectorActionSize[0];
if (modelActionSize != bpActionSize)
var numContinuousActions = 0;
if (brainParameters.VectorActionSpaceType == SpaceType.Continuous)
return "Action Size of the model does not match. The BrainParameters expect " +
$"{bpActionSize} but the model contains {modelActionSize}.";
numContinuousActions += brainParameters.NumActions;
}
foreach (var actuatorComponent in actuatorComponents)
{
var actionSpec = actuatorComponent.ActionSpec;
numContinuousActions += actionSpec.NumContinuousActions;
}
if (modelContinuousActionSize != numContinuousActions)
{
return "Continuous Action Size of the model does not match. The BrainParameters and ActuatorComponents expect " +
$"{numContinuousActions} but the model contains {modelContinuousActionSize}.";
}
return null;
}

10
com.unity.ml-agents/Runtime/Inference/ModelRunner.cs


using System.Collections.Generic;
using Unity.Barracuda;
using UnityEngine.Profiling;
using Unity.MLAgents.Actuators;
using Unity.MLAgents.Policies;
using Unity.MLAgents.Policies;
namespace Unity.MLAgents.Inference
{

/// the agents
/// </summary>
/// <param name="model"> The Barracuda model to load </param>
/// <param name="brainParameters"> The parameters of the Brain used to generate the
/// placeholder tensors </param>
/// <param name="actionSpec"> Description of the action spaces for the Agent.</param>
/// <param name="inferenceDevice"> Inference execution device. CPU is the fastest
/// option for most of ML Agents models. </param>
/// <param name="seed"> The seed that will be used to initialize the RandomNormal

public ModelRunner(
NNModel model,
BrainParameters brainParameters,
ActionSpec actionSpec,
InferenceDevice inferenceDevice = InferenceDevice.CPU,
int seed = 0)
{

m_TensorGenerator = new TensorGenerator(
seed, m_TensorAllocator, m_Memories, barracudaModel);
m_TensorApplier = new TensorApplier(
brainParameters, seed, m_TensorAllocator, m_Memories, barracudaModel);
actionSpec, seed, m_TensorAllocator, m_Memories, barracudaModel);
}
static Dictionary<string, Tensor> PrepareBarracudaInputs(IEnumerable<TensorProxy> infInputs)

13
com.unity.ml-agents/Runtime/Inference/TensorApplier.cs


using System.Collections.Generic;
using Unity.Barracuda;
using Unity.MLAgents.Actuators;
namespace Unity.MLAgents.Inference
{

/// <summary>
/// Returns a new TensorAppliers object.
/// </summary>
/// <param name="bp"> The BrainParameters used to determine what Appliers will be
/// used</param>
/// <param name="actionSpec"> Description of the action spaces for the Agent.</param>
BrainParameters bp,
ActionSpec actionSpec,
if (bp.VectorActionSpaceType == SpaceType.Continuous)
actionSpec.CheckNotHybrid();
if (actionSpec.NumContinuousActions > 0)
{
m_Dict[TensorNames.ActionOutput] = new ContinuousActionOutputApplier();
}

new DiscreteActionOutputApplier(bp.VectorActionSize, seed, allocator);
new DiscreteActionOutputApplier(actionSpec.BranchSizes, seed, allocator);
}
m_Dict[TensorNames.RecurrentOutput] = new MemoryOutputApplier(memories);

7
com.unity.ml-agents/Runtime/Policies/BarracudaPolicy.cs


/// <inheritdoc />
public BarracudaPolicy(
BrainParameters brainParameters,
ActionSpec actionSpec,
var modelRunner = Academy.Instance.GetOrCreateModelRunner(model, brainParameters, inferenceDevice);
var modelRunner = Academy.Instance.GetOrCreateModelRunner(model, actionSpec, inferenceDevice);
m_SpaceType = brainParameters.VectorActionSpaceType;
actionSpec.CheckNotHybrid();
m_SpaceType = actionSpec.NumContinuousActions > 0 ? SpaceType.Continuous : SpaceType.Discrete;
}
/// <inheritdoc />

31
com.unity.ml-agents/Runtime/Policies/BehaviorParameters.cs


using System;
using UnityEngine;
using UnityEngine.Serialization;
using Unity.MLAgents.Actuators;
using Unity.MLAgents.Sensors.Reflection;
namespace Unity.MLAgents.Policies

get { return m_BehaviorName + "?team=" + TeamId; }
}
internal IPolicy GeneratePolicy(HeuristicPolicy.ActionGenerator heuristic)
internal IPolicy GeneratePolicy(ActionSpec actionSpec, HeuristicPolicy.ActionGenerator heuristic)
return GenerateHeuristicPolicy(heuristic);
return new HeuristicPolicy(heuristic, actionSpec);
case BehaviorType.InferenceOnly:
{
if (m_Model == null)

"Either assign a model, or change to a different Behavior Type."
);
}
return new BarracudaPolicy(m_BrainParameters, m_Model, m_InferenceDevice);
return new BarracudaPolicy(actionSpec, m_Model, m_InferenceDevice);
return new RemotePolicy(m_BrainParameters, FullyQualifiedBehaviorName);
return new RemotePolicy(actionSpec, FullyQualifiedBehaviorName);
return new BarracudaPolicy(m_BrainParameters, m_Model, m_InferenceDevice);
return new BarracudaPolicy(actionSpec, m_Model, m_InferenceDevice);
return GenerateHeuristicPolicy(heuristic);
return new HeuristicPolicy(heuristic, actionSpec);
return GenerateHeuristicPolicy(heuristic);
}
}
internal IPolicy GenerateHeuristicPolicy(HeuristicPolicy.ActionGenerator heuristic)
{
var numContinuousActions = 0;
var numDiscreteActions = 0;
if (m_BrainParameters.VectorActionSpaceType == SpaceType.Continuous)
{
numContinuousActions = m_BrainParameters.NumActions;
return new HeuristicPolicy(heuristic, actionSpec);
else if (m_BrainParameters.VectorActionSpaceType == SpaceType.Discrete)
{
numDiscreteActions = m_BrainParameters.NumActions;
}
return new HeuristicPolicy(heuristic, numContinuousActions, numDiscreteActions);
}
internal void UpdateAgentPolicy()

4
com.unity.ml-agents/Runtime/Policies/HeuristicPolicy.cs


/// <inheritdoc />
public HeuristicPolicy(ActionGenerator heuristic, int numContinuousActions, int numDiscreteActions)
public HeuristicPolicy(ActionGenerator heuristic, ActionSpec actionSpec)
var numContinuousActions = actionSpec.NumContinuousActions;
var numDiscreteActions = actionSpec.NumDiscreteActions;
var continuousDecision = new ActionSegment<float>(new float[numContinuousActions], 0, numContinuousActions);
var discreteDecision = new ActionSegment<int>(new int[numDiscreteActions], 0, numDiscreteActions);
m_ActionBuffers = new ActionBuffers(continuousDecision, discreteDecision);

9
com.unity.ml-agents/Runtime/Policies/RemotePolicy.cs


/// <inheritdoc />
public RemotePolicy(
BrainParameters brainParameters,
ActionSpec actionSpec,
m_SpaceType = brainParameters.VectorActionSpaceType;
m_Communicator.SubscribeBrain(m_FullyQualifiedBehaviorName, brainParameters);
m_Communicator.SubscribeBrain(m_FullyQualifiedBehaviorName, actionSpec);
actionSpec.CheckNotHybrid();
m_SpaceType = actionSpec.NumContinuousActions > 0 ? SpaceType.Continuous : SpaceType.Discrete;
}
/// <inheritdoc />

{
m_Communicator?.DecideBatch();
var actions = m_Communicator?.GetActions(m_FullyQualifiedBehaviorName, m_AgentId);
// TODO figure out how to handle this with multiple space types.
if (m_SpaceType == SpaceType.Continuous)
{
m_LastActionBuffer = new ActionBuffers(actions, Array.Empty<int>());

2
com.unity.ml-agents/Tests/Editor/Actuators/ActuatorManagerTests.cs


var actuator2 = new TestActuator(ActionSpec.MakeContinuous(3), "actuator2");
manager.Add(actuator1);
manager.Add(actuator2);
manager.ReadyActuatorsForExecution(new[] { actuator1, actuator2 }, 3, 10, 4);
manager.ReadyActuatorsForExecution(new[] { actuator1, actuator2 }, 3, 10, 4);
}
[Test]

4
com.unity.ml-agents/Tests/Editor/Actuators/TestActuator.cs


public TestActuator(ActionSpec actuatorSpace, string name)
{
ActionSpec = actuatorSpace;
TotalNumberOfActions = actuatorSpace.NumContinuousActions +
actuatorSpace.NumDiscreteActions;
Name = name;
}

}
}
public int TotalNumberOfActions { get; }
public ActionSpec ActionSpec { get; }
public string Name { get; }

3
com.unity.ml-agents/Tests/Editor/BehaviorParameterTests.cs


var gameObj = new GameObject();
var bp = gameObj.AddComponent<BehaviorParameters>();
bp.BehaviorType = BehaviorType.InferenceOnly;
var actionSpec = new ActionSpec();
bp.GeneratePolicy(DummyHeuristic);
bp.GeneratePolicy(actionSpec, DummyHeuristic);
});
}
}

18
com.unity.ml-agents/Tests/Editor/Communicator/GrpcExtensionsTests.cs


using UnityEngine;
using Unity.MLAgents.Policies;
using Unity.MLAgents.Demonstrations;
using Unity.MLAgents.Sensors;
using Unity.MLAgents.Actuators;
namespace Unity.MLAgents.Tests
{

// Should be able to convert a default instance to proto.
var brain = new BrainParameters();
brain.ToProto("foo", false);
}
[Test]
public void TestDefaultActionSpecToProto()
{
// Should be able to convert a default instance to proto.
var actionSpec = new ActionSpec();
actionSpec.ToBrainParametersProto("foo", false);
// Continuous
actionSpec = ActionSpec.MakeContinuous(3);
actionSpec.ToBrainParametersProto("foo", false);
// Discrete
actionSpec = ActionSpec.MakeDiscrete(1, 2, 3);
actionSpec.ToBrainParametersProto("foo", false);
}
[Test]

5
com.unity.ml-agents/Tests/Editor/EditModeTestInternalBrainTensorApplier.cs


using System.Collections.Generic;
using NUnit.Framework;
using Unity.Barracuda;
using Unity.MLAgents.Actuators;
using Unity.MLAgents.Inference;
using Unity.MLAgents.Policies;

[Test]
public void Construction()
{
var bp = new BrainParameters();
var actionSpec = new ActionSpec();
var tensorGenerator = new TensorApplier(bp, 0, alloc, mem);
var tensorGenerator = new TensorApplier(actionSpec, 0, alloc, mem);
Assert.IsNotNull(tensorGenerator);
alloc.Dispose();
}

31
com.unity.ml-agents/Tests/Editor/ModelRunnerTest.cs


using UnityEngine;
using UnityEditor;
using Unity.Barracuda;
using Unity.MLAgents.Actuators;
using Unity.MLAgents.Inference;
using Unity.MLAgents.Sensors;
using Unity.MLAgents.Policies;

Test3DSensorComponent sensor_21_20_3;
Test3DSensorComponent sensor_20_22_3;
BrainParameters GetContinuous2vis8vec2actionBrainParameters()
ActionSpec GetContinuous2vis8vec2actionActionSpec()
var validBrainParameters = new BrainParameters();
validBrainParameters.VectorObservationSize = 8;
validBrainParameters.VectorActionSize = new[] { 2 };
validBrainParameters.NumStackedVectorObservations = 1;
validBrainParameters.VectorActionSpaceType = SpaceType.Continuous;
return validBrainParameters;
return ActionSpec.MakeContinuous(2);
BrainParameters GetDiscrete1vis0vec_2_3action_recurrModelBrainParameters()
ActionSpec GetDiscrete1vis0vec_2_3action_recurrModelActionSpec()
var validBrainParameters = new BrainParameters();
validBrainParameters.VectorObservationSize = 0;
validBrainParameters.VectorActionSize = new[] { 2, 3 };
validBrainParameters.NumStackedVectorObservations = 1;
validBrainParameters.VectorActionSpaceType = SpaceType.Discrete;
return validBrainParameters;
return ActionSpec.MakeDiscrete(2, 3);
}
[SetUp]

[Test]
public void TestCreation()
{
var modelRunner = new ModelRunner(continuous2vis8vec2actionModel, GetContinuous2vis8vec2actionBrainParameters());
var modelRunner = new ModelRunner(continuous2vis8vec2actionModel, GetContinuous2vis8vec2actionActionSpec());
modelRunner = new ModelRunner(discrete1vis0vec_2_3action_recurrModel, GetDiscrete1vis0vec_2_3action_recurrModelBrainParameters());
modelRunner = new ModelRunner(discrete1vis0vec_2_3action_recurrModel, GetDiscrete1vis0vec_2_3action_recurrModelActionSpec());
modelRunner.Dispose();
}

var modelRunner = new ModelRunner(continuous2vis8vec2actionModel, GetContinuous2vis8vec2actionBrainParameters(), InferenceDevice.CPU);
var modelRunner = new ModelRunner(continuous2vis8vec2actionModel, GetContinuous2vis8vec2actionActionSpec(), InferenceDevice.CPU);
Assert.True(modelRunner.HasModel(continuous2vis8vec2actionModel, InferenceDevice.CPU));
Assert.False(modelRunner.HasModel(continuous2vis8vec2actionModel, InferenceDevice.GPU));
Assert.False(modelRunner.HasModel(discrete1vis0vec_2_3action_recurrModel, InferenceDevice.CPU));

[Test]
public void TestRunModel()
{
var brainParameters = GetDiscrete1vis0vec_2_3action_recurrModelBrainParameters();
var modelRunner = new ModelRunner(discrete1vis0vec_2_3action_recurrModel, brainParameters);
var actionSpec = GetDiscrete1vis0vec_2_3action_recurrModelActionSpec();
var modelRunner = new ModelRunner(discrete1vis0vec_2_3action_recurrModel, actionSpec);
var info1 = new AgentInfo();
info1.episodeId = 1;
modelRunner.PutObservations(info1, new[] { sensor_21_20_3.CreateSensor() }.ToList());

Assert.IsNotNull(modelRunner.GetAction(1));
Assert.IsNotNull(modelRunner.GetAction(2));
Assert.IsNull(modelRunner.GetAction(3));
Assert.AreEqual(brainParameters.VectorActionSize.Count(), modelRunner.GetAction(1).Count());
Assert.AreEqual(actionSpec.NumDiscreteActions, modelRunner.GetAction(1).Count());
modelRunner.Dispose();
}
}

33
com.unity.ml-agents/Tests/Editor/ParameterLoaderTest.cs


using UnityEngine;
using UnityEditor;
using Unity.Barracuda;
using Unity.MLAgents.Actuators;
using Unity.MLAgents.Inference;
using Unity.MLAgents.Sensors;
using Unity.MLAgents.Policies;

var model = ModelLoader.Load(continuous2vis8vec2actionModel);
var validBrainParameters = GetContinuous2vis8vec2actionBrainParameters();
var errors = BarracudaModelParamLoader.CheckModel(model, validBrainParameters, new SensorComponent[] { sensor_21_20_3, sensor_20_22_3 });
var errors = BarracudaModelParamLoader.CheckModel(
model, validBrainParameters,
new SensorComponent[] { sensor_21_20_3, sensor_20_22_3 }, new ActuatorComponent[0]
);
Assert.AreEqual(0, errors.Count()); // There should not be any errors
}

var model = ModelLoader.Load(discrete1vis0vec_2_3action_recurrModel);
var validBrainParameters = GetDiscrete1vis0vec_2_3action_recurrModelBrainParameters();
var errors = BarracudaModelParamLoader.CheckModel(model, validBrainParameters, new SensorComponent[] { sensor_21_20_3 });
var errors = BarracudaModelParamLoader.CheckModel(
model, validBrainParameters,
new SensorComponent[] { sensor_21_20_3 }, new ActuatorComponent[0]
);
Assert.AreEqual(0, errors.Count()); // There should not be any errors
}

var brainParameters = GetContinuous2vis8vec2actionBrainParameters();
brainParameters.VectorObservationSize = 9; // Invalid observation
var errors = BarracudaModelParamLoader.CheckModel(model, brainParameters, new SensorComponent[] { sensor_21_20_3, sensor_20_22_3 });
var errors = BarracudaModelParamLoader.CheckModel(
model, brainParameters,
new SensorComponent[] { sensor_21_20_3, sensor_20_22_3 }, new ActuatorComponent[0]
);
errors = BarracudaModelParamLoader.CheckModel(model, brainParameters, new SensorComponent[] { sensor_21_20_3, sensor_20_22_3 });
errors = BarracudaModelParamLoader.CheckModel(
model, brainParameters,
new SensorComponent[] { sensor_21_20_3, sensor_20_22_3 }, new ActuatorComponent[0]
);
Assert.Greater(errors.Count(), 0);
}

var brainParameters = GetDiscrete1vis0vec_2_3action_recurrModelBrainParameters();
brainParameters.VectorObservationSize = 1; // Invalid observation
var errors = BarracudaModelParamLoader.CheckModel(model, brainParameters, new SensorComponent[] { sensor_21_20_3 });
var errors = BarracudaModelParamLoader.CheckModel(model, brainParameters, new SensorComponent[] { sensor_21_20_3 }, new ActuatorComponent[0]);
Assert.Greater(errors.Count(), 0);
}

var brainParameters = GetContinuous2vis8vec2actionBrainParameters();
brainParameters.VectorActionSize = new[] { 3 }; // Invalid action
var errors = BarracudaModelParamLoader.CheckModel(model, brainParameters, new SensorComponent[] { sensor_21_20_3, sensor_20_22_3 });
var errors = BarracudaModelParamLoader.CheckModel(model, brainParameters, new SensorComponent[] { sensor_21_20_3, sensor_20_22_3 }, new ActuatorComponent[0]);
errors = BarracudaModelParamLoader.CheckModel(model, brainParameters, new SensorComponent[] { sensor_21_20_3, sensor_20_22_3 });
errors = BarracudaModelParamLoader.CheckModel(model, brainParameters, new SensorComponent[] { sensor_21_20_3, sensor_20_22_3 }, new ActuatorComponent[0]);
Assert.Greater(errors.Count(), 0);
}

var brainParameters = GetDiscrete1vis0vec_2_3action_recurrModelBrainParameters();
brainParameters.VectorActionSize = new[] { 3, 3 }; // Invalid action
var errors = BarracudaModelParamLoader.CheckModel(model, brainParameters, new SensorComponent[] { sensor_21_20_3 });
var errors = BarracudaModelParamLoader.CheckModel(model, brainParameters, new SensorComponent[] { sensor_21_20_3 }, new ActuatorComponent[0]);
errors = BarracudaModelParamLoader.CheckModel(model, brainParameters, new SensorComponent[] { sensor_21_20_3 });
errors = BarracudaModelParamLoader.CheckModel(model, brainParameters, new SensorComponent[] { sensor_21_20_3 }, new ActuatorComponent[0]);
Assert.Greater(errors.Count(), 0);
}

var brainParameters = GetContinuous2vis8vec2actionBrainParameters();
var errors = BarracudaModelParamLoader.CheckModel(null, brainParameters, new SensorComponent[] { sensor_21_20_3, sensor_20_22_3 });
var errors = BarracudaModelParamLoader.CheckModel(null, brainParameters, new SensorComponent[] { sensor_21_20_3, sensor_20_22_3 }, new ActuatorComponent[0]);
Assert.Greater(errors.Count(), 0);
}
}

6
com.unity.ml-agents/package.json


"unity": "2018.4",
"description": "Use state-of-the-art machine learning to create intelligent character behaviors in any Unity environment (games, robotics, film, etc.).",
"dependencies": {
"com.unity.barracuda": "1.0.2"
"com.unity.barracuda": "1.0.2",
"com.unity.modules.imageconversion": "1.0.0",
"com.unity.modules.jsonserialize": "1.0.0",
"com.unity.modules.physics": "1.0.0",
"com.unity.modules.physics2d": "1.0.0"
}
}

4
config/ppo/StrikersVsGoalie.yaml


gamma: 0.99
strength: 1.0
keep_checkpoints: 5
max_steps: 50000000
max_steps: 30000000
time_horizon: 1000
summary_freq: 10000
threaded: true

gamma: 0.99
strength: 1.0
keep_checkpoints: 5
max_steps: 50000000
max_steps: 30000000
time_horizon: 1000
summary_freq: 10000
threaded: true

2
ml-agents/mlagents/trainers/buffer.py


Adds a list of np.arrays to the end of the list of np.arrays.
:param data: The np.array list to append.
"""
self += list(np.array(data))
self += list(np.array(data, dtype=np.float32))
def set(self, data):
"""

7
ml-agents/mlagents/trainers/cli_utils.py


action=DetectDefaultStoreTrue,
help="Forces training using CPU only",
)
argparser.add_argument(
"--torch",
default=False,
action=DetectDefaultStoreTrue,
help="(Experimental) Use the PyTorch framework instead of TensorFlow. Install PyTorch "
"before using this option",
)
eng_conf = argparser.add_argument_group(title="Engine Configuration")
eng_conf.add_argument(

12
ml-agents/mlagents/trainers/ghost/trainer.py


self.trainer.save_model()
def create_policy(
self, parsed_behavior_id: BehaviorIdentifiers, behavior_spec: BehaviorSpec
self,
parsed_behavior_id: BehaviorIdentifiers,
behavior_spec: BehaviorSpec,
create_graph: bool = False,
) -> Policy:
"""
Creates policy with the wrapped trainer's create_policy function

wrapped trainer to be trained.
"""
policy = self.trainer.create_policy(parsed_behavior_id, behavior_spec)
policy.create_tf_graph()
policy = self.trainer.create_policy(
parsed_behavior_id, behavior_spec, create_graph=True
)
policy.init_load_weights()
team_id = parsed_behavior_id.team_id
self.controller.subscribe_team_id(team_id, self)

parsed_behavior_id, behavior_spec
)
self.trainer.add_policy(parsed_behavior_id, internal_trainer_policy)
internal_trainer_policy.init_load_weights()
self.current_policy_snapshot[
parsed_behavior_id.brain_name
] = internal_trainer_policy.get_weights()

2
ml-agents/mlagents/trainers/policy/tf_policy.py


# We do an initialize to make the Policy usable out of the box. If an optimizer is needed,
# it will re-load the full graph
self.initialize()
# Create assignment ops for Ghost Trainer
self.init_load_weights()
def _create_encoder(
self,

76
ml-agents/mlagents/trainers/ppo/trainer.py


from mlagents.trainers.ppo.optimizer import PPOOptimizer
from mlagents.trainers.trajectory import Trajectory
from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers
from mlagents.trainers.settings import TrainerSettings, PPOSettings
from mlagents.trainers.settings import TrainerSettings, PPOSettings, FrameworkType
from mlagents.trainers.components.reward_signals import RewardSignal
try:
from mlagents.trainers.policy.torch_policy import TorchPolicy
from mlagents.trainers.ppo.optimizer_torch import TorchPPOOptimizer
except ModuleNotFoundError:
TorchPolicy = None # type: ignore
TorchPPOOptimizer = None # type: ignore
logger = get_logger(__name__)

trajectory.next_obs,
trajectory.done_reached and not trajectory.interrupted,
)
self._stats_reporter.add_stat(
self.optimizer.reward_signals[name].value_name, np.mean(v)
)
if isinstance(self.optimizer.reward_signals[name], RewardSignal):
self._stats_reporter.add_stat(
self.optimizer.reward_signals[name].value_name, np.mean(v)
)
else:
self._stats_reporter.add_stat(
f"Policy/{self.optimizer.reward_signals[name].name.capitalize()} Value Estimate",
np.mean(v),
)
# Evaluate all reward functions
self.collected_rewards["environment"][agent_id] += np.sum(

evaluate_result = reward_signal.evaluate_batch(
agent_buffer_trajectory
).scaled_reward
if isinstance(reward_signal, RewardSignal):
evaluate_result = reward_signal.evaluate_batch(
agent_buffer_trajectory
).scaled_reward
else:
evaluate_result = (
reward_signal.evaluate(agent_buffer_trajectory)
* reward_signal.strength
)
agent_buffer_trajectory[f"{name}_rewards"].extend(evaluate_result)
# Report the reward signals
self.collected_rewards[name][agent_id] += np.sum(evaluate_result)

local_value_estimates = agent_buffer_trajectory[
f"{name}_value_estimates"
].get_batch()
local_advantage = get_gae(
rewards=local_rewards,
value_estimates=local_value_estimates,

self._clear_update_buffer()
return True
def create_policy(
self, parsed_behavior_id: BehaviorIdentifiers, behavior_spec: BehaviorSpec
def create_tf_policy(
self,
parsed_behavior_id: BehaviorIdentifiers,
behavior_spec: BehaviorSpec,
create_graph: bool = False,
Creates a PPO policy to trainers list of policies.
Creates a policy with a Tensorflow backend and PPO hyperparameters
:param parsed_behavior_id:
:param create_graph: whether to create the Tensorflow graph on construction
:return policy
"""
policy = TFPolicy(

condition_sigma_on_obs=False, # Faster training for PPO
create_tf_graph=False, # We will create the TF graph in the Optimizer
create_tf_graph=create_graph,
return policy
def create_torch_policy(
self, parsed_behavior_id: BehaviorIdentifiers, behavior_spec: BehaviorSpec
) -> TorchPolicy:
"""
Creates a policy with a PyTorch backend and PPO hyperparameters
:param parsed_behavior_id:
:param behavior_spec: specifications for policy construction
:return policy
"""
policy = TorchPolicy(
self.seed,
behavior_spec,
self.trainer_settings,
condition_sigma_on_obs=False, # Faster training for PPO
separate_critic=behavior_spec.is_action_continuous(),
)
return PPOOptimizer(cast(TFPolicy, self.policy), self.trainer_settings)
if self.framework == FrameworkType.PYTORCH:
return TorchPPOOptimizer( # type: ignore
cast(TorchPolicy, self.policy), self.trainer_settings # type: ignore
) # type: ignore
else:
return PPOOptimizer( # type: ignore
cast(TFPolicy, self.policy), self.trainer_settings # type: ignore
) # type: ignore
def add_policy(
self, parsed_behavior_id: BehaviorIdentifiers, policy: Policy

)
self.policy = policy
self.policies[parsed_behavior_id.behavior_id] = policy
self.optimizer = self.create_ppo_optimizer()
for _reward_signal in self.optimizer.reward_signals.keys():
self.collected_rewards[_reward_signal] = defaultdict(lambda: 0)

120
ml-agents/mlagents/trainers/sac/trainer.py


from mlagents.trainers.trainer.rl_trainer import RLTrainer
from mlagents.trainers.trajectory import Trajectory, SplitObservations
from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers
from mlagents.trainers.settings import TrainerSettings, SACSettings
from mlagents.trainers.settings import TrainerSettings, SACSettings, FrameworkType
from mlagents.trainers.components.reward_signals import RewardSignal
try:
from mlagents.trainers.policy.torch_policy import TorchPolicy
from mlagents.trainers.sac.optimizer_torch import TorchSACOptimizer
except ModuleNotFoundError:
TorchPolicy = None # type: ignore
TorchSACOptimizer = None # type: ignore
logger = get_logger(__name__)

agent_buffer_trajectory["environment_rewards"]
)
for name, reward_signal in self.optimizer.reward_signals.items():
evaluate_result = reward_signal.evaluate_batch(
agent_buffer_trajectory
).scaled_reward
if isinstance(reward_signal, RewardSignal):
evaluate_result = reward_signal.evaluate_batch(
agent_buffer_trajectory
).scaled_reward
else:
evaluate_result = (
reward_signal.evaluate(agent_buffer_trajectory)
* reward_signal.strength
)
# Report the reward signals
self.collected_rewards[name][agent_id] += np.sum(evaluate_result)

)
for name, v in value_estimates.items():
self._stats_reporter.add_stat(
self.optimizer.reward_signals[name].value_name, np.mean(v)
)
if isinstance(self.optimizer.reward_signals[name], RewardSignal):
self._stats_reporter.add_stat(
self.optimizer.reward_signals[name].value_name, np.mean(v)
)
else:
self._stats_reporter.add_stat(
f"Policy/{self.optimizer.reward_signals[name].name.capitalize()} Value",
np.mean(v),
)
# Bootstrap using the last step rather than the bootstrap step if max step is reached.
# Set last element to duplicate obs and remove dones.

self._update_reward_signals()
return policy_was_updated
def create_policy(
self, parsed_behavior_id: BehaviorIdentifiers, behavior_spec: BehaviorSpec
) -> TFPolicy:
policy = TFPolicy(
self.seed,
behavior_spec,
self.trainer_settings,
tanh_squash=True,
reparameterize=True,
create_tf_graph=False,
)
def maybe_load_replay_buffer(self):
# Load the replay buffer if load
if self.load and self.checkpoint_replay_buffer:
try:

)
)
def create_tf_policy(
self,
parsed_behavior_id: BehaviorIdentifiers,
behavior_spec: BehaviorSpec,
create_graph: bool = False,
) -> TFPolicy:
"""
Creates a policy with a Tensorflow backend and SAC hyperparameters
:param parsed_behavior_id:
:param behavior_spec: specifications for policy construction
:param create_graph: whether to create the Tensorflow graph on construction
:return policy
"""
policy = TFPolicy(
self.seed,
behavior_spec,
self.trainer_settings,
tanh_squash=True,
reparameterize=True,
create_tf_graph=create_graph,
)
self.maybe_load_replay_buffer()
return policy
def create_torch_policy(
self, parsed_behavior_id: BehaviorIdentifiers, behavior_spec: BehaviorSpec
) -> TorchPolicy:
"""
Creates a policy with a PyTorch backend and SAC hyperparameters
:param parsed_behavior_id:
:param behavior_spec: specifications for policy construction
:return policy
"""
policy = TorchPolicy(
self.seed,
behavior_spec,
self.trainer_settings,
condition_sigma_on_obs=True,
tanh_squash=True,
separate_critic=True,
)
self.maybe_load_replay_buffer()
return policy
def _update_sac_policy(self) -> bool:

)
# Get rewards for each reward
for name, signal in self.optimizer.reward_signals.items():
sampled_minibatch[f"{name}_rewards"] = signal.evaluate_batch(
sampled_minibatch
).scaled_reward
if isinstance(signal, RewardSignal):
sampled_minibatch[f"{name}_rewards"] = signal.evaluate_batch(
sampled_minibatch
).scaled_reward
else:
sampled_minibatch[f"{name}_rewards"] = (
signal.evaluate(sampled_minibatch) * signal.strength
)
update_stats = self.optimizer.update(sampled_minibatch, n_sequences)
for stat_name, value in update_stats.items():

reward_signal_minibatches = {}
for name, signal in self.optimizer.reward_signals.items():
logger.debug(f"Updating {name} at step {self.step}")
# Some signals don't need a minibatch to be sampled - so we don't!
if signal.update_dict:
reward_signal_minibatches[name] = buffer.sample_mini_batch(
self.hyperparameters.batch_size,
sequence_length=self.policy.sequence_length,
)
if isinstance(signal, RewardSignal):
# Some signals don't need a minibatch to be sampled - so we don't!
if signal.update_dict:
reward_signal_minibatches[name] = buffer.sample_mini_batch(
self.hyperparameters.batch_size,
sequence_length=self.policy.sequence_length,
)
update_stats = self.optimizer.update_reward_signals(
reward_signal_minibatches, n_sequences
)

self._stats_reporter.add_stat(stat, np.mean(stat_list))
def create_sac_optimizer(self) -> SACOptimizer:
return SACOptimizer(cast(TFPolicy, self.policy), self.trainer_settings)
if self.framework == FrameworkType.PYTORCH:
return TorchSACOptimizer( # type: ignore
cast(TorchPolicy, self.policy), self.trainer_settings # type: ignore
) # type: ignore
else:
return SACOptimizer( # type: ignore
cast(TFPolicy, self.policy), self.trainer_settings # type: ignore
) # type: ignore
def add_policy(
self, parsed_behavior_id: BehaviorIdentifiers, policy: Policy

14
ml-agents/mlagents/trainers/settings.py


return _mapping[self]
class FrameworkType(Enum):
TENSORFLOW: str = "tensorflow"
PYTORCH: str = "pytorch"
@attr.s(auto_attribs=True)
class TrainerSettings(ExportableSettings):
trainer_type: TrainerType = TrainerType.PPO

threaded: bool = True
self_play: Optional[SelfPlaySettings] = None
behavioral_cloning: Optional[BehavioralCloningSettings] = None
framework: FrameworkType = FrameworkType.TENSORFLOW
cattr.register_structure_hook(
Dict[RewardSignalType, RewardSignalSettings], RewardSignalSettings.structure

configured_dict["engine_settings"][key] = val
else: # Base options
configured_dict[key] = val
return RunOptions.from_dict(configured_dict)
# Apply --torch retroactively
final_runoptions = RunOptions.from_dict(configured_dict)
if "torch" in DetectDefault.non_default_args:
for trainer_set in final_runoptions.behaviors.values():
trainer_set.framework = FrameworkType.PYTORCH
return final_runoptions
@staticmethod
def from_dict(options_dict: Dict[str, Any]) -> "RunOptions":

7
ml-agents/mlagents/trainers/tests/test_ghost.py


trainer_params = dummy_config
trainer = PPOTrainer("test", 0, trainer_params, True, False, 0, "0")
trainer.seed = 1
policy = trainer.create_policy("test", mock_specs)
policy.create_tf_graph()
policy = trainer.create_policy("test", mock_specs, create_graph=True)
to_load_policy = trainer.create_policy("test", mock_specs)
to_load_policy.create_tf_graph()
to_load_policy.init_load_weights()
to_load_policy = trainer.create_policy("test", mock_specs, create_graph=True)
weights = policy.get_weights()
load_weights = to_load_policy.get_weights()

5
ml-agents/mlagents/trainers/tests/test_rl_trainer.py


mock_saver.save_checkpoint.side_effect = checkpoint_path
self.saver = mock_saver
def create_policy(self):
def create_tf_policy(self, parsed_behavior_id, behavior_spec):
return mock.Mock()
def create_torch_policy(self, parsed_behavior_id, behavior_spec):
return mock.Mock()
def _process_trajectory(self, trajectory):

3
ml-agents/mlagents/trainers/tests/test_sac.py


0, mock_brain, trainer_settings, "test", False, create_tf_graph=False
)
optimizer = SACOptimizer(policy, trainer_settings)
policy.initialize()
optimizer.policy.initialize()
return optimizer

trainer.add_policy(behavior_id, policy)
trainer.saver.initialize_or_load(policy)
trainer.optimizer.update = mock.Mock()
trainer.saver.initialize_or_load(policy)
trainer.optimizer.update_reward_signals = mock.Mock()
trainer.optimizer.update_reward_signals.return_value = {}
trainer.optimizer.update.return_value = {}

2
ml-agents/mlagents/trainers/tests/test_simple_rl.py


RewardSignalType,
EncoderType,
ScheduleType,
FrameworkType,
)
from mlagents.trainers.environment_parameter_manager import EnvironmentParameterManager
from mlagents_envs.side_channel.environment_parameters_channel import (

summary_freq=500,
max_steps=3000,
threaded=False,
framework=FrameworkType.TENSORFLOW,
)
SAC_CONFIG = TrainerSettings(

19
ml-agents/mlagents/trainers/tests/torch/test_layers.py


linear_layer,
lstm_layer,
Initialization,
LSTM,
)

assert torch.all(
torch.eq(param.data[4:8], torch.ones_like(param.data[4:8]))
)
def test_lstm_class():
torch.manual_seed(0)
input_size = 12
memory_size = 64
batch_size = 8
seq_len = 16
lstm = LSTM(input_size, memory_size)
assert lstm.memory_size == memory_size
sample_input = torch.ones((batch_size, seq_len, input_size))
sample_memories = torch.ones((1, batch_size, memory_size))
out, mem = lstm(sample_input, sample_memories)
# Hidden size should be half of memory_size
assert out.shape == (batch_size, seq_len, memory_size // 2)
assert mem.shape == (1, batch_size, memory_size)

17
ml-agents/mlagents/trainers/tests/torch/test_networks.py


assert act.shape == (1, 1)
# Test forward
actions, probs, ver_num, mem_size, is_cont, act_size_vec = actor.forward(
actions, ver_num, mem_size, is_cont, act_size_vec = actor.forward(
# This is different from above for ONNX export
assert act.shape == (
act_size[0],
1,
) # This is different from above for ONNX export
assert act.shape == (act_size[0], 1)
assert act.shape == (1, 1)
assert act.shape == tuple(act_size)
# TODO: Once export works properly. fix the shapes here.
assert mem_size == 0
assert is_cont == int(action_type == ActionType.CONTINUOUS)
assert act_size_vec == torch.tensor(act_size)

if lstm:
sample_obs = torch.ones((1, network_settings.memory.sequence_length, obs_size))
memories = torch.ones(
(
1,
network_settings.memory.sequence_length,
network_settings.memory.memory_size,
)
(1, network_settings.memory.sequence_length, actor.memory_size)
)
else:
sample_obs = torch.ones((1, obs_size))

6
ml-agents/mlagents/trainers/tests/torch/test_utils.py


masks = torch.tensor([False, False, False, False, False])
mean = ModelUtils.masked_mean(test_input, masks=masks)
assert mean == 0.0
# Make sure it works with 2d arrays of shape (mask_length, N)
test_input = torch.tensor([1, 2, 3, 4, 5]).repeat(2, 1).T
masks = torch.tensor([False, False, True, True, True])
mean = ModelUtils.masked_mean(test_input, masks=masks)
assert mean == 4.0

20
ml-agents/mlagents/trainers/tf/model_serialization.py


def export_policy_model(
model_path: str,
output_filepath: str,
brain_name: str,
behavior_name: str,
graph: tf.Graph,
sess: tf.Session,
) -> None:

:param output_filepath: file path to output the model (without file suffix)
:param brain_name: brain name of the trained model
:param behavior_name: behavior name of the trained model
frozen_graph_def = _make_frozen_graph(brain_name, graph, sess)
frozen_graph_def = _make_frozen_graph(behavior_name, graph, sess)
if not os.path.exists(output_filepath):
os.makedirs(output_filepath)
# Save frozen graph

if ONNX_EXPORT_ENABLED:
if SerializationSettings.convert_to_onnx:
try:
onnx_graph = convert_frozen_to_onnx(brain_name, frozen_graph_def)
onnx_graph = convert_frozen_to_onnx(behavior_name, frozen_graph_def)
onnx_output_path = f"{output_filepath}.onnx"
with open(onnx_output_path, "wb") as f:
f.write(onnx_graph.SerializeToString())

def _make_frozen_graph(
brain_name: str, graph: tf.Graph, sess: tf.Session
behavior_name: str, graph: tf.Graph, sess: tf.Session
target_nodes = ",".join(_process_graph(brain_name, graph))
target_nodes = ",".join(_process_graph(behavior_name, graph))
graph_def = graph.as_graph_def()
output_graph_def = graph_util.convert_variables_to_constants(
sess, graph_def, target_nodes.replace(" ", "").split(",")

def convert_frozen_to_onnx(brain_name: str, frozen_graph_def: tf.GraphDef) -> Any:
def convert_frozen_to_onnx(behavior_name: str, frozen_graph_def: tf.GraphDef) -> Any:
# This is basically https://github.com/onnx/tensorflow-onnx/blob/master/tf2onnx/convert.py
inputs = _get_input_node_names(frozen_graph_def)

)
onnx_graph = optimizer.optimize_graph(g)
model_proto = onnx_graph.make_model(brain_name)
model_proto = onnx_graph.make_model(behavior_name)
return model_proto

return names
def _process_graph(brain_name: str, graph: tf.Graph) -> List[str]:
def _process_graph(behavior_name: str, graph: tf.Graph) -> List[str]:
"""
Gets the list of the output nodes present in the graph for inference
:return: list of node names

logger.info("List of nodes to export for brain :" + brain_name)
logger.info("List of nodes to export for behavior :" + behavior_name)
for n in nodes:
logger.info("\t" + n)
return nodes

17
ml-agents/mlagents/trainers/torch/encoders.py


super().__init__()
n_channels = [16, 32, 32] # channel for each stack
n_blocks = 2 # number of residual blocks
self.layers = []
layers = []
self.layers.append(
nn.Conv2d(last_channel, channel, [3, 3], [1, 1], padding=1)
)
self.layers.append(nn.MaxPool2d([3, 3], [2, 2]))
layers.append(nn.Conv2d(last_channel, channel, [3, 3], [1, 1], padding=1))
layers.append(nn.MaxPool2d([3, 3], [2, 2]))
self.layers.append(ResNetBlock(channel))
layers.append(ResNetBlock(channel))
self.layers.append(Swish())
layers.append(Swish())
self.dense = linear_layer(
n_channels[-1] * height * width,
final_hidden,

self.sequential = nn.Sequential(*layers)
hidden = visual_obs
for layer in self.layers:
hidden = layer(hidden)
hidden = self.sequential(visual_obs)
before_out = hidden.view(batch_size, -1)
return torch.relu(self.dense(before_out))

67
ml-agents/mlagents/trainers/torch/layers.py


import torch
import abc
from typing import Tuple
from enum import Enum

forget_bias
)
return lstm
class MemoryModule(torch.nn.Module):
@abc.abstractproperty
def memory_size(self) -> int:
"""
Size of memory that is required at the start of a sequence.
"""
pass
@abc.abstractmethod
def forward(
self, input_tensor: torch.Tensor, memories: torch.Tensor
) -> Tuple[torch.Tensor, torch.Tensor]:
"""
Pass a sequence to the memory module.
:input_tensor: Tensor of shape (batch_size, seq_length, size) that represents the input.
:memories: Tensor of initial memories.
:return: Tuple of output, final memories.
"""
pass
class LSTM(MemoryModule):
"""
Memory module that implements LSTM.
"""
def __init__(
self,
input_size: int,
memory_size: int,
num_layers: int = 1,
forget_bias: float = 1.0,
kernel_init: Initialization = Initialization.XavierGlorotUniform,
bias_init: Initialization = Initialization.Zero,
):
super().__init__()
# We set hidden size to half of memory_size since the initial memory
# will be divided between the hidden state and initial cell state.
self.hidden_size = memory_size // 2
self.lstm = lstm_layer(
input_size,
self.hidden_size,
num_layers,
True,
forget_bias,
kernel_init,
bias_init,
)
@property
def memory_size(self) -> int:
return 2 * self.hidden_size
def forward(
self, input_tensor: torch.Tensor, memories: torch.Tensor
) -> Tuple[torch.Tensor, torch.Tensor]:
# We don't use torch.split here since it is not supported by Barracuda
h0 = memories[:, :, : self.hidden_size]
c0 = memories[:, :, self.hidden_size :]
hidden = (h0, c0)
lstm_out, hidden_out = self.lstm(input_tensor, hidden)
output_mem = torch.cat(hidden_out, dim=-1)
return lstm_out, output_mem

115
ml-agents/mlagents/trainers/torch/networks.py


from typing import Callable, List, Dict, Tuple, Optional
import attr
import abc
import torch

from mlagents.trainers.settings import NetworkSettings
from mlagents.trainers.torch.utils import ModelUtils
from mlagents.trainers.torch.decoders import ValueHeads
from mlagents.trainers.torch.layers import lstm_layer
from mlagents.trainers.torch.layers import LSTM
ActivationFunction = Callable[[torch.Tensor], torch.Tensor]
EncoderFunction = Callable[

)
if self.use_lstm:
self.lstm = lstm_layer(self.h_size, self.m_size // 2, batch_first=True)
self.lstm = LSTM(self.h_size, self.m_size)
self.lstm = None
self.lstm = None # type: ignore
def update_normalization(self, vec_inputs: List[torch.Tensor]) -> None:
for vec_input, vec_enc in zip(vec_inputs, self.vector_encoders):

for n1, n2 in zip(self.vector_encoders, other_network.vector_encoders):
n1.copy_normalization(n2)
@property
def memory_size(self) -> int:
return self.lstm.memory_size if self.use_lstm else 0
def forward(
self,
vec_inputs: List[torch.Tensor],

sequence_length: int = 1,
) -> Tuple[torch.Tensor, torch.Tensor]:
vec_encodes = []
encodes = []
for idx, encoder in enumerate(self.vector_encoders):
vec_input = vec_inputs[idx]
if actions is not None:

vec_encodes.append(hidden)
encodes.append(hidden)
vis_encodes = []
vis_input = vis_input.permute([0, 3, 1, 2])
if not torch.onnx.is_in_onnx_export():
vis_input = vis_input.permute([0, 3, 1, 2])
vis_encodes.append(hidden)
encodes.append(hidden)
if len(vec_encodes) > 0 and len(vis_encodes) > 0:
vec_encodes_tensor = torch.stack(vec_encodes, dim=-1).sum(dim=-1)
vis_encodes_tensor = torch.stack(vis_encodes, dim=-1).sum(dim=-1)
encoding = torch.stack(
[vec_encodes_tensor, vis_encodes_tensor], dim=-1
).sum(dim=-1)
elif len(vec_encodes) > 0:
encoding = torch.stack(vec_encodes, dim=-1).sum(dim=-1)
elif len(vis_encodes) > 0:
encoding = torch.stack(vis_encodes, dim=-1).sum(dim=-1)
else:
if len(encodes) == 0:
# Constants don't work in Barracuda
encoding = encodes[0]
if len(encodes) > 1:
for _enc in encodes[1:]:
encoding += _enc
memories = torch.split(memories, self.m_size // 2, dim=-1)
memories = torch.cat(memories, dim=-1)
return encoding, memories

encoding_size = network_settings.hidden_units
self.value_heads = ValueHeads(stream_names, encoding_size, outputs_per_stream)
@property
def memory_size(self) -> int:
return self.network_body.memory_size
def forward(
self,
vec_inputs: List[torch.Tensor],

vis_inputs: List[torch.Tensor],
masks: Optional[torch.Tensor] = None,
memories: Optional[torch.Tensor] = None,
sequence_length: int = 1,
) -> Tuple[torch.Tensor, torch.Tensor, int, int, int, int]:
) -> Tuple[torch.Tensor, int, int, int, int]:
"""
Forward pass of the Actor for inference. This is required for export to ONNX, and
the inputs and outputs of this method should not be changed without a respective change

"""
pass
@abc.abstractproperty
def memory_size(self):
"""
Returns the size of the memory (same size used as input and output in the other
methods) used by this Actor.
"""
pass
class SimpleActor(nn.Module, Actor):
def __init__(

self.act_type = act_type
self.act_size = act_size
self.version_number = torch.nn.Parameter(torch.Tensor([2.0]))
self.memory_size = torch.nn.Parameter(torch.Tensor([0]))
self.is_continuous_int = torch.nn.Parameter(
torch.Tensor([int(act_type == ActionType.CONTINUOUS)])
)

self.encoding_size = network_settings.memory.memory_size // 2
else:
self.encoding_size = network_settings.hidden_units
if self.act_type == ActionType.CONTINUOUS:
self.distribution = GaussianDistribution(
self.encoding_size,

self.encoding_size, act_size
)
@property
def memory_size(self) -> int:
return self.network_body.memory_size
def update_normalization(self, vector_obs: List[torch.Tensor]) -> None:
self.network_body.update_normalization(vector_obs)

vis_inputs: List[torch.Tensor],
masks: Optional[torch.Tensor] = None,
memories: Optional[torch.Tensor] = None,
sequence_length: int = 1,
) -> Tuple[torch.Tensor, torch.Tensor, int, int, int, int]:
) -> Tuple[torch.Tensor, int, int, int, int]:
dists, _ = self.get_dists(
vec_inputs, vis_inputs, masks, memories, sequence_length
)
dists, _ = self.get_dists(vec_inputs, vis_inputs, masks, memories, 1)
if self.act_type == ActionType.CONTINUOUS:
action_out = sampled_actions
else:
action_out = dists[0].all_log_prob()
sampled_actions,
dists[0].pdf(sampled_actions),
action_out,
self.memory_size,
torch.Tensor([self.network_body.memory_size]),
self.is_continuous_int,
self.act_size_vector,
)

# Give the Actor only half the memories. Note we previously validate
# that memory_size must be a multiple of 4.
self.use_lstm = network_settings.memory is not None
if network_settings.memory is not None:
self.half_mem_size = network_settings.memory.memory_size // 2
new_memory_settings = attr.evolve(
network_settings.memory, memory_size=self.half_mem_size
)
use_network_settings = attr.evolve(
network_settings, memory=new_memory_settings
)
else:
use_network_settings = network_settings
self.half_mem_size = 0
use_network_settings,
network_settings,
act_type,
act_size,
conditional_sigma,

self.critic = ValueNetwork(
stream_names, observation_shapes, use_network_settings
)
self.critic = ValueNetwork(stream_names, observation_shapes, network_settings)
@property
def memory_size(self) -> int:
return self.network_body.memory_size + self.critic.memory_size
def critic_pass(
self,

actor_mem, critic_mem = None, None
if self.use_lstm:
# Use only the back half of memories for critic
actor_mem, critic_mem = torch.split(memories, self.half_mem_size, -1)
actor_mem, critic_mem = torch.split(memories, self.memory_size // 2, -1)
value_outputs, critic_mem_out = self.critic(
vec_inputs, vis_inputs, memories=critic_mem, sequence_length=sequence_length
)

) -> Tuple[List[DistInstance], Dict[str, torch.Tensor], torch.Tensor]:
if self.use_lstm:
# Use only the back half of memories for critic and actor
actor_mem, critic_mem = torch.split(memories, self.half_mem_size, dim=-1)
actor_mem, critic_mem = torch.split(memories, self.memory_size // 2, dim=-1)
else:
critic_mem = None
actor_mem = None

class GlobalSteps(nn.Module):
def __init__(self):
super().__init__()
self.global_step = torch.Tensor([0])
self.__global_step = nn.Parameter(torch.Tensor([0]), requires_grad=False)
@property
def current_step(self):
return int(self.__global_step.item())
@current_step.setter
def current_step(self, value):
self.__global_step[:] = value
self.global_step += value
self.__global_step += value
class LearningRate(nn.Module):

4
ml-agents/mlagents/trainers/torch/utils.py


:param tensor: Tensor which needs mean computation.
:param masks: Boolean tensor of masks with same dimension as tensor.
"""
return (tensor * masks).sum() / torch.clamp(masks.float().sum(), min=1.0)
return (tensor.T * masks).sum() / torch.clamp(
(torch.ones_like(tensor.T) * masks).float().sum(), min=1.0
)

99
ml-agents/mlagents/trainers/trainer/rl_trainer.py


from mlagents.trainers.optimizer import Optimizer
from mlagents.trainers.buffer import AgentBuffer
from mlagents.trainers.trainer import Trainer
from mlagents.trainers.components.reward_signals import RewardSignalResult
from mlagents.trainers.components.reward_signals import RewardSignalResult, RewardSignal
from mlagents_envs.base_env import BehaviorSpec
from mlagents.trainers.policy.policy import Policy
from mlagents.trainers.policy.tf_policy import TFPolicy
from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers
from mlagents.trainers.settings import TrainerSettings
from mlagents.trainers.settings import TrainerSettings, FrameworkType
from mlagents.trainers.exception import UnityTrainerException
try:
from mlagents.trainers.policy.torch_policy import TorchPolicy
from mlagents.trainers.saver.torch_saver import TorchSaver
except ModuleNotFoundError:
TorchPolicy = None # type: ignore
RewardSignalResults = Dict[str, RewardSignalResult]

self._stats_reporter.add_property(
StatsPropertyType.HYPERPARAMETERS, self.trainer_settings.as_dict()
)
self.framework = self.trainer_settings.framework
logger.debug(f"Using framework {self.framework.value}")
self.trainer_settings, self.artifact_path, self.load
self.framework, self.trainer_settings, self.artifact_path, self.load
)
def end_episode(self) -> None:

for agent_id in rewards:
rewards[agent_id] = 0
@staticmethod
def create_saver(
trainer_settings: TrainerSettings, model_path: str, load: bool
) -> BaseSaver:
saver = TFSaver(trainer_settings, model_path, load)
return saver
def _update_end_episode_stats(self, agent_id: str, optimizer: Optimizer) -> None:
for name, rewards in self.collected_rewards.items():
if name == "environment":

self.reward_buffer.appendleft(rewards.get(agent_id, 0))
rewards[agent_id] = 0
else:
self.stats_reporter.add_stat(
optimizer.reward_signals[name].stat_name, rewards.get(agent_id, 0)
)
if isinstance(optimizer.reward_signals[name], RewardSignal):
self.stats_reporter.add_stat(
optimizer.reward_signals[name].stat_name,
rewards.get(agent_id, 0),
)
else:
self.stats_reporter.add_stat(
f"Policy/{optimizer.reward_signals[name].name.capitalize()} Reward",
rewards.get(agent_id, 0),
)
rewards[agent_id] = 0
def _clear_update_buffer(self) -> None:

"""
return False
def create_policy(
self,
parsed_behavior_id: BehaviorIdentifiers,
behavior_spec: BehaviorSpec,
create_graph: bool = False,
) -> Policy:
if self.framework == FrameworkType.PYTORCH and TorchPolicy is None:
raise UnityTrainerException(
"To use the experimental PyTorch backend, install the PyTorch Python package first."
)
elif self.framework == FrameworkType.PYTORCH:
return self.create_torch_policy(parsed_behavior_id, behavior_spec)
else:
return self.create_tf_policy(
parsed_behavior_id, behavior_spec, create_graph=create_graph
)
@abc.abstractmethod
def create_torch_policy(
self, parsed_behavior_id: BehaviorIdentifiers, behavior_spec: BehaviorSpec
) -> TorchPolicy:
"""
Create a Policy object that uses the PyTorch backend.
"""
pass
@abc.abstractmethod
def create_tf_policy(
self,
parsed_behavior_id: BehaviorIdentifiers,
behavior_spec: BehaviorSpec,
create_graph: bool = False,
) -> TFPolicy:
"""
Create a Policy object that uses the TensorFlow backend.
"""
pass
@staticmethod
def create_saver(
framework: str, trainer_settings: TrainerSettings, model_path: str, load: bool
) -> BaseSaver:
if framework == FrameworkType.PYTORCH:
saver = TorchSaver( # type: ignore
trainer_settings, model_path, load
)
else:
saver = TFSaver( # type: ignore
trainer_settings, model_path, load
)
return saver
def _policy_mean_reward(self) -> Optional[float]:
""" Returns the mean episode reward for the current policy. """
rewards = self.cumulative_returns_since_policy_update

logger.warning(
"Trainer has multiple policies, but default behavior only saves the first."
)
elif n_policies == 0:
logger.warning("Trainer has no policies, not saving anything.")
return
# Copy the checkpointed model files to the final output location
final_checkpoint = attr.evolve(
model_checkpoint, file_path=f"{self.saver.model_path}.nn"
)

5
ml-agents/mlagents/trainers/trainer/trainer.py


@abc.abstractmethod
def create_policy(
self, parsed_behavior_id: BehaviorIdentifiers, behavior_spec: BehaviorSpec
self,
parsed_behavior_id: BehaviorIdentifiers,
behavior_spec: BehaviorSpec,
create_graph: bool = False,
) -> Policy:
"""
Creates policy

7
ml-agents/mlagents/trainers/trainer_controller.py


from mlagents.trainers.agent_processor import AgentManager
from mlagents.tf_utils import global_values
try:
import torch
except ModuleNotFoundError:
torch = None # type: ignore
class TrainerController:
def __init__(

self.kill_trainers = False
np.random.seed(training_seed)
tf.set_random_seed(training_seed)
if torch is not None:
torch.manual_seed(training_seed)
@timed
def _save_models(self):

2
test_requirements.txt


# Test-only dependencies should go here, not in setup.py
pytest>4.0.0,<6.0.0
pytest-cov==2.6.1
pytest-xdist
pytest-xdist==1.34.0
# PyTorch tests are here for the time being, before they are used in the codebase.
torch>=1.5.0

85
Project/Assets/ML-Agents/Examples/Basic/Scripts/BasicActuatorComponent.cs


using System;
using Unity.MLAgents.Actuators;
namespace Unity.MLAgentsExamples
{
/// <summary>
/// A simple example of a ActuatorComponent.
/// This should be added to the same GameObject as the BasicController
/// </summary>
public class BasicActuatorComponent : ActuatorComponent
{
public BasicController basicController;
ActionSpec m_ActionSpec = ActionSpec.MakeDiscrete(3);
/// <summary>
/// Creates a BasicActuator.
/// </summary>
/// <returns></returns>
public override IActuator CreateActuator()
{
return new BasicActuator(basicController);
}
public override ActionSpec ActionSpec
{
get { return m_ActionSpec; }
}
}
/// <summary>
/// Simple actuator that converts the action into a {-1, 0, 1} direction
/// </summary>
public class BasicActuator : IActuator
{
public BasicController basicController;
ActionSpec m_ActionSpec;
public BasicActuator(BasicController controller)
{
basicController = controller;
m_ActionSpec = ActionSpec.MakeDiscrete(3);
}
public ActionSpec ActionSpec
{
get { return m_ActionSpec; }
}
/// <inheritdoc/>
public String Name
{
get { return "Basic"; }
}
public void ResetData()
{
}
public void OnActionReceived(ActionBuffers actionBuffers)
{
var movement = actionBuffers.DiscreteActions[0];
var direction = 0;
switch (movement)
{
case 1:
direction = -1;
break;
case 2:
direction = 1;
break;
}
basicController.MoveDirection(direction);
}
public void WriteDiscreteActionMask(IDiscreteActionMask actionMask)
{
}
}
}

3
Project/Assets/ML-Agents/Examples/Basic/Scripts/BasicActuatorComponent.cs.meta


fileFormatVersion: 2
guid: 4ce4e199dabb494e8764b09f4c378098
timeCreated: 1597446960

94
ml-agents/mlagents/trainers/optimizer/torch_optimizer.py


from typing import Dict, Optional, Tuple, List
import torch
import numpy as np
from mlagents.trainers.buffer import AgentBuffer
from mlagents.trainers.trajectory import SplitObservations
from mlagents.trainers.torch.components.bc.module import BCModule
from mlagents.trainers.torch.components.reward_providers import create_reward_provider
from mlagents.trainers.policy.torch_policy import TorchPolicy
from mlagents.trainers.optimizer import Optimizer
from mlagents.trainers.settings import TrainerSettings
from mlagents.trainers.torch.utils import ModelUtils
class TorchOptimizer(Optimizer): # pylint: disable=W0223
def __init__(self, policy: TorchPolicy, trainer_settings: TrainerSettings):
super().__init__()
self.policy = policy
self.trainer_settings = trainer_settings
self.update_dict: Dict[str, torch.Tensor] = {}
self.value_heads: Dict[str, torch.Tensor] = {}
self.memory_in: torch.Tensor = None
self.memory_out: torch.Tensor = None
self.m_size: int = 0
self.global_step = torch.tensor(0)
self.bc_module: Optional[BCModule] = None
self.create_reward_signals(trainer_settings.reward_signals)
if trainer_settings.behavioral_cloning is not None:
self.bc_module = BCModule(
self.policy,
trainer_settings.behavioral_cloning,
policy_learning_rate=trainer_settings.hyperparameters.learning_rate,
default_batch_size=trainer_settings.hyperparameters.batch_size,
default_num_epoch=3,
)
def update(self, batch: AgentBuffer, num_sequences: int) -> Dict[str, float]:
pass
def create_reward_signals(self, reward_signal_configs):
"""
Create reward signals
:param reward_signal_configs: Reward signal config.
"""
for reward_signal, settings in reward_signal_configs.items():
# Name reward signals by string in case we have duplicates later
self.reward_signals[reward_signal.value] = create_reward_provider(
reward_signal, self.policy.behavior_spec, settings
)
def get_trajectory_value_estimates(
self, batch: AgentBuffer, next_obs: List[np.ndarray], done: bool
) -> Tuple[Dict[str, np.ndarray], Dict[str, float]]:
vector_obs = [ModelUtils.list_to_tensor(batch["vector_obs"])]
if self.policy.use_vis_obs:
visual_obs = []
for idx, _ in enumerate(
self.policy.actor_critic.network_body.visual_encoders
):
visual_ob = ModelUtils.list_to_tensor(batch["visual_obs%d" % idx])
visual_obs.append(visual_ob)
else:
visual_obs = []
memory = torch.zeros([1, 1, self.policy.m_size])
vec_vis_obs = SplitObservations.from_observations(next_obs)
next_vec_obs = [
ModelUtils.list_to_tensor(vec_vis_obs.vector_observations).unsqueeze(0)
]
next_vis_obs = [
ModelUtils.list_to_tensor(_vis_ob).unsqueeze(0)
for _vis_ob in vec_vis_obs.visual_observations
]
value_estimates, next_memory = self.policy.actor_critic.critic_pass(
vector_obs, visual_obs, memory, sequence_length=batch.num_experiences
)
next_value_estimate, _ = self.policy.actor_critic.critic_pass(
next_vec_obs, next_vis_obs, next_memory, sequence_length=1
)
for name, estimate in value_estimates.items():
value_estimates[name] = estimate.detach().cpu().numpy()
next_value_estimate[name] = next_value_estimate[name].detach().cpu().numpy()
if done:
for k in next_value_estimate:
if not self.reward_signals[k].ignore_done:
next_value_estimate[k] = 0.0
return value_estimates, next_value_estimate

281
ml-agents/mlagents/trainers/policy/torch_policy.py


from typing import Any, Dict, List, Tuple, Optional
import numpy as np
import torch
import copy
from mlagents.trainers.action_info import ActionInfo
from mlagents.trainers.behavior_id_utils import get_global_agent_id
from mlagents.trainers.policy import Policy
from mlagents_envs.base_env import DecisionSteps, BehaviorSpec
from mlagents_envs.timers import timed
from mlagents.trainers.settings import TrainerSettings
from mlagents.trainers.trajectory import SplitObservations
from mlagents.trainers.torch.networks import (
SharedActorCritic,
SeparateActorCritic,
GlobalSteps,
)
from mlagents.trainers.torch.utils import ModelUtils
EPSILON = 1e-7 # Small value to avoid divide by zero
class TorchPolicy(Policy):
def __init__(
self,
seed: int,
behavior_spec: BehaviorSpec,
trainer_settings: TrainerSettings,
tanh_squash: bool = False,
reparameterize: bool = False,
separate_critic: bool = True,
condition_sigma_on_obs: bool = True,
):
"""
Policy that uses a multilayer perceptron to map the observations to actions. Could
also use a CNN to encode visual input prior to the MLP. Supports discrete and
continuous action spaces, as well as recurrent networks.
:param seed: Random seed.
:param brain: Assigned BrainParameters object.
:param trainer_settings: Defined training parameters.
:param load: Whether a pre-trained model will be loaded or a new one created.
:param tanh_squash: Whether to use a tanh function on the continuous output,
or a clipped output.
:param reparameterize: Whether we are using the resampling trick to update the policy
in continuous output.
"""
super().__init__(
seed,
behavior_spec,
trainer_settings,
tanh_squash,
reparameterize,
condition_sigma_on_obs,
)
self.global_step = (
GlobalSteps()
) # could be much simpler if TorchPolicy is nn.Module
self.grads = None
torch.set_default_tensor_type(torch.FloatTensor)
reward_signal_configs = trainer_settings.reward_signals
reward_signal_names = [key.value for key, _ in reward_signal_configs.items()]
self.stats_name_to_update_name = {
"Losses/Value Loss": "value_loss",
"Losses/Policy Loss": "policy_loss",
}
if separate_critic:
ac_class = SeparateActorCritic
else:
ac_class = SharedActorCritic
self.actor_critic = ac_class(
observation_shapes=self.behavior_spec.observation_shapes,
network_settings=trainer_settings.network_settings,
act_type=behavior_spec.action_type,
act_size=self.act_size,
stream_names=reward_signal_names,
conditional_sigma=self.condition_sigma_on_obs,
tanh_squash=tanh_squash,
)
# Save the m_size needed for export
self._export_m_size = self.m_size
# m_size needed for training is determined by network, not trainer settings
self.m_size = self.actor_critic.memory_size
self.actor_critic.to("cpu")
@property
def export_memory_size(self) -> int:
"""
Returns the memory size of the exported ONNX policy. This only includes the memory
of the Actor and not any auxillary networks.
"""
return self._export_m_size
def _split_decision_step(
self, decision_requests: DecisionSteps
) -> Tuple[SplitObservations, np.ndarray]:
vec_vis_obs = SplitObservations.from_observations(decision_requests.obs)
mask = None
if not self.use_continuous_act:
mask = torch.ones([len(decision_requests), np.sum(self.act_size)])
if decision_requests.action_mask is not None:
mask = torch.as_tensor(
1 - np.concatenate(decision_requests.action_mask, axis=1)
)
return vec_vis_obs, mask
def update_normalization(self, vector_obs: np.ndarray) -> None:
"""
If this policy normalizes vector observations, this will update the norm values in the graph.
:param vector_obs: The vector observations to add to the running estimate of the distribution.
"""
vector_obs = [torch.as_tensor(vector_obs)]
if self.use_vec_obs and self.normalize:
self.actor_critic.update_normalization(vector_obs)
@timed
def sample_actions(
self,
vec_obs: List[torch.Tensor],
vis_obs: List[torch.Tensor],
masks: Optional[torch.Tensor] = None,
memories: Optional[torch.Tensor] = None,
seq_len: int = 1,
all_log_probs: bool = False,
) -> Tuple[
torch.Tensor, torch.Tensor, torch.Tensor, Dict[str, torch.Tensor], torch.Tensor
]:
"""
:param all_log_probs: Returns (for discrete actions) a tensor of log probs, one for each action.
"""
dists, value_heads, memories = self.actor_critic.get_dist_and_value(
vec_obs, vis_obs, masks, memories, seq_len
)
action_list = self.actor_critic.sample_action(dists)
log_probs, entropies, all_logs = ModelUtils.get_probs_and_entropy(
action_list, dists
)
actions = torch.stack(action_list, dim=-1)
if self.use_continuous_act:
actions = actions[:, :, 0]
else:
actions = actions[:, 0, :]
return (
actions,
all_logs if all_log_probs else log_probs,
entropies,
value_heads,
memories,
)
def evaluate_actions(
self,
vec_obs: torch.Tensor,
vis_obs: torch.Tensor,
actions: torch.Tensor,
masks: Optional[torch.Tensor] = None,
memories: Optional[torch.Tensor] = None,
seq_len: int = 1,
) -> Tuple[torch.Tensor, torch.Tensor, Dict[str, torch.Tensor]]:
dists, value_heads, _ = self.actor_critic.get_dist_and_value(
vec_obs, vis_obs, masks, memories, seq_len
)
action_list = [actions[..., i] for i in range(actions.shape[-1])]
log_probs, entropies, _ = ModelUtils.get_probs_and_entropy(action_list, dists)
return log_probs, entropies, value_heads
@timed
def evaluate(
self, decision_requests: DecisionSteps, global_agent_ids: List[str]
) -> Dict[str, Any]:
"""
Evaluates policy for the agent experiences provided.
:param global_agent_ids:
:param decision_requests: DecisionStep object containing inputs.
:return: Outputs from network as defined by self.inference_dict.
"""
vec_vis_obs, masks = self._split_decision_step(decision_requests)
vec_obs = [torch.as_tensor(vec_vis_obs.vector_observations)]
vis_obs = [
torch.as_tensor(vis_ob) for vis_ob in vec_vis_obs.visual_observations
]
memories = torch.as_tensor(self.retrieve_memories(global_agent_ids)).unsqueeze(
0
)
run_out = {}
with torch.no_grad():
action, log_probs, entropy, value_heads, memories = self.sample_actions(
vec_obs, vis_obs, masks=masks, memories=memories
)
run_out["action"] = action.detach().cpu().numpy()
run_out["pre_action"] = action.detach().cpu().numpy()
# Todo - make pre_action difference
run_out["log_probs"] = log_probs.detach().cpu().numpy()
run_out["entropy"] = entropy.detach().cpu().numpy()
run_out["value_heads"] = {
name: t.detach().cpu().numpy() for name, t in value_heads.items()
}
run_out["value"] = np.mean(list(run_out["value_heads"].values()), 0)
run_out["learning_rate"] = 0.0
if self.use_recurrent:
run_out["memory_out"] = memories.detach().cpu().numpy().squeeze(0)
return run_out
def get_action(
self, decision_requests: DecisionSteps, worker_id: int = 0
) -> ActionInfo:
"""
Decides actions given observations information, and takes them in environment.
:param worker_id:
:param decision_requests: A dictionary of brain names and BrainInfo from environment.
:return: an ActionInfo containing action, memories, values and an object
to be passed to add experiences
"""
if len(decision_requests) == 0:
return ActionInfo.empty()
global_agent_ids = [
get_global_agent_id(worker_id, int(agent_id))
for agent_id in decision_requests.agent_id
] # For 1-D array, the iterator order is correct.
run_out = self.evaluate(
decision_requests, global_agent_ids
) # pylint: disable=assignment-from-no-return
self.save_memories(global_agent_ids, run_out.get("memory_out"))
return ActionInfo(
action=run_out.get("action"),
value=run_out.get("value"),
outputs=run_out,
agent_ids=list(decision_requests.agent_id),
)
@property
def use_vis_obs(self):
return self.vis_obs_size > 0
@property
def use_vec_obs(self):
return self.vec_obs_size > 0
def get_current_step(self):
"""
Gets current model step.
:return: current model step.
"""
return self.global_step.current_step
def set_step(self, step: int) -> int:
"""
Sets current model step to step without creating additional ops.
:param step: Step to set the current model step to.
:return: The step the model was set to.
"""
self.global_step.current_step = step
return step
def increment_step(self, n_steps):
"""
Increments model step.
"""
self.global_step.increment(n_steps)
return self.get_current_step()
def load_weights(self, values: List[np.ndarray]) -> None:
self.actor_critic.load_state_dict(values)
def init_load_weights(self) -> None:
pass
def get_weights(self) -> List[np.ndarray]:
return copy.deepcopy(self.actor_critic.state_dict())
def get_modules(self):
return {"Policy": self.actor_critic, "global_step": self.global_step}

203
ml-agents/mlagents/trainers/ppo/optimizer_torch.py


from typing import Dict, cast
import torch
from mlagents.trainers.buffer import AgentBuffer
from mlagents_envs.timers import timed
from mlagents.trainers.policy.torch_policy import TorchPolicy
from mlagents.trainers.optimizer.torch_optimizer import TorchOptimizer
from mlagents.trainers.settings import TrainerSettings, PPOSettings
from mlagents.trainers.torch.utils import ModelUtils
class TorchPPOOptimizer(TorchOptimizer):
def __init__(self, policy: TorchPolicy, trainer_settings: TrainerSettings):
"""
Takes a Policy and a Dict of trainer parameters and creates an Optimizer around the policy.
The PPO optimizer has a value estimator and a loss function.
:param policy: A TFPolicy object that will be updated by this PPO Optimizer.
:param trainer_params: Trainer parameters dictionary that specifies the
properties of the trainer.
"""
# Create the graph here to give more granular control of the TF graph to the Optimizer.
super().__init__(policy, trainer_settings)
params = list(self.policy.actor_critic.parameters())
self.hyperparameters: PPOSettings = cast(
PPOSettings, trainer_settings.hyperparameters
)
self.decay_learning_rate = ModelUtils.DecayedValue(
self.hyperparameters.learning_rate_schedule,
self.hyperparameters.learning_rate,
1e-10,
self.trainer_settings.max_steps,
)
self.decay_epsilon = ModelUtils.DecayedValue(
self.hyperparameters.learning_rate_schedule,
self.hyperparameters.epsilon,
0.1,
self.trainer_settings.max_steps,
)
self.decay_beta = ModelUtils.DecayedValue(
self.hyperparameters.learning_rate_schedule,
self.hyperparameters.beta,
1e-5,
self.trainer_settings.max_steps,
)
self.optimizer = torch.optim.Adam(
params, lr=self.trainer_settings.hyperparameters.learning_rate
)
self.stats_name_to_update_name = {
"Losses/Value Loss": "value_loss",
"Losses/Policy Loss": "policy_loss",
}
self.stream_names = list(self.reward_signals.keys())
def ppo_value_loss(
self,
values: Dict[str, torch.Tensor],
old_values: Dict[str, torch.Tensor],
returns: Dict[str, torch.Tensor],
epsilon: float,
loss_masks: torch.Tensor,
) -> torch.Tensor:
"""
Evaluates value loss for PPO.
:param values: Value output of the current network.
:param old_values: Value stored with experiences in buffer.
:param returns: Computed returns.
:param epsilon: Clipping value for value estimate.
:param loss_mask: Mask for losses. Used with LSTM to ignore 0'ed out experiences.
"""
value_losses = []
for name, head in values.items():
old_val_tensor = old_values[name]
returns_tensor = returns[name]
clipped_value_estimate = old_val_tensor + torch.clamp(
head - old_val_tensor, -1 * epsilon, epsilon
)
v_opt_a = (returns_tensor - head) ** 2
v_opt_b = (returns_tensor - clipped_value_estimate) ** 2
value_loss = ModelUtils.masked_mean(torch.max(v_opt_a, v_opt_b), loss_masks)
value_losses.append(value_loss)
value_loss = torch.mean(torch.stack(value_losses))
return value_loss
def ppo_policy_loss(
self,
advantages: torch.Tensor,
log_probs: torch.Tensor,
old_log_probs: torch.Tensor,
loss_masks: torch.Tensor,
) -> torch.Tensor:
"""
Evaluate PPO policy loss.
:param advantages: Computed advantages.
:param log_probs: Current policy probabilities
:param old_log_probs: Past policy probabilities
:param loss_masks: Mask for losses. Used with LSTM to ignore 0'ed out experiences.
"""
advantage = advantages.unsqueeze(-1)
decay_epsilon = self.hyperparameters.epsilon
r_theta = torch.exp(log_probs - old_log_probs)
p_opt_a = r_theta * advantage
p_opt_b = (
torch.clamp(r_theta, 1.0 - decay_epsilon, 1.0 + decay_epsilon) * advantage
)
policy_loss = -1 * ModelUtils.masked_mean(
torch.min(p_opt_a, p_opt_b), loss_masks
)
return policy_loss
@timed
def update(self, batch: AgentBuffer, num_sequences: int) -> Dict[str, float]:
"""
Performs update on model.
:param batch: Batch of experiences.
:param num_sequences: Number of sequences to process.
:return: Results of update.
"""
# Get decayed parameters
decay_lr = self.decay_learning_rate.get_value(self.policy.get_current_step())
decay_eps = self.decay_epsilon.get_value(self.policy.get_current_step())
decay_bet = self.decay_beta.get_value(self.policy.get_current_step())
returns = {}
old_values = {}
for name in self.reward_signals:
old_values[name] = ModelUtils.list_to_tensor(
batch[f"{name}_value_estimates"]
)
returns[name] = ModelUtils.list_to_tensor(batch[f"{name}_returns"])
vec_obs = [ModelUtils.list_to_tensor(batch["vector_obs"])]
act_masks = ModelUtils.list_to_tensor(batch["action_mask"])
if self.policy.use_continuous_act:
actions = ModelUtils.list_to_tensor(batch["actions"]).unsqueeze(-1)
else:
actions = ModelUtils.list_to_tensor(batch["actions"], dtype=torch.long)
memories = [
ModelUtils.list_to_tensor(batch["memory"][i])
for i in range(0, len(batch["memory"]), self.policy.sequence_length)
]
if len(memories) > 0:
memories = torch.stack(memories).unsqueeze(0)
if self.policy.use_vis_obs:
vis_obs = []
for idx, _ in enumerate(
self.policy.actor_critic.network_body.visual_encoders
):
vis_ob = ModelUtils.list_to_tensor(batch["visual_obs%d" % idx])
vis_obs.append(vis_ob)
else:
vis_obs = []
log_probs, entropy, values = self.policy.evaluate_actions(
vec_obs,
vis_obs,
masks=act_masks,
actions=actions,
memories=memories,
seq_len=self.policy.sequence_length,
)
loss_masks = ModelUtils.list_to_tensor(batch["masks"], dtype=torch.bool)
value_loss = self.ppo_value_loss(
values, old_values, returns, decay_eps, loss_masks
)
policy_loss = self.ppo_policy_loss(
ModelUtils.list_to_tensor(batch["advantages"]),
log_probs,
ModelUtils.list_to_tensor(batch["action_probs"]),
loss_masks,
)
loss = (
policy_loss
+ 0.5 * value_loss
- decay_bet * ModelUtils.masked_mean(entropy, loss_masks)
)
# Set optimizer learning rate
ModelUtils.update_learning_rate(self.optimizer, decay_lr)
self.optimizer.zero_grad()
loss.backward()
self.optimizer.step()
update_stats = {
"Losses/Policy Loss": abs(policy_loss.detach().cpu().numpy()),
"Losses/Value Loss": value_loss.detach().cpu().numpy(),
"Policy/Learning Rate": decay_lr,
"Policy/Epsilon": decay_eps,
"Policy/Beta": decay_bet,
}
for reward_provider in self.reward_signals.values():
update_stats.update(reward_provider.update(batch))
return update_stats
def get_modules(self):
return {"Optimizer": self.optimizer}

561
ml-agents/mlagents/trainers/sac/optimizer_torch.py


import numpy as np
from typing import Dict, List, Mapping, cast, Tuple, Optional
import torch
from torch import nn
import attr
from mlagents_envs.logging_util import get_logger
from mlagents_envs.base_env import ActionType
from mlagents.trainers.optimizer.torch_optimizer import TorchOptimizer
from mlagents.trainers.policy.torch_policy import TorchPolicy
from mlagents.trainers.settings import NetworkSettings
from mlagents.trainers.torch.networks import ValueNetwork
from mlagents.trainers.torch.utils import ModelUtils
from mlagents.trainers.buffer import AgentBuffer
from mlagents_envs.timers import timed
from mlagents.trainers.exception import UnityTrainerException
from mlagents.trainers.settings import TrainerSettings, SACSettings
EPSILON = 1e-6 # Small value to avoid divide by zero
logger = get_logger(__name__)
class TorchSACOptimizer(TorchOptimizer):
class PolicyValueNetwork(nn.Module):
def __init__(
self,
stream_names: List[str],
observation_shapes: List[Tuple[int, ...]],
network_settings: NetworkSettings,
act_type: ActionType,
act_size: List[int],
):
super().__init__()
if act_type == ActionType.CONTINUOUS:
num_value_outs = 1
num_action_ins = sum(act_size)
else:
num_value_outs = sum(act_size)
num_action_ins = 0
self.q1_network = ValueNetwork(
stream_names,
observation_shapes,
network_settings,
num_action_ins,
num_value_outs,
)
self.q2_network = ValueNetwork(
stream_names,
observation_shapes,
network_settings,
num_action_ins,
num_value_outs,
)
def forward(
self,
vec_inputs: List[torch.Tensor],
vis_inputs: List[torch.Tensor],
actions: Optional[torch.Tensor] = None,
memories: Optional[torch.Tensor] = None,
sequence_length: int = 1,
) -> Tuple[Dict[str, torch.Tensor], Dict[str, torch.Tensor]]:
q1_out, _ = self.q1_network(
vec_inputs,
vis_inputs,
actions=actions,
memories=memories,
sequence_length=sequence_length,
)
q2_out, _ = self.q2_network(
vec_inputs,
vis_inputs,
actions=actions,
memories=memories,
sequence_length=sequence_length,
)
return q1_out, q2_out
def __init__(self, policy: TorchPolicy, trainer_params: TrainerSettings):
super().__init__(policy, trainer_params)
hyperparameters: SACSettings = cast(SACSettings, trainer_params.hyperparameters)
self.tau = hyperparameters.tau
self.init_entcoef = hyperparameters.init_entcoef
self.policy = policy
self.act_size = policy.act_size
policy_network_settings = policy.network_settings
self.tau = hyperparameters.tau
self.burn_in_ratio = 0.0
# Non-exposed SAC parameters
self.discrete_target_entropy_scale = 0.2 # Roughly equal to e-greedy 0.05
self.continuous_target_entropy_scale = 1.0
self.stream_names = list(self.reward_signals.keys())
# Use to reduce "survivor bonus" when using Curiosity or GAIL.
self.gammas = [_val.gamma for _val in trainer_params.reward_signals.values()]
self.use_dones_in_backup = {
name: int(not self.reward_signals[name].ignore_done)
for name in self.stream_names
}
# Critics should have 1/2 of the memory of the policy
critic_memory = policy_network_settings.memory
if critic_memory is not None:
critic_memory = attr.evolve(
critic_memory, memory_size=critic_memory.memory_size // 2
)
value_network_settings = attr.evolve(
policy_network_settings, memory=critic_memory
)
self.value_network = TorchSACOptimizer.PolicyValueNetwork(
self.stream_names,
self.policy.behavior_spec.observation_shapes,
value_network_settings,
self.policy.behavior_spec.action_type,
self.act_size,
)
self.target_network = ValueNetwork(
self.stream_names,
self.policy.behavior_spec.observation_shapes,
value_network_settings,
)
self.soft_update(self.policy.actor_critic.critic, self.target_network, 1.0)
self._log_ent_coef = torch.nn.Parameter(
torch.log(torch.as_tensor([self.init_entcoef] * len(self.act_size))),
requires_grad=True,
)
if self.policy.use_continuous_act:
self.target_entropy = torch.as_tensor(
-1
* self.continuous_target_entropy_scale
* np.prod(self.act_size[0]).astype(np.float32)
)
else:
self.target_entropy = [
self.discrete_target_entropy_scale * np.log(i).astype(np.float32)
for i in self.act_size
]
policy_params = list(self.policy.actor_critic.network_body.parameters()) + list(
self.policy.actor_critic.distribution.parameters()
)
value_params = list(self.value_network.parameters()) + list(
self.policy.actor_critic.critic.parameters()
)
logger.debug("value_vars")
for param in value_params:
logger.debug(param.shape)
logger.debug("policy_vars")
for param in policy_params:
logger.debug(param.shape)
self.decay_learning_rate = ModelUtils.DecayedValue(
hyperparameters.learning_rate_schedule,
hyperparameters.learning_rate,
1e-10,
self.trainer_settings.max_steps,
)
self.policy_optimizer = torch.optim.Adam(
policy_params, lr=hyperparameters.learning_rate
)
self.value_optimizer = torch.optim.Adam(
value_params, lr=hyperparameters.learning_rate
)
self.entropy_optimizer = torch.optim.Adam(
[self._log_ent_coef], lr=hyperparameters.learning_rate
)
def sac_q_loss(
self,
q1_out: Dict[str, torch.Tensor],
q2_out: Dict[str, torch.Tensor],
target_values: Dict[str, torch.Tensor],
dones: torch.Tensor,
rewards: Dict[str, torch.Tensor],
loss_masks: torch.Tensor,
) -> Tuple[torch.Tensor, torch.Tensor]:
q1_losses = []
q2_losses = []
# Multiple q losses per stream
for i, name in enumerate(q1_out.keys()):
q1_stream = q1_out[name].squeeze()
q2_stream = q2_out[name].squeeze()
with torch.no_grad():
q_backup = rewards[name] + (
(1.0 - self.use_dones_in_backup[name] * dones)
* self.gammas[i]
* target_values[name]
)
_q1_loss = 0.5 * ModelUtils.masked_mean(
torch.nn.functional.mse_loss(q_backup, q1_stream), loss_masks
)
_q2_loss = 0.5 * ModelUtils.masked_mean(
torch.nn.functional.mse_loss(q_backup, q2_stream), loss_masks
)
q1_losses.append(_q1_loss)
q2_losses.append(_q2_loss)
q1_loss = torch.mean(torch.stack(q1_losses))
q2_loss = torch.mean(torch.stack(q2_losses))
return q1_loss, q2_loss
def soft_update(self, source: nn.Module, target: nn.Module, tau: float) -> None:
for source_param, target_param in zip(source.parameters(), target.parameters()):
target_param.data.copy_(
target_param.data * (1.0 - tau) + source_param.data * tau
)
def sac_value_loss(
self,
log_probs: torch.Tensor,
values: Dict[str, torch.Tensor],
q1p_out: Dict[str, torch.Tensor],
q2p_out: Dict[str, torch.Tensor],
loss_masks: torch.Tensor,
discrete: bool,
) -> torch.Tensor:
min_policy_qs = {}
with torch.no_grad():
_ent_coef = torch.exp(self._log_ent_coef)
for name in values.keys():
if not discrete:
min_policy_qs[name] = torch.min(q1p_out[name], q2p_out[name])
else:
action_probs = log_probs.exp()
_branched_q1p = ModelUtils.break_into_branches(
q1p_out[name] * action_probs, self.act_size
)
_branched_q2p = ModelUtils.break_into_branches(
q2p_out[name] * action_probs, self.act_size
)
_q1p_mean = torch.mean(
torch.stack(
[torch.sum(_br, dim=1, keepdim=True) for _br in _branched_q1p]
),
dim=0,
)
_q2p_mean = torch.mean(
torch.stack(
[torch.sum(_br, dim=1, keepdim=True) for _br in _branched_q2p]
),
dim=0,
)
min_policy_qs[name] = torch.min(_q1p_mean, _q2p_mean)
value_losses = []
if not discrete:
for name in values.keys():
with torch.no_grad():
v_backup = min_policy_qs[name] - torch.sum(
_ent_coef * log_probs, dim=1
)
value_loss = 0.5 * ModelUtils.masked_mean(
torch.nn.functional.mse_loss(values[name], v_backup), loss_masks
)
value_losses.append(value_loss)
else:
branched_per_action_ent = ModelUtils.break_into_branches(
log_probs * log_probs.exp(), self.act_size
)
# We have to do entropy bonus per action branch
branched_ent_bonus = torch.stack(
[
torch.sum(_ent_coef[i] * _lp, dim=1, keepdim=True)
for i, _lp in enumerate(branched_per_action_ent)
]
)
for name in values.keys():
with torch.no_grad():
v_backup = min_policy_qs[name] - torch.mean(
branched_ent_bonus, axis=0
)
value_loss = 0.5 * ModelUtils.masked_mean(
torch.nn.functional.mse_loss(values[name], v_backup.squeeze()),
loss_masks,
)
value_losses.append(value_loss)
value_loss = torch.mean(torch.stack(value_losses))
if torch.isinf(value_loss).any() or torch.isnan(value_loss).any():
raise UnityTrainerException("Inf found")
return value_loss
def sac_policy_loss(
self,
log_probs: torch.Tensor,
q1p_outs: Dict[str, torch.Tensor],
loss_masks: torch.Tensor,
discrete: bool,
) -> torch.Tensor:
_ent_coef = torch.exp(self._log_ent_coef)
mean_q1 = torch.mean(torch.stack(list(q1p_outs.values())), axis=0)
if not discrete:
mean_q1 = mean_q1.unsqueeze(1)
batch_policy_loss = torch.mean(_ent_coef * log_probs - mean_q1, dim=1)
policy_loss = ModelUtils.masked_mean(batch_policy_loss, loss_masks)
else:
action_probs = log_probs.exp()
branched_per_action_ent = ModelUtils.break_into_branches(
log_probs * action_probs, self.act_size
)
branched_q_term = ModelUtils.break_into_branches(
mean_q1 * action_probs, self.act_size
)
branched_policy_loss = torch.stack(
[
torch.sum(_ent_coef[i] * _lp - _qt, dim=1, keepdim=True)
for i, (_lp, _qt) in enumerate(
zip(branched_per_action_ent, branched_q_term)
)
]
)
batch_policy_loss = torch.squeeze(branched_policy_loss)
policy_loss = torch.mean(loss_masks * batch_policy_loss)
return policy_loss
def sac_entropy_loss(
self, log_probs: torch.Tensor, loss_masks: torch.Tensor, discrete: bool
) -> torch.Tensor:
if not discrete:
with torch.no_grad():
target_current_diff = torch.sum(log_probs + self.target_entropy, dim=1)
entropy_loss = -torch.mean(
self._log_ent_coef * loss_masks * target_current_diff
)
else:
with torch.no_grad():
branched_per_action_ent = ModelUtils.break_into_branches(
log_probs * log_probs.exp(), self.act_size
)
target_current_diff_branched = torch.stack(
[
torch.sum(_lp, axis=1, keepdim=True) + _te
for _lp, _te in zip(
branched_per_action_ent, self.target_entropy
)
],
axis=1,
)
target_current_diff = torch.squeeze(
target_current_diff_branched, axis=2
)
entropy_loss = -1 * ModelUtils.masked_mean(
torch.mean(self._log_ent_coef * target_current_diff, axis=1), loss_masks
)
return entropy_loss
def _condense_q_streams(
self, q_output: Dict[str, torch.Tensor], discrete_actions: torch.Tensor
) -> Dict[str, torch.Tensor]:
condensed_q_output = {}
onehot_actions = ModelUtils.actions_to_onehot(discrete_actions, self.act_size)
for key, item in q_output.items():
branched_q = ModelUtils.break_into_branches(item, self.act_size)
only_action_qs = torch.stack(
[
torch.sum(_act * _q, dim=1, keepdim=True)
for _act, _q in zip(onehot_actions, branched_q)
]
)
condensed_q_output[key] = torch.mean(only_action_qs, dim=0)
return condensed_q_output
@timed
def update(self, batch: AgentBuffer, num_sequences: int) -> Dict[str, float]:
"""
Updates model using buffer.
:param num_sequences: Number of trajectories in batch.
:param batch: Experience mini-batch.
:param update_target: Whether or not to update target value network
:param reward_signal_batches: Minibatches to use for updating the reward signals,
indexed by name. If none, don't update the reward signals.
:return: Output from update process.
"""
rewards = {}
for name in self.reward_signals:
rewards[name] = ModelUtils.list_to_tensor(batch[f"{name}_rewards"])
vec_obs = [ModelUtils.list_to_tensor(batch["vector_obs"])]
next_vec_obs = [ModelUtils.list_to_tensor(batch["next_vector_in"])]
act_masks = ModelUtils.list_to_tensor(batch["action_mask"])
if self.policy.use_continuous_act:
actions = ModelUtils.list_to_tensor(batch["actions"]).unsqueeze(-1)
else:
actions = ModelUtils.list_to_tensor(batch["actions"], dtype=torch.long)
memories_list = [
ModelUtils.list_to_tensor(batch["memory"][i])
for i in range(0, len(batch["memory"]), self.policy.sequence_length)
]
# LSTM shouldn't have sequence length <1, but stop it from going out of the index if true.
offset = 1 if self.policy.sequence_length > 1 else 0
next_memories_list = [
ModelUtils.list_to_tensor(
batch["memory"][i][self.policy.m_size // 2 :]
) # only pass value part of memory to target network
for i in range(offset, len(batch["memory"]), self.policy.sequence_length)
]
if len(memories_list) > 0:
memories = torch.stack(memories_list).unsqueeze(0)
next_memories = torch.stack(next_memories_list).unsqueeze(0)
else:
memories = None
next_memories = None
# Q network memories are 0'ed out, since we don't have them during inference.
q_memories = (
torch.zeros_like(next_memories) if next_memories is not None else None
)
vis_obs: List[torch.Tensor] = []
next_vis_obs: List[torch.Tensor] = []
if self.policy.use_vis_obs:
vis_obs = []
for idx, _ in enumerate(
self.policy.actor_critic.network_body.visual_encoders
):
vis_ob = ModelUtils.list_to_tensor(batch["visual_obs%d" % idx])
vis_obs.append(vis_ob)
next_vis_ob = ModelUtils.list_to_tensor(
batch["next_visual_obs%d" % idx]
)
next_vis_obs.append(next_vis_ob)
# Copy normalizers from policy
self.value_network.q1_network.network_body.copy_normalization(
self.policy.actor_critic.network_body
)
self.value_network.q2_network.network_body.copy_normalization(
self.policy.actor_critic.network_body
)
self.target_network.network_body.copy_normalization(
self.policy.actor_critic.network_body
)
(
sampled_actions,
log_probs,
entropies,
sampled_values,
_,
) = self.policy.sample_actions(
vec_obs,
vis_obs,
masks=act_masks,
memories=memories,
seq_len=self.policy.sequence_length,
all_log_probs=not self.policy.use_continuous_act,
)
if self.policy.use_continuous_act:
squeezed_actions = actions.squeeze(-1)
q1p_out, q2p_out = self.value_network(
vec_obs,
vis_obs,
sampled_actions,
memories=q_memories,
sequence_length=self.policy.sequence_length,
)
q1_out, q2_out = self.value_network(
vec_obs,
vis_obs,
squeezed_actions,
memories=q_memories,
sequence_length=self.policy.sequence_length,
)
q1_stream, q2_stream = q1_out, q2_out
else:
with torch.no_grad():
q1p_out, q2p_out = self.value_network(
vec_obs,
vis_obs,
memories=q_memories,
sequence_length=self.policy.sequence_length,
)
q1_out, q2_out = self.value_network(
vec_obs,
vis_obs,
memories=q_memories,
sequence_length=self.policy.sequence_length,
)
q1_stream = self._condense_q_streams(q1_out, actions)
q2_stream = self._condense_q_streams(q2_out, actions)
with torch.no_grad():
target_values, _ = self.target_network(
next_vec_obs,
next_vis_obs,
memories=next_memories,
sequence_length=self.policy.sequence_length,
)
masks = ModelUtils.list_to_tensor(batch["masks"], dtype=torch.bool)
use_discrete = not self.policy.use_continuous_act
dones = ModelUtils.list_to_tensor(batch["done"])
q1_loss, q2_loss = self.sac_q_loss(
q1_stream, q2_stream, target_values, dones, rewards, masks
)
value_loss = self.sac_value_loss(
log_probs, sampled_values, q1p_out, q2p_out, masks, use_discrete
)
policy_loss = self.sac_policy_loss(log_probs, q1p_out, masks, use_discrete)
entropy_loss = self.sac_entropy_loss(log_probs, masks, use_discrete)
total_value_loss = q1_loss + q2_loss + value_loss
decay_lr = self.decay_learning_rate.get_value(self.policy.get_current_step())
ModelUtils.update_learning_rate(self.policy_optimizer, decay_lr)
self.policy_optimizer.zero_grad()
policy_loss.backward()
self.policy_optimizer.step()
ModelUtils.update_learning_rate(self.value_optimizer, decay_lr)
self.value_optimizer.zero_grad()
total_value_loss.backward()
self.value_optimizer.step()
ModelUtils.update_learning_rate(self.entropy_optimizer, decay_lr)
self.entropy_optimizer.zero_grad()
entropy_loss.backward()
self.entropy_optimizer.step()
# Update target network
self.soft_update(self.policy.actor_critic.critic, self.target_network, self.tau)
update_stats = {
"Losses/Policy Loss": abs(policy_loss.detach().cpu().numpy()),
"Losses/Value Loss": value_loss.detach().cpu().numpy(),
"Losses/Q1 Loss": q1_loss.detach().cpu().numpy(),
"Losses/Q2 Loss": q2_loss.detach().cpu().numpy(),
"Policy/Entropy Coeff": torch.exp(self._log_ent_coef)
.detach()
.cpu()
.numpy(),
"Policy/Learning Rate": decay_lr,
}
for signal in self.reward_signals.values():
signal.update(batch)
return update_stats
def update_reward_signals(
self, reward_signal_minibatches: Mapping[str, AgentBuffer], num_sequences: int
) -> Dict[str, float]:
return {}
def get_modules(self):
return {
"Optimizer:value_network": self.value_network,
"Optimizer:target_network": self.target_network,
"Optimizer:policy_optimizer": self.policy_optimizer,
"Optimizer:value_optimizer": self.value_optimizer,
"Optimizer:entropy_optimizer": self.entropy_optimizer,
}

118
ml-agents/mlagents/trainers/saver/torch_saver.py


import os
import shutil
import torch
from typing import Dict, Union, Optional, cast
from mlagents_envs.exception import UnityPolicyException
from mlagents_envs.logging_util import get_logger
from mlagents.trainers.saver.saver import BaseSaver
from mlagents.trainers.settings import TrainerSettings, SerializationSettings
from mlagents.trainers.policy.torch_policy import TorchPolicy
from mlagents.trainers.optimizer.torch_optimizer import TorchOptimizer
from mlagents.trainers.torch.model_serialization import ModelSerializer
logger = get_logger(__name__)
class TorchSaver(BaseSaver):
"""
Saver class for PyTorch
"""
def __init__(
self, trainer_settings: TrainerSettings, model_path: str, load: bool = False
):
super().__init__()
self.model_path = model_path
self.initialize_path = trainer_settings.init_path
self._keep_checkpoints = trainer_settings.keep_checkpoints
self.load = load
self.policy: Optional[TorchPolicy] = None
self.exporter: Optional[ModelSerializer] = None
self.modules: Dict[str, torch.nn.Modules] = {}
def register(self, module: Union[TorchPolicy, TorchOptimizer]) -> None:
if isinstance(module, TorchPolicy) or isinstance(module, TorchOptimizer):
self.modules.update(module.get_modules()) # type: ignore
else:
raise UnityPolicyException(
"Registering Object of unsupported type {} to Saver ".format(
type(module)
)
)
if self.policy is None and isinstance(module, TorchPolicy):
self.policy = module
self.exporter = ModelSerializer(self.policy)
def save_checkpoint(self, brain_name: str, step: int) -> str:
if not os.path.exists(self.model_path):
os.makedirs(self.model_path)
checkpoint_path = os.path.join(self.model_path, f"{brain_name}-{step}")
state_dict = {
name: module.state_dict() for name, module in self.modules.items()
}
torch.save(state_dict, f"{checkpoint_path}.pt")
torch.save(state_dict, os.path.join(self.model_path, "checkpoint.pt"))
self.export(checkpoint_path, brain_name)
return checkpoint_path
def export(self, output_filepath: str, brain_name: str) -> None:
if self.exporter is not None:
self.exporter.export_policy_model(output_filepath)
def initialize_or_load(self, policy: Optional[TorchPolicy] = None) -> None:
# Initialize/Load registered self.policy by default.
# If given input argument policy, use the input policy instead.
# This argument is mainly for initialization of the ghost trainer's fixed policy.
reset_steps = not self.load
if self.initialize_path is not None:
self._load_model(
self.initialize_path, policy, reset_global_steps=reset_steps
)
elif self.load:
self._load_model(self.model_path, policy, reset_global_steps=reset_steps)
def _load_model(
self,
load_path: str,
policy: Optional[TorchPolicy] = None,
reset_global_steps: bool = False,
) -> None:
model_path = os.path.join(load_path, "checkpoint.pt")
saved_state_dict = torch.load(model_path)
if policy is None:
modules = self.modules
policy = self.policy
else:
modules = policy.get_modules()
policy = cast(TorchPolicy, policy)
for name, mod in modules.items():
mod.load_state_dict(saved_state_dict[name])
if reset_global_steps:
policy.set_step(0)
logger.info(
"Starting training from step 0 and saving to {}.".format(
self.model_path
)
)
else:
logger.info(f"Resuming training from step {policy.get_current_step()}.")
def copy_final_model(self, source_nn_path: str) -> None:
"""
Copy the .nn file at the given source to the destination.
Also copies the corresponding .onnx file if it exists.
"""
final_model_name = os.path.splitext(source_nn_path)[0]
if SerializationSettings.convert_to_onnx:
try:
source_path = f"{final_model_name}.onnx"
destination_path = f"{self.model_path}.onnx"
shutil.copyfile(source_path, destination_path)
logger.info(f"Copied {source_path} to {destination_path}.")
except OSError:
pass

1001
ml-agents/mlagents/trainers/tests/torch/test.demo
文件差异内容过多而无法显示
查看文件

144
ml-agents/mlagents/trainers/tests/torch/test_bcmodule.py


from unittest.mock import MagicMock
import pytest
import mlagents.trainers.tests.mock_brain as mb
import numpy as np
import os
from mlagents.trainers.policy.torch_policy import TorchPolicy
from mlagents.trainers.torch.components.bc.module import BCModule
from mlagents.trainers.settings import (
TrainerSettings,
BehavioralCloningSettings,
NetworkSettings,
)
def create_bc_module(mock_behavior_specs, bc_settings, use_rnn, tanhresample):
# model_path = env.external_brain_names[0]
trainer_config = TrainerSettings()
trainer_config.network_settings.memory = (
NetworkSettings.MemorySettings() if use_rnn else None
)
policy = TorchPolicy(
0, mock_behavior_specs, trainer_config, tanhresample, tanhresample
)
bc_module = BCModule(
policy,
settings=bc_settings,
policy_learning_rate=trainer_config.hyperparameters.learning_rate,
default_batch_size=trainer_config.hyperparameters.batch_size,
default_num_epoch=3,
)
return bc_module
# Test default values
def test_bcmodule_defaults():
# See if default values match
mock_specs = mb.create_mock_3dball_behavior_specs()
bc_settings = BehavioralCloningSettings(
demo_path=os.path.dirname(os.path.abspath(__file__)) + "/" + "test.demo"
)
bc_module = create_bc_module(mock_specs, bc_settings, False, False)
assert bc_module.num_epoch == 3
assert bc_module.batch_size == TrainerSettings().hyperparameters.batch_size
# Assign strange values and see if it overrides properly
bc_settings = BehavioralCloningSettings(
demo_path=os.path.dirname(os.path.abspath(__file__)) + "/" + "test.demo",
num_epoch=100,
batch_size=10000,
)
bc_module = create_bc_module(mock_specs, bc_settings, False, False)
assert bc_module.num_epoch == 100
assert bc_module.batch_size == 10000
# Test with continuous control env and vector actions
@pytest.mark.parametrize("is_sac", [True, False], ids=["sac", "ppo"])
def test_bcmodule_update(is_sac):
mock_specs = mb.create_mock_3dball_behavior_specs()
bc_settings = BehavioralCloningSettings(
demo_path=os.path.dirname(os.path.abspath(__file__)) + "/" + "test.demo"
)
bc_module = create_bc_module(mock_specs, bc_settings, False, is_sac)
stats = bc_module.update()
for _, item in stats.items():
assert isinstance(item, np.float32)
# Test with constant pretraining learning rate
@pytest.mark.parametrize("is_sac", [True, False], ids=["sac", "ppo"])
def test_bcmodule_constant_lr_update(is_sac):
mock_specs = mb.create_mock_3dball_behavior_specs()
bc_settings = BehavioralCloningSettings(
demo_path=os.path.dirname(os.path.abspath(__file__)) + "/" + "test.demo",
steps=0,
)
bc_module = create_bc_module(mock_specs, bc_settings, False, is_sac)
stats = bc_module.update()
for _, item in stats.items():
assert isinstance(item, np.float32)
old_learning_rate = bc_module.current_lr
_ = bc_module.update()
assert old_learning_rate == bc_module.current_lr
# Test with constant pretraining learning rate
@pytest.mark.parametrize("is_sac", [True, False], ids=["sac", "ppo"])
def test_bcmodule_linear_lr_update(is_sac):
mock_specs = mb.create_mock_3dball_behavior_specs()
bc_settings = BehavioralCloningSettings(
demo_path=os.path.dirname(os.path.abspath(__file__)) + "/" + "test.demo",
steps=100,
)
bc_module = create_bc_module(mock_specs, bc_settings, False, is_sac)
# Should decay by 10/100 * 0.0003 = 0.00003
bc_module.policy.get_current_step = MagicMock(return_value=10)
old_learning_rate = bc_module.current_lr
_ = bc_module.update()
assert old_learning_rate - 0.00003 == pytest.approx(bc_module.current_lr, abs=0.01)
# Test with RNN
@pytest.mark.parametrize("is_sac", [True, False], ids=["sac", "ppo"])
def test_bcmodule_rnn_update(is_sac):
mock_specs = mb.create_mock_3dball_behavior_specs()
bc_settings = BehavioralCloningSettings(
demo_path=os.path.dirname(os.path.abspath(__file__)) + "/" + "test.demo"
)
bc_module = create_bc_module(mock_specs, bc_settings, True, is_sac)
stats = bc_module.update()
for _, item in stats.items():
assert isinstance(item, np.float32)
# Test with discrete control and visual observations
@pytest.mark.parametrize("is_sac", [True, False], ids=["sac", "ppo"])
def test_bcmodule_dc_visual_update(is_sac):
mock_specs = mb.create_mock_banana_behavior_specs()
bc_settings = BehavioralCloningSettings(
demo_path=os.path.dirname(os.path.abspath(__file__)) + "/" + "testdcvis.demo"
)
bc_module = create_bc_module(mock_specs, bc_settings, False, is_sac)
stats = bc_module.update()
for _, item in stats.items():
assert isinstance(item, np.float32)
# Test with discrete control, visual observations and RNN
@pytest.mark.parametrize("is_sac", [True, False], ids=["sac", "ppo"])
def test_bcmodule_rnn_dc_update(is_sac):
mock_specs = mb.create_mock_banana_behavior_specs()
bc_settings = BehavioralCloningSettings(
demo_path=os.path.dirname(os.path.abspath(__file__)) + "/" + "testdcvis.demo"
)
bc_module = create_bc_module(mock_specs, bc_settings, True, is_sac)
stats = bc_module.update()
for _, item in stats.items():
assert isinstance(item, np.float32)
if __name__ == "__main__":
pytest.main()

177
ml-agents/mlagents/trainers/tests/torch/test_ghost.py


import pytest
import numpy as np
from mlagents.trainers.ghost.trainer import GhostTrainer
from mlagents.trainers.ghost.controller import GhostController
from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers
from mlagents.trainers.ppo.trainer import PPOTrainer
from mlagents.trainers.agent_processor import AgentManagerQueue
from mlagents.trainers.tests import mock_brain as mb
from mlagents.trainers.tests.test_trajectory import make_fake_trajectory
from mlagents.trainers.settings import TrainerSettings, SelfPlaySettings, FrameworkType
@pytest.fixture
def dummy_config():
return TrainerSettings(
self_play=SelfPlaySettings(), framework=FrameworkType.PYTORCH
)
VECTOR_ACTION_SPACE = 1
VECTOR_OBS_SPACE = 8
DISCRETE_ACTION_SPACE = [3, 3, 3, 2]
BUFFER_INIT_SAMPLES = 513
NUM_AGENTS = 12
@pytest.mark.parametrize("use_discrete", [True, False])
def test_load_and_set(dummy_config, use_discrete):
mock_specs = mb.setup_test_behavior_specs(
use_discrete,
False,
vector_action_space=DISCRETE_ACTION_SPACE
if use_discrete
else VECTOR_ACTION_SPACE,
vector_obs_space=VECTOR_OBS_SPACE,
)
trainer_params = dummy_config
trainer = PPOTrainer("test", 0, trainer_params, True, False, 0, "0")
trainer.seed = 1
policy = trainer.create_policy("test", mock_specs)
trainer.seed = 20 # otherwise graphs are the same
to_load_policy = trainer.create_policy("test", mock_specs)
weights = policy.get_weights()
load_weights = to_load_policy.get_weights()
try:
for w, lw in zip(weights, load_weights):
np.testing.assert_array_equal(w, lw)
except AssertionError:
pass
to_load_policy.load_weights(weights)
load_weights = to_load_policy.get_weights()
for w, lw in zip(weights, load_weights):
np.testing.assert_array_equal(w, lw)
def test_process_trajectory(dummy_config):
mock_specs = mb.setup_test_behavior_specs(
True, False, vector_action_space=[2], vector_obs_space=1
)
behavior_id_team0 = "test_brain?team=0"
behavior_id_team1 = "test_brain?team=1"
brain_name = BehaviorIdentifiers.from_name_behavior_id(behavior_id_team0).brain_name
ppo_trainer = PPOTrainer(brain_name, 0, dummy_config, True, False, 0, "0")
controller = GhostController(100)
trainer = GhostTrainer(
ppo_trainer, brain_name, controller, 0, dummy_config, True, "0"
)
# first policy encountered becomes policy trained by wrapped PPO
parsed_behavior_id0 = BehaviorIdentifiers.from_name_behavior_id(behavior_id_team0)
policy = trainer.create_policy(parsed_behavior_id0, mock_specs)
trainer.add_policy(parsed_behavior_id0, policy)
trajectory_queue0 = AgentManagerQueue(behavior_id_team0)
trainer.subscribe_trajectory_queue(trajectory_queue0)
# Ghost trainer should ignore this queue because off policy
parsed_behavior_id1 = BehaviorIdentifiers.from_name_behavior_id(behavior_id_team1)
policy = trainer.create_policy(parsed_behavior_id1, mock_specs)
trainer.add_policy(parsed_behavior_id1, policy)
trajectory_queue1 = AgentManagerQueue(behavior_id_team1)
trainer.subscribe_trajectory_queue(trajectory_queue1)
time_horizon = 15
trajectory = make_fake_trajectory(
length=time_horizon,
max_step_complete=True,
observation_shapes=[(1,)],
action_space=[2],
)
trajectory_queue0.put(trajectory)
trainer.advance()
# Check that trainer put trajectory in update buffer
assert trainer.trainer.update_buffer.num_experiences == 15
trajectory_queue1.put(trajectory)
trainer.advance()
# Check that ghost trainer ignored off policy queue
assert trainer.trainer.update_buffer.num_experiences == 15
# Check that it emptied the queue
assert trajectory_queue1.empty()
def test_publish_queue(dummy_config):
mock_specs = mb.setup_test_behavior_specs(
True, False, vector_action_space=[1], vector_obs_space=8
)
behavior_id_team0 = "test_brain?team=0"
behavior_id_team1 = "test_brain?team=1"
parsed_behavior_id0 = BehaviorIdentifiers.from_name_behavior_id(behavior_id_team0)
brain_name = parsed_behavior_id0.brain_name
ppo_trainer = PPOTrainer(brain_name, 0, dummy_config, True, False, 0, "0")
controller = GhostController(100)
trainer = GhostTrainer(
ppo_trainer, brain_name, controller, 0, dummy_config, True, "0"
)
# First policy encountered becomes policy trained by wrapped PPO
# This queue should remain empty after swap snapshot
policy = trainer.create_policy(parsed_behavior_id0, mock_specs)
trainer.add_policy(parsed_behavior_id0, policy)
policy_queue0 = AgentManagerQueue(behavior_id_team0)
trainer.publish_policy_queue(policy_queue0)
# Ghost trainer should use this queue for ghost policy swap
parsed_behavior_id1 = BehaviorIdentifiers.from_name_behavior_id(behavior_id_team1)
policy = trainer.create_policy(parsed_behavior_id1, mock_specs)
trainer.add_policy(parsed_behavior_id1, policy)
policy_queue1 = AgentManagerQueue(behavior_id_team1)
trainer.publish_policy_queue(policy_queue1)
# check ghost trainer swap pushes to ghost queue and not trainer
assert policy_queue0.empty() and policy_queue1.empty()
trainer._swap_snapshots()
assert policy_queue0.empty() and not policy_queue1.empty()
# clear
policy_queue1.get_nowait()
mock_specs = mb.setup_test_behavior_specs(
False,
False,
vector_action_space=VECTOR_ACTION_SPACE,
vector_obs_space=VECTOR_OBS_SPACE,
)
buffer = mb.simulate_rollout(BUFFER_INIT_SAMPLES, mock_specs)
# Mock out reward signal eval
buffer["extrinsic_rewards"] = buffer["environment_rewards"]
buffer["extrinsic_returns"] = buffer["environment_rewards"]
buffer["extrinsic_value_estimates"] = buffer["environment_rewards"]
buffer["curiosity_rewards"] = buffer["environment_rewards"]
buffer["curiosity_returns"] = buffer["environment_rewards"]
buffer["curiosity_value_estimates"] = buffer["environment_rewards"]
buffer["advantages"] = buffer["environment_rewards"]
trainer.trainer.update_buffer = buffer
# when ghost trainer advance and wrapped trainer buffers full
# the wrapped trainer pushes updated policy to correct queue
assert policy_queue0.empty() and policy_queue1.empty()
trainer.advance()
assert not policy_queue0.empty() and policy_queue1.empty()
if __name__ == "__main__":
pytest.main()

150
ml-agents/mlagents/trainers/tests/torch/test_policy.py


import pytest
import torch
from mlagents.trainers.policy.torch_policy import TorchPolicy
from mlagents.trainers.tests import mock_brain as mb
from mlagents.trainers.settings import TrainerSettings, NetworkSettings
from mlagents.trainers.torch.utils import ModelUtils
VECTOR_ACTION_SPACE = 2
VECTOR_OBS_SPACE = 8
DISCRETE_ACTION_SPACE = [3, 3, 3, 2]
BUFFER_INIT_SAMPLES = 32
NUM_AGENTS = 12
EPSILON = 1e-7
def create_policy_mock(
dummy_config: TrainerSettings,
use_rnn: bool = False,
use_discrete: bool = True,
use_visual: bool = False,
seed: int = 0,
) -> TorchPolicy:
mock_spec = mb.setup_test_behavior_specs(
use_discrete,
use_visual,
vector_action_space=DISCRETE_ACTION_SPACE
if use_discrete
else VECTOR_ACTION_SPACE,
vector_obs_space=VECTOR_OBS_SPACE,
)
trainer_settings = dummy_config
trainer_settings.keep_checkpoints = 3
trainer_settings.network_settings.memory = (
NetworkSettings.MemorySettings() if use_rnn else None
)
policy = TorchPolicy(seed, mock_spec, trainer_settings)
return policy
@pytest.mark.parametrize("discrete", [True, False], ids=["discrete", "continuous"])
@pytest.mark.parametrize("visual", [True, False], ids=["visual", "vector"])
@pytest.mark.parametrize("rnn", [True, False], ids=["rnn", "no_rnn"])
def test_policy_evaluate(rnn, visual, discrete):
# Test evaluate
policy = create_policy_mock(
TrainerSettings(), use_rnn=rnn, use_discrete=discrete, use_visual=visual
)
decision_step, terminal_step = mb.create_steps_from_behavior_spec(
policy.behavior_spec, num_agents=NUM_AGENTS
)
run_out = policy.evaluate(decision_step, list(decision_step.agent_id))
if discrete:
run_out["action"].shape == (NUM_AGENTS, len(DISCRETE_ACTION_SPACE))
else:
assert run_out["action"].shape == (NUM_AGENTS, VECTOR_ACTION_SPACE)
@pytest.mark.parametrize("discrete", [True, False], ids=["discrete", "continuous"])
@pytest.mark.parametrize("visual", [True, False], ids=["visual", "vector"])
@pytest.mark.parametrize("rnn", [True, False], ids=["rnn", "no_rnn"])
def test_evaluate_actions(rnn, visual, discrete):
policy = create_policy_mock(
TrainerSettings(), use_rnn=rnn, use_discrete=discrete, use_visual=visual
)
buffer = mb.simulate_rollout(64, policy.behavior_spec, memory_size=policy.m_size)
vec_obs = [ModelUtils.list_to_tensor(buffer["vector_obs"])]
act_masks = ModelUtils.list_to_tensor(buffer["action_mask"])
if policy.use_continuous_act:
actions = ModelUtils.list_to_tensor(buffer["actions"]).unsqueeze(-1)
else:
actions = ModelUtils.list_to_tensor(buffer["actions"], dtype=torch.long)
vis_obs = []
for idx, _ in enumerate(policy.actor_critic.network_body.visual_encoders):
vis_ob = ModelUtils.list_to_tensor(buffer["visual_obs%d" % idx])
vis_obs.append(vis_ob)
memories = [
ModelUtils.list_to_tensor(buffer["memory"][i])
for i in range(0, len(buffer["memory"]), policy.sequence_length)
]
if len(memories) > 0:
memories = torch.stack(memories).unsqueeze(0)
log_probs, entropy, values = policy.evaluate_actions(
vec_obs,
vis_obs,
masks=act_masks,
actions=actions,
memories=memories,
seq_len=policy.sequence_length,
)
assert log_probs.shape == (64, policy.behavior_spec.action_size)
assert entropy.shape == (64, policy.behavior_spec.action_size)
for val in values.values():
assert val.shape == (64,)
@pytest.mark.parametrize("discrete", [True, False], ids=["discrete", "continuous"])
@pytest.mark.parametrize("visual", [True, False], ids=["visual", "vector"])
@pytest.mark.parametrize("rnn", [True, False], ids=["rnn", "no_rnn"])
def test_sample_actions(rnn, visual, discrete):
policy = create_policy_mock(
TrainerSettings(), use_rnn=rnn, use_discrete=discrete, use_visual=visual
)
buffer = mb.simulate_rollout(64, policy.behavior_spec, memory_size=policy.m_size)
vec_obs = [ModelUtils.list_to_tensor(buffer["vector_obs"])]
act_masks = ModelUtils.list_to_tensor(buffer["action_mask"])
vis_obs = []
for idx, _ in enumerate(policy.actor_critic.network_body.visual_encoders):
vis_ob = ModelUtils.list_to_tensor(buffer["visual_obs%d" % idx])
vis_obs.append(vis_ob)
memories = [
ModelUtils.list_to_tensor(buffer["memory"][i])
for i in range(0, len(buffer["memory"]), policy.sequence_length)
]
if len(memories) > 0:
memories = torch.stack(memories).unsqueeze(0)
(
sampled_actions,
log_probs,
entropies,
sampled_values,
memories,
) = policy.sample_actions(
vec_obs,
vis_obs,
masks=act_masks,
memories=memories,
seq_len=policy.sequence_length,
all_log_probs=not policy.use_continuous_act,
)
if discrete:
assert log_probs.shape == (
64,
sum(policy.behavior_spec.discrete_action_branches),
)
else:
assert log_probs.shape == (64, policy.behavior_spec.action_shape)
assert entropies.shape == (64, policy.behavior_spec.action_size)
for val in sampled_values.values():
assert val.shape == (64,)
if rnn:
assert memories.shape == (1, 1, policy.m_size)

105
ml-agents/mlagents/trainers/tests/torch/test_saver.py


import pytest
from unittest import mock
import os
import numpy as np
import torch
from mlagents.trainers.policy.torch_policy import TorchPolicy
from mlagents.trainers.ppo.optimizer_torch import TorchPPOOptimizer
from mlagents.trainers.saver.torch_saver import TorchSaver
from mlagents.trainers.settings import TrainerSettings
from mlagents.trainers.tests import mock_brain as mb
from mlagents.trainers.tests.torch.test_policy import create_policy_mock
def test_register(tmp_path):
trainer_params = TrainerSettings()
saver = TorchSaver(trainer_params, tmp_path)
opt = mock.Mock(spec=TorchPPOOptimizer)
opt.get_modules = mock.Mock(return_value={})
saver.register(opt)
assert saver.policy is None
trainer_params = TrainerSettings()
policy = create_policy_mock(trainer_params)
opt.get_modules = mock.Mock(return_value={})
saver.register(policy)
assert saver.policy is not None
def test_load_save(tmp_path):
path1 = os.path.join(tmp_path, "runid1")
path2 = os.path.join(tmp_path, "runid2")
trainer_params = TrainerSettings()
policy = create_policy_mock(trainer_params)
saver = TorchSaver(trainer_params, path1)
saver.register(policy)
saver.initialize_or_load(policy)
policy.set_step(2000)
mock_brain_name = "MockBrain"
saver.save_checkpoint(mock_brain_name, 2000)
assert len(os.listdir(tmp_path)) > 0
# Try load from this path
saver2 = TorchSaver(trainer_params, path1, load=True)
policy2 = create_policy_mock(trainer_params)
saver2.register(policy2)
saver2.initialize_or_load(policy2)
_compare_two_policies(policy, policy2)
assert policy2.get_current_step() == 2000
# Try initialize from path 1
trainer_params.init_path = path1
saver3 = TorchSaver(trainer_params, path2)
policy3 = create_policy_mock(trainer_params)
saver3.register(policy3)
saver3.initialize_or_load(policy3)
_compare_two_policies(policy2, policy3)
# Assert that the steps are 0.
assert policy3.get_current_step() == 0
# TorchPolicy.evalute() returns log_probs instead of all_log_probs like tf does.
# resulting in indeterministic results for testing.
# So here use sample_actions instead.
def _compare_two_policies(policy1: TorchPolicy, policy2: TorchPolicy) -> None:
"""
Make sure two policies have the same output for the same input.
"""
decision_step, _ = mb.create_steps_from_behavior_spec(
policy1.behavior_spec, num_agents=1
)
vec_vis_obs, masks = policy1._split_decision_step(decision_step)
vec_obs = [torch.as_tensor(vec_vis_obs.vector_observations)]
vis_obs = [torch.as_tensor(vis_ob) for vis_ob in vec_vis_obs.visual_observations]
memories = torch.as_tensor(
policy1.retrieve_memories(list(decision_step.agent_id))
).unsqueeze(0)
with torch.no_grad():
_, log_probs1, _, _, _ = policy1.sample_actions(
vec_obs, vis_obs, masks=masks, memories=memories, all_log_probs=True
)
_, log_probs2, _, _, _ = policy2.sample_actions(
vec_obs, vis_obs, masks=masks, memories=memories, all_log_probs=True
)
np.testing.assert_array_equal(log_probs1, log_probs2)
@pytest.mark.parametrize("discrete", [True, False], ids=["discrete", "continuous"])
@pytest.mark.parametrize("visual", [True, False], ids=["visual", "vector"])
@pytest.mark.parametrize("rnn", [True, False], ids=["rnn", "no_rnn"])
def test_checkpoint_conversion(tmpdir, rnn, visual, discrete):
dummy_config = TrainerSettings()
model_path = os.path.join(tmpdir, "Mock_Brain")
policy = create_policy_mock(
dummy_config, use_rnn=rnn, use_discrete=discrete, use_visual=visual
)
trainer_params = TrainerSettings()
saver = TorchSaver(trainer_params, model_path)
saver.register(policy)
saver.save_checkpoint("Mock_Brain", 100)
assert os.path.isfile(model_path + "/Mock_Brain-100.onnx")

505
ml-agents/mlagents/trainers/tests/torch/test_simple_rl.py


import math
import tempfile
import pytest
import numpy as np
import attr
from typing import Dict
from mlagents.trainers.tests.simple_test_envs import (
SimpleEnvironment,
MemoryEnvironment,
RecordEnvironment,
)
from mlagents.trainers.trainer_controller import TrainerController
from mlagents.trainers.trainer_util import TrainerFactory
from mlagents.trainers.simple_env_manager import SimpleEnvManager
from mlagents.trainers.demo_loader import write_demo
from mlagents.trainers.stats import StatsReporter, StatsWriter, StatsSummary
from mlagents.trainers.settings import (
TrainerSettings,
PPOSettings,
SACSettings,
NetworkSettings,
SelfPlaySettings,
BehavioralCloningSettings,
GAILSettings,
TrainerType,
RewardSignalType,
EncoderType,
ScheduleType,
FrameworkType,
)
from mlagents.trainers.environment_parameter_manager import EnvironmentParameterManager
from mlagents_envs.side_channel.environment_parameters_channel import (
EnvironmentParametersChannel,
)
from mlagents_envs.communicator_objects.demonstration_meta_pb2 import (
DemonstrationMetaProto,
)
from mlagents_envs.communicator_objects.brain_parameters_pb2 import BrainParametersProto
from mlagents_envs.communicator_objects.space_type_pb2 import discrete, continuous
BRAIN_NAME = "1D"
PPO_CONFIG = TrainerSettings(
trainer_type=TrainerType.PPO,
hyperparameters=PPOSettings(
learning_rate=5.0e-3,
learning_rate_schedule=ScheduleType.CONSTANT,
batch_size=16,
buffer_size=64,
),
network_settings=NetworkSettings(num_layers=1, hidden_units=32),
summary_freq=500,
max_steps=3000,
threaded=False,
framework=FrameworkType.PYTORCH,
)
SAC_CONFIG = TrainerSettings(
trainer_type=TrainerType.SAC,
hyperparameters=SACSettings(
learning_rate=5.0e-3,
learning_rate_schedule=ScheduleType.CONSTANT,
batch_size=8,
buffer_init_steps=100,
buffer_size=5000,
tau=0.01,
init_entcoef=0.01,
),
network_settings=NetworkSettings(num_layers=1, hidden_units=16),
summary_freq=100,
max_steps=1000,
threaded=False,
)
# The reward processor is passed as an argument to _check_environment_trains.
# It is applied to the list of all final rewards for each brain individually.
# This is so that we can process all final rewards in different ways for different algorithms.
# Custom reward processors should be built within the test function and passed to _check_environment_trains
# Default is average over the last 5 final rewards
def default_reward_processor(rewards, last_n_rewards=5):
rewards_to_use = rewards[-last_n_rewards:]
# For debugging tests
print(f"Last {last_n_rewards} rewards:", rewards_to_use)
return np.array(rewards[-last_n_rewards:], dtype=np.float32).mean()
class DebugWriter(StatsWriter):
"""
Print to stdout so stats can be viewed in pytest
"""
def __init__(self):
self._last_reward_summary: Dict[str, float] = {}
def get_last_rewards(self):
return self._last_reward_summary
def write_stats(
self, category: str, values: Dict[str, StatsSummary], step: int
) -> None:
for val, stats_summary in values.items():
if val == "Environment/Cumulative Reward":
print(step, val, stats_summary.mean)
self._last_reward_summary[category] = stats_summary.mean
def _check_environment_trains(
env,
trainer_config,
reward_processor=default_reward_processor,
env_parameter_manager=None,
success_threshold=0.9,
env_manager=None,
):
if env_parameter_manager is None:
env_parameter_manager = EnvironmentParameterManager()
# Create controller and begin training.
with tempfile.TemporaryDirectory() as dir:
run_id = "id"
seed = 1337
StatsReporter.writers.clear() # Clear StatsReporters so we don't write to file
debug_writer = DebugWriter()
StatsReporter.add_writer(debug_writer)
if env_manager is None:
env_manager = SimpleEnvManager(env, EnvironmentParametersChannel())
trainer_factory = TrainerFactory(
trainer_config=trainer_config,
output_path=dir,
train_model=True,
load_model=False,
seed=seed,
param_manager=env_parameter_manager,
multi_gpu=False,
)
tc = TrainerController(
trainer_factory=trainer_factory,
output_path=dir,
run_id=run_id,
param_manager=env_parameter_manager,
train=True,
training_seed=seed,
)
# Begin training
tc.start_learning(env_manager)
if (
success_threshold is not None
): # For tests where we are just checking setup and not reward
processed_rewards = [
reward_processor(rewards) for rewards in env.final_rewards.values()
]
assert all(not math.isnan(reward) for reward in processed_rewards)
assert all(reward > success_threshold for reward in processed_rewards)
@pytest.mark.parametrize("use_discrete", [True, False])
def test_simple_ppo(use_discrete):
env = SimpleEnvironment([BRAIN_NAME], use_discrete=use_discrete)
config = attr.evolve(PPO_CONFIG)
_check_environment_trains(env, {BRAIN_NAME: config})
@pytest.mark.parametrize("use_discrete", [True, False])
def test_2d_ppo(use_discrete):
env = SimpleEnvironment(
[BRAIN_NAME], use_discrete=use_discrete, action_size=2, step_size=0.8
)
new_hyperparams = attr.evolve(
PPO_CONFIG.hyperparameters, batch_size=64, buffer_size=640
)
config = attr.evolve(PPO_CONFIG, hyperparameters=new_hyperparams, max_steps=10000)
_check_environment_trains(env, {BRAIN_NAME: config})
@pytest.mark.parametrize("use_discrete", [True, False])
@pytest.mark.parametrize("num_visual", [1, 2])
def test_visual_ppo(num_visual, use_discrete):
env = SimpleEnvironment(
[BRAIN_NAME],
use_discrete=use_discrete,
num_visual=num_visual,
num_vector=0,
step_size=0.2,
)
new_hyperparams = attr.evolve(PPO_CONFIG.hyperparameters, learning_rate=3.0e-4)
config = attr.evolve(PPO_CONFIG, hyperparameters=new_hyperparams)
_check_environment_trains(env, {BRAIN_NAME: config})
@pytest.mark.parametrize("num_visual", [1, 2])
@pytest.mark.parametrize("vis_encode_type", ["resnet", "nature_cnn"])
def test_visual_advanced_ppo(vis_encode_type, num_visual):
env = SimpleEnvironment(
[BRAIN_NAME],
use_discrete=True,
num_visual=num_visual,
num_vector=0,
step_size=0.5,
vis_obs_size=(36, 36, 3),
)
new_networksettings = attr.evolve(
SAC_CONFIG.network_settings, vis_encode_type=EncoderType(vis_encode_type)
)
new_hyperparams = attr.evolve(PPO_CONFIG.hyperparameters, learning_rate=3.0e-4)
config = attr.evolve(
PPO_CONFIG,
hyperparameters=new_hyperparams,
network_settings=new_networksettings,
max_steps=500,
summary_freq=100,
)
# The number of steps is pretty small for these encoders
_check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.5)
@pytest.mark.parametrize("use_discrete", [True, False])
def test_recurrent_ppo(use_discrete):
env = MemoryEnvironment([BRAIN_NAME], use_discrete=use_discrete)
new_network_settings = attr.evolve(
PPO_CONFIG.network_settings,
memory=NetworkSettings.MemorySettings(memory_size=16),
)
new_hyperparams = attr.evolve(
PPO_CONFIG.hyperparameters, learning_rate=1.0e-3, batch_size=64, buffer_size=128
)
config = attr.evolve(
PPO_CONFIG,
hyperparameters=new_hyperparams,
network_settings=new_network_settings,
max_steps=5000,
)
_check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)
@pytest.mark.parametrize("use_discrete", [True, False])
def test_simple_sac(use_discrete):
env = SimpleEnvironment([BRAIN_NAME], use_discrete=use_discrete)
config = attr.evolve(SAC_CONFIG)
_check_environment_trains(env, {BRAIN_NAME: config})
@pytest.mark.parametrize("use_discrete", [True, False])
def test_2d_sac(use_discrete):
env = SimpleEnvironment(
[BRAIN_NAME], use_discrete=use_discrete, action_size=2, step_size=0.8
)
new_hyperparams = attr.evolve(SAC_CONFIG.hyperparameters, buffer_init_steps=2000)
config = attr.evolve(SAC_CONFIG, hyperparameters=new_hyperparams, max_steps=10000)
_check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.8)
@pytest.mark.parametrize("use_discrete", [True, False])
@pytest.mark.parametrize("num_visual", [1, 2])
def test_visual_sac(num_visual, use_discrete):
env = SimpleEnvironment(
[BRAIN_NAME],
use_discrete=use_discrete,
num_visual=num_visual,
num_vector=0,
step_size=0.2,
)
new_hyperparams = attr.evolve(
SAC_CONFIG.hyperparameters, batch_size=16, learning_rate=3e-4
)
config = attr.evolve(SAC_CONFIG, hyperparameters=new_hyperparams)
_check_environment_trains(env, {BRAIN_NAME: config})
@pytest.mark.parametrize("num_visual", [1, 2])
@pytest.mark.parametrize("vis_encode_type", ["resnet", "nature_cnn"])
def test_visual_advanced_sac(vis_encode_type, num_visual):
env = SimpleEnvironment(
[BRAIN_NAME],
use_discrete=True,
num_visual=num_visual,
num_vector=0,
step_size=0.5,
vis_obs_size=(36, 36, 3),
)
new_networksettings = attr.evolve(
SAC_CONFIG.network_settings, vis_encode_type=EncoderType(vis_encode_type)
)
new_hyperparams = attr.evolve(
SAC_CONFIG.hyperparameters,
batch_size=16,
learning_rate=3e-4,
buffer_init_steps=0,
)
config = attr.evolve(
SAC_CONFIG,
hyperparameters=new_hyperparams,
network_settings=new_networksettings,
max_steps=100,
)
# The number of steps is pretty small for these encoders
_check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.5)
@pytest.mark.parametrize("use_discrete", [True, False])
def test_recurrent_sac(use_discrete):
step_size = 0.5 if use_discrete else 0.2
env = MemoryEnvironment(
[BRAIN_NAME], use_discrete=use_discrete, step_size=step_size
)
new_networksettings = attr.evolve(
SAC_CONFIG.network_settings,
memory=NetworkSettings.MemorySettings(memory_size=16, sequence_length=16),
)
new_hyperparams = attr.evolve(
SAC_CONFIG.hyperparameters,
batch_size=128,
learning_rate=1e-3,
buffer_init_steps=1000,
steps_per_update=2,
)
config = attr.evolve(
SAC_CONFIG,
hyperparameters=new_hyperparams,
network_settings=new_networksettings,
max_steps=5000,
)
_check_environment_trains(env, {BRAIN_NAME: config})
@pytest.mark.parametrize("use_discrete", [True, False])
def test_simple_ghost(use_discrete):
env = SimpleEnvironment(
[BRAIN_NAME + "?team=0", BRAIN_NAME + "?team=1"], use_discrete=use_discrete
)
self_play_settings = SelfPlaySettings(
play_against_latest_model_ratio=1.0, save_steps=2000, swap_steps=2000
)
config = attr.evolve(PPO_CONFIG, self_play=self_play_settings, max_steps=2500)
_check_environment_trains(env, {BRAIN_NAME: config})
@pytest.mark.parametrize("use_discrete", [True, False])
def test_simple_ghost_fails(use_discrete):
env = SimpleEnvironment(
[BRAIN_NAME + "?team=0", BRAIN_NAME + "?team=1"], use_discrete=use_discrete
)
# This config should fail because the ghosted policy is never swapped with a competent policy.
# Swap occurs after max step is reached.
self_play_settings = SelfPlaySettings(
play_against_latest_model_ratio=1.0, save_steps=2000, swap_steps=4000
)
config = attr.evolve(PPO_CONFIG, self_play=self_play_settings, max_steps=2500)
_check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=None)
processed_rewards = [
default_reward_processor(rewards) for rewards in env.final_rewards.values()
]
success_threshold = 0.9
assert any(reward > success_threshold for reward in processed_rewards) and any(
reward < success_threshold for reward in processed_rewards
)
@pytest.mark.parametrize("use_discrete", [True, False])
def test_simple_asymm_ghost(use_discrete):
# Make opponent for asymmetric case
brain_name_opp = BRAIN_NAME + "Opp"
env = SimpleEnvironment(
[BRAIN_NAME + "?team=0", brain_name_opp + "?team=1"], use_discrete=use_discrete
)
self_play_settings = SelfPlaySettings(
play_against_latest_model_ratio=1.0,
save_steps=10000,
swap_steps=10000,
team_change=400,
)
config = attr.evolve(PPO_CONFIG, self_play=self_play_settings, max_steps=4000)
_check_environment_trains(env, {BRAIN_NAME: config, brain_name_opp: config})
@pytest.mark.parametrize("use_discrete", [True, False])
def test_simple_asymm_ghost_fails(use_discrete):
# Make opponent for asymmetric case
brain_name_opp = BRAIN_NAME + "Opp"
env = SimpleEnvironment(
[BRAIN_NAME + "?team=0", brain_name_opp + "?team=1"], use_discrete=use_discrete
)
# This config should fail because the team that us not learning when both have reached
# max step should be executing the initial, untrained poliy.
self_play_settings = SelfPlaySettings(
play_against_latest_model_ratio=0.0,
save_steps=5000,
swap_steps=5000,
team_change=2000,
)
config = attr.evolve(PPO_CONFIG, self_play=self_play_settings, max_steps=3000)
_check_environment_trains(
env, {BRAIN_NAME: config, brain_name_opp: config}, success_threshold=None
)
processed_rewards = [
default_reward_processor(rewards) for rewards in env.final_rewards.values()
]
success_threshold = 0.9
assert any(reward > success_threshold for reward in processed_rewards) and any(
reward < success_threshold for reward in processed_rewards
)
@pytest.fixture(scope="session")
def simple_record(tmpdir_factory):
def record_demo(use_discrete, num_visual=0, num_vector=1):
env = RecordEnvironment(
[BRAIN_NAME],
use_discrete=use_discrete,
num_visual=num_visual,
num_vector=num_vector,
n_demos=100,
)
# If we want to use true demos, we can solve the env in the usual way
# Otherwise, we can just call solve to execute the optimal policy
env.solve()
agent_info_protos = env.demonstration_protos[BRAIN_NAME]
meta_data_proto = DemonstrationMetaProto()
brain_param_proto = BrainParametersProto(
vector_action_size=[2] if use_discrete else [1],
vector_action_descriptions=[""],
vector_action_space_type=discrete if use_discrete else continuous,
brain_name=BRAIN_NAME,
is_training=True,
)
action_type = "Discrete" if use_discrete else "Continuous"
demo_path_name = "1DTest" + action_type + ".demo"
demo_path = str(tmpdir_factory.mktemp("tmp_demo").join(demo_path_name))
write_demo(demo_path, meta_data_proto, brain_param_proto, agent_info_protos)
return demo_path
return record_demo
@pytest.mark.parametrize("use_discrete", [True, False])
@pytest.mark.parametrize("trainer_config", [PPO_CONFIG, SAC_CONFIG])
def test_gail(simple_record, use_discrete, trainer_config):
demo_path = simple_record(use_discrete)
env = SimpleEnvironment([BRAIN_NAME], use_discrete=use_discrete, step_size=0.2)
bc_settings = BehavioralCloningSettings(demo_path=demo_path, steps=1000)
reward_signals = {
RewardSignalType.GAIL: GAILSettings(encoding_size=32, demo_path=demo_path)
}
config = attr.evolve(
trainer_config,
reward_signals=reward_signals,
behavioral_cloning=bc_settings,
max_steps=500,
)
_check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)
@pytest.mark.parametrize("use_discrete", [True, False])
def test_gail_visual_ppo(simple_record, use_discrete):
demo_path = simple_record(use_discrete, num_visual=1, num_vector=0)
env = SimpleEnvironment(
[BRAIN_NAME],
num_visual=1,
num_vector=0,
use_discrete=use_discrete,
step_size=0.2,
)
bc_settings = BehavioralCloningSettings(demo_path=demo_path, steps=1500)
reward_signals = {
RewardSignalType.GAIL: GAILSettings(encoding_size=32, demo_path=demo_path)
}
hyperparams = attr.evolve(PPO_CONFIG.hyperparameters, learning_rate=3e-4)
config = attr.evolve(
PPO_CONFIG,
reward_signals=reward_signals,
hyperparameters=hyperparams,
behavioral_cloning=bc_settings,
max_steps=1000,
)
_check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)
@pytest.mark.parametrize("use_discrete", [True, False])
def test_gail_visual_sac(simple_record, use_discrete):
demo_path = simple_record(use_discrete, num_visual=1, num_vector=0)
env = SimpleEnvironment(
[BRAIN_NAME],
num_visual=1,
num_vector=0,
use_discrete=use_discrete,
step_size=0.2,
)
bc_settings = BehavioralCloningSettings(demo_path=demo_path, steps=1000)
reward_signals = {
RewardSignalType.GAIL: GAILSettings(encoding_size=32, demo_path=demo_path)
}
hyperparams = attr.evolve(
SAC_CONFIG.hyperparameters, learning_rate=3e-4, batch_size=16
)
config = attr.evolve(
SAC_CONFIG,
reward_signals=reward_signals,
hyperparameters=hyperparams,
behavioral_cloning=bc_settings,
max_steps=500,
)
_check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)

446
ml-agents/mlagents/trainers/tests/torch/testdcvis.demo


bcvis& -****:VisualFoodCollectorLearning�
� P‡�������j�
TT� �PNG

IHDRTT� ���IDATx�X�ndE� �D,������� D$|!19?EF�--�@0^��ڡ�oU�z�}�glK��v��8�8�}�ww��?�,�yg�ĉw#���o;�v~�h�~���)��o;�� �c��Mo^;���/p��/p��mߎ};� �@;� ��v۷cߎ�'Ў�7����طc�� ,�د.//G������<:`��U_on�T]��W#'�W���׉�ÎN޵rqq�r��S��:������ms��:9 ���-o��y�䅑W̟�������g~�9��f�?>�������׷��?�?������ֱ.�/�S��v��\]_'K���W���z�������:�!�R�&�M~�>K�ۿ�<H �<www �$��wIډos��\o����3\��N �f�>��yr��ӈG �0��;;���UX���,R8 I�H$G����<�g?9(K�}V�T �K�ʘ�z �˗_eL��n����@A�`���`����#������V5:)��͌EK ������((%�I��7����NNH���[��LB��,��f���@�0D��ّ��%�M�NU����G�~jP�������t�I��!#����Iç��ؗ>� )��T�F4�Y��1Ĥ!%���ن ��0��9� �� $�2�d"���<K�����11ϼc��yQR��Q�T��Z�@� �Vb�]"�u�FN�j7�#�P�(\���&q�D��h+���m%����%�ȪS���%�a����Q��L�EI �$�(�8�`�^�D[dE?�6�ʧ�6A��R)bI��1��{I�+��*�!��E� ���h��-=_xH]��G0\�3�V�f.�cI��gp����3�]���
F�|���d���� �x�����(�{F��[ AL����|�hv��<���]V4�^���pAH�W�H"d��G����P�uż���A�A$��8�O��;��Ó��ý�UqpI^�(P�g�|���{��?G@[1��M�9h�E���샔�6���b��j�m-h�ڡ��"q"&
b1�,)��p'�����s
͗b�$P ���� IԳN��O0s��J�)�Q����=�bs`��s)&4|H�@ujٸ��6�0�䃞�{\x�t8����6��I��Wꝏ�����c��k�k.���؈�Kh��������rD�^��fRݐ��.��BG�/<'�:C�Yt��S�&b�]%תz�Ǿ�R� �����}��� ��¿�&�>���t��uel#��w���T����j�����K�X��Ô���r��.#?�wqMD��D ��r�`(�N���'�; m��ko� ����c���I�R�i�Э� /��6��d�`�9�j��D�̢O-�����7�S�Qv]U�iD� ���|5ÕI�IEND�B`�j
"
�
� P‡�������j�
TT� �PNG

IHDRTT� ���IDATx�X�ndE� �D,������� D$|!19?EF�--�@0^��ڡ�oU�z�}�glK��v��8�8�}�ww��?�,�yg�ĉw#���o;�v~�h�~���)��o;�� �c��Mo^;���/p��/p��mߎ};� �@;� ��v۷cߎ�'Ў�7����طc�� ,�د.//G������<:`��U_on�T]��W#'�W���׉�ÎN޵rqq�r��S��:������ms��:9 ���-o��y�䅑W̟�������g~�9��f�?>�������׷��?�?������ֱ.�/�S��v��\]_'K���W���z�������:�!�R�&�M~�>K�ۿ�<H �<www �$��wIډos��\o����3\��N �f�>��yr��ӈG �0��;;���UX���,R8 I�H$G����<�g?9(K�}V�T �K�ʘ�z �˗_eL��n����@A�`���`����#������V5:)��͌EK ������((%�I��7����NNH���[��LB��,��f���@�0D��ّ��%�M�NU����G�~jP�������t�I��!#����Iç��ؗ>� )��T�F4�Y��1Ĥ!%���ن ��0��9� �� $�2�d"���<K�����11ϼc��yQR��Q�T��Z�@� �Vb�]"�u�FN�j7�#�P�(\���&q�D��h+���m%����%�ȪS���%�a����Q��L�EI �$�(�8�`�^�D[dE?�6�ʧ�6A��R)bI��1��{I�+��*�!��E� ���h��-=_xH]��G0\�3�V�f.�cI��gp����3�]���
F�|���d���� �x�����(�{F��[ AL����|�hv��<���]V4�^���pAH�W�H"d��G����P�uż���A�A$��8�O��;��Ó��ý�UqpI^�(P�g�|���{��?G@[1��M�9h�E���샔�6���b��j�m-h�ڡ��"q"&
b1�,)��p'�����s
͗b�$P ���� IԳN��O0s��J�)�Q����=�bs`��s)&4|H�@ujٸ��6�0�䃞�{\x�t8����6��I��Wꝏ�����c��k�k.���؈�Kh��������rD�^��fRݐ��.��BG�/<'�:C�Yt��S�&b�]%תz�Ǿ�R� �����}��� ��¿�&�>���t��uel#��w���T����j�����K�X��Ô���r��.#?�wqMD��D ��r�`(�N���'�; m��ko� ����c���I�R�i�Э� /��6��d�`�9�j��D�̢O-�����7�S�Qv]U�iD� ���|5ÕI�IEND�B`�j
"
�
� P‡�������j�
TT� �PNG

IHDRTT� ���IDATx�X�ndE� �D,������� D$|!19?EF�--�@0^��ڡ�oU�z�}�glK��v��8�8�}�ww��?�,�yg�ĉw#���o;�v~�h�~���)��o;�� �c��Mo^;���/p��/p��mߎ};� �@;� ��v۷cߎ�'Ў�7����طc�� ,�د.//G������<:`��U_on�T]��W#'�W���׉�ÎN޵rqq�r��S��:������ms��:9 ���-o��y�䅑W̟�������g~�9��f�?>�������׷��?�?������ֱ.�/�S��v��\]_'K���W���z�������:�!�R�&�M~�>K�ۿ�<H �<www �$��wIډos��\o����3\��N �f�>��yr��ӈG �0��;;���UX���,R8 I�H$G����<�g?9(K�}V�T �K�ʘ�z �˗_eL��n����@A�`���`����#������V5:)��͌EK ������((%�I��7����NNH���[��LB��,��f���@�0D��ّ��%�M�NU����G�~jP�������t�I��!#����Iç��ؗ>� )��T�F4�Y��1Ĥ!%���ن ��0��9� �� $�2�d"���<K�����11ϼc��yQR��Q�T��Z�@� �Vb�]"�u�FN�j7�#�P�(\���&q�D��h+���m%����%�ȪS���%�a����Q��L�EI �$�(�8�`�^�D[dE?�6�ʧ�6A��R)bI��1��{I�+��*�!��E� ���h��-=_xH]��G0\�3�V�f.�cI��gp����3�]���
F�|���d���� �x�����(�{F��[ AL����|�hv��<���]V4�^���pAH�W�H"d��G����P�uż���A�A$��8�O��;��Ó��ý�UqpI^�(P�g�|���{��?G@[1��M�9h�E���샔�6���b��j�m-h�ڡ��"q"&
b1�,)��p'�����s
͗b�$P ���� IԳN��O0s��J�)�Q����=�bs`��s)&4|H�@ujٸ��6�0�䃞�{\x�t8����6��I��Wꝏ�����c��k�k.���؈�Kh��������rD�^��fRݐ��.��BG�/<'�:C�Yt��S�&b�]%תz�Ǿ�R� �����}��� ��¿�&�>���t��uel#��w���T����j�����K�X��Ô���r��.#?�wqMD��D ��r�`(�N���'�; m��ko� ����c���I�R�i�Э� /��6��d�`�9�j��D�̢O-�����7�S�Qv]U�iD� ���|5ÕI�IEND�B`�j
"
�
� P‡�������j�
TT� �PNG

IHDRTT� ���IDATx�X�ndE� �D,������� D$|!19?EF�--�@0^��ڡ�oU�z�}�glK��v��8�8�}�ww��?�,�yg�ĉw#���o;�v~�h�~���)��o;�� �c��Mo^;���/p��/p��mߎ};� �@;� ��v۷cߎ�'Ў�7����طc�� ,�د.//G������<:`��U_on�T]��W#'�W���׉�ÎN޵rqq�r��S��:������ms��:9 ���-o��y�䅑W̟�������g~�9��f�?>�������׷��?�?������ֱ.�/�S��v��\]_'K���W���z�������:�!�R�&�M~�>K�ۿ�<H �<www �$��wIډos��\o����3\��N �f�>��yr��ӈG �0��;;���UX���,R8 I�H$G����<�g?9(K�}V�T �K�ʘ�z �˗_eL��n����@A�`���`����#������V5:)��͌EK ������((%�I��7����NNH���[��LB��,��f���@�0D��ّ��%�M�NU����G�~jP�������t�I��!#����Iç��ؗ>� )��T�F4�Y��1Ĥ!%���ن ��0��9� �� $�2�d"���<K�����11ϼc��yQR��Q�T��Z�@� �Vb�]"�u�FN�j7�#�P�(\���&q�D��h+���m%����%�ȪS���%�a����Q��L�EI �$�(�8�`�^�D[dE?�6�ʧ�6A��R)bI��1��{I�+��*�!��E� ���h��-=_xH]��G0\�3�V�f.�cI��gp����3�]���
F�|���d���� �x�����(�{F��[ AL����|�hv��<���]V4�^���pAH�W�H"d��G����P�uż���A�A$��8�O��;��Ó��ý�UqpI^�(P�g�|���{��?G@[1��M�9h�E���샔�6���b��j�m-h�ڡ��"q"&
b1�,)��p'�����s
͗b�$P ���� IԳN��O0s��J�)�Q����=�bs`��s)&4|H�@ujٸ��6�0�䃞�{\x�t8����6��I��Wꝏ�����c��k�k.���؈�Kh��������rD�^��fRݐ��.��BG�/<'�:C�Yt��S�&b�]%תz�Ǿ�R� �����}��� ��¿�&�>���t��uel#��w���T����j�����K�X��Ô���r��.#?�wqMD��D ��r�`(�N���'�; m��ko� ����c���I�R�i�Э� /��6��d�`�9�j��D�̢O-�����7�S�Qv]U�iD� ���|5ÕI�IEND�B`�j
"
�
� P‡�������j�
TT� �PNG

IHDRTT� ���IDATx�X�ndE� �D,������� D$|!19?EF�--�@0^��ڡ�oU�z�}�glK��v��8�8�}�ww��?�,�yg�ĉw#���o;�v~�h�~���)��o;�� �c��Mo^;���/p��/p��mߎ};� �@;� ��v۷cߎ�'Ў�7����طc�� ,�د.//G������<:`��U_on�T]��W#'�W���׉�ÎN޵rqq�r��S��:������ms��:9 ���-o��y�䅑W̟�������g~�9��f�?>�������׷��?�?������ֱ.�/�S��v��\]_'K���W���z�������:�!�R�&�M~�>K�ۿ�<H �<www �$��wIډos��\o����3\��N �f�>��yr��ӈG �0��;;���UX���,R8 I�H$G����<�g?9(K�}V�T �K�ʘ�z �˗_eL��n����@A�`���`����#������V5:)��͌EK ������((%�I��7����NNH���[��LB��,��f���@�0D��ّ��%�M�NU����G�~jP�������t�I��!#����Iç��ؗ>� )��T�F4�Y��1Ĥ!%���ن ��0��9� �� $�2�d"���<K�����11ϼc��yQR��Q�T��Z�@� �Vb�]"�u�FN�j7�#�P�(\���&q�D��h+���m%����%�ȪS���%�a����Q��L�EI �$�(�8�`�^�D[dE?�6�ʧ�6A��R)bI��1��{I�+��*�!��E� ���h��-=_xH]��G0\�3�V�f.�cI��gp����3�]���
F�|���d���� �x�����(�{F��[ AL����|�hv��<���]V4�^���pAH�W�H"d��G����P�uż���A�A$��8�O��;��Ó��ý�UqpI^�(P�g�|���{��?G@[1��M�9h�E���샔�6���b��j�m-h�ڡ��"q"&
b1�,)��p'�����s
͗b�$P ���� IԳN��O0s��J�)�Q����=�bs`��s)&4|H�@ujٸ��6�0�䃞�{\x�t8����6��I��Wꝏ�����c��k�k.���؈�Kh��������rD�^��fRݐ��.��BG�/<'�:C�Yt��S�&b�]%תz�Ǿ�R� �����}��� ��¿�&�>���t��uel#��w���T����j�����K�X��Ô���r��.#?�wqMD��D ��r�`(�N���'�; m��ko� ����c���I�R�i�Э� /��6��d�`�9�j��D�̢O-�����7�S�Qv]U�iD� ���|5ÕI�IEND�B`�j
"
@�?�
�P‡�������j�
TT��PNG

IHDRTT� ���IDATx�Xͮ�D����$W�@�-j T]!$6���'�Ux ��a�W@����X���R����$�mn�1ߙ��s��6�l�u�����o��ط�7��4���J�x�䛺��ʷ+��h۾���(�+߮|g�m�.z{��m߶}g�m�.z{ڷm߶}g�m�.z{ڷm߶}g�m�.z{ڷm߶}g��m׾���������q�������F�&Z����>o@���9�n����|����|���������ۏ��8�����׵ ���򗿿�<K����x����P����`����1��݋4�����+��6��G7�E�H���Y2;N�w������OGx�&i��=��f~��ܣ8�:�� ��{!�:Nn��?=� �/� �� ���?>�%�EP���A�q7~v�?�͵�����X��ߣG��R*�*���KB�/��.]r|�x���ePA� ��"_fK�1X,�%�A�[�'y��7��y��]F��Mje"�%@���lET����X��#r���E�<��<���i��sh�<�=|��wG�9��G�b=<�آ��[j[�|*��ّY��^ ��B�"q¡�����q6���ϔ?ܘ�G�����8 �3 �BhORn���1�̕=���P0q���a ��Ģ#����/��Φ'��ω�"�γ�Lܳ ""Po�`0@�q���+Ec���d�<��\�r����Y��^9�|�o �������hoZd�g0�fx��NOSO��T��0����"v��Y:�CSL]�Y({��ɘ3��W�V��^�����AX>�@Ap6:û ˚/��p4<�@0?Mg�Y��7,����!�A ֟ձb�% Jn�yi�_Q�5��Ѻ*IM���)�C
/O^�8ϋe^�t���s���<z�p��0�H� �l��dX�
V :W�\��y�Q��7��>9Ӱ �=�����>�n��n�t"�ǿ(���{�˃��Ӵ��FH]��Wް����N�a���liݸ��w ��?z~��"���Ǚ7?��$]�K��"�謿��Xvn���I����2׵�Q޹�%s�Hc�ge�J�7���#��h~��� S8��H�s�_�� ܨ������VF�OQ��}n�?��<�L��ݧ��a���X�0�=@������bBi���˃��v}��Hd��W%dV�/F�+�1f9�§�gZ��$�����D�ҹJ�����̝���N�dȡVd!h5bo�%K��H~�������A�o�s"�7���Hi��(���i�<
�( VHLS�m�)��5.�&��K�_��i��,��� ����,������U�d�_���=I&����'��R`u�+�Z�+�s�V�Uh ��m��ל5��b�[����g��j�0ب����8�
��\��F����B��2����a��c��� ��5o��݄�3ʓI'e~�I�9VL���8��E�'\�=v�Q�$mﰁu�u�}����&ֵ��0��J��Y8&`�Y���z���<���NF0 Yg����JՙJ�\��]��B
������ ������s���6q�D���Z�i>�[�D0<���E���럞N�z��H2b-�3�?�J �2s�5?v����z�=/���1�P;��D;�.UE��l��1#����#)�v{�M�ʫi@�yrH��^��d ����`�_ψF��[���R{6�~ɛT)E�J�~��\��(�^u����몔�J��ެ��u=b/|y>����˛h� s�D�Ԩـ���G �Ul��Ni���I��@�Ƨ����~k�&Vx��5���G^��^�"ۃ����y�:a5E�dmF2K��M����Ug���-6�uI�z4K��t�F鉘��Ȗ��dv�^��-��-�>��b.�>��`V� �������jR�r<s���ω�D`V��q�Y�ʨ���*�Ì�N#ov�Y
���tT���i���{�~������ޔ�:x�P�a栭��������i��_IEND�B`�j
"
@�?�
�P‡�������j�
TT��PNG

IHDRTT� ���IDATx�Xͮ�D����$W�@�-j T]!$6���'�Ux ��a�W@����X���R����$�mn�1ߙ��s��6�l�u�����o��ط�7��4���J�x�䛺��ʷ+��h۾���(�+߮|g�m�.z{��m߶}g�m�.z{ڷm߶}g�m�.z{ڷm߶}g�m�.z{ڷm߶}g��m׾���������q�������F�&Z����>o@���9�n����|����|���������ۏ��8�����׵ ���򗿿�<K����x����P����`����1��݋4�����+��6��G7�E�H���Y2;N�w������OGx�&i��=��f~��ܣ8�:�� ��{!�:Nn��?=� �/� �� ���?>�%�EP���A�q7~v�?�͵�����X��ߣG��R*�*���KB�/��.]r|�x���ePA� ��"_fK�1X,�%�A�[�'y��7��y��]F��Mje"�%@���lET����X��#r���E�<��<���i��sh�<�=|��wG�9��G�b=<�آ��[j[�|*��ّY��^ ��B�"q¡�����q6���ϔ?ܘ�G�����8 �3 �BhORn���1�̕=���P0q���a ��Ģ#����/��Φ'��ω�"�γ�Lܳ ""Po�`0@�q���+Ec���d�<��\�r����Y��^9�|�o �������hoZd�g0�fx��NOSO��T��0����"v��Y:�CSL]�Y({��ɘ3��W�V��^�����AX>�@Ap6:û ˚/��p4<�@0?Mg�Y��7,����!�A ֟ձb�% Jn�yi�_Q�5��Ѻ*IM���)�C
/O^�8ϋe^�t���s���<z�p��0�H� �l��dX�
V :W�\��y�Q��7��>9Ӱ �=�����>�n��n�t"�ǿ(���{�˃��Ӵ��FH]��Wް����N�a���liݸ��w ��?z~��"���Ǚ7?��$]�K��"�謿��Xvn���I����2׵�Q޹�%s�Hc�ge�J�7���#��h~��� S8��H�s�_�� ܨ������VF�OQ��}n�?��<�L��ݧ��a���X�0�=@������bBi���˃��v}��Hd��W%dV�/F�+�1f9�§�gZ��$�����D�ҹJ�����̝���N�dȡVd!h5bo�%K��H~�������A�o�s"�7���Hi��(���i�<
�( VHLS�m�)��5.�&��K�_��i��,��� ����,������U�d�_���=I&����'��R`u�+�Z�+�s�V�Uh ��m��ל5��b�[����g��j�0ب����8�
��\��F����B��2����a��c��� ��5o��݄�3ʓI'e~�I�9VL���8��E�'\�=v�Q�$mﰁu�u�}����&ֵ��0��J��Y8&`�Y���z���<���NF0 Yg����JՙJ�\��]��B
������ ������s���6q�D���Z�i>�[�D0<���E���럞N�z��H2b-�3�?�J �2s�5?v����z�=/���1�P;��D;�.UE��l��1#����#)�v{�M�ʫi@�yrH��^��d ����`�_ψF��[���R{6�~ɛT)E�J�~��\��(�^u����몔�J��ެ��u=b/|y>����˛h� s�D�Ԩـ���G �Ul��Ni���I��@�Ƨ����~k�&Vx��5���G^��^�"ۃ����y�:a5E�dmF2K��M����Ug���-6�uI�z4K��t�F鉘��Ȗ��dv�^��-��-�>��b.�>��`V� �������jR�r<s���ω�D`V��q�Y�ʨ���*�Ì�N#ov�Y
���tT���i���{�~������ޔ�:x�P�a栭��������i��_IEND�B`�j
"
@�?�
�P‡�������j�
TT��PNG

IHDRTT� ���IDATx�Xͮ�D����$W�@�-j T]!$6���'�Ux ��a�W@����X���R����$�mn�1ߙ��s��6�l�u�����o��ط�7��4���J�x�䛺��ʷ+��h۾���(�+߮|g�m�.z{��m߶}g�m�.z{ڷm߶}g�m�.z{ڷm߶}g�m�.z{ڷm߶}g��m׾���������q�������F�&Z����>o@���9�n����|����|���������ۏ��8�����׵ ���򗿿�<K����x����P����`����1��݋4�����+��6��G7�E�H���Y2;N�w������OGx�&i��=��f~��ܣ8�:�� ��{!�:Nn��?=� �/� �� ���?>�%�EP���A�q7~v�?�͵�����X��ߣG��R*�*���KB�/��.]r|�x���ePA� ��"_fK�1X,�%�A�[�'y��7��y��]F��Mje"�%@���lET����X��#r���E�<��<���i��sh�<�=|��wG�9��G�b=<�آ��[j[�|*��ّY��^ ��B�"q¡�����q6���ϔ?ܘ�G�����8 �3 �BhORn���1�̕=���P0q���a ��Ģ#����/��Φ'��ω�"�γ�Lܳ ""Po�`0@�q���+Ec���d�<��\�r����Y��^9�|�o �������hoZd�g0�fx��NOSO��T��0����"v��Y:�CSL]�Y({��ɘ3��W�V��^�����AX>�@Ap6:û ˚/��p4<�@0?Mg�Y��7,����!�A ֟ձb�% Jn�yi�_Q�5��Ѻ*IM���)�C
/O^�8ϋe^�t���s���<z�p��0�H� �l��dX�
V :W�\��y�Q��7��>9Ӱ �=�����>�n��n�t"�ǿ(���{�˃��Ӵ��FH]��Wް����N�a���liݸ��w ��?z~��"���Ǚ7?��$]�K��"�謿��Xvn���I����2׵�Q޹�%s�Hc�ge�J�7���#��h~��� S8��H�s�_�� ܨ������VF�OQ��}n�?��<�L��ݧ��a���X�0�=@������bBi���˃��v}��Hd��W%dV�/F�+�1f9�§�gZ��$�����D�ҹJ�����̝���N�dȡVd!h5bo�%K��H~�������A�o�s"�7���Hi��(���i�<
�( VHLS�m�)��5.�&��K�_��i��,��� ����,������U�d�_���=I&����'��R`u�+�Z�+�s�V�Uh ��m��ל5��b�[����g��j�0ب����8�
��\��F����B��2����a��c��� ��5o��݄�3ʓI'e~�I�9VL���8��E�'\�=v�Q�$mﰁu�u�}����&ֵ��0��J��Y8&`�Y���z���<���NF0 Yg����JՙJ�\��]��B
������ ������s���6q�D���Z�i>�[�D0<���E���럞N�z��H2b-�3�?�J �2s�5?v����z�=/���1�P;��D;�.UE��l��1#����#)�v{�M�ʫi@�yrH��^��d ����`�_ψF��[���R{6�~ɛT)E�J�~��\��(�^u����몔�J��ެ��u=b/|y>����˛h� s�D�Ԩـ���G �Ul��Ni���I��@�Ƨ����~k�&Vx��5���G^��^�"ۃ����y�:a5E�dmF2K��M����Ug���-6�uI�z4K��t�F鉘��Ȗ��dv�^��-��-�>��b.�>��`V� �������jR�r<s���ω�D`V��q�Y�ʨ���*�Ì�N#ov�Y
���tT���i���{�~������ޔ�:x�P�a栭��������i��_IEND�B`�j
"
@�?�
�P‡�������j�
TT��PNG

IHDRTT� ���IDATx�Xͮ�D����$W�@�-j T]!$6���'�Ux ��a�W@����X���R����$�mn�1ߙ��s��6�l�u�����o��ط�7��4���J�x�䛺��ʷ+��h۾���(�+߮|g�m�.z{��m߶}g�m�.z{ڷm߶}g�m�.z{ڷm߶}g�m�.z{ڷm߶}g��m׾���������q�������F�&Z����>o@���9�n����|����|���������ۏ��8�����׵ ���򗿿�<K����x����P����`����1��݋4�����+��6��G7�E�H���Y2;N�w������OGx�&i��=��f~��ܣ8�:�� ��{!�:Nn��?=� �/� �� ���?>�%�EP���A�q7~v�?�͵�����X��ߣG��R*�*���KB�/��.]r|�x���ePA� ��"_fK�1X,�%�A�[�'y��7��y��]F��Mje"�%@���lET����X��#r���E�<��<���i��sh�<�=|��wG�9��G�b=<�آ��[j[�|*��ّY��^ ��B�"q¡�����q6���ϔ?ܘ�G�����8 �3 �BhORn���1�̕=���P0q���a ��Ģ#����/��Φ'��ω�"�γ�Lܳ ""Po�`0@�q���+Ec���d�<��\�r����Y��^9�|�o �������hoZd�g0�fx��NOSO��T��0����"v��Y:�CSL]�Y({��ɘ3��W�V��^�����AX>�@Ap6:û ˚/��p4<�@0?Mg�Y��7,����!�A ֟ձb�% Jn�yi�_Q�5��Ѻ*IM���)�C
/O^�8ϋe^�t���s���<z�p��0�H� �l��dX�
V :W�\��y�Q��7��>9Ӱ �=�����>�n��n�t"�ǿ(���{�˃��Ӵ��FH]��Wް����N�a���liݸ��w ��?z~��"���Ǚ7?��$]�K��"�謿��Xvn���I����2׵�Q޹�%s�Hc�ge�J�7���#��h~��� S8��H�s�_�� ܨ������VF�OQ��}n�?��<�L��ݧ��a���X�0�=@������bBi���˃��v}��Hd��W%dV�/F�+�1f9�§�gZ��$�����D�ҹJ�����̝���N�dȡVd!h5bo�%K��H~�������A�o�s"�7���Hi��(���i�<
�( VHLS�m�)��5.�&��K�_��i��,��� ����,������U�d�_���=I&����'��R`u�+�Z�+�s�V�Uh ��m��ל5��b�[����g��j�0ب����8�
��\��F����B��2����a��c��� ��5o��݄�3ʓI'e~�I�9VL���8��E�'\�=v�Q�$mﰁu�u�}����&ֵ��0��J��Y8&`�Y���z���<���NF0 Yg����JՙJ�\��]��B
������ ������s���6q�D���Z�i>�[�D0<���E���럞N�z��H2b-�3�?�J �2s�5?v����z�=/���1�P;��D;�.UE��l��1#����#)�v{�M�ʫi@�yrH��^��d ����`�_ψF��[���R{6�~ɛT)E�J�~��\��(�^u����몔�J��ެ��u=b/|y>����˛h� s�D�Ԩـ���G �Ul��Ni���I��@�Ƨ����~k�&Vx��5���G^��^�"ۃ����y�:a5E�dmF2K��M����Ug���-6�uI�z4K��t�F鉘��Ȗ��dv�^��-��-�>��b.�>��`V� �������jR�r<s���ω�D`V��q�Y�ʨ���*�Ì�N#ov�Y
���tT���i���{�~������ޔ�:x�P�a栭��������i��_IEND�B`�j
"
@�?�
�P‡�������j�
TT��PNG

IHDRTT� ���IDATx�Xͮ�D����$W�@�-j T]!$6���'�Ux ��a�W@����X���R����$�mn�1ߙ��s��6�l�u�����o��ط�7��4���J�x�䛺��ʷ+��h۾���(�+߮|g�m�.z{��m߶}g�m�.z{ڷm߶}g�m�.z{ڷm߶}g�m�.z{ڷm߶}g��m׾���������q�������F�&Z����>o@���9�n����|����|���������ۏ��8�����׵ ���򗿿�<K����x����P����`����1��݋4�����+��6��G7�E�H���Y2;N�w������OGx�&i��=��f~��ܣ8�:�� ��{!�:Nn��?=� �/� �� ���?>�%�EP���A�q7~v�?�͵�����X��ߣG��R*�*���KB�/��.]r|�x���ePA� ��"_fK�1X,�%�A�[�'y��7��y��]F��Mje"�%@���lET����X��#r���E�<��<���i��sh�<�=|��wG�9��G�b=<�آ��[j[�|*��ّY��^ ��B�"q¡�����q6���ϔ?ܘ�G�����8 �3 �BhORn���1�̕=���P0q���a ��Ģ#����/��Φ'��ω�"�γ�Lܳ ""Po�`0@�q���+Ec���d�<��\�r����Y��^9�|�o �������hoZd�g0�fx��NOSO��T��0����"v��Y:�CSL]�Y({��ɘ3��W�V��^�����AX>�@Ap6:û ˚/��p4<�@0?Mg�Y��7,����!�A ֟ձb�% Jn�yi�_Q�5��Ѻ*IM���)�C
/O^�8ϋe^�t���s���<z�p��0�H� �l��dX�
V :W�\��y�Q��7��>9Ӱ �=�����>�n��n�t"�ǿ(���{�˃��Ӵ��FH]��Wް����N�a���liݸ��w ��?z~��"���Ǚ7?��$]�K��"�謿��Xvn���I����2׵�Q޹�%s�Hc�ge�J�7���#��h~��� S8��H�s�_�� ܨ������VF�OQ��}n�?��<�L��ݧ��a���X�0�=@������bBi���˃��v}��Hd��W%dV�/F�+�1f9�§�gZ��$�����D�ҹJ�����̝���N�dȡVd!h5bo�%K��H~�������A�o�s"�7���Hi��(���i�<
�( VHLS�m�)��5.�&��K�_��i��,��� ����,������U�d�_���=I&����'��R`u�+�Z�+�s�V�Uh ��m��ל5��b�[����g��j�0ب����8�
��\��F����B��2����a��c��� ��5o��݄�3ʓI'e~�I�9VL���8��E�'\�=v�Q�$mﰁu�u�}����&ֵ��0��J��Y8&`�Y���z���<���NF0 Yg����JՙJ�\��]��B
������ ������s���6q�D���Z�i>�[�D0<���E���럞N�z��H2b-�3�?�J �2s�5?v����z�=/���1�P;��D;�.UE��l��1#����#)�v{�M�ʫi@�yrH��^��d ����`�_ψF��[���R{6�~ɛT)E�J�~��\��(�^u����몔�J��ެ��u=b/|y>����˛h� s�D�Ԩـ���G �Ul��Ni���I��@�Ƨ����~k�&Vx��5���G^��^�"ۃ����y�:a5E�dmF2K��M����Ug���-6�uI�z4K��t�F鉘��Ȗ��dv�^��-��-�>��b.�>��`V� �������jR�r<s���ω�D`V��q�Y�ʨ���*�Ì�N#ov�Y
���tT���i���{�~������ޔ�:x�P�a栭��������i��_IEND�B`�j
"
�?�?�?�?�
� P‡�������j�
TT� �PNG

IHDRTT� ���IDATx�X�n\E�IF��� X�����k�� ;~�=_6Y@/ $�x,#�P�����OUw�nj��-n�"�z�:U�n�뉗_|��b��Gsκ�����v�۝��ڱ��M�۝ow~�h�~�7���ڱo�~�h�~�7���۱o�~�h�~�7���۱o�~�h�~�7���۱o�~�x����__}������◿��k��]^^�-�����]�k!�^��ln�����.�Ƿ��/�G��C�ڷ ��JL߄���z/�c��{8�i]G���/�������Ez�z�K_~���G�=��LMQ����?����O�럯7�l^���Tj-�Q~���o��o���v� m��������ꇫ����Pp�.�S�_r��ψ�����b/4���H�?y��L�2fONO(uq~�*�7R?O�)����$�
�@V���^$'�呍(ȥl���)�gg|?�S'���>�r׊l�l��
`T��[R%�8�t�"�ɐ��6ⶋ�͆��S��҇ H9�� ��tJ&�q�!% i�,9�.����,��D\�Z��P� 76�!&�ώ�m- %bI�^8ڋ1qP&$"�%��l�X@��S�q IF���`I o �_�H�vPsT��$����X�}� S~�9�*Q(a�Gia���*$����)cGM�t5@DZOsXC)m̪Jq4�!<��;v�*�|P[x����Ql�����`�2��MJ�%y1�h��[^,A��ú1%O��ވe*5��;��M:��R�$��-���@����q�Pm���ݼ�9ҧ�H�au"c���� �,H�p��8j�V������_F�fHJ�)I��-ϖ@Jd�`�>i�t�<:6�&��/�\R)*���" ��zsq��{j�}0O� ���0J��b��u��b�I ���u��# �H �dy"oh�yDj�pP�>l 8^�9�l�1��c��D"��T�#`�RT�K� $�Sci�I��.iV� �#T�PX�K#�nߧT)�0{��L�Qo)�F�B@Ks=7�H���3��l��EH�s?/~i�4'����g�m����2͡���1�B��J�dM�{>Ϳ����*�?mL(�-��88!+$B����͢W�t�ʴ�`H�WGT��q�pP� ˈF&)�pok�S>5�C
U@�:P�*y�!�D�����a��)�����C~Vd�#Ym(���2��ӝGv�L��:�r��u��'ݍ^Җ-������y��^����kz�k�u'�z�į##t$�3
�"�f�Ш7uo3����`V���V�oxY�v(���e�n;j�fq��)�<�C���>+�m�;���C�C춋��wT���7��i��A�K 삟y�ӹ�bw:���c9ԕ9��=��&\��(o{;e��͝�}�|��(�۪Ц���3�3��܅�v��m�v��T��!���Q��s�.���� ���7�˲K�ڱ�� �vQ����w{;[�iS�hgSU�YD�"��y��� �IEND�B`�j
"
�?�?�?�?�
� P‡�������j�
TT� �PNG

IHDRTT� ���IDATx�X�n\E�IF��� X�����k�� ;~�=_6Y@/ $�x,#�P�����OUw�nj��-n�"�z�:U�n�뉗_|��b��Gsκ�����v�۝��ڱ��M�۝ow~�h�~�7���ڱo�~�h�~�7���۱o�~�h�~�7���۱o�~�h�~�7���۱o�~�x����__}������◿��k��]^^�-�����]�k!�^��ln�����.�Ƿ��/�G��C�ڷ ��JL߄���z/�c��{8�i]G���/�������Ez�z�K_~���G�=��LMQ����?����O�럯7�l^���Tj-�Q~���o��o���v� m��������ꇫ����Pp�.�S�_r��ψ�����b/4���H�?y��L�2fONO(uq~�*�7R?O�)����$�
�@V���^$'�呍(ȥl���)�gg|?�S'���>�r׊l�l��
`T��[R%�8�t�"�ɐ��6ⶋ�͆��S��҇ H9�� ��tJ&�q�!% i�,9�.����,��D\�Z��P� 76�!&�ώ�m- %bI�^8ڋ1qP&$"�%��l�X@��S�q IF���`I o �_�H�vPsT��$����X�}� S~�9�*Q(a�Gia���*$����)cGM�t5@DZOsXC)m̪Jq4�!<��;v�*�|P[x����Ql�����`�2��MJ�%y1�h��[^,A��ú1%O��ވe*5��;��M:��R�$��-���@����q�Pm���ݼ�9ҧ�H�au"c���� �,H�p��8j�V������_F�fHJ�)I��-ϖ@Jd�`�>i�t�<:6�&��/�\R)*���" ��zsq��{j�}0O� ���0J��b��u��b�I ���u��# �H �dy"oh�yDj�pP�>l 8^�9�l�1��c��D"��T�#`�RT�K� $�Sci�I��.iV� �#T�PX�K#�nߧT)�0{��L�Qo)�F�B@Ks=7�H���3��l��EH�s?/~i�4'����g�m����2͡���1�B��J�dM�{>Ϳ����*�?mL(�-��88!+$B����͢W�t�ʴ�`H�WGT��q�pP� ˈF&)�pok�S>5�C
U@�:P�*y�!�D�����a��)�����C~Vd�#Ym(���2��ӝGv�L��:�r��u��'ݍ^Җ-������y��^����kz�k�u'�z�į##t$�3
�"�f�Ш7uo3����`V���V�oxY�v(���e�n;j�fq��)�<�C���>+�m�;���C�C춋��wT���7��i��A�K 삟y�ӹ�bw:���c9ԕ9��=��&\��(o{;e��͝�}�|��(�۪Ц���3�3��܅�v��m�v��T��!���Q��s�.���� ���7�˲K�ڱ�� �vQ����w{;[�iS�hgSU�YD�"��y��� �IEND�B`�j
"
�?�?�?�?�
� P‡�������j�
TT� �PNG

IHDRTT� ���IDATx�X�n\E�IF��� X�����k�� ;~�=_6Y@/ $�x,#�P�����OUw�nj��-n�"�z�:U�n�뉗_|��b��Gsκ�����v�۝��ڱ��M�۝ow~�h�~�7���ڱo�~�h�~�7���۱o�~�h�~�7���۱o�~�h�~�7���۱o�~�x����__}������◿��k��]^^�-�����]�k!�^��ln�����.�Ƿ��/�G��C�ڷ ��JL߄���z/�c��{8�i]G���/�������Ez�z�K_~���G�=��LMQ����?����O�럯7�l^���Tj-�Q~���o��o���v� m��������ꇫ����Pp�.�S�_r��ψ�����b/4���H�?y��L�2fONO(uq~�*�7R?O�)����$�
�@V���^$'�呍(ȥl���)�gg|?�S'���>�r׊l�l��
`T��[R%�8�t�"�ɐ��6ⶋ�͆��S��҇ H9�� ��tJ&�q�!% i�,9�.����,��D\�Z��P� 76�!&�ώ�m- %bI�^8ڋ1qP&$"�%��l�X@��S�q IF���`I o �_�H�vPsT��$����X�}� S~�9�*Q(a�Gia���*$����)cGM�t5@DZOsXC)m̪Jq4�!<��;v�*�|P[x����Ql�����`�2��MJ�%y1�h��[^,A��ú1%O��ވe*5��;��M:��R�$��-���@����q�Pm���ݼ�9ҧ�H�au"c���� �,H�p��8j�V������_F�fHJ�)I��-ϖ@Jd�`�>i�t�<:6�&��/�\R)*���" ��zsq��{j�}0O� ���0J��b��u��b�I ���u��# �H �dy"oh�yDj�pP�>l 8^�9�l�1��c��D"��T�#`�RT�K� $�Sci�I��.iV� �#T�PX�K#�nߧT)�0{��L�Qo)�F�B@Ks=7�H���3��l��EH�s?/~i�4'����g�m����2͡���1�B��J�dM�{>Ϳ����*�?mL(�-��88!+$B����͢W�t�ʴ�`H�WGT��q�pP� ˈF&)�pok�S>5�C
U@�:P�*y�!�D�����a��)�����C~Vd�#Ym(���2��ӝGv�L��:�r��u��'ݍ^Җ-������y��^����kz�k�u'�z�į##t$�3
�"�f�Ш7uo3����`V���V�oxY�v(���e�n;j�fq��)�<�C���>+�m�;���C�C춋��wT���7��i��A�K 삟y�ӹ�bw:���c9ԕ9��=��&\��(o{;e��͝�}�|��(�۪Ц���3�3��܅�v��m�v��T��!���Q��s�.���� ���7�˲K�ڱ�� �vQ����w{;[�iS�hgSU�YD�"��y��� �IEND�B`�j
"
�?�?�?�?�
� P‡�������j�
TT� �PNG

IHDRTT� ���IDATx�X�n\E�IF��� X�����k�� ;~�=_6Y@/ $�x,#�P�����OUw�nj��-n�"�z�:U�n�뉗_|��b��Gsκ�����v�۝��ڱ��M�۝ow~�h�~�7���ڱo�~�h�~�7���۱o�~�h�~�7���۱o�~�h�~�7���۱o�~�x����__}������◿��k��]^^�-�����]�k!�^��ln�����.�Ƿ��/�G��C�ڷ ��JL߄���z/�c��{8�i]G���/�������Ez�z�K_~���G�=��LMQ����?����O�럯7�l^���Tj-�Q~���o��o���v� m��������ꇫ����Pp�.�S�_r��ψ�����b/4���H�?y��L�2fONO(uq~�*�7R?O�)����$�
�@V���^$'�呍(ȥl���)�gg|?�S'���>�r׊l�l��
`T��[R%�8�t�"�ɐ��6ⶋ�͆��S��҇ H9�� ��tJ&�q�!% i�,9�.����,��D\�Z��P� 76�!&�ώ�m- %bI�^8ڋ1qP&$"�%��l�X@��S�q IF���`I o �_�H�vPsT��$����X�}� S~�9�*Q(a�Gia���*$����)cGM�t5@DZOsXC)m̪Jq4�!<��;v�*�|P[x����Ql�����`�2��MJ�%y1�h��[^,A��ú1%O��ވe*5��;��M:��R�$��-���@����q�Pm���ݼ�9ҧ�H�au"c���� �,H�p��8j�V������_F�fHJ�)I��-ϖ@Jd�`�>i�t�<:6�&��/�\R)*���" ��zsq��{j�}0O� ���0J��b��u��b�I ���u��# �H �dy"oh�yDj�pP�>l 8^�9�l�1��c��D"��T�#`�RT�K� $�Sci�I��.iV� �#T�PX�K#�nߧT)�0{��L�Qo)�F�B@Ks=7�H���3��l��EH�s?/~i�4'����g�m����2͡���1�B��J�dM�{>Ϳ����*�?mL(�-��88!+$B����͢W�t�ʴ�`H�WGT��q�pP� ˈF&)�pok�S>5�C
U@�:P�*y�!�D�����a��)�����C~Vd�#Ym(���2��ӝGv�L��:�r��u��'ݍ^Җ-������y��^����kz�k�u'�z�į##t$�3
�"�f�Ш7uo3����`V���V�oxY�v(���e�n;j�fq��)�<�C���>+�m�;���C�C춋��wT���7��i��A�K 삟y�ӹ�bw:���c9ԕ9��=��&\��(o{;e��͝�}�|��(�۪Ц���3�3��܅�v��m�v��T��!���Q��s�.���� ���7�˲K�ڱ�� �vQ����w{;[�iS�hgSU�YD�"��y��� �IEND�B`�j
"
�?�?�?�?�
� P‡�������j�
TT� �PNG

IHDRTT� ���IDATx�X�n\E�IF��� X�����k�� ;~�=_6Y@/ $�x,#�P�����OUw�nj��-n�"�z�:U�n�뉗_|��b��Gsκ�����v�۝��ڱ��M�۝ow~�h�~�7���ڱo�~�h�~�7���۱o�~�h�~�7���۱o�~�h�~�7���۱o�~�x����__}������◿��k��]^^�-�����]�k!�^��ln�����.�Ƿ��/�G��C�ڷ ��JL߄���z/�c��{8�i]G���/�������Ez�z�K_~���G�=��LMQ����?����O�럯7�l^���Tj-�Q~���o��o���v� m��������ꇫ����Pp�.�S�_r��ψ�����b/4���H�?y��L�2fONO(uq~�*�7R?O�)����$�
�@V���^$'�呍(ȥl���)�gg|?�S'���>�r׊l�l��
`T��[R%�8�t�"�ɐ��6ⶋ�͆��S��҇ H9�� ��tJ&�q�!% i�,9�.����,��D\�Z��P� 76�!&�ώ�m- %bI�^8ڋ1qP&$"�%��l�X@��S�q IF���`I o �_�H�vPsT��$����X�}� S~�9�*Q(a�Gia���*$����)cGM�t5@DZOsXC)m̪Jq4�!<��;v�*�|P[x����Ql�����`�2��MJ�%y1�h��[^,A��ú1%O��ވe*5��;��M:��R�$��-���@����q�Pm���ݼ�9ҧ�H�au"c���� �,H�p��8j�V������_F�fHJ�)I��-ϖ@Jd�`�>i�t�<:6�&��/�\R)*���" ��zsq��{j�}0O� ���0J��b��u��b�I ���u��# �H �dy"oh�yDj�pP�>l 8^�9�l�1��c��D"��T�#`�RT�K� $�Sci�I��.iV� �#T�PX�K#�nߧT)�0{��L�Qo)�F�B@Ks=7�H���3��l��EH�s?/~i�4'����g�m����2͡���1�B��J�dM�{>Ϳ����*�?mL(�-��88!+$B����͢W�t�ʴ�`H�WGT��q�pP� ˈF&)�pok�S>5�C
U@�:P�*y�!�D�����a��)�����C~Vd�#Ym(���2��ӝGv�L��:�r��u��'ݍ^Җ-������y��^����kz�k�u'�z�į##t$�3
�"�f�Ш7uo3����`V���V�oxY�v(���e�n;j�fq��)�<�C���>+�m�;���C�C춋��wT���7��i��A�K 삟y�ӹ�bw:���c9ԕ9��=��&\��(o{;e��͝�}�|��(�۪Ц���3�3��܅�v��m�v��T��!���Q��s�.���� ���7�˲K�ڱ�� �vQ����w{;[�iS�hgSU�YD�"��y��� �IEND�B`�j
"
@@�?�?�
� P‡�������j�
TT� �PNG

IHDRTT� ���IDATx�Y1r$7 Ժ�"m��WN�������p��p��Y�8�*�A��93������V��M��pV��/������7{m��=����ϝ�;����~���-ϝ�;����~��>/�y�����
�c��M���<����p�������~�y�w����p��m?��<�;\��������O?���d?��{��.�kV����U�����J=== t���n�Թ��^��;�Q�񼛷������;�Wd�����ߝ�緿���]������D^T����;��^�b�&�� }yyq:���ϟ�m�����1r���* ��"�5c���h�g�%��P!A���S׿)���Q���[�/s��m ���ӧ���U.Hg1�B4i@�e��s�1���n�S�u?Pq�[��z�
�h��8eX$/��t�����$��or���x���Z
Y��e0� ���� �S<#��Mb�������������8(��r���@$z2ul ��}}Խ.([$��I��('/|��l� F�!��3�{�J�>����l

}�M�LU�BK:�R��b�f����59M���?<<ʣn�_�F0?T�b���b���d*P�z�S�o��*�O�*9F�A��G��o���̈�J��Ki43�L�5�(�e��Ҹu�c��\0\�Ǟ����.��h�7�P)ʎ'��:J�0��2���˻���O�K�z�@C�`��5��8��k�#��؇G]:HO�϶����*�)!�z�+�d�r�w��u����n)`�i�Z;(�jv��a���b�Pj��C��=��,��01g��A&s�Lٶ7D.�E[r4&�p��:���j��;�N���}T���g�=\�C,$�@KC��u�s��<�%��ݙg0�nģ7���(ŷ�A���K�"Ȁe)�؇|^�6) �� ��-���Y*�{�/89ϋ��[�=O�m{(����<Y�nu�29���I��$�� R�x���}l�d-��Hb��h���j����Ԑk�:��X�� &ŀ��5`+��;ۓCٍ5�zB��?�TWA$�ů��[L/a>&�<J1s��EQҘ���N�O�t4<=|��`'�T�T1W�H��kr�e`�L���V����U� %/#�vGg8�50�l:�h-�yNr��T�B8����P s� �3v��Z�T�����W�,�s���p�F��o�F ����k�Rv�%�v�1!z^����,��/�b!ʅ��'Qә�bE$z�i��a�F07���;]�2]�Ȩ�<�#>f\�Up�q{p8ZLpX �l���%/��E5/#D���]�cD�"����n�N����pԺ��X %'���򖍅�P�������7vjB��XKW���ܔ�` 5����(�ml18��nG<z������4���1����u)-�,�QS�=ܔj�b7�'�������OW���0�
�0o#8�����1���4? �I�[8�w���cs:!��B���Hf_�*�������<|17�[N��IEND�B`�j
"
@@�?�?�
� P‡�������j�
TT� �PNG

IHDRTT� ���IDATx�Y1r$7 Ժ�"m��WN�������p��p��Y�8�*�A��93������V��M��pV��/������7{m��=����ϝ�;����~���-ϝ�;����~��>/�y�����
�c��M���<����p�������~�y�w����p��m?��<�;\��������O?���d?��{��.�kV����U�����J=== t���n�Թ��^��;�Q�񼛷������;�Wd�����ߝ�緿���]������D^T����;��^�b�&�� }yyq:���ϟ�m�����1r���* ��"�5c���h�g�%��P!A���S׿)���Q���[�/s��m ���ӧ���U.Hg1�B4i@�e��s�1���n�S�u?Pq�[��z�
�h��8eX$/��t�����$��or���x���Z
Y��e0� ���� �S<#��Mb�������������8(��r���@$z2ul ��}}Խ.([$��I��('/|��l� F�!��3�{�J�>����l

}�M�LU�BK:�R��b�f����59M���?<<ʣn�_�F0?T�b���b���d*P�z�S�o��*�O�*9F�A��G��o���̈�J��Ki43�L�5�(�e��Ҹu�c��\0\�Ǟ����.��h�7�P)ʎ'��:J�0��2���˻���O�K�z�@C�`��5��8��k�#��؇G]:HO�϶����*�)!�z�+�d�r�w��u����n)`�i�Z;(�jv��a���b�Pj��C��=��,��01g��A&s�Lٶ7D.�E[r4&�p��:���j��;�N���}T���g�=\�C,$�@KC��u�s��<�%��ݙg0�nģ7���(ŷ�A���K�"Ȁe)�؇|^�6) �� ��-���Y*�{�/89ϋ��[�=O�m{(����<Y�nu�29���I��$�� R�x���}l�d-��Hb��h���j����Ԑk�:��X�� &ŀ��5`+��;ۓCٍ5�zB��?�TWA$�ů��[L/a>&�<J1s��EQҘ���N�O�t4<=|��`'�T�T1W�H��kr�e`�L���V����U� %/#�vGg8�50�l:�h-�yNr��T�B8����P s� �3v��Z�T�����W�,�s���p�F��o�F ����k�Rv�%�v�1!z^����,��/�b!ʅ��'Qә�bE$z�i��a�F07���;]�2]�Ȩ�<�#>f\�Up�q{p8ZLpX �l���%/��E5/#D���]�cD�"����n�N����pԺ��X %'���򖍅�P�������7vjB��XKW���ܔ�` 5����(�ml18��nG<z������4���1����u)-�,�QS�=ܔj�b7�'�������OW���0�
�0o#8�����1���4? �I�[8�w���cs:!��B���Hf_�*�������<|17�[N��IEND�B`�j
"
@@�?�?�
� P‡�������j�
TT� �PNG

IHDRTT� ���IDATx�Y1r$7 Ժ�"m��WN�������p��p��Y�8�*�A��93������V��M��pV��/������7{m��=����ϝ�;����~���-ϝ�;����~��>/�y�����
�c��M���<����p�������~�y�w����p��m?��<�;\��������O?���d?��{��.�kV����U�����J=== t���n�Թ��^��;�Q�񼛷������;�Wd�����ߝ�緿���]������D^T����;��^�b�&�� }yyq:���ϟ�m�����1r���* ��"�5c���h�g�%��P!A���S׿)���Q���[�/s��m ���ӧ���U.Hg1�B4i@�e��s�1���n�S�u?Pq�[��z�
�h��8eX$/��t�����$��or���x���Z
Y��e0� ���� �S<#��Mb�������������8(��r���@$z2ul ��}}Խ.([$��I��('/|��l� F�!��3�{�J�>����l

}�M�LU�BK:�R��b�f����59M���?<<ʣn�_�F0?T�b���b���d*P�z�S�o��*�O�*9F�A��G��o���̈�J��Ki43�L�5�(�e��Ҹu�c��\0\�Ǟ����.��h�7�P)ʎ'��:J�0��2���˻���O�K�z�@C�`��5��8��k�#��؇G]:HO�϶����*�)!�z�+�d�r�w��u����n)`�i�Z;(�jv��a���b�Pj��C��=��,��01g��A&s�Lٶ7D.�E[r4&�p��:���j��;�N���}T���g�=\�C,$�@KC��u�s��<�%��ݙg0�nģ7���(ŷ�A���K�"Ȁe)�؇|^�6) �� ��-���Y*�{�/89ϋ��[�=O�m{(����<Y�nu�29���I��$�� R�x���}l�d-��Hb��h���j����Ԑk�:��X�� &ŀ��5`+��;ۓCٍ5�zB��?�TWA$�ů��[L/a>&�<J1s��EQҘ���N�O�t4<=|��`'�T�T1W�H��kr�e`�L���V����U� %/#�vGg8�50�l:�h-�yNr��T�B8����P s� �3v��Z�T�����W�,�s���p�F��o�F ����k�Rv�%�v�1!z^����,��/�b!ʅ��'Qә�bE$z�i��a�F07���;]�2]�Ȩ�<�#>f\�Up�q{p8ZLpX �l���%/��E5/#D���]�cD�"����n�N����pԺ��X %'���򖍅�P�������7vjB��XKW���ܔ�` 5����(�ml18��nG<z������4���1����u)-�,�QS�=ܔj�b7�'�������OW���0�
�0o#8�����1���4? �I�[8�w���cs:!��B���Hf_�*�������<|17�[N��IEND�B`�j
"
@@�?�?�
� P‡�������j�
TT� �PNG

IHDRTT� ���IDATx�Y1r$7 Ժ�"m��WN�������p��p��Y�8�*�A��93������V��M��pV��/������7{m��=����ϝ�;����~���-ϝ�;����~��>/�y�����
�c��M���<����p�������~�y�w����p��m?��<�;\��������O?���d?��{��.�kV����U�����J=== t���n�Թ��^��;�Q�񼛷������;�Wd�����ߝ�緿���]������D^T����;��^�b�&�� }yyq:���ϟ�m�����1r���* ��"�5c���h�g�%��P!A���S׿)���Q���[�/s��m ���ӧ���U.Hg1�B4i@�e��s�1���n�S�u?Pq�[��z�
�h��8eX$/��t�����$��or���x���Z
Y��e0� ���� �S<#��Mb�������������8(��r���@$z2ul ��}}Խ.([$��I��('/|��l� F�!��3�{�J�>����l

}�M�LU�BK:�R��b�f����59M���?<<ʣn�_�F0?T�b���b���d*P�z�S�o��*�O�*9F�A��G��o���̈�J��Ki43�L�5�(�e��Ҹu�c��\0\�Ǟ����.��h�7�P)ʎ'��:J�0��2���˻���O�K�z�@C�`��5��8��k�#��؇G]:HO�϶����*�)!�z�+�d�r�w��u����n)`�i�Z;(�jv��a���b�Pj��C��=��,��01g��A&s�Lٶ7D.�E[r4&�p��:���j��;�N���}T���g�=\�C,$�@KC��u�s��<�%��ݙg0�nģ7���(ŷ�A���K�"Ȁe)�؇|^�6) �� ��-���Y*�{�/89ϋ��[�=O�m{(����<Y�nu�29���I��$�� R�x���}l�d-��Hb��h���j����Ԑk�:��X�� &ŀ��5`+��;ۓCٍ5�zB��?�TWA$�ů��[L/a>&�<J1s��EQҘ���N�O�t4<=|��`'�T�T1W�H��kr�e`�L���V����U� %/#�vGg8�50�l:�h-�yNr��T�B8����P s� �3v��Z�T�����W�,�s���p�F��o�F ����k�Rv�%�v�1!z^����,��/�b!ʅ��'Qә�bE$z�i��a�F07���;]�2]�Ȩ�<�#>f\�Up�q{p8ZLpX �l���%/��E5/#D���]�cD�"����n�N����pԺ��X %'���򖍅�P�������7vjB��XKW���ܔ�` 5����(�ml18��nG<z������4���1����u)-�,�QS�=ܔj�b7�'�������OW���0�
�0o#8�����1���4? �I�[8�w���cs:!��B���Hf_�*�������<|17�[N��IEND�B`�j
"
@@�?�?�
� P‡�������j�
TT� �PNG

IHDRTT� ���IDATx�Y1r$7 Ժ�"m��WN�������p��p��Y�8�*�A��93������V��M��pV��/������7{m��=����ϝ�;����~���-ϝ�;����~��>/�y�����
�c��M���<����p�������~�y�w����p��m?��<�;\��������O?���d?��{��.�kV����U�����J=== t���n�Թ��^��;�Q�񼛷������;�Wd�����ߝ�緿���]������D^T����;��^�b�&�� }yyq:���ϟ�m�����1r���* ��"�5c���h�g�%��P!A���S׿)���Q���[�/s��m ���ӧ���U.Hg1�B4i@�e��s�1���n�S�u?Pq�[��z�
�h��8eX$/��t�����$��or���x���Z
Y��e0� ���� �S<#��Mb�������������8(��r���@$z2ul ��}}Խ.([$��I��('/|��l� F�!��3�{�J�>����l

}�M�LU�BK:�R��b�f����59M���?<<ʣn�_�F0?T�b���b���d*P�z�S�o��*�O�*9F�A��G��o���̈�J��Ki43�L�5�(�e��Ҹu�c��\0\�Ǟ����.��h�7�P)ʎ'��:J�0��2���˻���O�K�z�@C�`��5��8��k�#��؇G]:HO�϶����*�)!�z�+�d�r�w��u����n)`�i�Z;(�jv��a���b�Pj��C��=��,��01g��A&s�Lٶ7D.�E[r4&�p��:���j��;�N���}T���g�=\�C,$�@KC��u�s��<�%��ݙg0�nģ7���(ŷ�A���K�"Ȁe)�؇|^�6) �� ��-���Y*�{�/89ϋ��[�=O�m{(����<Y�nu�29���I��$�� R�x���}l�d-��Hb��h���j����Ԑk�:��X�� &ŀ��5`+��;ۓCٍ5�zB��?�TWA$�ů��[L/a>&�<J1s��EQҘ���N�O�t4<=|��`'�T�T1W�H��kr�e`�L���V����U� %/#�vGg8�50�l:�h-�yNr��T�B8����P s� �3v��Z�T�����W�,�s���p�F��o�F ����k�Rv�%�v�1!z^����,��/�b!ʅ��'Qә�bE$z�i��a�F07���;]�2]�Ȩ�<�#>f\�Up�q{p8ZLpX �l���%/��E5/#D���]�cD�"����n�N����pԺ��X %'���򖍅�P�������7vjB��XKW���ܔ�` 5����(�ml18��nG<z������4���1����u)-�,�QS�=ܔj�b7�'�������OW���0�
�0o#8�����1���4? �I�[8�w���cs:!��B���Hf_�*�������<|17�[N��IEND�B`�j
"
�?�?@�?�
� P‡�������j�
TT� �PNG

IHDRTT� ���IDATx�Y��U݁�T��d���GP�|!99F������]�
�}������i%���*�t�9}�1wF��}�ÏWs�|0��y�6�\�~�����p���𦇑۝ow~�h�~�7���ڱo�~�h�~�7���۱o�~�h�~�7���۱o�~�h�~�7���۱o�~n��|����M�˯���<'�fH�=l�H��c��D->}YO�V}J�����ooo�`�c�y����LN�>���3�����ɯ������~�ݫ�����ۿ��i]� �>���۩>���w��7������食-tB���~��j�v�~�:ᨉ�����~u�Z�Y����I��r��T��D�^�xA�������hwu��ޖ�^ݼ߾�#_l?����lr*1�G�r����v��ۋc*Z(�!�ai�U'��܀.>^����eL.)&^��L��������
1o1F��b�2wd �]��Mn m6$��<����&䣫$נ�ł�ZJ�z5�<�5O�emp��,���NEPBe��$� �P�eh�/��.�����
�PM����t�� �t i�)��`$�#E�n�6�e
��� ��
}��� ��������r�F���!@Ƞ���ѱ���S���lJ�����0H���E�<�!�Q�rԋ(�� �A��ßժJ�4��Gjfi�� e�iPYM>4<�b񊡒.��!<|��d�:��"�a�,���F@e��`#�cO����r,JH���z�ɰ����X�ҋ��9��~��L��*����:���Ԛj��1p�%|�^<(�Fj7��kđ:5�¡uz2�"���"��,L�0`h�R�9�;��z��)&��D�ǖ�gK(9s�v�cg@wB?�6���gq\����Dg��%���hG?�T2��� �`n0�J{1t��Dd��F ����� ��J���IR -h���H�� u�r��s�I�S\�G��@I�U�G
����8.Q4&�n�_x��� �\� L{u��q��d��R�~�כ�҆8cfL��S���nT
`)��k�`fᙇ����^V#��R?M~ )� X�� j��3��#O^���2�1 ��1�l<Ǖ,��:��|�m�Y�w�P�[�98��|�}����t�r�GФ��+3
E���T8���e�G:���`k\Sh>KCJU��:��,y�!�B��db%BX�,T
!LAZ�!?I2��$7$_x�Ą��I��-;8�`��% ~���ƒ��Fw���yɱu8�$�3�|^�" _9���_��k8J��y#~��I�1#�������m���0}0�Nwa3�/�$O+����<u��m�t
��8z_)i�<���U#݌�#�O�k�1�c췋��wR�4~�ƙR6)�d�삟yǟRy8c9���c���1��5%��D�/�mo���s}Gxh#V���lx��d�X�|F��t#�]�l��0�EKghlJ�?���4iNۅm�^x�����&U�� Ixn:������ږ�Is�e��?��σ���IEND�B`�j
"
�?�?@�?�
� P‡�������j�
TT� �PNG

IHDRTT� ���IDATx�Y��U݁�T��d���GP�|!99F������]�
�}������i%���*�t�9}�1wF��}�ÏWs�|0��y�6�\�~�����p���𦇑۝ow~�h�~�7���ڱo�~�h�~�7���۱o�~�h�~�7���۱o�~�h�~�7���۱o�~n��|����M�˯���<'�fH�=l�H��c��D->}YO�V}J�����ooo�`�c�y����LN�>���3�����ɯ������~�ݫ�����ۿ��i]� �>���۩>���w��7������食-tB���~��j�v�~�:ᨉ�����~u�Z�Y����I��r��T��D�^�xA�������hwu��ޖ�^ݼ߾�#_l?����lr*1�G�r����v��ۋc*Z(�!�ai�U'��܀.>^����eL.)&^��L��������
1o1F��b�2wd �]��Mn m6$��<����&䣫$נ�ł�ZJ�z5�<�5O�emp��,���NEPBe��$� �P�eh�/��.�����
�PM����t�� �t i�)��`$�#E�n�6�e
��� ��
}��� ��������r�F���!@Ƞ���ѱ���S���lJ�����0H���E�<�!�Q�rԋ(�� �A��ßժJ�4��Gjfi�� e�iPYM>4<�b񊡒.��!<|��d�:��"�a�,���F@e��`#�cO����r,JH���z�ɰ����X�ҋ��9��~��L��*����:���Ԛj��1p�%|�^<(�Fj7��kđ:5�¡uz2�"���"��,L�0`h�R�9�;��z��)&��D�ǖ�gK(9s�v�cg@wB?�6���gq\����Dg��%���hG?�T2��� �`n0�J{1t��Dd��F ����� ��J���IR -h���H�� u�r��s�I�S\�G��@I�U�G
����8.Q4&�n�_x��� �\� L{u��q��d��R�~�כ�҆8cfL��S���nT
`)��k�`fᙇ����^V#��R?M~ )� X�� j��3��#O^���2�1 ��1�l<Ǖ,��:��|�m�Y�w�P�[�98��|�}����t�r�GФ��+3
E���T8���e�G:���`k\Sh>KCJU��:��,y�!�B��db%BX�,T
!LAZ�!?I2��$7$_x�Ą��I��-;8�`��% ~���ƒ��Fw���yɱu8�$�3�|^�" _9���_��k8J��y#~��I�1#�������m���0}0�Nwa3�/�$O+����<u��m�t
��8z_)i�<���U#݌�#�O�k�1�c췋��wR�4~�ƙR6)�d�삟yǟRy8c9���c���1��5%��D�/�mo���s}Gxh#V���lx��d�X�|F��t#�]�l��0�EKghlJ�?���4iNۅm�^x�����&U�� Ixn:������ږ�Is�e��?��σ���IEND�B`�j
"
�?�?@�?�
� P‡�������j�
TT� �PNG

IHDRTT� ���IDATx�Y��U݁�T��d���GP�|!99F������]�
�}������i%���*�t�9}�1wF��}�ÏWs�|0��y�6�\�~�����p���𦇑۝ow~�h�~�7���ڱo�~�h�~�7���۱o�~�h�~�7���۱o�~�h�~�7���۱o�~n��|����M�˯���<'�fH�=l�H��c��D->}YO�V}J�����ooo�`�c�y����LN�>���3�����ɯ������~�ݫ�����ۿ��i]� �>���۩>���w��7������食-tB���~��j�v�~�:ᨉ�����~u�Z�Y����I��r��T��D�^�xA�������hwu��ޖ�^ݼ߾�#_l?����lr*1�G�r����v��ۋc*Z(�!�ai�U'��܀.>^����eL.)&^��L��������
1o1F��b�2wd �]��Mn m6$��<����&䣫$נ�ł�ZJ�z5�<�5O�emp��,���NEPBe��$� �P�eh�/��.�����
�PM����t�� �t i�)��`$�#E�n�6�e
��� ��
}��� ��������r�F���!@Ƞ���ѱ���S���lJ�����0H���E�<�!�Q�rԋ(�� �A��ßժJ�4��Gjfi�� e�iPYM>4<�b񊡒.��!<|��d�:��"�a�,���F@e��`#�cO����r,JH���z�ɰ����X�ҋ��9��~��L��*����:���Ԛj��1p�%|�^<(�Fj7��kđ:5�¡uz2�"���"��,L�0`h�R�9�;��z��)&��D�ǖ�gK(9s�v�cg@wB?�6���gq\����Dg��%���hG?�T2��� �`n0�J{1t��Dd��F ����� ��J���IR -h���H�� u�r��s�I�S\�G��@I�U�G
����8.Q4&�n�_x��� �\� L{u��q��d��R�~�כ�҆8cfL��S���nT
`)��k�`fᙇ����^V#��R?M~ )� X�� j��3��#O^���2�1 ��1�l<Ǖ,��:��|�m�Y�w�P�[�98��|�}����t�r�GФ��+3
E���T8���e�G:���`k\Sh>KCJU��:��,y�!�B��db%BX�,T
!LAZ�!?I2��$7$_x�Ą��I��-;8�`��% ~���ƒ��Fw���yɱu8�$�3�|^�" _9���_��k8J��y#~��I�1#�������m���0}0�Nwa3�/�$O+����<u��m�t
��8z_)i�<���U#݌�#�O�k�1�c췋��wR�4~�ƙR6)�d�삟yǟRy8c9���c���1��5%��D�/�mo���s}Gxh#V���lx��d�X�|F��t#�]�l��0�EKghlJ�?���4iNۅm�^x�����&U�� Ixn:������ږ�Is�e��?��σ���IEND�B`�j
"
�?�?@�?�
� P‡�������j�
TT� �PNG

IHDRTT� ���IDATx�Y��U݁�T��d���GP�|!99F������]�
�}������i%���*�t�9}�1wF��}�ÏWs�|0��y�6�\�~�����p���𦇑۝ow~�h�~�7���ڱo�~�h�~�7���۱o�~�h�~�7���۱o�~�h�~�7���۱o�~n��|����M�˯���<'�fH�=l�H��c��D->}YO�V}J�����ooo�`�c�y����LN�>���3�����ɯ������~�ݫ�����ۿ��i]� �>���۩>���w��7������食-tB���~��j�v�~�:ᨉ�����~u�Z�Y����I��r��T��D�^�xA�������hwu��ޖ�^ݼ߾�#_l?����lr*1�G�r����v��ۋc*Z(�!�ai�U'��܀.>^����eL.)&^��L��������
1o1F��b�2wd �]��Mn m6$��<����&䣫$נ�ł�ZJ�z5�<�5O�emp��,���NEPBe��$� �P�eh�/��.�����
�PM����t�� �t i�)��`$�#E�n�6�e
��� ��
}��� ��������r�F���!@Ƞ���ѱ���S���lJ�����0H���E�<�!�Q�rԋ(�� �A��ßժJ�4��Gjfi�� e�iPYM>4<�b񊡒.��!<|��d�:��"�a�,���F@e��`#�cO����r,JH���z�ɰ����X�ҋ��9��~��L��*����:���Ԛj��1p�%|�^<(�Fj7��kđ:5�¡uz2�"���"��,L�0`h�R�9�;��z��)&��D�ǖ�gK(9s�v�cg@wB?�6���gq\����Dg��%���hG?�T2��� �`n0�J{1t��Dd��F ����� ��J���IR -h���H�� u�r��s�I�S\�G��@I�U�G
����8.Q4&�n�_x��� �\� L{u��q��d��R�~�כ�҆8cfL��S���nT
`)��k�`fᙇ����^V#��R?M~ )� X�� j��3��#O^���2�1 ��1�l<Ǖ,��:��|�m�Y�w�P�[�98��|�}����t�r�GФ��+3
E���T8���e�G:���`k\Sh>KCJU��:��,y�!�B��db%BX�,T
!LAZ�!?I2��$7$_x�Ą��I��-;8�`��% ~���ƒ��Fw���yɱu8�$�3�|^�" _9���_��k8J��y#~��I�1#�������m���0}0�Nwa3�/�$O+����<u��m�t
��8z_)i�<���U#݌�#�O�k�1�c췋��wR�4~�ƙR6)�d�삟yǟRy8c9���c���1��5%��D�/�mo���s}Gxh#V���lx��d�X�|F��t#�]�l��0�EKghlJ�?���4iNۅm�^x�����&U�� Ixn:������ږ�Is�e��?��σ���IEND�B`�j
"
�?�?@�?�
� P‡�������j�
TT� �PNG

IHDRTT� ���IDATx�Y��U݁�T��d���GP�|!99F������]�
�}������i%���*�t�9}�1wF��}�ÏWs�|0��y�6�\�~�����p���𦇑۝ow~�h�~�7���ڱo�~�h�~�7���۱o�~�h�~�7���۱o�~�h�~�7���۱o�~n��|����M�˯���<'�fH�=l�H��c��D->}YO�V}J�����ooo�`�c�y����LN�>���3�����ɯ������~�ݫ�����ۿ��i]� �>���۩>���w��7������食-tB���~��j�v�~�:ᨉ�����~u�Z�Y����I��r��T��D�^�xA�������hwu��ޖ�^ݼ߾�#_l?����lr*1�G�r����v��ۋc*Z(�!�ai�U'��܀.>^����eL.)&^��L��������
1o1F��b�2wd �]��Mn m6$��<����&䣫$נ�ł�ZJ�z5�<�5O�emp��,���NEPBe��$� �P�eh�/��.�����
�PM����t�� �t i�)��`$�#E�n�6�e
��� ��
}��� ��������r�F���!@Ƞ���ѱ���S���lJ�����0H���E�<�!�Q�rԋ(�� �A��ßժJ�4��Gjfi�� e�iPYM>4<�b񊡒.��!<|��d�:��"�a�,���F@e��`#�cO����r,JH���z�ɰ����X�ҋ��9��~��L��*����:���Ԛj��1p�%|�^<(�Fj7��kđ:5�¡uz2�"���"��,L�0`h�R�9�;��z��)&��D�ǖ�gK(9s�v�cg@wB?�6���gq\����Dg��%���hG?�T2��� �`n0�J{1t��Dd��F ����� ��J���IR -h���H�� u�r��s�I�S\�G��@I�U�G
����8.Q4&�n�_x��� �\� L{u��q��d��R�~�כ�҆8cfL��S���nT
`)��k�`fᙇ����^V#��R?M~ )� X�� j��3��#O^���2�1 ��1�l<Ǖ,��:��|�m�Y�w�P�[�98��|�}����t�r�GФ��+3
E���T8���e�G:���`k\Sh>KCJU��:��,y�!�B��db%BX�,T
!LAZ�!?I2��$7$_x�Ą��I��-;8�`��% ~���ƒ��Fw���yɱu8�$�3�|^�" _9���_��k8J��y#~��I�1#�������m���0}0�Nwa3�/�$O+����<u��m�t
��8z_)i�<���U#݌�#�O�k�1�c췋��wR�4~�ƙR6)�d�삟yǟRy8c9���c���1��5%��D�/�mo���s}Gxh#V���lx��d�X�|F��t#�]�l��0�EKghlJ�?���4iNۅm�^x�����&U�� Ixn:������ږ�Is�e��?��σ���IEND�B`�j
"
�?�?�?�
� P‡�������j�
TT� �PNG

IHDRTT� ���IDATx�XˎUuG��h2�`�0� �D>!{���� �Ċ�%!�Nƚl� ��v�=Uuo����3�o/��q�T��붓����^-�z�Tᬻ�_��oO�=�n��>�$�=�����v���� ��v���v����۾�v���v����۾�v���v����۾�����ꊴ������ߟi��m�!=��ɦF4�O;�=D�]5�d��P����)��.//��.[wj�����(���Z��Q~j��J��;���/�x��g~��������|%Ms�c��|���_����//������7�n�o~�l~����i�;��#������y������ʩ�q���<� ����B 7�����E��߾�U�=;;�V���R~��?~|N雛W4��م�JK! +Su��P[�>zċ;?g 9�~�pmq����Y; ����Q�ܑ5�v W�rS �L��n��y����z�,&�S}��Z[)X�:��tG� K@��Ũ*Ra㪨�� (�'��hN������O?%�i���h�;G�K(X� FV�paX5����<�\o�M���E��n�$,X��%@�qtcΈ�2A"1�,���v�5T��zԈ�H2z.�� �����M �d=`5G�QO"I)��hV���ǯԲJ� ���Gia��� d��|e6`��xt�� ���J} �9�:G�N1�|R[x����Qld�����&�PjD�Up� �b Jo6�)y�{�,�J����:�I�g��� Pǀ����M z�I�8i�v���@G� Uk:͠N�X䳃��� Rn�00�Z��9�;�B�@�f��/QԱ%@��H����g �NB�G�����A�K
!E��Y$�<[Bonv�cO-ÅyB�\�s�(Uۋ�����-'5yf���\����c�P���G���A ��%������Q����Z��J�$"�;�:�G ����,� H7�^t����+*��NG�N����F��~x,m sp��4�„Ȕq��B@Ks�}h��|�A�'8�W����S�cA�H3���9�A��̣���+t��=�Lsh��s ����R<Y��=���c`��J�M�'��G� �����)6����z��뺕i��>���4)7�J��H��LR0���~NE`��,����u�U��C@���2f�S
Aq9��Pd�#�6V_x�b��DŽ�t��]:H��[��R~��� LǓ�F/i˖���Cyg>�e/��bš>��x����-����$�gFAU�,�hԛ���vwb�`�����^���}2�,����Na؈���¨�cO?rB��m_�(8j`�;T:ļ]�ϼ�:D���3�mh�b�س ��;�����鸧;��PW�(R���p-R����픥>7wN�m�� .�o�R�"���Ψ�72� [����l� �S����g�'i"�i���� /�66:�d�(˜d�ce#�vQ���{�U7�����Y��"��]�>���IEND�B`�j
"
�?�?�?�
� P‡�������j�
TT� �PNG

IHDRTT� ���IDATx�XˎUuG��h2�`�0� �D>!{���� �Ċ�%!�Nƚl� ��v�=Uuo����3�o/��q�T��붓����^-�z�Tᬻ�_��oO�=�n��>�$�=�����v���� ��v���v����۾�v���v����۾�v���v����۾�����ꊴ������ߟi��m�!=��ɦF4�O;�=D�]5�d��P����)��.//��.[wj�����(���Z��Q~j��J��;���/�x��g~��������|%Ms�c��|���_����//������7�n�o~�l~����i�;��#������y������ʩ�q���<� ����B 7�����E��߾�U�=;;�V���R~��?~|N雛W4��م�JK! +Su��P[�>zċ;?g 9�~�pmq����Y; ����Q�ܑ5�v W�rS �L��n��y����z�,&�S}��Z[)X�:��tG� K@��Ũ*Ra㪨�� (�'��hN������O?%�i���h�;G�K(X� FV�paX5����<�\o�M���E��n�$,X��%@�qtcΈ�2A"1�,���v�5T��zԈ�H2z.�� �����M �d=`5G�QO"I)��hV���ǯԲJ� ���Gia��� d��|e6`��xt�� ���J} �9�:G�N1�|R[x����Qld�����&�PjD�Up� �b Jo6�)y�{�,�J����:�I�g��� Pǀ����M z�I�8i�v���@G� Uk:͠N�X䳃��� Rn�00�Z��9�;�B�@�f��/QԱ%@��H����g �NB�G�����A�K
!E��Y$�<[Bonv�cO-ÅyB�\�s�(Uۋ�����-'5yf���\����c�P���G���A ��%������Q����Z��J�$"�;�:�G ����,� H7�^t����+*��NG�N����F��~x,m sp��4�„Ȕq��B@Ks�}h��|�A�'8�W����S�cA�H3���9�A��̣���+t��=�Lsh��s ����R<Y��=���c`��J�M�'��G� �����)6����z��뺕i��>���4)7�J��H��LR0���~NE`��,����u�U��C@���2f�S
Aq9��Pd�#�6V_x�b��DŽ�t��]:H��[��R~��� LǓ�F/i˖���Cyg>�e/��bš>��x����-����$�gFAU�,�hԛ���vwb�`�����^���}2�,����Na؈���¨�cO?rB��m_�(8j`�;T:ļ]�ϼ�:D���3�mh�b�س ��;�����鸧;��PW�(R���p-R����픥>7wN�m�� .�o�R�"���Ψ�72� [����l� �S����g�'i"�i���� /�66:�d�(˜d�ce#�vQ���{�U7�����Y��"��]�>���IEND�B`�j
"
�?�?�?�
� P‡�������j�
TT� �PNG

IHDRTT� ���IDATx�XˎUuG��h2�`�0� �D>!{���� �Ċ�%!�Nƚl� ��v�=Uuo����3�o/��q�T��붓����^-�z�Tᬻ�_��oO�=�n��>�$�=�����v���� ��v���v����۾�v���v����۾�v���v����۾�����ꊴ������ߟi��m�!=��ɦF4�O;�=D�]5�d��P����)��.//��.[wj�����(���Z��Q~j��J��;���/�x��g~��������|%Ms�c��|���_����//������7�n�o~�l~����i�;��#������y������ʩ�q���<� ����B 7�����E��߾�U�=;;�V���R~��?~|N雛W4��م�JK! +Su��P[�>zċ;?g 9�~�pmq����Y; ����Q�ܑ5�v W�rS �L��n��y����z�,&�S}��Z[)X�:��tG� K@��Ũ*Ra㪨�� (�'��hN������O?%�i���h�;G�K(X� FV�paX5����<�\o�M���E��n�$,X��%@�qtcΈ�2A"1�,���v�5T��zԈ�H2z.�� �����M �d=`5G�QO"I)��hV���ǯԲJ� ���Gia��� d��|e6`��xt�� ���J} �9�:G�N1�|R[x����Qld�����&�PjD�Up� �b Jo6�)y�{�,�J����:�I�g��� Pǀ����M z�I�8i�v���@G� Uk:͠N�X䳃��� Rn�00�Z��9�;�B�@�f��/QԱ%@��H����g �NB�G�����A�K
!E��Y$�<[Bonv�cO-ÅyB�\�s�(Uۋ�����-'5yf���\����c�P���G���A ��%������Q����Z��J�$"�;�:�G ����,� H7�^t����+*��NG�N����F��~x,m sp��4�„Ȕq��B@Ks�}h��|�A�'8�W����S�cA�H3���9�A��̣���+t��=�Lsh��s ����R<Y��=���c`��J�M�'��G� �����)6����z��뺕i��>���4)7�J��H��LR0���~NE`��,����u�U��C@���2f�S
Aq9��Pd�#�6V_x�b��DŽ�t��]:H��[��R~��� LǓ�F/i˖���Cyg>�e/��bš>��x����-����$�gFAU�,�hԛ���vwb�`�����^���}2�,����Na؈���¨�cO?rB��m_�(8j`�;T:ļ]�ϼ�:D���3�mh�b�س ��;�����鸧;��PW�(R���p-R����픥>7wN�m�� .�o�R�"���Ψ�72� [����l� �S����g�'i"�i���� /�66:�d�(˜d�ce#�vQ���{�U7�����Y��"��]�>���IEND�B`�j
"
�?�?�?�
� P‡�������j�
TT� �PNG

IHDRTT� ���IDATx�XˎUuG��h2�`�0� �D>!{���� �Ċ�%!�Nƚl� ��v�=Uuo����3�o/��q�T��붓����^-�z�Tᬻ�_��oO�=�n��>�$�=�����v���� ��v���v����۾�v���v����۾�v���v����۾�����ꊴ������ߟi��m�!=��ɦF4�O;�=D�]5�d��P����)��.//��.[wj�����(���Z��Q~j��J��;���/�x��g~��������|%Ms�c��|���_����//������7�n�o~�l~����i�;��#������y������ʩ�q���<� ����B 7�����E��߾�U�=;;�V���R~��?~|N雛W4��م�JK! +Su��P[�>zċ;?g 9�~�pmq����Y; ����Q�ܑ5�v W�rS �L��n��y����z�,&�S}��Z[)X�:��tG� K@��Ũ*Ra㪨�� (�'��hN������O?%�i���h�;G�K(X� FV�paX5����<�\o�M���E��n�$,X��%@�qtcΈ�2A"1�,���v�5T��zԈ�H2z.�� �����M �d=`5G�QO"I)��hV���ǯԲJ� ���Gia��� d��|e6`��xt�� ���J} �9�:G�N1�|R[x����Qld�����&�PjD�Up� �b Jo6�)y�{�,�J����:�I�g��� Pǀ����M z�I�8i�v���@G� Uk:͠N�X䳃��� Rn�00�Z��9�;�B�@�f��/QԱ%@��H����g �NB�G�����A�K
!E��Y$�<[Bonv�cO-ÅyB�\�s�(Uۋ�����-'5yf���\����c�P���G���A ��%������Q����Z��J�$"�;�:�G ����,� H7�^t����+*��NG�N����F��~x,m sp��4�„Ȕq��B@Ks�}h��|�A�'8�W����S�cA�H3���9�A��̣���+t��=�Lsh��s ����R<Y��=���c`��J�M�'��G� �����)6����z��뺕i��>���4)7�J��H��LR0���~NE`��,����u�U��C@���2f�S
Aq9��Pd�#�6V_x�b��DŽ�t��]:H��[��R~��� LǓ�F/i˖���Cyg>�e/��bš>��x����-����$�gFAU�,�hԛ���vwb�`�����^���}2�,����Na؈���¨�cO?rB��m_�(8j`�;T:ļ]�ϼ�:D���3�mh�b�س ��;�����鸧;��PW�(R���p-R����픥>7wN�m�� .�o�R�"���Ψ�72� [����l� �S����g�'i"�i���� /�66:�d�(˜d�ce#�vQ���{�U7�����Y��"��]�>���IEND�B`�j
"
�?�?�?�
� P‡�������j�
TT� �PNG

IHDRTT� ���IDATx�XˎUuG��h2�`�0� �D>!{���� �Ċ�%!�Nƚl� ��v�=Uuo����3�o/��q�T��붓����^-�z�Tᬻ�_��oO�=�n��>�$�=�����v���� ��v���v����۾�v���v����۾�v���v����۾�����ꊴ������ߟi��m�!=��ɦF4�O;�=D�]5�d��P����)��.//��.[wj�����(���Z��Q~j��J��;���/�x��g~��������|%Ms�c��|���_����//������7�n�o~�l~����i�;��#������y������ʩ�q���<� ����B 7�����E��߾�U�=;;�V���R~��?~|N雛W4��م�JK! +Su��P[�>zċ;?g 9�~�pmq����Y; ����Q�ܑ5�v W�rS �L��n��y����z�,&�S}��Z[)X�:��tG� K@��Ũ*Ra㪨�� (�'��hN������O?%�i���h�;G�K(X� FV�paX5����<�\o�M���E��n�$,X��%@�qtcΈ�2A"1�,���v�5T��zԈ�H2z.�� �����M �d=`5G�QO"I)��hV���ǯԲJ� ���Gia��� d��|e6`��xt�� ���J} �9�:G�N1�|R[x����Qld�����&�PjD�Up� �b Jo6�)y�{�,�J����:�I�g��� Pǀ����M z�I�8i�v���@G� Uk:͠N�X䳃��� Rn�00�Z��9�;�B�@�f��/QԱ%@��H����g �NB�G�����A�K
!E��Y$�<[Bonv�cO-ÅyB�\�s�(Uۋ�����-'5yf���\����c�P���G���A ��%������Q����Z��J�$"�;�:�G ����,� H7�^t����+*��NG�N����F��~x,m sp��4�„Ȕq��B@Ks�}h��|�A�'8�W����S�cA�H3���9�A��̣���+t��=�Lsh��s ����R<Y��=���c`��J�M�'��G� �����)6����z��뺕i��>���4)7�J��H��LR0���~NE`��,����u�U��C@���2f�S
Aq9��Pd�#�6V_x�b��DŽ�t��]:H��[��R~��� LǓ�F/i˖���Cyg>�e/��bš>��x����-����$�gFAU�,�hԛ���vwb�`�����^���}2�,����Na؈���¨�cO?rB��m_�(8j`�;T:ļ]�ϼ�:D���3�mh�b�س ��;�����鸧;��PW�(R���p-R����픥>7wN�m�� .�o�R�"���Ψ�72� [����l� �S����g�'i"�i���� /�66:�d�(˜d�ce#�vQ���{�U7�����Y��"��]�>���IEND�B`�j
"
@�
�P‡�������j�
TT��PNG

IHDRTT� ���IDATx�Z�nE�I��bG�+BA)�&���y�#$x�9'`��H�8ή����P�S�U��3;��ǑfF#w�|����q���Ά�\j�X�X�Pw��q�؁q�����q�ǝ`Ʊএ�8������p���~�q�؁q����m?��8���8������~�c�?�'��I\��������* қ����M�Z$k�TPI�2Z�kQ����^���#�_�n�^q��F?�}�������u6W�����/fEq
�l�f�w�WU�{ˬ���'��~�+Z]�ݟ�}����W�z.����p���b1_,J~���>r������_7��Ԛk���̳��/���y{*�W���LJ�/�O�z��"+��r^��eyo��9���̿����/_L������ӽϋ��`� ����� .<����ѝ;�c�s�`?�r�������G�TQΦg���WR�9V�.�e���������8J���3��g2��
�k6�Nn���Z���6�Fq�j{{ �;�v�������
�{���'����Ln>ŝ����}��C QQ�9�-1���̀�G�� �R�����g�I��^�wߜ����jQ-���m���,��/N��www�ъ�����Z�
8������<���-,�G��������P[��!ѐC��m�(�s�l�A�Z�ٙ'����s^;�lD{rt
�l>�y<���+���~�n��h����ZPB ���h���X��i'�ݸ�^}�a��7n�_�Ώ�7�a��ڽFP�hd$�HLX1!������f�@сs����ӗ7�߄�O�9�x�ލ=���f��Rl���PFFL-E@���X���|2X'(=+��ك��W=�hi����(F�4K�ӈ�����)�L�*#�����l�%~��*~إՈ//E�����Q|Д$LwR�������M���:4!�i)ђ�W��XF��,E@�E(^,��Ym�}]1��%���*��N6��(��o��b�ġR����w&���d�j�35X�Q[���W`B҂
C�v����~$^�Y��J��)�;�(x�#[`k{]��C���i V�4�:���+�[�H!z��[��KaLI>,�U�La��ڄQp_n�����|�p�$��2qBeW���0��M_Ď(Lu�� \/K��A�KYe,9��ҁ$Rk��2ӵ#1�b���M[�����C��UX�� J-5HG�Q�w��l=��C�Kv�E:$�["�b��v 2q�Ł�j�tFJ�c�|h�xb�X�:j���j?�b���#����*��Z�(PT������������I+F�ߴ���� °ځ�tB9����y����&Z��I�ʅK#A��Sa�V:�(U�`(�H ^�Z�����ʀ�v��Xх'v԰v6�wS��HN$U�/��{$�u�� /p�!�uBZ�c�F�j�����z� O<��t�z�6���a`��3�I�gL���^�_��w��Ɖ�v���3"W���r�D:���8�)��a/���� �8B�vƦ�It��) :b�u� Uu��}��}`�Z�o�n�.U�j�ϼqu��/�@��6#�F��x� �K�v���aL3�qR]�I� �„p�w���Y�������#�_c#�ⵕ  |���daG{��M#\�Z���eUZ�e���j��z�S� ϱZ֚\g`��U�Ɉ��8ӋR����:���V]!d�(N#��?��˙$�%JIEND�B`�j
"
@�
�P‡�������j�
TT��PNG

IHDRTT� ���IDATx�Z�nE�I��bG�+BA)�&���y�#$x�9'`��H�8ή����P�S�U��3;��ǑfF#w�|����q���Ά�\j�X�X�Pw��q�؁q�����q�ǝ`Ʊএ�8������p���~�q�؁q����m?��8���8������~�c�?�'��I\��������* қ����M�Z$k�TPI�2Z�kQ����^���#�_�n�^q��F?�}�������u6W�����/fEq
�l�f�w�WU�{ˬ���'��~�+Z]�ݟ�}����W�z.����p���b1_,J~���>r������_7��Ԛk���̳��/���y{*�W���LJ�/�O�z��"+��r^��eyo��9���̿����/_L������ӽϋ��`� ����� .<����ѝ;�c�s�`?�r�������G�TQΦg���WR�9V�.�e���������8J���3��g2��
�k6�Nn���Z���6�Fq�j{{ �;�v�������
�{���'����Ln>ŝ����}��C QQ�9�-1���̀�G�� �R�����g�I��^�wߜ����jQ-���m���,��/N��www�ъ�����Z�
8������<���-,�G��������P[��!ѐC��m�(�s�l�A�Z�ٙ'����s^;�lD{rt
�l>�y<���+���~�n��h����ZPB ���h���X��i'�ݸ�^}�a��7n�_�Ώ�7�a��ڽFP�hd$�HLX1!������f�@сs����ӗ7�߄�O�9�x�ލ=���f��Rl���PFFL-E@���X���|2X'(=+��ك��W=�hi����(F�4K�ӈ�����)�L�*#�����l�%~��*~إՈ//E�����Q|Д$LwR�������M���:4!�i)ђ�W��XF��,E@�E(^,��Ym�}]1��%���*��N6��(��o��b�ġR����w&���d�j�35X�Q[���W`B҂
C�v����~$^�Y��J��)�;�(x�#[`k{]��C���i V�4�:���+�[�H!z��[��KaLI>,�U�La��ڄQp_n�����|�p�$��2qBeW���0��M_Ď(Lu�� \/K��A�KYe,9��ҁ$Rk��2ӵ#1�b���M[�����C��UX�� J-5HG�Q�w��l=��C�Kv�E:$�["�b��v 2q�Ł�j�tFJ�c�|h�xb�X�:j���j?�b���#����*��Z�(PT������������I+F�ߴ���� °ځ�tB9����y����&Z��I�ʅK#A��Sa�V:�(U�`(�H ^�Z�����ʀ�v��Xх'v԰v6�wS��HN$U�/��{$�u�� /p�!�uBZ�c�F�j�����z� O<��t�z�6���a`��3�I�gL���^�_��w��Ɖ�v���3"W���r�D:���8�)��a/���� �8B�vƦ�It��) :b�u� Uu��}��}`�Z�o�n�.U�j�ϼqu��/�@��6#�F��x� �K�v���aL3�qR]�I� �„p�w���Y�������#�_c#�ⵕ  |���daG{��M#\�Z���eUZ�e���j��z�S� ϱZ֚\g`��U�Ɉ��8ӋR����:���V]!d�(N#��?��˙$�%JIEND�B`�j
"
@�
�P‡�������j�
TT��PNG

IHDRTT� ���IDATx�Z�nE�I��bG�+BA)�&���y�#$x�9'`��H�8ή����P�S�U��3;��ǑfF#w�|����q���Ά�\j�X�X�Pw��q�؁q�����q�ǝ`Ʊএ�8������p���~�q�؁q����m?��8���8������~�c�?�'��I\��������* қ����M�Z$k�TPI�2Z�kQ����^���#�_�n�^q��F?�}�������u6W�����/fEq
�l�f�w�WU�{ˬ���'��~�+Z]�ݟ�}����W�z.����p���b1_,J~���>r������_7��Ԛk���̳��/���y{*�W���LJ�/�O�z��"+��r^��eyo��9���̿����/_L������ӽϋ��`� ����� .<����ѝ;�c�s�`?�r�������G�TQΦg���WR�9V�.�e���������8J���3��g2��
�k6�Nn���Z���6�Fq�j{{ �;�v�������
�{���'����Ln>ŝ����}��C QQ�9�-1���̀�G�� �R�����g�I��^�wߜ����jQ-���m���,��/N��www�ъ�����Z�
8������<���-,�G��������P[��!ѐC��m�(�s�l�A�Z�ٙ'����s^;�lD{rt
�l>�y<���+���~�n��h����ZPB ���h���X��i'�ݸ�^}�a��7n�_�Ώ�7�a��ڽFP�hd$�HLX1!������f�@сs����ӗ7�߄�O�9�x�ލ=���f��Rl���PFFL-E@���X���|2X'(=+��ك��W=�hi����(F�4K�ӈ�����)�L�*#�����l�%~��*~إՈ//E�����Q|Д$LwR�������M���:4!�i)ђ�W��XF��,E@�E(^,��Ym�}]1��%���*��N6��(��o��b�ġR����w&���d�j�35X�Q[���W`B҂
C�v����~$^�Y��J��)�;�(x�#[`k{]��C���i V�4�:���+�[�H!z��[��KaLI>,�U�La��ڄQp_n�����|�p�$��2qBeW���0��M_Ď(Lu�� \/K��A�KYe,9��ҁ$Rk��2ӵ#1�b���M[�����C��UX�� J-5HG�Q�w��l=��C�Kv�E:$�["�b��v 2q�Ł�j�tFJ�c�|h�xb�X�:j���j?�b���#����*��Z�(PT������������I+F�ߴ���� °ځ�tB9����y����&Z��I�ʅK#A��Sa�V:�(U�`(�H ^�Z�����ʀ�v��Xх'v԰v6�wS��HN$U�/��{$�u�� /p�!�uBZ�c�F�j�����z� O<��t�z�6���a`��3�I�gL���^�_��w��Ɖ�v���3"W���r�D:���8�)��a/���� �8B�vƦ�It��) :b�u� Uu��}��}`�Z�o�n�.U�j�ϼqu��/�@��6#�F��x� �K�v���aL3�qR]�I� �„p�w���Y�������#�_c#�ⵕ  |���daG{��M#\�Z���eUZ�e���j��z�S� ϱZ֚\g`��U�Ɉ��8ӋR����:���V]!d�(N#��?��˙$�%JIEND�B`�j
"
@�
�P‡�������j�
TT��PNG

IHDRTT� ���IDATx�Z�nE�I��bG�+BA)�&���y�#$x�9'`��H�8ή����P�S�U��3;��ǑfF#w�|����q���Ά�\j�X�X�Pw��q�؁q�����q�ǝ`Ʊএ�8������p���~�q�؁q����m?��8���8������~�c�?�'��I\��������* қ����M�Z$k�TPI�2Z�kQ����^���#�_�n�^q��F?�}�������u6W�����/fEq
�l�f�w�WU�{ˬ���'��~�+Z]�ݟ�}����W�z.����p���b1_,J~���>r������_7��Ԛk���̳��/���y{*�W���LJ�/�O�z��"+��r^��eyo��9���̿����/_L������ӽϋ��`� ����� .<����ѝ;�c�s�`?�r�������G�TQΦg���WR�9V�.�e���������8J���3��g2��
�k6�Nn���Z���6�Fq�j{{ �;�v�������
�{���'����Ln>ŝ����}��C QQ�9�-1���̀�G�� �R�����g�I��^�wߜ����jQ-���m���,��/N��www�ъ�����Z�
8������<���-,�G��������P[��!ѐC��m�(�s�l�A�Z�ٙ'����s^;�lD{rt
�l>�y<���+���~�n��h����ZPB ���h���X��i'�ݸ�^}�a��7n�_�Ώ�7�a��ڽFP�hd$�HLX1!������f�@сs����ӗ7�߄�O�9�x�ލ=���f��Rl���PFFL-E@���X���|2X'(=+��ك��W=�hi����(F�4K�ӈ�����)�L�*#�����l�%~��*~إՈ//E�����Q|Д$LwR�������M���:4!�i)ђ�W��XF��,E@�E(^,��Ym�}]1��%���*��N6��(��o��b�ġR����w&���d�j�35X�Q[���W`B҂
C�v����~$^�Y��J��)�;�(x�#[`k{]��C���i V�4�:���+�[�H!z��[��KaLI>,�U�La��ڄQp_n�����|�p�$��2qBeW���0��M_Ď(Lu�� \/K��A�KYe,9��ҁ$Rk��2ӵ#1�b���M[�����C��UX�� J-5HG�Q�w��l=��C�Kv�E:$�["�b��v 2q�Ł�j�tFJ�c�|h�xb�X�:j���j?�b���#����*��Z�(PT������������I+F�ߴ���� °ځ�tB9����y����&Z��I�ʅK#A��Sa�V:�(U�`(�H ^�Z�����ʀ�v��Xх'v԰v6�wS��HN$U�/��{$�u�� /p�!�uBZ�c�F�j�����z� O<��t�z�6���a`��3�I�gL���^�_��w��Ɖ�v���3"W���r�D:���8�)��a/���� �8B�vƦ�It��) :b�u� Uu��}��}`�Z�o�n�.U�j�ϼqu��/�@��6#�F��x� �K�v���aL3�qR]�I� �„p�w���Y�������#�_c#�ⵕ  |���daG{��M#\�Z���eUZ�e���j��z�S� ϱZ֚\g`��U�Ɉ��8ӋR����:���V]!d�(N#��?��˙$�%JIEND�B`�j
"
@�
�P‡�������j�
TT��PNG

IHDRTT� ���IDATx�Z�nE�I��bG�+BA)�&���y�#$x�9'`��H�8ή����P�S�U��3;��ǑfF#w�|����q���Ά�\j�X�X�Pw��q�؁q�����q�ǝ`Ʊএ�8������p���~�q�؁q����m?��8���8������~�c�?�'��I\��������* қ����M�Z$k�TPI�2Z�kQ����^���#�_�n�^q��F?�}�������u6W�����/fEq
�l�f�w�WU�{ˬ���'��~�+Z]�ݟ�}����W�z.����p���b1_,J~���>r������_7��Ԛk���̳��/���y{*�W���LJ�/�O�z��"+��r^��eyo��9���̿����/_L������ӽϋ��`� ����� .<����ѝ;�c�s�`?�r�������G�TQΦg���WR�9V�.�e���������8J���3��g2��
�k6�Nn���Z���6�Fq�j{{ �;�v�������
�{���'����Ln>ŝ����}��C QQ�9�-1���̀�G�� �R�����g�I��^�wߜ����jQ-���m���,��/N��www�ъ�����Z�
8������<���-,�G��������P[��!ѐC��m�(�s�l�A�Z�ٙ'����s^;�lD{rt
�l>�y<���+���~�n��h����ZPB ���h���X��i'�ݸ�^}�a��7n�_�Ώ�7�a��ڽFP�hd$�HLX1!������f�@сs����ӗ7�߄�O�9�x�ލ=���f��Rl���PFFL-E@���X���|2X'(=+��ك��W=�hi����(F�4K�ӈ�����)�L�*#�����l�%~��*~إՈ//E�����Q|Д$LwR�������M���:4!�i)ђ�W��XF��,E@�E(^,��Ym�}]1��%���*��N6��(��o��b�ġR����w&���d�j�35X�Q[���W`B҂
C�v����~$^�Y��J��)�;�(x�#[`k{]��C���i V�4�:���+�[�H!z��[��KaLI>,�U�La��ڄQp_n�����|�p�$��2qBeW���0��M_Ď(Lu�� \/K��A�KYe,9��ҁ$Rk��2ӵ#1�b���M[�����C��UX�� J-5HG�Q�w��l=��C�Kv�E:$�["�b��v 2q�Ł�j�tFJ�c�|h�xb�X�:j���j?�b���#����*��Z�(PT������������I+F�ߴ���� °ځ�tB9����y����&Z��I�ʅK#A��Sa�V:�(U�`(�H ^�Z�����ʀ�v��Xх'v԰v6�wS��HN$U�/��{$�u�� /p�!�uBZ�c�F�j�����z� O<��t�z�6���a`��3�I�gL���^�_��w��Ɖ�v���3"W���r�D:���8�)��a/���� �8B�vƦ�It��) :b�u� Uu��}��}`�Z�o�n�.U�j�ϼqu��/�@��6#�F��x� �K�v���aL3�qR]�I� �„p�w���Y�������#�_c#�ⵕ  |���daG{��M#\�Z���eUZ�e���j��z�S� ϱZ֚\g`��U�Ɉ��8ӋR����:���V]!d�(N#��?��˙$�%JIEND�B`�j
"
@�
�P‡�������j�
TT��PNG

IHDRTT� ���IDATx�ZK�E�%��ټ �\,ʕK�q��=?q������ʕ�P�8�1+���;�;TuWuU�􌽎c��L��z|_���=�d�ٗ_e]���j��w�|WW�_�~�;8�~�wp�]����+�� �۾���x����}'�o�.z��۾���@��;���i�o�~�wp�������}����3����{<zx���7��$|�ɭ&���-͜����_?�c���eY?���-7�T����v ��m��/�8yx2=<�}���y�߻}�ȊrR��tyZU���j2�\�.o��p�?�e?�}����lA�b�,������<-�C��>}u�Q�T�f�yV��:���r>E?�f�,����&��t���W>}t����QSA�*ۍo�~����q {������q����?<��Gw���tAYg�}&A(g���Y�Aͬ�W͎<�91�3:�S^36j6&�
�ް�a��ɬ�Y<*�m8���<F�R�lG͇
�}�0?��������ɿ�E�\�~;ߵ]�GEYu���_�ze<�T�"��h�H��w�8����X���ȥ�ޗ�_�x|�/]���O���s\�G|��- ���7o�Mz��E�`��'h�����ܺ�:��Ǹ���E�������� \y<ܡ����4�C�(ժ��B�+���Λv9.����%;���s��Ň�G�^/����09ע(*p�޻)3a����L���� \��"��+�� ��۲|��z����m� 1�CY��ɵ��� B%�xBG�&QH��u������G�]�O�����i�V����=�n��l��q��M
��N�����`�I�����$_p������ݶctqxk� �ntT�&� +��7��b�D �
���� �q�x�%4���M�,4���P�D@{##�
#7��P `�h������=`�E7��8���f �B�I\?�3�D�8� ��A�P]
а�CZ��ڦ�
vI�M�h�n Bpa�D@�GCI��D$6��82��m��%��}!�rq�О��0��J<I�⛅5c�qO{��Z"���yE ]�C�L�CB�`�Dx{#߬�4t�P��w{S�j��4��j`Ń�,�Q��(x&$�С`h ����=~���.��G�A��)�
%
%�ԑu��>��^U�ʋ�R=��F�z꩔/ԏ�E%�n�"\��Ho ��Uy���-[��Q�RV�a}�1pQŞF�����(,��D� ��7zG�W���+8�O)�΍}qB���'� �Jȓ�������򫎣ի_âȡ�)�jnm2PV�=C�@�r �����o����uO�����]��� �[ �M�Q֠(w��ds���!:�I�A�3ρB��Bu&h:��2�!��qHwA��y�#�(t�Ӟc�j��٪PK#��<���@�� sT�EVohm� �'��{Ȥ�KW�5�Qo�·C�ȇ74�n�x<��3��/�F G�H]x�(U>� ���s��L�`i}�}\N����U~Ðz�Y)iD"I}"y �3������0�j�)���!x6V�d����&�:@ӂ��pnc���t��0!YBX���i����L���p����1~�`��<�y�,|���؇qW����W�N�g1��?=��܏q�I"o?e������}&�m�a"�I��b���H覚��l������f����3$s ��7p&H����j�P��ImWCw��"��$��Y��Y�D��'TԔ�60E>��{p��-�ݒ��K� � o^���.O�h���8;ou�<��}��Ρj0~��o�7`+N��5G_�ƴ�����A6:��Ԧ�V�Aj�.����m���N+��V�袩7�8��5[�
qF�% t^i(Dn�e'�N��՚���f�w!��^/#��wEj��k^{-v��,=�J�]nq�,�s; ��hp�9U���� L��6r�IB�l4X���Y4 �i��������
�)�/��ED$(DLIEND�B`�j
"
@�
�P‡�������j�
TT��PNG

IHDRTT� ���IDATx�ZK�E�%��ټ �\,ʕK�q��=?q������ʕ�P�8�1+���;�;TuWuU�􌽎c��L��z|_���=�d�ٗ_e]���j��w�|WW�_�~�;8�~�wp�]����+�� �۾���x����}'�o�.z��۾���@��;���i�o�~�wp�������}����3����{<zx���7��$|�ɭ&���-͜����_?�c���eY?���-7�T����v ��m��/�8yx2=<�}���y�߻}�ȊrR��tyZU���j2�\�.o��p�?�e?�}����lA�b�,������<-�C��>}u�Q�T�f�yV��:���r>E?�f�,����&��t���W>}t����QSA�*ۍo�~����q {������q����?<��Gw���tAYg�}&A(g���Y�Aͬ�W͎<�91�3:�S^36j6&�
�ް�a��ɬ�Y<*�m8���<F�R�lG͇
�}�0?��������ɿ�E�\�~;ߵ]�GEYu���_�ze<�T�"��h�H��w�8����X���ȥ�ޗ�_�x|�/]���O���s\�G|��- ���7o�Mz��E�`��'h�����ܺ�:��Ǹ���E�������� \y<ܡ����4�C�(ժ��B�+���Λv9.����%;���s��Ň�G�^/����09ע(*p�޻)3a����L���� \��"��+�� ��۲|��z����m� 1�CY��ɵ��� B%�xBG�&QH��u������G�]�O�����i�V����=�n��l��q��M
��N�����`�I�����$_p������ݶctqxk� �ntT�&� +��7��b�D �
���� �q�x�%4���M�,4���P�D@{##�
#7��P `�h������=`�E7��8���f �B�I\?�3�D�8� ��A�P]
а�CZ��ڦ�
vI�M�h�n Bpa�D@�GCI��D$6��82��m��%��}!�rq�О��0��J<I�⛅5c�qO{��Z"���yE ]�C�L�CB�`�Dx{#߬�4t�P��w{S�j��4��j`Ń�,�Q��(x&$�С`h ����=~���.��G�A��)�
%
%�ԑu��>��^U�ʋ�R=��F�z꩔/ԏ�E%�n�"\��Ho ��Uy���-[��Q�RV�a}�1pQŞF�����(,��D� ��7zG�W���+8�O)�΍}qB���'� �Jȓ�������򫎣ի_âȡ�)�jnm2PV�=C�@�r �����o����uO�����]��� �[ �M�Q֠(w��ds���!:�I�A�3ρB��Bu&h:��2�!��qHwA��y�#�(t�Ӟc�j��٪PK#��<���@�� sT�EVohm� �'��{Ȥ�KW�5�Qo�·C�ȇ74�n�x<��3��/�F G�H]x�(U>� ���s��L�`i}�}\N����U~Ðz�Y)iD"I}"y �3������0�j�)���!x6V�d����&�:@ӂ��pnc���t��0!YBX���i����L���p����1~�`��<�y�,|���؇qW����W�N�g1��?=��܏q�I"o?e������}&�m�a"�I��b���H覚��l������f����3$s ��7p&H����j�P��ImWCw��"��$��Y��Y�D��'TԔ�60E>��{p��-�ݒ��K� � o^���.O�h���8;ou�<��}��Ρj0~��o�7`+N��5G_�ƴ�����A6:��Ԧ�V�Aj�.����m���N+��V�袩7�8��5[�
qF�% t^i(Dn�e'�N��՚���f�w!��^/#��wEj��k^{-v��,=�J�]nq�,�s; ��hp�9U���� L��6r�IB�l4X���Y4 �i��������
�)�/��ED$(DLIEND�B`�j
"
@�
�P‡�������j�
TT��PNG

IHDRTT� ���IDATx�ZK�E�%��ټ �\,ʕK�q��=?q������ʕ�P�8�1+���;�;TuWuU�􌽎c��L��z|_���=�d�ٗ_e]���j��w�|WW�_�~�;8�~�wp�]����+�� �۾���x����}'�o�.z��۾���@��;���i�o�~�wp�������}����3����{<zx���7��$|�ɭ&���-͜����_?�c���eY?���-7�T����v ��m��/�8yx2=<�}���y�߻}�ȊrR��tyZU���j2�\�.o��p�?�e?�}����lA�b�,������<-�C��>}u�Q�T�f�yV��:���r>E?�f�,����&��t���W>}t����QSA�*ۍo�~����q {������q����?<��Gw���tAYg�}&A(g���Y�Aͬ�W͎<�91�3:�S^36j6&�
�ް�a��ɬ�Y<*�m8���<F�R�lG͇
�}�0?��������ɿ�E�\�~;ߵ]�GEYu���_�ze<�T�"��h�H��w�8����X���ȥ�ޗ�_�x|�/]���O���s\�G|��- ���7o�Mz��E�`��'h�����ܺ�:��Ǹ���E�������� \y<ܡ����4�C�(ժ��B�+���Λv9.����%;���s��Ň�G�^/����09ע(*p�޻)3a����L���� \��"��+�� ��۲|��z����m� 1�CY��ɵ��� B%�xBG�&QH��u������G�]�O�����i�V����=�n��l��q��M
��N�����`�I�����$_p������ݶctqxk� �ntT�&� +��7��b�D �
���� �q�x�%4���M�,4���P�D@{##�
#7��P `�h������=`�E7��8���f �B�I\?�3�D�8� ��A�P]
а�CZ��ڦ�
vI�M�h�n Bpa�D@�GCI��D$6��82��m��%��}!�rq�О��0��J<I�⛅5c�qO{��Z"���yE ]�C�L�CB�`�Dx{#߬�4t�P��w{S�j��4��j`Ń�,�Q��(x&$�С`h ����=~���.��G�A��)�
%
%�ԑu��>��^U�ʋ�R=��F�z꩔/ԏ�E%�n�"\��Ho ��Uy���-[��Q�RV�a}�1pQŞF�����(,��D� ��7zG�W���+8�O)�΍}qB���'� �Jȓ�������򫎣ի_âȡ�)�jnm2PV�=C�@�r �����o����uO�����]��� �[ �M�Q֠(w��ds���!:�I�A�3ρB��Bu&h:��2�!��qHwA��y�#�(t�Ӟc�j��٪PK#��<���@�� sT�EVohm� �'��{Ȥ�KW�5�Qo�·C�ȇ74�n�x<��3��/�F G�H]x�(U>� ���s��L�`i}�}\N����U~Ðz�Y)iD"I}"y �3������0�j�)���!x6V�d����&�:@ӂ��pnc���t��0!YBX���i����L���p����1~�`��<�y�,|���؇qW����W�N�g1��?=��܏q�I"o?e������}&�m�a"�I��b���H覚��l������f����3$s ��7p&H����j�P��ImWCw��"��$��Y��Y�D��'TԔ�60E>��{p��-�ݒ��K� � o^���.O�h���8;ou�<��}��Ρj0~��o�7`+N��5G_�ƴ�����A6:��Ԧ�V�Aj�.����m���N+��V�袩7�8��5[�
qF�% t^i(Dn�e'�N��՚���f�w!��^/#��wEj��k^{-v��,=�J�]nq�,�s; ��hp�9U���� L��6r�IB�l4X���Y4 �i��������
�)�/��ED$(DLIEND�B`�j
"

74
ml-agents/mlagents/trainers/torch/model_serialization.py


import os
import torch
from mlagents_envs.logging_util import get_logger
from mlagents.trainers.settings import SerializationSettings
logger = get_logger(__name__)
class ModelSerializer:
def __init__(self, policy):
# ONNX only support input in NCHW (channel first) format.
# Barracuda also expect to get data in NCHW.
# Any multi-dimentional input should follow that otherwise will
# cause problem to barracuda import.
self.policy = policy
batch_dim = [1]
seq_len_dim = [1]
dummy_vec_obs = [torch.zeros(batch_dim + [self.policy.vec_obs_size])]
# create input shape of NCHW
# (It's NHWC in self.policy.behavior_spec.observation_shapes)
dummy_vis_obs = [
torch.zeros(batch_dim + [shape[2], shape[0], shape[1]])
for shape in self.policy.behavior_spec.observation_shapes
if len(shape) == 3
]
dummy_masks = torch.ones(batch_dim + [sum(self.policy.actor_critic.act_size)])
dummy_memories = torch.zeros(
batch_dim + seq_len_dim + [self.policy.export_memory_size]
)
self.dummy_input = (dummy_vec_obs, dummy_vis_obs, dummy_masks, dummy_memories)
self.input_names = (
["vector_observation"]
+ [f"visual_observation_{i}" for i in range(self.policy.vis_obs_size)]
+ ["action_masks", "memories"]
)
self.output_names = [
"action",
"version_number",
"memory_size",
"is_continuous_control",
"action_output_shape",
]
self.dynamic_axes = {name: {0: "batch"} for name in self.input_names}
self.dynamic_axes.update({"action": {0: "batch"}})
def export_policy_model(self, output_filepath: str) -> None:
"""
Exports a Torch model for a Policy to .onnx format for Unity embedding.
:param output_filepath: file path to output the model (without file suffix)
:param brain_name: Brain name of brain to be trained
"""
if not os.path.exists(output_filepath):
os.makedirs(output_filepath)
onnx_output_path = f"{output_filepath}.onnx"
logger.info(f"Converting to {onnx_output_path}")
torch.onnx.export(
self.policy.actor_critic,
self.dummy_input,
onnx_output_path,
opset_version=SerializationSettings.onnx_opset,
input_names=self.input_names,
output_names=self.output_names,
dynamic_axes=self.dynamic_axes,
)
logger.info(f"Exported {onnx_output_path}")

111
ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_curiosity.py


import numpy as np
import pytest
import torch
from mlagents.trainers.torch.components.reward_providers import (
CuriosityRewardProvider,
create_reward_provider,
)
from mlagents_envs.base_env import BehaviorSpec, ActionType
from mlagents.trainers.settings import CuriositySettings, RewardSignalType
from mlagents.trainers.tests.torch.test_reward_providers.utils import (
create_agent_buffer,
)
SEED = [42]
@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec([(10,)], ActionType.CONTINUOUS, 5),
BehaviorSpec([(10,)], ActionType.DISCRETE, (2, 3)),
],
)
def test_construction(behavior_spec: BehaviorSpec) -> None:
curiosity_settings = CuriositySettings(32, 0.01)
curiosity_settings.strength = 0.1
curiosity_rp = CuriosityRewardProvider(behavior_spec, curiosity_settings)
assert curiosity_rp.strength == 0.1
assert curiosity_rp.name == "Curiosity"
@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec([(10,)], ActionType.CONTINUOUS, 5),
BehaviorSpec([(10,), (64, 66, 3), (84, 86, 1)], ActionType.CONTINUOUS, 5),
BehaviorSpec([(10,), (64, 66, 1)], ActionType.DISCRETE, (2, 3)),
BehaviorSpec([(10,)], ActionType.DISCRETE, (2,)),
],
)
def test_factory(behavior_spec: BehaviorSpec) -> None:
curiosity_settings = CuriositySettings(32, 0.01)
curiosity_rp = create_reward_provider(
RewardSignalType.CURIOSITY, behavior_spec, curiosity_settings
)
assert curiosity_rp.name == "Curiosity"
@pytest.mark.parametrize("seed", SEED)
@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec([(10,), (64, 66, 3), (24, 26, 1)], ActionType.CONTINUOUS, 5),
BehaviorSpec([(10,)], ActionType.DISCRETE, (2, 3)),
BehaviorSpec([(10,)], ActionType.DISCRETE, (2,)),
],
)
def test_reward_decreases(behavior_spec: BehaviorSpec, seed: int) -> None:
np.random.seed(seed)
torch.manual_seed(seed)
curiosity_settings = CuriositySettings(32, 0.01)
curiosity_rp = CuriosityRewardProvider(behavior_spec, curiosity_settings)
buffer = create_agent_buffer(behavior_spec, 5)
curiosity_rp.update(buffer)
reward_old = curiosity_rp.evaluate(buffer)[0]
for _ in range(10):
curiosity_rp.update(buffer)
reward_new = curiosity_rp.evaluate(buffer)[0]
assert reward_new < reward_old
reward_old = reward_new
@pytest.mark.parametrize("seed", SEED)
@pytest.mark.parametrize(
"behavior_spec", [BehaviorSpec([(10,)], ActionType.CONTINUOUS, 5)]
)
def test_continuous_action_prediction(behavior_spec: BehaviorSpec, seed: int) -> None:
np.random.seed(seed)
torch.manual_seed(seed)
curiosity_settings = CuriositySettings(32, 0.1)
curiosity_rp = CuriosityRewardProvider(behavior_spec, curiosity_settings)
buffer = create_agent_buffer(behavior_spec, 5)
for _ in range(200):
curiosity_rp.update(buffer)
prediction = curiosity_rp._network.predict_action(buffer)[0].detach()
target = buffer["actions"][0]
error = float(torch.mean((prediction - target) ** 2))
assert error < 0.001
@pytest.mark.parametrize("seed", SEED)
@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec([(10,), (64, 66, 3)], ActionType.CONTINUOUS, 5),
BehaviorSpec([(10,)], ActionType.DISCRETE, (2, 3)),
BehaviorSpec([(10,)], ActionType.DISCRETE, (2,)),
],
)
def test_next_state_prediction(behavior_spec: BehaviorSpec, seed: int) -> None:
np.random.seed(seed)
torch.manual_seed(seed)
curiosity_settings = CuriositySettings(32, 0.1)
curiosity_rp = CuriosityRewardProvider(behavior_spec, curiosity_settings)
buffer = create_agent_buffer(behavior_spec, 5)
for _ in range(100):
curiosity_rp.update(buffer)
prediction = curiosity_rp._network.predict_next_state(buffer)[0]
target = curiosity_rp._network.get_next_state(buffer)[0]
error = float(torch.mean((prediction - target) ** 2).detach())
assert error < 0.001

56
ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_extrinsic.py


import pytest
from mlagents.trainers.torch.components.reward_providers import (
ExtrinsicRewardProvider,
create_reward_provider,
)
from mlagents_envs.base_env import BehaviorSpec, ActionType
from mlagents.trainers.settings import RewardSignalSettings, RewardSignalType
from mlagents.trainers.tests.torch.test_reward_providers.utils import (
create_agent_buffer,
)
@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec([(10,)], ActionType.CONTINUOUS, 5),
BehaviorSpec([(10,)], ActionType.DISCRETE, (2, 3)),
],
)
def test_construction(behavior_spec: BehaviorSpec) -> None:
settings = RewardSignalSettings()
settings.gamma = 0.2
extrinsic_rp = ExtrinsicRewardProvider(behavior_spec, settings)
assert extrinsic_rp.gamma == 0.2
assert extrinsic_rp.name == "Extrinsic"
@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec([(10,)], ActionType.CONTINUOUS, 5),
BehaviorSpec([(10,)], ActionType.DISCRETE, (2, 3)),
],
)
def test_factory(behavior_spec: BehaviorSpec) -> None:
settings = RewardSignalSettings()
extrinsic_rp = create_reward_provider(
RewardSignalType.EXTRINSIC, behavior_spec, settings
)
assert extrinsic_rp.name == "Extrinsic"
@pytest.mark.parametrize("reward", [2.0, 3.0, 4.0])
@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec([(10,)], ActionType.CONTINUOUS, 5),
BehaviorSpec([(10,)], ActionType.DISCRETE, (2, 3)),
],
)
def test_reward(behavior_spec: BehaviorSpec, reward: float) -> None:
buffer = create_agent_buffer(behavior_spec, 1000, reward)
settings = RewardSignalSettings()
extrinsic_rp = ExtrinsicRewardProvider(behavior_spec, settings)
generated_rewards = extrinsic_rp.evaluate(buffer)
assert (generated_rewards == reward).all()

138
ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_gail.py


from typing import Any
import numpy as np
import pytest
from unittest.mock import patch
import torch
import os
from mlagents.trainers.torch.components.reward_providers import (
GAILRewardProvider,
create_reward_provider,
)
from mlagents_envs.base_env import BehaviorSpec, ActionType
from mlagents.trainers.settings import GAILSettings, RewardSignalType
from mlagents.trainers.tests.torch.test_reward_providers.utils import (
create_agent_buffer,
)
from mlagents.trainers.torch.components.reward_providers.gail_reward_provider import (
DiscriminatorNetwork,
)
CONTINUOUS_PATH = (
os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir, os.pardir)
+ "/test.demo"
)
DISCRETE_PATH = (
os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir, os.pardir)
+ "/testdcvis.demo"
)
SEED = [42]
@pytest.mark.parametrize(
"behavior_spec", [BehaviorSpec([(8,)], ActionType.CONTINUOUS, 2)]
)
def test_construction(behavior_spec: BehaviorSpec) -> None:
gail_settings = GAILSettings(demo_path=CONTINUOUS_PATH)
gail_rp = GAILRewardProvider(behavior_spec, gail_settings)
assert gail_rp.name == "GAIL"
@pytest.mark.parametrize(
"behavior_spec", [BehaviorSpec([(8,)], ActionType.CONTINUOUS, 2)]
)
def test_factory(behavior_spec: BehaviorSpec) -> None:
gail_settings = GAILSettings(demo_path=CONTINUOUS_PATH)
gail_rp = create_reward_provider(
RewardSignalType.GAIL, behavior_spec, gail_settings
)
assert gail_rp.name == "GAIL"
@pytest.mark.parametrize("seed", SEED)
@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec([(8,), (24, 26, 1)], ActionType.CONTINUOUS, 2),
BehaviorSpec([(50,)], ActionType.DISCRETE, (2, 3, 3, 3)),
BehaviorSpec([(10,)], ActionType.DISCRETE, (20,)),
],
)
@pytest.mark.parametrize("use_actions", [False, True])
@patch(
"mlagents.trainers.torch.components.reward_providers.gail_reward_provider.demo_to_buffer"
)
def test_reward_decreases(
demo_to_buffer: Any, use_actions: bool, behavior_spec: BehaviorSpec, seed: int
) -> None:
np.random.seed(seed)
torch.manual_seed(seed)
buffer_expert = create_agent_buffer(behavior_spec, 1000)
buffer_policy = create_agent_buffer(behavior_spec, 1000)
demo_to_buffer.return_value = None, buffer_expert
gail_settings = GAILSettings(
demo_path="", learning_rate=0.05, use_vail=False, use_actions=use_actions
)
gail_rp = create_reward_provider(
RewardSignalType.GAIL, behavior_spec, gail_settings
)
init_reward_expert = gail_rp.evaluate(buffer_expert)[0]
init_reward_policy = gail_rp.evaluate(buffer_policy)[0]
for _ in range(10):
gail_rp.update(buffer_policy)
reward_expert = gail_rp.evaluate(buffer_expert)[0]
reward_policy = gail_rp.evaluate(buffer_policy)[0]
assert reward_expert >= 0 # GAIL / VAIL reward always positive
assert reward_policy >= 0
reward_expert = gail_rp.evaluate(buffer_expert)[0]
reward_policy = gail_rp.evaluate(buffer_policy)[0]
assert reward_expert > reward_policy # Expert reward greater than non-expert reward
assert (
reward_expert > init_reward_expert
) # Expert reward getting better as network trains
assert (
reward_policy < init_reward_policy
) # Non-expert reward getting worse as network trains
@pytest.mark.parametrize("seed", SEED)
@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec([(8,)], ActionType.CONTINUOUS, 2),
BehaviorSpec([(10,)], ActionType.DISCRETE, (2, 3, 3, 3)),
BehaviorSpec([(10,)], ActionType.DISCRETE, (20,)),
],
)
@pytest.mark.parametrize("use_actions", [False, True])
@patch(
"mlagents.trainers.torch.components.reward_providers.gail_reward_provider.demo_to_buffer"
)
def test_reward_decreases_vail(
demo_to_buffer: Any, use_actions: bool, behavior_spec: BehaviorSpec, seed: int
) -> None:
np.random.seed(seed)
torch.manual_seed(seed)
buffer_expert = create_agent_buffer(behavior_spec, 1000)
buffer_policy = create_agent_buffer(behavior_spec, 1000)
demo_to_buffer.return_value = None, buffer_expert
gail_settings = GAILSettings(
demo_path="", learning_rate=0.005, use_vail=True, use_actions=use_actions
)
DiscriminatorNetwork.initial_beta = 0.0
# we must set the initial value of beta to 0 for testing
# If we do not, the kl-loss will dominate early and will block the estimator
gail_rp = create_reward_provider(
RewardSignalType.GAIL, behavior_spec, gail_settings
)
for _ in range(100):
gail_rp.update(buffer_policy)
reward_expert = gail_rp.evaluate(buffer_expert)[0]
reward_policy = gail_rp.evaluate(buffer_policy)[0]
assert reward_expert >= 0 # GAIL / VAIL reward always positive
assert reward_policy >= 0
reward_expert = gail_rp.evaluate(buffer_expert)[0]
reward_policy = gail_rp.evaluate(buffer_policy)[0]
assert reward_expert > reward_policy # Expert reward greater than non-expert reward

32
ml-agents/mlagents/trainers/tests/torch/test_reward_providers/utils.py


import numpy as np
from mlagents.trainers.buffer import AgentBuffer
from mlagents_envs.base_env import BehaviorSpec
from mlagents.trainers.trajectory import SplitObservations
def create_agent_buffer(
behavior_spec: BehaviorSpec, number: int, reward: float = 0.0
) -> AgentBuffer:
buffer = AgentBuffer()
curr_observations = [
np.random.normal(size=shape) for shape in behavior_spec.observation_shapes
]
next_observations = [
np.random.normal(size=shape) for shape in behavior_spec.observation_shapes
]
action = behavior_spec.create_random_action(1)[0, :]
for _ in range(number):
curr_split_obs = SplitObservations.from_observations(curr_observations)
next_split_obs = SplitObservations.from_observations(next_observations)
for i, _ in enumerate(curr_split_obs.visual_observations):
buffer["visual_obs%d" % i].append(curr_split_obs.visual_observations[i])
buffer["next_visual_obs%d" % i].append(
next_split_obs.visual_observations[i]
)
buffer["vector_obs"].append(curr_split_obs.vector_observations)
buffer["next_vector_in"].append(next_split_obs.vector_observations)
buffer["actions"].append(action)
buffer["done"].append(np.zeros(1, dtype=np.float32))
buffer["reward"].append(np.ones(1, dtype=np.float32) * reward)
buffer["masks"].append(np.ones(1, dtype=np.float32))
return buffer

0
ml-agents/mlagents/trainers/torch/components/__init__.py

0
ml-agents/mlagents/trainers/torch/components/bc/__init__.py

183
ml-agents/mlagents/trainers/torch/components/bc/module.py


from typing import Dict
import numpy as np
import torch
from mlagents.trainers.policy.torch_policy import TorchPolicy
from mlagents.trainers.demo_loader import demo_to_buffer
from mlagents.trainers.settings import BehavioralCloningSettings, ScheduleType
from mlagents.trainers.torch.utils import ModelUtils
class BCModule:
def __init__(
self,
policy: TorchPolicy,
settings: BehavioralCloningSettings,
policy_learning_rate: float,
default_batch_size: int,
default_num_epoch: int,
):
"""
A BC trainer that can be used inline with RL.
:param policy: The policy of the learning model
:param settings: The settings for BehavioralCloning including LR strength, batch_size,
num_epochs, samples_per_update and LR annealing steps.
:param policy_learning_rate: The initial Learning Rate of the policy. Used to set an appropriate learning rate
for the pretrainer.
"""
self.policy = policy
self._anneal_steps = settings.steps
self.current_lr = policy_learning_rate * settings.strength
learning_rate_schedule: ScheduleType = ScheduleType.LINEAR if self._anneal_steps > 0 else ScheduleType.CONSTANT
self.decay_learning_rate = ModelUtils.DecayedValue(
learning_rate_schedule, self.current_lr, 1e-10, self._anneal_steps
)
params = self.policy.actor_critic.parameters()
self.optimizer = torch.optim.Adam(params, lr=self.current_lr)
_, self.demonstration_buffer = demo_to_buffer(
settings.demo_path, policy.sequence_length, policy.behavior_spec
)
self.batch_size = (
settings.batch_size if settings.batch_size else default_batch_size
)
self.num_epoch = settings.num_epoch if settings.num_epoch else default_num_epoch
self.n_sequences = max(
min(self.batch_size, self.demonstration_buffer.num_experiences)
// policy.sequence_length,
1,
)
self.has_updated = False
self.use_recurrent = self.policy.use_recurrent
self.samples_per_update = settings.samples_per_update
def update(self) -> Dict[str, np.ndarray]:
"""
Updates model using buffer.
:param max_batches: The maximum number of batches to use per update.
:return: The loss of the update.
"""
# Don't continue training if the learning rate has reached 0, to reduce training time.
decay_lr = self.decay_learning_rate.get_value(self.policy.get_current_step())
if self.current_lr <= 0:
return {"Losses/Pretraining Loss": 0}
batch_losses = []
possible_demo_batches = (
self.demonstration_buffer.num_experiences // self.n_sequences
)
possible_batches = possible_demo_batches
max_batches = self.samples_per_update // self.n_sequences
n_epoch = self.num_epoch
for _ in range(n_epoch):
self.demonstration_buffer.shuffle(
sequence_length=self.policy.sequence_length
)
if max_batches == 0:
num_batches = possible_batches
else:
num_batches = min(possible_batches, max_batches)
for i in range(num_batches // self.policy.sequence_length):
demo_update_buffer = self.demonstration_buffer
start = i * self.n_sequences * self.policy.sequence_length
end = (i + 1) * self.n_sequences * self.policy.sequence_length
mini_batch_demo = demo_update_buffer.make_mini_batch(start, end)
run_out = self._update_batch(mini_batch_demo, self.n_sequences)
loss = run_out["loss"]
batch_losses.append(loss)
ModelUtils.update_learning_rate(self.optimizer, decay_lr)
self.current_lr = decay_lr
self.has_updated = True
update_stats = {"Losses/Pretraining Loss": np.mean(batch_losses)}
return update_stats
def _behavioral_cloning_loss(self, selected_actions, log_probs, expert_actions):
if self.policy.use_continuous_act:
bc_loss = torch.nn.functional.mse_loss(selected_actions, expert_actions)
else:
log_prob_branches = ModelUtils.break_into_branches(
log_probs, self.policy.act_size
)
bc_loss = torch.mean(
torch.stack(
[
torch.sum(
-torch.nn.functional.log_softmax(log_prob_branch, dim=1)
* expert_actions_branch,
dim=1,
)
for log_prob_branch, expert_actions_branch in zip(
log_prob_branches, expert_actions
)
]
)
)
return bc_loss
def _update_batch(
self, mini_batch_demo: Dict[str, np.ndarray], n_sequences: int
) -> Dict[str, float]:
"""
Helper function for update_batch.
"""
vec_obs = [ModelUtils.list_to_tensor(mini_batch_demo["vector_obs"])]
act_masks = None
if self.policy.use_continuous_act:
expert_actions = ModelUtils.list_to_tensor(mini_batch_demo["actions"])
else:
raw_expert_actions = ModelUtils.list_to_tensor(
mini_batch_demo["actions"], dtype=torch.long
)
expert_actions = ModelUtils.actions_to_onehot(
raw_expert_actions, self.policy.act_size
)
act_masks = ModelUtils.list_to_tensor(
np.ones(
(
self.n_sequences * self.policy.sequence_length,
sum(self.policy.behavior_spec.discrete_action_branches),
),
dtype=np.float32,
)
)
memories = []
if self.policy.use_recurrent:
memories = torch.zeros(1, self.n_sequences, self.policy.m_size)
if self.policy.use_vis_obs:
vis_obs = []
for idx, _ in enumerate(
self.policy.actor_critic.network_body.visual_encoders
):
vis_ob = ModelUtils.list_to_tensor(
mini_batch_demo["visual_obs%d" % idx]
)
vis_obs.append(vis_ob)
else:
vis_obs = []
selected_actions, all_log_probs, _, _, _ = self.policy.sample_actions(
vec_obs,
vis_obs,
masks=act_masks,
memories=memories,
seq_len=self.policy.sequence_length,
all_log_probs=True,
)
bc_loss = self._behavioral_cloning_loss(
selected_actions, all_log_probs, expert_actions
)
self.optimizer.zero_grad()
bc_loss.backward()
self.optimizer.step()
run_out = {"loss": bc_loss.detach().cpu().numpy()}
return run_out

15
ml-agents/mlagents/trainers/torch/components/reward_providers/__init__.py


from mlagents.trainers.torch.components.reward_providers.base_reward_provider import ( # noqa F401
BaseRewardProvider,
)
from mlagents.trainers.torch.components.reward_providers.extrinsic_reward_provider import ( # noqa F401
ExtrinsicRewardProvider,
)
from mlagents.trainers.torch.components.reward_providers.curiosity_reward_provider import ( # noqa F401
CuriosityRewardProvider,
)
from mlagents.trainers.torch.components.reward_providers.gail_reward_provider import ( # noqa F401
GAILRewardProvider,
)
from mlagents.trainers.torch.components.reward_providers.reward_provider_factory import ( # noqa F401
create_reward_provider,
)

72
ml-agents/mlagents/trainers/torch/components/reward_providers/base_reward_provider.py


import numpy as np
from abc import ABC, abstractmethod
from typing import Dict
from mlagents.trainers.buffer import AgentBuffer
from mlagents.trainers.settings import RewardSignalSettings
from mlagents_envs.base_env import BehaviorSpec
class BaseRewardProvider(ABC):
def __init__(self, specs: BehaviorSpec, settings: RewardSignalSettings) -> None:
self._policy_specs = specs
self._gamma = settings.gamma
self._strength = settings.strength
self._ignore_done = False
@property
def gamma(self) -> float:
"""
The discount factor for the reward signal
"""
return self._gamma
@property
def strength(self) -> float:
"""
The strength multiplier of the reward provider
"""
return self._strength
@property
def name(self) -> str:
"""
The name of the reward provider. Is used for reporting and identification
"""
class_name = self.__class__.__name__
return class_name.replace("RewardProvider", "")
@property
def ignore_done(self) -> bool:
"""
If true, when the agent is done, the rewards of the next episode must be
used to calculate the return of the current episode.
Is used to mitigate the positive bias in rewards with no natural end.
"""
return self._ignore_done
@abstractmethod
def evaluate(self, mini_batch: AgentBuffer) -> np.ndarray:
"""
Evaluates the reward for the data present in the Dict mini_batch. Use this when evaluating a reward
function drawn straight from a Buffer.
:param mini_batch: A Dict of numpy arrays (the format used by our Buffer)
when drawing from the update buffer.
:return: a np.ndarray of rewards generated by the reward provider
"""
raise NotImplementedError(
"The reward provider's evaluate method has not been implemented "
)
@abstractmethod
def update(self, mini_batch: AgentBuffer) -> Dict[str, np.ndarray]:
"""
Update the reward for the data present in the Dict mini_batch. Use this when updating a reward
function drawn straight from a Buffer.
:param mini_batch: A Dict of numpy arrays (the format used by our Buffer)
when drawing from the update buffer.
:return: A dictionary from string to stats values
"""
raise NotImplementedError(
"The reward provider's update method has not been implemented "
)

15
ml-agents/mlagents/trainers/torch/components/reward_providers/extrinsic_reward_provider.py


import numpy as np
from typing import Dict
from mlagents.trainers.buffer import AgentBuffer
from mlagents.trainers.torch.components.reward_providers.base_reward_provider import (
BaseRewardProvider,
)
class ExtrinsicRewardProvider(BaseRewardProvider):
def evaluate(self, mini_batch: AgentBuffer) -> np.ndarray:
return np.array(mini_batch["environment_rewards"], dtype=np.float32)
def update(self, mini_batch: AgentBuffer) -> Dict[str, np.ndarray]:
return {}

43
ml-agents/mlagents/trainers/torch/components/reward_providers/reward_provider_factory.py


from typing import Dict, Type
from mlagents.trainers.exception import UnityTrainerException
from mlagents.trainers.settings import RewardSignalSettings, RewardSignalType
from mlagents.trainers.torch.components.reward_providers.base_reward_provider import (
BaseRewardProvider,
)
from mlagents.trainers.torch.components.reward_providers.extrinsic_reward_provider import (
ExtrinsicRewardProvider,
)
from mlagents.trainers.torch.components.reward_providers.curiosity_reward_provider import (
CuriosityRewardProvider,
)
from mlagents.trainers.torch.components.reward_providers.gail_reward_provider import (
GAILRewardProvider,
)
from mlagents_envs.base_env import BehaviorSpec
NAME_TO_CLASS: Dict[RewardSignalType, Type[BaseRewardProvider]] = {
RewardSignalType.EXTRINSIC: ExtrinsicRewardProvider,
RewardSignalType.CURIOSITY: CuriosityRewardProvider,
RewardSignalType.GAIL: GAILRewardProvider,
}
def create_reward_provider(
name: RewardSignalType, specs: BehaviorSpec, settings: RewardSignalSettings
) -> BaseRewardProvider:
"""
Creates a reward provider class based on the name and config entry provided as a dict.
:param name: The name of the reward signal
:param specs: The BehaviorSpecs of the policy
:param settings: The RewardSignalSettings for that reward signal
:return: The reward signal class instantiated
"""
rcls = NAME_TO_CLASS.get(name)
if not rcls:
raise UnityTrainerException(f"Unknown reward signal type {name}")
class_inst = rcls(specs, settings)
return class_inst

225
ml-agents/mlagents/trainers/torch/components/reward_providers/curiosity_reward_provider.py


import numpy as np
from typing import Dict
import torch
from mlagents.trainers.buffer import AgentBuffer
from mlagents.trainers.torch.components.reward_providers.base_reward_provider import (
BaseRewardProvider,
)
from mlagents.trainers.settings import CuriositySettings
from mlagents_envs.base_env import BehaviorSpec
from mlagents.trainers.torch.utils import ModelUtils
from mlagents.trainers.torch.networks import NetworkBody
from mlagents.trainers.torch.layers import linear_layer, Swish
from mlagents.trainers.settings import NetworkSettings, EncoderType
class CuriosityRewardProvider(BaseRewardProvider):
beta = 0.2 # Forward vs Inverse loss weight
loss_multiplier = 10.0 # Loss multiplier
def __init__(self, specs: BehaviorSpec, settings: CuriositySettings) -> None:
super().__init__(specs, settings)
self._ignore_done = True
self._network = CuriosityNetwork(specs, settings)
self.optimizer = torch.optim.Adam(
self._network.parameters(), lr=settings.learning_rate
)
self._has_updated_once = False
def evaluate(self, mini_batch: AgentBuffer) -> np.ndarray:
with torch.no_grad():
rewards = self._network.compute_reward(mini_batch).detach().cpu().numpy()
rewards = np.minimum(rewards, 1.0 / self.strength)
return rewards * self._has_updated_once
def update(self, mini_batch: AgentBuffer) -> Dict[str, np.ndarray]:
self._has_updated_once = True
forward_loss = self._network.compute_forward_loss(mini_batch)
inverse_loss = self._network.compute_inverse_loss(mini_batch)
loss = self.loss_multiplier * (
self.beta * forward_loss + (1.0 - self.beta) * inverse_loss
)
self.optimizer.zero_grad()
loss.backward()
self.optimizer.step()
return {
"Losses/Curiosity Forward Loss": forward_loss.detach().cpu().numpy(),
"Losses/Curiosity Inverse Loss": inverse_loss.detach().cpu().numpy(),
}
class CuriosityNetwork(torch.nn.Module):
EPSILON = 1e-10
def __init__(self, specs: BehaviorSpec, settings: CuriositySettings) -> None:
super().__init__()
self._policy_specs = specs
state_encoder_settings = NetworkSettings(
normalize=False,
hidden_units=settings.encoding_size,
num_layers=2,
vis_encode_type=EncoderType.SIMPLE,
memory=None,
)
self._state_encoder = NetworkBody(
specs.observation_shapes, state_encoder_settings
)
self._action_flattener = ModelUtils.ActionFlattener(specs)
self.inverse_model_action_predition = torch.nn.Sequential(
linear_layer(2 * settings.encoding_size, 256),
Swish(),
linear_layer(256, self._action_flattener.flattened_size),
)
self.forward_model_next_state_prediction = torch.nn.Sequential(
linear_layer(
settings.encoding_size + self._action_flattener.flattened_size, 256
),
Swish(),
linear_layer(256, settings.encoding_size),
)
def get_current_state(self, mini_batch: AgentBuffer) -> torch.Tensor:
"""
Extracts the current state embedding from a mini_batch.
"""
n_vis = len(self._state_encoder.visual_encoders)
hidden, _ = self._state_encoder.forward(
vec_inputs=[
ModelUtils.list_to_tensor(mini_batch["vector_obs"], dtype=torch.float)
],
vis_inputs=[
ModelUtils.list_to_tensor(
mini_batch["visual_obs%d" % i], dtype=torch.float
)
for i in range(n_vis)
],
)
return hidden
def get_next_state(self, mini_batch: AgentBuffer) -> torch.Tensor:
"""
Extracts the next state embedding from a mini_batch.
"""
n_vis = len(self._state_encoder.visual_encoders)
hidden, _ = self._state_encoder.forward(
vec_inputs=[
ModelUtils.list_to_tensor(
mini_batch["next_vector_in"], dtype=torch.float
)
],
vis_inputs=[
ModelUtils.list_to_tensor(
mini_batch["next_visual_obs%d" % i], dtype=torch.float
)
for i in range(n_vis)
],
)
return hidden
def predict_action(self, mini_batch: AgentBuffer) -> torch.Tensor:
"""
In the continuous case, returns the predicted action.
In the discrete case, returns the logits.
"""
inverse_model_input = torch.cat(
(self.get_current_state(mini_batch), self.get_next_state(mini_batch)), dim=1
)
hidden = self.inverse_model_action_predition(inverse_model_input)
if self._policy_specs.is_action_continuous():
return hidden
else:
branches = ModelUtils.break_into_branches(
hidden, self._policy_specs.discrete_action_branches
)
branches = [torch.softmax(b, dim=1) for b in branches]
return torch.cat(branches, dim=1)
def predict_next_state(self, mini_batch: AgentBuffer) -> torch.Tensor:
"""
Uses the current state embedding and the action of the mini_batch to predict
the next state embedding.
"""
if self._policy_specs.is_action_continuous():
action = ModelUtils.list_to_tensor(mini_batch["actions"], dtype=torch.float)
else:
action = torch.cat(
ModelUtils.actions_to_onehot(
ModelUtils.list_to_tensor(mini_batch["actions"], dtype=torch.long),
self._policy_specs.discrete_action_branches,
),
dim=1,
)
forward_model_input = torch.cat(
(self.get_current_state(mini_batch), action), dim=1
)
return self.forward_model_next_state_prediction(forward_model_input)
def compute_inverse_loss(self, mini_batch: AgentBuffer) -> torch.Tensor:
"""
Computes the inverse loss for a mini_batch. Corresponds to the error on the
action prediction (given the current and next state).
"""
predicted_action = self.predict_action(mini_batch)
if self._policy_specs.is_action_continuous():
sq_difference = (
ModelUtils.list_to_tensor(mini_batch["actions"], dtype=torch.float)
- predicted_action
) ** 2
sq_difference = torch.sum(sq_difference, dim=1)
return torch.mean(
ModelUtils.dynamic_partition(
sq_difference,
ModelUtils.list_to_tensor(mini_batch["masks"], dtype=torch.float),
2,
)[1]
)
else:
true_action = torch.cat(
ModelUtils.actions_to_onehot(
ModelUtils.list_to_tensor(mini_batch["actions"], dtype=torch.long),
self._policy_specs.discrete_action_branches,
),
dim=1,
)
cross_entropy = torch.sum(
-torch.log(predicted_action + self.EPSILON) * true_action, dim=1
)
return torch.mean(
ModelUtils.dynamic_partition(
cross_entropy,
ModelUtils.list_to_tensor(
mini_batch["masks"], dtype=torch.float
), # use masks not action_masks
2,
)[1]
)
def compute_reward(self, mini_batch: AgentBuffer) -> torch.Tensor:
"""
Calculates the curiosity reward for the mini_batch. Corresponds to the error
between the predicted and actual next state.
"""
predicted_next_state = self.predict_next_state(mini_batch)
target = self.get_next_state(mini_batch)
sq_difference = 0.5 * (target - predicted_next_state) ** 2
sq_difference = torch.sum(sq_difference, dim=1)
return sq_difference
def compute_forward_loss(self, mini_batch: AgentBuffer) -> torch.Tensor:
"""
Computes the loss for the next state prediction
"""
return torch.mean(
ModelUtils.dynamic_partition(
self.compute_reward(mini_batch),
ModelUtils.list_to_tensor(mini_batch["masks"], dtype=torch.float),
2,
)[1]
)

256
ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py


from typing import Optional, Dict
import numpy as np
import torch
from mlagents.trainers.buffer import AgentBuffer
from mlagents.trainers.torch.components.reward_providers.base_reward_provider import (
BaseRewardProvider,
)
from mlagents.trainers.settings import GAILSettings
from mlagents_envs.base_env import BehaviorSpec
from mlagents.trainers.torch.utils import ModelUtils
from mlagents.trainers.torch.networks import NetworkBody
from mlagents.trainers.torch.layers import linear_layer, Swish, Initialization
from mlagents.trainers.settings import NetworkSettings, EncoderType
from mlagents.trainers.demo_loader import demo_to_buffer
class GAILRewardProvider(BaseRewardProvider):
def __init__(self, specs: BehaviorSpec, settings: GAILSettings) -> None:
super().__init__(specs, settings)
self._ignore_done = True
self._discriminator_network = DiscriminatorNetwork(specs, settings)
_, self._demo_buffer = demo_to_buffer(
settings.demo_path, 1, specs
) # This is supposed to be the sequence length but we do not have access here
params = list(self._discriminator_network.parameters())
self.optimizer = torch.optim.Adam(params, lr=settings.learning_rate)
def evaluate(self, mini_batch: AgentBuffer) -> np.ndarray:
with torch.no_grad():
estimates, _ = self._discriminator_network.compute_estimate(
mini_batch, use_vail_noise=False
)
return (
-torch.log(
1.0
- estimates.squeeze(dim=1)
* (1.0 - self._discriminator_network.EPSILON)
)
.detach()
.cpu()
.numpy()
)
def update(self, mini_batch: AgentBuffer) -> Dict[str, np.ndarray]:
expert_batch = self._demo_buffer.sample_mini_batch(
mini_batch.num_experiences, 1
)
loss, stats_dict = self._discriminator_network.compute_loss(
mini_batch, expert_batch
)
self.optimizer.zero_grad()
loss.backward()
self.optimizer.step()
return stats_dict
class DiscriminatorNetwork(torch.nn.Module):
gradient_penalty_weight = 10.0
z_size = 128
alpha = 0.0005
mutual_information = 0.5
EPSILON = 1e-7
initial_beta = 0.0
def __init__(self, specs: BehaviorSpec, settings: GAILSettings) -> None:
super().__init__()
self._policy_specs = specs
self._use_vail = settings.use_vail
self._settings = settings
state_encoder_settings = NetworkSettings(
normalize=False,
hidden_units=settings.encoding_size,
num_layers=2,
vis_encode_type=EncoderType.SIMPLE,
memory=None,
)
self._state_encoder = NetworkBody(
specs.observation_shapes, state_encoder_settings
)
self._action_flattener = ModelUtils.ActionFlattener(specs)
encoder_input_size = settings.encoding_size
if settings.use_actions:
encoder_input_size += (
self._action_flattener.flattened_size + 1
) # + 1 is for done
self.encoder = torch.nn.Sequential(
linear_layer(encoder_input_size, settings.encoding_size),
Swish(),
linear_layer(settings.encoding_size, settings.encoding_size),
Swish(),
)
estimator_input_size = settings.encoding_size
if settings.use_vail:
estimator_input_size = self.z_size
self._z_sigma = torch.nn.Parameter(
torch.ones((self.z_size), dtype=torch.float), requires_grad=True
)
self._z_mu_layer = linear_layer(
settings.encoding_size,
self.z_size,
kernel_init=Initialization.KaimingHeNormal,
kernel_gain=0.1,
)
self._beta = torch.nn.Parameter(
torch.tensor(self.initial_beta, dtype=torch.float), requires_grad=False
)
self._estimator = torch.nn.Sequential(
linear_layer(estimator_input_size, 1), torch.nn.Sigmoid()
)
def get_action_input(self, mini_batch: AgentBuffer) -> torch.Tensor:
"""
Creates the action Tensor. In continuous case, corresponds to the action. In
the discrete case, corresponds to the concatenation of one hot action Tensors.
"""
return self._action_flattener.forward(
torch.as_tensor(mini_batch["actions"], dtype=torch.float)
)
def get_state_encoding(self, mini_batch: AgentBuffer) -> torch.Tensor:
"""
Creates the observation input.
"""
n_vis = len(self._state_encoder.visual_encoders)
hidden, _ = self._state_encoder.forward(
vec_inputs=[torch.as_tensor(mini_batch["vector_obs"], dtype=torch.float)],
vis_inputs=[
torch.as_tensor(mini_batch["visual_obs%d" % i], dtype=torch.float)
for i in range(n_vis)
],
)
return hidden
def compute_estimate(
self, mini_batch: AgentBuffer, use_vail_noise: bool = False
) -> torch.Tensor:
"""
Given a mini_batch, computes the estimate (How much the discriminator believes
the data was sampled from the demonstration data).
:param mini_batch: The AgentBuffer of data
:param use_vail_noise: Only when using VAIL : If true, will sample the code, if
false, will return the mean of the code.
"""
encoder_input = self.get_state_encoding(mini_batch)
if self._settings.use_actions:
actions = self.get_action_input(mini_batch)
dones = torch.as_tensor(mini_batch["done"], dtype=torch.float)
encoder_input = torch.cat([encoder_input, actions, dones], dim=1)
hidden = self.encoder(encoder_input)
z_mu: Optional[torch.Tensor] = None
if self._settings.use_vail:
z_mu = self._z_mu_layer(hidden)
hidden = torch.normal(z_mu, self._z_sigma * use_vail_noise)
estimate = self._estimator(hidden)
return estimate, z_mu
def compute_loss(
self, policy_batch: AgentBuffer, expert_batch: AgentBuffer
) -> torch.Tensor:
"""
Given a policy mini_batch and an expert mini_batch, computes the loss of the discriminator.
"""
total_loss = torch.zeros(1)
stats_dict: Dict[str, np.ndarray] = {}
policy_estimate, policy_mu = self.compute_estimate(
policy_batch, use_vail_noise=True
)
expert_estimate, expert_mu = self.compute_estimate(
expert_batch, use_vail_noise=True
)
stats_dict["Policy/GAIL Policy Estimate"] = (
policy_estimate.mean().detach().cpu().numpy()
)
stats_dict["Policy/GAIL Expert Estimate"] = (
expert_estimate.mean().detach().cpu().numpy()
)
discriminator_loss = -(
torch.log(expert_estimate + self.EPSILON)
+ torch.log(1.0 - policy_estimate + self.EPSILON)
).mean()
stats_dict["Losses/GAIL Loss"] = discriminator_loss.detach().cpu().numpy()
total_loss += discriminator_loss
if self._settings.use_vail:
# KL divergence loss (encourage latent representation to be normal)
kl_loss = torch.mean(
-torch.sum(
1
+ (self._z_sigma ** 2).log()
- 0.5 * expert_mu ** 2
- 0.5 * policy_mu ** 2
- (self._z_sigma ** 2),
dim=1,
)
)
vail_loss = self._beta * (kl_loss - self.mutual_information)
with torch.no_grad():
self._beta.data = torch.max(
self._beta + self.alpha * (kl_loss - self.mutual_information),
torch.tensor(0.0),
)
total_loss += vail_loss
stats_dict["Policy/GAIL Beta"] = self._beta.detach().cpu().numpy()
stats_dict["Losses/GAIL KL Loss"] = kl_loss.detach().cpu().numpy()
if self.gradient_penalty_weight > 0.0:
total_loss += (
self.gradient_penalty_weight
* self.compute_gradient_magnitude(policy_batch, expert_batch)
)
return total_loss, stats_dict
def compute_gradient_magnitude(
self, policy_batch: AgentBuffer, expert_batch: AgentBuffer
) -> torch.Tensor:
"""
Gradient penalty from https://arxiv.org/pdf/1704.00028. Adds stability esp.
for off-policy. Compute gradients w.r.t randomly interpolated input.
"""
policy_obs = self.get_state_encoding(policy_batch)
expert_obs = self.get_state_encoding(expert_batch)
obs_epsilon = torch.rand(policy_obs.shape)
encoder_input = obs_epsilon * policy_obs + (1 - obs_epsilon) * expert_obs
if self._settings.use_actions:
policy_action = self.get_action_input(policy_batch)
expert_action = self.get_action_input(policy_batch)
action_epsilon = torch.rand(policy_action.shape)
policy_dones = torch.as_tensor(policy_batch["done"], dtype=torch.float)
expert_dones = torch.as_tensor(expert_batch["done"], dtype=torch.float)
dones_epsilon = torch.rand(policy_dones.shape)
encoder_input = torch.cat(
[
encoder_input,
action_epsilon * policy_action
+ (1 - action_epsilon) * expert_action,
dones_epsilon * policy_dones + (1 - dones_epsilon) * expert_dones,
],
dim=1,
)
hidden = self.encoder(encoder_input)
if self._settings.use_vail:
use_vail_noise = True
z_mu = self._z_mu_layer(hidden)
hidden = torch.normal(z_mu, self._z_sigma * use_vail_noise)
hidden = self._estimator(hidden)
estimate = torch.mean(torch.sum(hidden, dim=1))
gradient = torch.autograd.grad(estimate, encoder_input)[0]
# Norm's gradient could be NaN at 0. Use our own safe_norm
safe_norm = (torch.sum(gradient ** 2, dim=1) + self.EPSILON).sqrt()
gradient_mag = torch.mean((safe_norm - 1) ** 2)
return gradient_mag
正在加载...
取消
保存