浏览代码

Merge branch 'develop-policyonly' into develop-sac-targetq

/develop/sac-targetq
Ervin Teng 4 年前
当前提交
f9ff3efe
共有 31 个文件被更改,包括 477 次插入417 次删除
  1. 526
      Project/Assets/ML-Agents/Examples/FoodCollector/Scenes/GridFoodCollector.unity
  2. 13
      README.md
  3. 45
      com.unity.ml-agents.extensions/Runtime/Sensors/GridSensor.cs
  4. 27
      com.unity.ml-agents/CHANGELOG.md
  5. 2
      com.unity.ml-agents/Documentation~/com.unity.ml-agents.md
  6. 3
      com.unity.ml-agents/Documentation~/filter.yml
  7. 4
      com.unity.ml-agents/Runtime/Academy.cs
  8. 10
      com.unity.ml-agents/Runtime/Actuators/ActionSegment.cs
  9. 4
      com.unity.ml-agents/Runtime/Actuators/ActuatorDiscreteActionMask.cs
  10. 54
      com.unity.ml-agents/Runtime/Actuators/ActuatorManager.cs
  11. 6
      com.unity.ml-agents/Runtime/Actuators/IActionReceiver.cs
  12. 2
      com.unity.ml-agents/Runtime/Actuators/IDiscreteActionMask.cs
  13. 4
      com.unity.ml-agents/Runtime/Actuators/VectorActuator.cs
  14. 32
      com.unity.ml-agents/Runtime/Agent.cs
  15. 2
      com.unity.ml-agents/Runtime/Demonstrations/DemonstrationRecorder.cs
  16. 2
      com.unity.ml-agents/Runtime/DiscreteActionMasker.cs
  17. 2
      com.unity.ml-agents/package.json
  18. 4
      docs/Installation-Anaconda-Windows.md
  19. 6
      docs/Installation.md
  20. 7
      docs/Learning-Environment-Executable.md
  21. 36
      docs/Training-ML-Agents.md
  22. 2
      docs/Training-on-Amazon-Web-Service.md
  23. 4
      docs/Unity-Inference-Engine.md
  24. 10
      ml-agents-envs/mlagents_envs/env_utils.py
  25. 30
      ml-agents/mlagents/trainers/policy/torch_policy.py
  26. 34
      ml-agents/mlagents/trainers/sac/optimizer_torch.py
  27. 4
      ml-agents/mlagents/trainers/tests/torch/saver/test_saver.py
  28. 10
      ml-agents/mlagents/trainers/tests/torch/test_policy.py
  29. 2
      ml-agents/mlagents/trainers/torch/components/bc/module.py
  30. 6
      ml-agents/mlagents/trainers/torch/networks.py
  31. 1
      utils/make_readme_table.py

526
Project/Assets/ML-Agents/Examples/FoodCollector/Scenes/GridFoodCollector.unity


m_ReflectionIntensity: 1
m_CustomReflection: {fileID: 0}
m_Sun: {fileID: 0}
m_IndirectSpecularColor: {r: 0.44971228, g: 0.49977815, b: 0.57563734, a: 1}
m_IndirectSpecularColor: {r: 0.4497121, g: 0.49977785, b: 0.57563704, a: 1}
m_UseRadianceAmbientProbe: 0
--- !u!157 &3
LightmapSettings:

objectReference: {fileID: 0}
m_RemovedComponents: []
m_SourcePrefab: {fileID: 100100000, guid: b5339e4b990ade14f992aadf3bf8591b, type: 3}
--- !u!1 &273651478
GameObject:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
serializedVersion: 6
m_Component:
- component: {fileID: 273651479}
- component: {fileID: 273651481}
- component: {fileID: 273651480}
m_Layer: 5
m_Name: Text
m_TagString: Untagged
m_Icon: {fileID: 0}
m_NavMeshLayer: 0
m_StaticEditorFlags: 0
m_IsActive: 1
--- !u!224 &273651479
RectTransform:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 273651478}
m_LocalRotation: {x: -0, y: -0, z: -0, w: 1}
m_LocalPosition: {x: 0, y: 0, z: 0}
m_LocalScale: {x: 1, y: 1, z: 1}
m_Children: []
m_Father: {fileID: 1799584681}
m_RootOrder: 0
m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
m_AnchorMin: {x: 0, y: 0}
m_AnchorMax: {x: 1, y: 1}
m_AnchoredPosition: {x: 0, y: 0}
m_SizeDelta: {x: 0, y: 0}
m_Pivot: {x: 0.5, y: 0.5}
--- !u!114 &273651480
MonoBehaviour:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 273651478}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: 5f7201a12d95ffc409449d95f23cf332, type: 3}
m_Name:
m_EditorClassIdentifier:
--- !u!222 &273651481
CanvasRenderer:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 273651478}
m_CullTransparentMesh: 0
--- !u!1 &378228137
GameObject:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
serializedVersion: 6
m_Component:
- component: {fileID: 378228141}
- component: {fileID: 378228140}
- component: {fileID: 378228139}
- component: {fileID: 378228138}
m_Layer: 5
m_Name: Canvas
m_TagString: Untagged
m_Icon: {fileID: 0}
m_NavMeshLayer: 0
m_StaticEditorFlags: 0
m_IsActive: 1
--- !u!114 &378228138
MonoBehaviour:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 378228137}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: dc42784cf147c0c48a680349fa168899, type: 3}
m_Name:
m_EditorClassIdentifier:
--- !u!114 &378228139
MonoBehaviour:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 378228137}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: 0cd44c1031e13a943bb63640046fad76, type: 3}
m_Name:
m_EditorClassIdentifier:
--- !u!223 &378228140
Canvas:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 378228137}
m_Enabled: 1
serializedVersion: 3
m_RenderMode: 0
m_Camera: {fileID: 0}
m_PlaneDistance: 100
m_PixelPerfect: 0
m_ReceivesEvents: 1
m_OverrideSorting: 0
m_OverridePixelPerfect: 0
m_SortingBucketNormalizedSize: 0
m_AdditionalShaderChannelsFlag: 0
m_SortingLayerID: 0
m_SortingOrder: 0
m_TargetDisplay: 0
--- !u!224 &378228141
RectTransform:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 378228137}
m_LocalRotation: {x: 0, y: 0, z: 0, w: 1}
m_LocalPosition: {x: 0, y: 0, z: 0}
m_LocalScale: {x: 0, y: 0, z: 0}
m_Children:
- {fileID: 1799584681}
- {fileID: 1086444498}
m_Father: {fileID: 0}
m_RootOrder: 2
m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
m_AnchorMin: {x: 0, y: 0}
m_AnchorMax: {x: 0, y: 0}
m_AnchoredPosition: {x: 0, y: 0}
m_SizeDelta: {x: 0, y: 0}
m_Pivot: {x: 0, y: 0}
--- !u!1001 &392794583
PrefabInstance:
m_ObjectHideFlags: 0

objectReference: {fileID: 0}
- target: {fileID: 1819751139121548, guid: b5339e4b990ade14f992aadf3bf8591b, type: 3}
propertyPath: m_IsActive
value: 0
value: 1
objectReference: {fileID: 0}
- target: {fileID: 4688212428263696, guid: b5339e4b990ade14f992aadf3bf8591b, type: 3}
propertyPath: m_LocalPosition.x

objectReference: {fileID: 0}
m_RemovedComponents: []
m_SourcePrefab: {fileID: 100100000, guid: b5339e4b990ade14f992aadf3bf8591b, type: 3}
--- !u!1 &499540684
--- !u!1 &625137506
GameObject:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}

m_Component:
- component: {fileID: 499540687}
- component: {fileID: 499540686}
- component: {fileID: 499540685}
m_Layer: 0
m_Name: EventSystem
- component: {fileID: 625137507}
- component: {fileID: 625137509}
- component: {fileID: 625137508}
m_Layer: 5
m_Name: Text
--- !u!114 &499540685
MonoBehaviour:
--- !u!224 &625137507
RectTransform:
m_GameObject: {fileID: 499540684}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: 4f231c4fb786f3946a6b90b886c48677, type: 3}
m_Name:
m_EditorClassIdentifier:
--- !u!114 &499540686
m_GameObject: {fileID: 625137506}
m_LocalRotation: {x: -0, y: -0, z: -0, w: 1}
m_LocalPosition: {x: 0, y: 0, z: 0}
m_LocalScale: {x: 1, y: 1, z: 1}
m_Children: []
m_Father: {fileID: 965533424}
m_RootOrder: 0
m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
m_AnchorMin: {x: 0, y: 0}
m_AnchorMax: {x: 1, y: 1}
m_AnchoredPosition: {x: 0, y: 0}
m_SizeDelta: {x: 0, y: 0}
m_Pivot: {x: 0.5, y: 0.5}
--- !u!114 &625137508
m_GameObject: {fileID: 499540684}
m_GameObject: {fileID: 625137506}
m_Script: {fileID: 11500000, guid: 76c392e42b5098c458856cdf6ecaaaa1, type: 3}
m_Script: {fileID: 708705254, guid: f70555f144d8491a825f0804e09c671c, type: 3}
--- !u!4 &499540687
Transform:
m_Material: {fileID: 0}
m_Color: {r: 0.9338235, g: 0.9338235, b: 0.9338235, a: 0.784}
m_RaycastTarget: 1
m_OnCullStateChanged:
m_PersistentCalls:
m_Calls: []
m_FontData:
m_Font: {fileID: 10102, guid: 0000000000000000e000000000000000, type: 0}
m_FontSize: 300
m_FontStyle: 1
m_BestFit: 0
m_MinSize: 8
m_MaxSize: 300
m_Alignment: 4
m_AlignByGeometry: 0
m_RichText: 1
m_HorizontalOverflow: 0
m_VerticalOverflow: 0
m_LineSpacing: 1
m_Text: NOM
--- !u!222 &625137509
CanvasRenderer:
m_GameObject: {fileID: 499540684}
m_LocalRotation: {x: 0, y: 0, z: 0, w: 1}
m_LocalPosition: {x: 0, y: 0, z: 0}
m_LocalScale: {x: 1, y: 1, z: 1}
m_Children: []
m_Father: {fileID: 0}
m_RootOrder: 4
m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
m_GameObject: {fileID: 625137506}
m_CullTransparentMesh: 0
--- !u!1001 &916917435
PrefabInstance:
m_ObjectHideFlags: 0

objectReference: {fileID: 0}
m_RemovedComponents: []
m_SourcePrefab: {fileID: 100100000, guid: 5889392e3f05b448a8a06c5def6c2dec, type: 3}
--- !u!1 &965533423
GameObject:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
serializedVersion: 6
m_Component:
- component: {fileID: 965533424}
- component: {fileID: 965533426}
- component: {fileID: 965533425}
m_Layer: 5
m_Name: Panel
m_TagString: Untagged
m_Icon: {fileID: 0}
m_NavMeshLayer: 0
m_StaticEditorFlags: 0
m_IsActive: 0
--- !u!224 &965533424
RectTransform:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 965533423}
m_LocalRotation: {x: 0, y: 0, z: 0, w: 1}
m_LocalPosition: {x: 0, y: 0, z: 0}
m_LocalScale: {x: 1, y: 1, z: 1}
m_Children:
- {fileID: 625137507}
m_Father: {fileID: 1064449898}
m_RootOrder: 0
m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
m_AnchorMin: {x: 0, y: 0}
m_AnchorMax: {x: 1, y: 1}
m_AnchoredPosition: {x: 0, y: 0}
m_SizeDelta: {x: 0, y: 0}
m_Pivot: {x: 0.5, y: 0.5}
--- !u!114 &965533425
MonoBehaviour:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 965533423}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: -765806418, guid: f70555f144d8491a825f0804e09c671c, type: 3}
m_Name:
m_EditorClassIdentifier:
m_Material: {fileID: 0}
m_Color: {r: 0, g: 0, b: 0, a: 0.472}
m_RaycastTarget: 1
m_OnCullStateChanged:
m_PersistentCalls:
m_Calls: []
m_Sprite: {fileID: 10907, guid: 0000000000000000f000000000000000, type: 0}
m_Type: 1
m_PreserveAspect: 0
m_FillCenter: 1
m_FillMethod: 4
m_FillAmount: 1
m_FillClockwise: 1
m_FillOrigin: 0
m_UseSpriteMesh: 0
--- !u!222 &965533426
CanvasRenderer:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 965533423}
m_CullTransparentMesh: 0
--- !u!1 &1009000883
GameObject:
m_ObjectHideFlags: 0

objectReference: {fileID: 0}
- target: {fileID: 1819751139121548, guid: b5339e4b990ade14f992aadf3bf8591b, type: 3}
propertyPath: m_IsActive
value: 0
value: 1
objectReference: {fileID: 0}
- target: {fileID: 4688212428263696, guid: b5339e4b990ade14f992aadf3bf8591b, type: 3}
propertyPath: m_LocalPosition.x

objectReference: {fileID: 0}
m_RemovedComponents: []
m_SourcePrefab: {fileID: 100100000, guid: b5339e4b990ade14f992aadf3bf8591b, type: 3}
--- !u!1 &1086444495
--- !u!1 &1064449894
GameObject:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
serializedVersion: 6
m_Component:
- component: {fileID: 1064449898}
- component: {fileID: 1064449897}
- component: {fileID: 1064449896}
- component: {fileID: 1064449895}
m_Layer: 5
m_Name: Canvas
m_TagString: Untagged
m_Icon: {fileID: 0}
m_NavMeshLayer: 0
m_StaticEditorFlags: 0
m_IsActive: 1
--- !u!114 &1064449895
MonoBehaviour:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 1064449894}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 1301386320, guid: f70555f144d8491a825f0804e09c671c, type: 3}
m_Name:
m_EditorClassIdentifier:
m_IgnoreReversedGraphics: 1
m_BlockingObjects: 0
m_BlockingMask:
serializedVersion: 2
m_Bits: 4294967295
--- !u!114 &1064449896
MonoBehaviour:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 1064449894}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 1980459831, guid: f70555f144d8491a825f0804e09c671c, type: 3}
m_Name:
m_EditorClassIdentifier:
m_UiScaleMode: 1
m_ReferencePixelsPerUnit: 100
m_ScaleFactor: 1
m_ReferenceResolution: {x: 800, y: 600}
m_ScreenMatchMode: 0
m_MatchWidthOrHeight: 0.5
m_PhysicalUnit: 3
m_FallbackScreenDPI: 96
m_DefaultSpriteDPI: 96
m_DynamicPixelsPerUnit: 1
--- !u!223 &1064449897
Canvas:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 1064449894}
m_Enabled: 1
serializedVersion: 3
m_RenderMode: 0
m_Camera: {fileID: 0}
m_PlaneDistance: 100
m_PixelPerfect: 0
m_ReceivesEvents: 1
m_OverrideSorting: 0
m_OverridePixelPerfect: 0
m_SortingBucketNormalizedSize: 0
m_AdditionalShaderChannelsFlag: 0
m_SortingLayerID: 0
m_SortingOrder: 0
m_TargetDisplay: 0
--- !u!224 &1064449898
RectTransform:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 1064449894}
m_LocalRotation: {x: 0, y: 0, z: 0, w: 1}
m_LocalPosition: {x: 0, y: 0, z: 0}
m_LocalScale: {x: 0, y: 0, z: 0}
m_Children:
- {fileID: 965533424}
- {fileID: 1418304525}
m_Father: {fileID: 0}
m_RootOrder: 2
m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
m_AnchorMin: {x: 0, y: 0}
m_AnchorMax: {x: 0, y: 0}
m_AnchoredPosition: {x: 0, y: 0}
m_SizeDelta: {x: 0, y: 0}
m_Pivot: {x: 0, y: 0}
--- !u!1 &1418304524
GameObject:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}

m_Component:
- component: {fileID: 1086444498}
- component: {fileID: 1086444497}
- component: {fileID: 1086444496}
- component: {fileID: 1418304525}
- component: {fileID: 1418304527}
- component: {fileID: 1418304526}
m_Layer: 5
m_Name: Text
m_TagString: Untagged

m_IsActive: 1
--- !u!114 &1086444496
--- !u!224 &1418304525
RectTransform:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 1418304524}
m_LocalRotation: {x: 0, y: 0, z: 0, w: 1}
m_LocalPosition: {x: 0, y: 0, z: 0}
m_LocalScale: {x: 1, y: 1, z: 1}
m_Children: []
m_Father: {fileID: 1064449898}
m_RootOrder: 1
m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
m_AnchorMin: {x: 0.5, y: 0.5}
m_AnchorMax: {x: 0.5, y: 0.5}
m_AnchoredPosition: {x: -1000, y: -239.57645}
m_SizeDelta: {x: 160, y: 30}
m_Pivot: {x: 0.5, y: 0.5}
--- !u!114 &1418304526
m_GameObject: {fileID: 1086444495}
m_GameObject: {fileID: 1418304524}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 708705254, guid: f70555f144d8491a825f0804e09c671c, type: 3}

m_Color: {r: 1, g: 1, b: 1, a: 1}
m_Color: {r: 0.19607843, g: 0.19607843, b: 0.19607843, a: 1}
m_RaycastTarget: 1
m_OnCullStateChanged:
m_PersistentCalls:

m_HorizontalOverflow: 0
m_VerticalOverflow: 0
m_LineSpacing: 1
m_Text:
--- !u!222 &1086444497
m_Text: New Text
--- !u!222 &1418304527
m_GameObject: {fileID: 1086444495}
m_GameObject: {fileID: 1418304524}
--- !u!224 &1086444498
RectTransform:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 1086444495}
m_LocalRotation: {x: 0, y: 0, z: 0, w: 1}
m_LocalPosition: {x: 0, y: 0, z: 0}
m_LocalScale: {x: 1, y: 1, z: 1}
m_Children: []
m_Father: {fileID: 378228141}
m_RootOrder: 1
m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
m_AnchorMin: {x: 0.5, y: 0.5}
m_AnchorMax: {x: 0.5, y: 0.5}
m_AnchoredPosition: {x: -1000, y: -239.57645}
m_SizeDelta: {x: 160, y: 30}
m_Pivot: {x: 0.5, y: 0.5}
--- !u!1 &1574236047
GameObject:
m_ObjectHideFlags: 0

agents: []
listArea: []
totalScore: 0
scoreText: {fileID: 1086444496}
scoreText: {fileID: 1418304526}
--- !u!4 &1574236049
Transform:
m_ObjectHideFlags: 0

m_Father: {fileID: 0}
m_RootOrder: 3
m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
--- !u!1 &1799584680
--- !u!1 &1956702417
GameObject:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}

m_Component:
- component: {fileID: 1799584681}
- component: {fileID: 1799584683}
- component: {fileID: 1799584682}
m_Layer: 5
m_Name: Panel
- component: {fileID: 1956702420}
- component: {fileID: 1956702419}
- component: {fileID: 1956702418}
m_Layer: 0
m_Name: EventSystem
m_IsActive: 0
--- !u!224 &1799584681
RectTransform:
m_IsActive: 1
--- !u!114 &1956702418
MonoBehaviour:
m_GameObject: {fileID: 1799584680}
m_LocalRotation: {x: 0, y: 0, z: 0, w: 1}
m_LocalPosition: {x: 0, y: 0, z: 0}
m_LocalScale: {x: 1, y: 1, z: 1}
m_Children:
- {fileID: 273651479}
m_Father: {fileID: 378228141}
m_RootOrder: 0
m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
m_AnchorMin: {x: 0, y: 0}
m_AnchorMax: {x: 1, y: 1}
m_AnchoredPosition: {x: 0, y: 0}
m_SizeDelta: {x: 0, y: 0}
m_Pivot: {x: 0.5, y: 0.5}
--- !u!114 &1799584682
m_GameObject: {fileID: 1956702417}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 1077351063, guid: f70555f144d8491a825f0804e09c671c, type: 3}
m_Name:
m_EditorClassIdentifier:
m_HorizontalAxis: Horizontal
m_VerticalAxis: Vertical
m_SubmitButton: Submit
m_CancelButton: Cancel
m_InputActionsPerSecond: 10
m_RepeatDelay: 0.5
m_ForceModuleActive: 0
--- !u!114 &1956702419
m_GameObject: {fileID: 1799584680}
m_GameObject: {fileID: 1956702417}
m_Script: {fileID: 11500000, guid: fe87c0e1cc204ed48ad3b37840f39efc, type: 3}
m_Script: {fileID: -619905303, guid: f70555f144d8491a825f0804e09c671c, type: 3}
--- !u!222 &1799584683
CanvasRenderer:
m_FirstSelected: {fileID: 0}
m_sendNavigationEvents: 1
m_DragThreshold: 5
--- !u!4 &1956702420
Transform:
m_GameObject: {fileID: 1799584680}
m_CullTransparentMesh: 0
m_GameObject: {fileID: 1956702417}
m_LocalRotation: {x: 0, y: 0, z: 0, w: 1}
m_LocalPosition: {x: 0, y: 0, z: 0}
m_LocalScale: {x: 1, y: 1, z: 1}
m_Children: []
m_Father: {fileID: 0}
m_RootOrder: 4
m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
--- !u!1001 &1985725465
PrefabInstance:
m_ObjectHideFlags: 0

objectReference: {fileID: 0}
- target: {fileID: 1819751139121548, guid: b5339e4b990ade14f992aadf3bf8591b, type: 3}
propertyPath: m_IsActive
value: 0
value: 1
objectReference: {fileID: 0}
- target: {fileID: 4688212428263696, guid: b5339e4b990ade14f992aadf3bf8591b, type: 3}
propertyPath: m_LocalPosition.x

13
README.md


# Unity ML-Agents Toolkit
[![docs badge](https://img.shields.io/badge/docs-reference-blue.svg)](https://github.com/Unity-Technologies/ml-agents/tree/release_6_docs/docs/)
[![docs badge](https://img.shields.io/badge/docs-reference-blue.svg)](https://github.com/Unity-Technologies/ml-agents/tree/release_7_docs/docs/)
[![license badge](https://img.shields.io/badge/license-Apache--2.0-green.svg)](LICENSE)

## Releases & Documentation
**Our latest, stable release is `Release 6`. Click
[here](https://github.com/Unity-Technologies/ml-agents/tree/release_6_docs/docs/Readme.md)
**Our latest, stable release is `Release 7`. Click
[here](https://github.com/Unity-Technologies/ml-agents/tree/release_7_docs/docs/Readme.md)
to get started with the latest release of ML-Agents.**
The table below lists all our releases, including our `master` branch which is

| **Version** | **Release Date** | **Source** | **Documentation** | **Download** |
|:-------:|:------:|:-------------:|:-------:|:------------:|
| **master (unstable)** | -- | [source](https://github.com/Unity-Technologies/ml-agents/tree/master) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/master/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/master.zip) |
| **Release 6** | **August 12, 2020** | **[source](https://github.com/Unity-Technologies/ml-agents/tree/release_6)** | **[docs](https://github.com/Unity-Technologies/ml-agents/tree/release_6_docs/docs/Readme.md)** | **[download](https://github.com/Unity-Technologies/ml-agents/archive/release_6.zip)** |
| **Release 5** | **July 31, 2020** | **[source](https://github.com/Unity-Technologies/ml-agents/tree/release_5)** | **[docs](https://github.com/Unity-Technologies/ml-agents/tree/release_5_docs/docs/Readme.md)** | **[download](https://github.com/Unity-Technologies/ml-agents/archive/release_5.zip)** |
| **Release 7** | **September 16, 2020** | **[source](https://github.com/Unity-Technologies/ml-agents/tree/release_7)** | **[docs](https://github.com/Unity-Technologies/ml-agents/tree/release_7_docs/docs/Readme.md)** | **[download](https://github.com/Unity-Technologies/ml-agents/archive/release_7.zip)** |
| **Release 6** | August 12, 2020 | [source](https://github.com/Unity-Technologies/ml-agents/tree/release_6) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/release_6_docs/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/release_6.zip) |
| **Release 5** | July 31, 2020 | [source](https://github.com/Unity-Technologies/ml-agents/tree/release_5) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/release_5_docs/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/release_5.zip) |
| **0.15.1** | March 30, 2020 | [source](https://github.com/Unity-Technologies/ml-agents/tree/0.15.1) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/0.15.1/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/0.15.1.zip) |
| **0.15.0** | March 18, 2020 | [source](https://github.com/Unity-Technologies/ml-agents/tree/0.15.0) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/0.15.0/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/0.15.0.zip) |
## Citation

45
com.unity.ml-agents.extensions/Runtime/Sensors/GridSensor.cs


/// <returns>byte[] containing the compressed observation of the grid observation</returns>
public byte[] GetCompressedObservation()
{
// Timer stack is in accessable due to its protection level
//using (TimerStack.Instance.Scoped("GridSensor.GetCompressedObservation"))
using (TimerStack.Instance.Scoped("GridSensor.GetCompressedObservation"))
{
Perceive(); // Fill the perception buffer with observed data

public float[] Perceive()
{
Reset();
// TODO: make these part of the class
Collider[] foundColliders = null;
Vector3 cellCenter = Vector3.zero;
using (TimerStack.Instance.Scoped("GridSensor.Perceive"))
{
// TODO: make these part of the class
Collider[] foundColliders = null;
Vector3 cellCenter = Vector3.zero;
Vector3 halfCellScale = new Vector3(CellScaleX / 2f, CellScaleY, CellScaleZ / 2f);
Vector3 halfCellScale = new Vector3(CellScaleX / 2f, CellScaleY, CellScaleZ / 2f);
for (int cellIndex = 0; cellIndex < NumCells; cellIndex++)
{
if (RotateToAgent)
for (int cellIndex = 0; cellIndex < NumCells; cellIndex++)
cellCenter = transform.TransformPoint(CellPoints[cellIndex]);
foundColliders = Physics.OverlapBox(cellCenter, halfCellScale, transform.rotation, ObserveMask);
}
else
{
cellCenter = transform.position + CellPoints[cellIndex];
foundColliders = Physics.OverlapBox(cellCenter, halfCellScale, Quaternion.identity, ObserveMask);
}
if (RotateToAgent)
{
cellCenter = transform.TransformPoint(CellPoints[cellIndex]);
foundColliders = Physics.OverlapBox(cellCenter, halfCellScale, transform.rotation, ObserveMask);
}
else
{
cellCenter = transform.position + CellPoints[cellIndex];
foundColliders = Physics.OverlapBox(cellCenter, halfCellScale, Quaternion.identity, ObserveMask);
}
if (foundColliders != null && foundColliders.Length > 0)
{
ParseColliders(foundColliders, cellIndex, cellCenter);
if (foundColliders != null && foundColliders.Length > 0)
{
ParseColliders(foundColliders, cellIndex, cellCenter);
}
}
}

/// <inheritdoc/>
public int Write(ObservationWriter writer)
{
// Timer stack is in accessable due to its protection level
// using (TimerStack.Instance.Scoped("GridSensor.WriteToTensor"))
using (TimerStack.Instance.Scoped("GridSensor.WriteToTensor"))
{
Perceive();

27
com.unity.ml-agents/CHANGELOG.md


### Bug Fixes
#### com.unity.ml-agents (C#)
#### ml-agents / ml-agents-envs / gym-unity (Python)
- Fixed a bug in exporting Pytorch models when using multiple discrete actions. (#4491)
## [1.4.0-preview] - 2020-09-16

### Minor Changes
#### com.unity.ml-agents (C#)
- Update Barracuda to 1.1.0-preview (#4208)
- The `IActuator` interface and `ActuatorComponent` abstract class were added.
These are analogous to `ISensor` and `SensorComponent`, but for applying actions
for an Agent. They allow you to control the action space more programmatically
than defining the actions in the Agent's Behavior Parameters. See
[BasicActuatorComponent.cs](https://github.com/Unity-Technologies/ml-agents/blob/release_7_docs/Project/Assets/ML-Agents/Examples/Basic/Scripts/BasicActuatorComponent.cs)
for an example of how to use them. (#4297, #4315)
- Update Barracuda to 1.1.1-preview (#4482)
- GridSensor was added to the com.unity.ml-agents.extensions package. Thank you
- GridSensor was added to the `com.unity.ml-agents.extensions` package. Thank you
to Jaden Travnik from Eidos Montreal for the contribution! (#4399)
- Added `Agent.EpisodeInterrupted()`, which can be used to reset the agent when
it has reached a user-determined maximum number of steps. This behaves similarly

`framework: pytorch` to your trainer configuration (under the behavior name) to enable it.
Note that PyTorch 1.6.0 or greater should be installed to use this feature; see
[the PyTorch website](https://pytorch.org/) for installation instructions. (#4335)
[the PyTorch website](https://pytorch.org/) for installation instructions and
[the relevant ML-Agents docs](https://github.com/Unity-Technologies/ml-agents/blob/release_7_docs/docs/Training-ML-Agents.md#using-pytorch-experimental) for usage. (#4335)
ISensor.GetCompressedObservation(), this can be done by writing 3 channels at a
`ISensor.GetCompressedObservation()`, this can be done by writing 3 channels at a
a compatible version of the trainer.
a compatible version of the trainer. (#4462)
- Improved the executable detection logic for environments on Windows. (#4485)
- The package dependencies were updated to include the built-in packages that are used also. (#4384)
- Previously, `com.unity.ml-agents` was not declaring built-in packages as
dependencies in its package.json. The relevant dependencies are now listed. (#4384)
- Agents no longer try to send observations when they become disabled if the
Academy has been shut down. (#4489)
- A bug in the observation normalizer that would cause rewards to decrease
when using `--resume` was fixed. (#4463)
- Fixed a bug in exporting Pytorch models when using multiple discrete actions. (#4491)
## [1.3.0-preview] - 2020-08-12

2
com.unity.ml-agents/Documentation~/com.unity.ml-agents.md


[unity ML-Agents Toolkit]: https://github.com/Unity-Technologies/ml-agents
[unity inference engine]: https://docs.unity3d.com/Packages/com.unity.barracuda@latest/index.html
[package manager documentation]: https://docs.unity3d.com/Manual/upm-ui-install.html
[installation instructions]: https://github.com/Unity-Technologies/ml-agents/blob/release_6_docs/docs/Installation.md
[installation instructions]: https://github.com/Unity-Technologies/ml-agents/blob/release_7_docs/docs/Installation.md
[github repository]: https://github.com/Unity-Technologies/ml-agents
[python package]: https://github.com/Unity-Technologies/ml-agents
[execution order of event functions]: https://docs.unity3d.com/Manual/ExecutionOrder.html

3
com.unity.ml-agents/Documentation~/filter.yml


uidRegex: ^Unity.MLAgents\.Tests\.Communicator$
type: Namespace
- exclude:
uidRegex: ^Unity.MLAgents\.Tests\.Actuators$
type: Namespace
- exclude:
uidRegex: ^Unity.MLAgents\.Editor$
type: Namespace
- exclude:

4
com.unity.ml-agents/Runtime/Academy.cs


* API. For more information on each of these entities, in addition to how to
* set-up a learning environment and train the behavior of characters in a
* Unity scene, please browse our documentation pages on GitHub:
* https://github.com/Unity-Technologies/ml-agents/tree/release_6_docs/docs/
* https://github.com/Unity-Technologies/ml-agents/tree/release_7_docs/docs/
*/
namespace Unity.MLAgents

/// fall back to inference or heuristic decisions. (You can also set agents to always use
/// inference or heuristics.)
/// </remarks>
[HelpURL("https://github.com/Unity-Technologies/ml-agents/tree/release_6_docs/" +
[HelpURL("https://github.com/Unity-Technologies/ml-agents/tree/release_7_docs/" +
"docs/Learning-Environment-Design.md")]
public class Academy : IDisposable
{

10
com.unity.ml-agents/Runtime/Actuators/ActionSegment.cs


System.Array.Clear(Array, Offset, Length);
}
/// <inheritdoc cref="IEnumerable{T}.GetEnumerator"/>
/// <inheritdoc/>
/// <inheritdoc cref="IEnumerable{T}"/>
/// <inheritdoc/>
/// <inheritdoc cref="ValueType.Equals(object)"/>
/// <inheritdoc/>
public override bool Equals(object obj)
{
if (!(obj is ActionSegment<T>))

return Equals((ActionSegment<T>)obj);
}
/// <inheritdoc cref="IEquatable{T}.Equals(T)"/>
/// <inheritdoc/>
/// <inheritdoc cref="ValueType.GetHashCode"/>
/// <inheritdoc/>
public override int GetHashCode()
{
unchecked

4
com.unity.ml-agents/Runtime/Actuators/ActuatorDiscreteActionMask.cs


m_BranchSizes = branchSizes;
}
/// <inheritdoc cref="IDiscreteActionMask.WriteMask"/>
/// <inheritdoc/>
public void WriteMask(int branch, IEnumerable<int> actionIndices)
{
LazyInitialize();

}
}
/// <inheritdoc cref="IDiscreteActionMask.GetMask"/>
/// <inheritdoc/>
public bool[] GetMask()
{
#if DEBUG

54
com.unity.ml-agents/Runtime/Actuators/ActuatorManager.cs


m_DiscreteActionMask.ResetMask();
}
/// <summary>
/// Sorts the <see cref="IActuator"/>s according to their <see cref="IActuator.Name"/> value.
/// </summary>

* IList implementation that delegates to m_Actuators List. *
*********************************************************************************/
/// <summary>
/// <inheritdoc cref="IEnumerable{T}.GetEnumerator"/>
/// </summary>
/// <inheritdoc/>
/// <summary>
/// <inheritdoc cref="IList{T}.GetEnumerator"/>
/// </summary>
/// <inheritdoc/>
/// <summary>
/// <inheritdoc cref="ICollection{T}.Add"/>
/// </summary>
/// <param name="item"></param>
/// <inheritdoc/>
public void Add(IActuator item)
{
Debug.Assert(m_ReadyForExecution == false,

}
/// <summary>
/// <inheritdoc cref="ICollection{T}.Clear"/>
/// </summary>
/// <inheritdoc/>
public void Clear()
{
Debug.Assert(m_ReadyForExecution == false,

}
/// <summary>
/// <inheritdoc cref="ICollection{T}.Contains"/>
/// </summary>
/// <inheritdoc/>
/// <summary>
/// <inheritdoc cref="ICollection{T}.CopyTo"/>
/// </summary>
/// <inheritdoc/>
/// <summary>
/// <inheritdoc cref="ICollection{T}.Remove"/>
/// </summary>
/// <inheritdoc/>
public bool Remove(IActuator item)
{
Debug.Assert(m_ReadyForExecution == false,

return false;
}
/// <summary>
/// <inheritdoc cref="ICollection{T}.Count"/>
/// </summary>
/// <inheritdoc/>
/// <summary>
/// <inheritdoc cref="ICollection{T}.IsReadOnly"/>
/// </summary>
/// <inheritdoc/>
/// <summary>
/// <inheritdoc cref="IList{T}.IndexOf"/>
/// </summary>
/// <inheritdoc/>
/// <summary>
/// <inheritdoc cref="IList{T}.Insert"/>
/// </summary>
/// <inheritdoc/>
public void Insert(int index, IActuator item)
{
Debug.Assert(m_ReadyForExecution == false,

}
/// <summary>
/// <inheritdoc cref="IList{T}.RemoveAt"/>
/// </summary>
/// <inheritdoc/>
public void RemoveAt(int index)
{
Debug.Assert(m_ReadyForExecution == false,

m_Actuators.RemoveAt(index);
}
/// <summary>
/// <inheritdoc cref="IList{T}.this"/>
/// </summary>
/// <inheritdoc/>
public IActuator this[int index]
{
get => m_Actuators[index];

6
com.unity.ml-agents/Runtime/Actuators/IActionReceiver.cs


DiscreteActions.Clear();
}
/// <inheritdoc cref="ValueType.Equals(object)"/>
/// <inheritdoc/>
public override bool Equals(object obj)
{
if (!(obj is ActionBuffers))

ab.DiscreteActions.SequenceEqual(DiscreteActions);
}
/// <inheritdoc cref="ValueType.GetHashCode"/>
/// <inheritdoc/>
public override int GetHashCode()
{
unchecked

///
/// See [Agents - Actions] for more information on masking actions.
///
/// [Agents - Actions]: https://github.com/Unity-Technologies/ml-agents/blob/release_4_docs/docs/Learning-Environment-Design-Agents.md#actions
/// [Agents - Actions]: https://github.com/Unity-Technologies/ml-agents/blob/release_7_docs/docs/Learning-Environment-Design-Agents.md#actions
/// </remarks>
/// <seealso cref="IActionReceiver.OnActionReceived"/>
void WriteDiscreteActionMask(IDiscreteActionMask actionMask);

2
com.unity.ml-agents/Runtime/Actuators/IDiscreteActionMask.cs


///
/// See [Agents - Actions] for more information on masking actions.
///
/// [Agents - Actions]: https://github.com/Unity-Technologies/ml-agents/blob/release_2_docs/docs/Learning-Environment-Design-Agents.md#actions
/// [Agents - Actions]: https://github.com/Unity-Technologies/ml-agents/blob/release_7_docs/docs/Learning-Environment-Design-Agents.md#actions
/// </remarks>
/// <param name="branch">The branch for which the actions will be masked.</param>
/// <param name="actionIndices">The indices of the masked actions.</param>

4
com.unity.ml-agents/Runtime/Actuators/VectorActuator.cs


m_ActionReceiver.WriteDiscreteActionMask(actionMask);
}
/// <summary>
/// <inheritdoc cref="IActionReceiver.ActionSpec"/>
/// </summary>
/// <inheritdoc/>
public ActionSpec ActionSpec { get; }
/// <inheritdoc />

32
com.unity.ml-agents/Runtime/Agent.cs


/// [OnDisable()]: https://docs.unity3d.com/ScriptReference/MonoBehaviour.OnDisable.html]
/// [OnBeforeSerialize()]: https://docs.unity3d.com/ScriptReference/MonoBehaviour.OnBeforeSerialize.html
/// [OnAfterSerialize()]: https://docs.unity3d.com/ScriptReference/MonoBehaviour.OnAfterSerialize.html
/// [Agents]: https://github.com/Unity-Technologies/ml-agents/blob/release_6_docs/docs/Learning-Environment-Design-Agents.md
/// [Reinforcement Learning in Unity]: https://github.com/Unity-Technologies/ml-agents/blob/release_6_docs/docs/Learning-Environment-Design.md
/// [Agents]: https://github.com/Unity-Technologies/ml-agents/blob/release_7_docs/docs/Learning-Environment-Design-Agents.md
/// [Reinforcement Learning in Unity]: https://github.com/Unity-Technologies/ml-agents/blob/release_7_docs/docs/Learning-Environment-Design.md
/// [Unity ML-Agents Toolkit manual]: https://github.com/Unity-Technologies/ml-agents/blob/release_6_docs/docs/Readme.md
/// [Unity ML-Agents Toolkit manual]: https://github.com/Unity-Technologies/ml-agents/blob/release_7_docs/docs/Readme.md
[HelpURL("https://github.com/Unity-Technologies/ml-agents/blob/release_6_docs/" +
[HelpURL("https://github.com/Unity-Technologies/ml-agents/blob/release_7_docs/" +
"docs/Learning-Environment-Design-Agents.md")]
[Serializable]
[RequireComponent(typeof(BehaviorParameters))]

Academy.Instance.DecideAction -= DecideAction;
Academy.Instance.AgentAct -= AgentStep;
Academy.Instance.AgentForceReset -= _AgentReset;
NotifyAgentDone(DoneReason.Disabled);
NotifyAgentDone(DoneReason.Disabled);
m_Brain?.Dispose();
m_Initialized = false;
}

/// for information about mixing reward signals from curiosity and Generative Adversarial
/// Imitation Learning (GAIL) with rewards supplied through this method.
///
/// [Agents - Rewards]: https://github.com/Unity-Technologies/ml-agents/blob/release_6_docs/docs/Learning-Environment-Design-Agents.md#rewards
/// [Reward Signals]: https://github.com/Unity-Technologies/ml-agents/blob/release_6_docs/docs/ML-Agents-Overview.md#a-quick-note-on-reward-signals
/// [Agents - Rewards]: https://github.com/Unity-Technologies/ml-agents/blob/release_7_docs/docs/Learning-Environment-Design-Agents.md#rewards
/// [Reward Signals]: https://github.com/Unity-Technologies/ml-agents/blob/release_7_docs/docs/ML-Agents-Overview.md#a-quick-note-on-reward-signals
/// </remarks>
/// <param name="reward">The new value of the reward.</param>
public void SetReward(float reward)

/// for information about mixing reward signals from curiosity and Generative Adversarial
/// Imitation Learning (GAIL) with rewards supplied through this method.
///
/// [Agents - Rewards]: https://github.com/Unity-Technologies/ml-agents/blob/release_6_docs/docs/Learning-Environment-Design-Agents.md#rewards
/// [Reward Signals]: https://github.com/Unity-Technologies/ml-agents/blob/release_6_docs/docs/ML-Agents-Overview.md#a-quick-note-on-reward-signals
/// [Agents - Rewards]: https://github.com/Unity-Technologies/ml-agents/blob/release_7_docs/docs/Learning-Environment-Design-Agents.md#rewards
/// [Reward Signals]: https://github.com/Unity-Technologies/ml-agents/blob/release_7_docs/docs/ML-Agents-Overview.md#a-quick-note-on-reward-signals
///</remarks>
/// <param name="increment">Incremental reward value.</param>
public void AddReward(float increment)

/// implementing a simple heuristic function can aid in debugging agent actions and interactions
/// with its environment.
///
/// [Demonstration Recorder]: https://github.com/Unity-Technologies/ml-agents/blob/release_6_docs/docs/Learning-Environment-Design-Agents.md#recording-demonstrations
/// [Actions]: https://github.com/Unity-Technologies/ml-agents/blob/release_6_docs/docs/Learning-Environment-Design-Agents.md#actions
/// [Demonstration Recorder]: https://github.com/Unity-Technologies/ml-agents/blob/release_7_docs/docs/Learning-Environment-Design-Agents.md#recording-demonstrations
/// [Actions]: https://github.com/Unity-Technologies/ml-agents/blob/release_7_docs/docs/Learning-Environment-Design-Agents.md#actions
/// [GameObject]: https://docs.unity3d.com/Manual/GameObjects.html
/// </remarks>
/// <example>

/// For more information about observations, see [Observations and Sensors].
///
/// [GameObject]: https://docs.unity3d.com/Manual/GameObjects.html
/// [Observations and Sensors]: https://github.com/Unity-Technologies/ml-agents/blob/release_6_docs/docs/Learning-Environment-Design-Agents.md#observations-and-sensors
/// [Observations and Sensors]: https://github.com/Unity-Technologies/ml-agents/blob/release_7_docs/docs/Learning-Environment-Design-Agents.md#observations-and-sensors
/// </remarks>
public virtual void CollectObservations(VectorSensor sensor)
{

/// Returns a read-only view of the observations that were generated in
/// <see cref="CollectObservations(VectorSensor)"/>. This is mainly useful inside of a
/// <see cref="Heuristic(float[], int[])"/> method to avoid recomputing the observations.
/// <see cref="Heuristic(in ActionBuffers)"/> method to avoid recomputing the observations.
/// </summary>
/// <returns>A read-only view of the observations list.</returns>
public ReadOnlyCollection<float> GetObservations()

///
/// See [Agents - Actions] for more information on masking actions.
///
/// [Agents - Actions]: https://github.com/Unity-Technologies/ml-agents/blob/release_6_docs/docs/Learning-Environment-Design-Agents.md#actions
/// [Agents - Actions]: https://github.com/Unity-Technologies/ml-agents/blob/release_7_docs/docs/Learning-Environment-Design-Agents.md#actions
/// </remarks>
/// <seealso cref="IActionReceiver.OnActionReceived"/>
public virtual void WriteDiscreteActionMask(IDiscreteActionMask actionMask)

/// three values in the action array to use as the force components. During
/// training, the agent's policy learns to set those particular elements of
/// the array to maximize the training rewards the agent receives. (Of course,
/// if you implement a <seealso cref="Heuristic(float[], int[])"/> function, it must use the same
/// if you implement a <seealso cref="Heuristic(in ActionBuffers)"/> function, it must use the same
/// elements of the action array for the same purpose since there is no learning
/// involved.)
///

///
/// For more information about implementing agent actions see [Agents - Actions].
///
/// [Agents - Actions]: https://github.com/Unity-Technologies/ml-agents/blob/release_6_docs/docs/Learning-Environment-Design-Agents.md#actions
/// [Agents - Actions]: https://github.com/Unity-Technologies/ml-agents/blob/release_7_docs/docs/Learning-Environment-Design-Agents.md#actions
/// </remarks>
/// <param name="actions">
/// Struct containing the buffers of actions to be executed at this step.

2
com.unity.ml-agents/Runtime/Demonstrations/DemonstrationRecorder.cs


/// See [Imitation Learning - Recording Demonstrations] for more information.
///
/// [GameObject]: https://docs.unity3d.com/Manual/GameObjects.html
/// [Imitation Learning - Recording Demonstrations]: https://github.com/Unity-Technologies/ml-agents/blob/release_6_docs/docs//Learning-Environment-Design-Agents.md#recording-demonstrations
/// [Imitation Learning - Recording Demonstrations]: https://github.com/Unity-Technologies/ml-agents/blob/release_7_docs/docs//Learning-Environment-Design-Agents.md#recording-demonstrations
/// </remarks>
[RequireComponent(typeof(Agent))]
[AddComponentMenu("ML Agents/Demonstration Recorder", (int)MenuGroup.Default)]

2
com.unity.ml-agents/Runtime/DiscreteActionMasker.cs


///
/// See [Agents - Actions] for more information on masking actions.
///
/// [Agents - Actions]: https://github.com/Unity-Technologies/ml-agents/blob/release_6_docs/docs/Learning-Environment-Design-Agents.md#actions
/// [Agents - Actions]: https://github.com/Unity-Technologies/ml-agents/blob/release_7_docs/docs/Learning-Environment-Design-Agents.md#actions
/// </remarks>
/// <param name="branch">The branch for which the actions will be masked.</param>
/// <param name="actionIndices">The indices of the masked actions.</param>

2
com.unity.ml-agents/package.json


"unity": "2018.4",
"description": "Use state-of-the-art machine learning to create intelligent character behaviors in any Unity environment (games, robotics, film, etc.).",
"dependencies": {
"com.unity.barracuda": "1.1.0-preview",
"com.unity.barracuda": "1.1.1-preview",
"com.unity.modules.imageconversion": "1.0.0",
"com.unity.modules.jsonserialize": "1.0.0",
"com.unity.modules.physics": "1.0.0",

4
docs/Installation-Anaconda-Windows.md


the ml-agents Conda environment by typing `activate ml-agents`)_:
```sh
git clone --branch release_6 https://github.com/Unity-Technologies/ml-agents.git
git clone --branch release_7 https://github.com/Unity-Technologies/ml-agents.git
The `--branch release_6` option will switch to the tag of the latest stable
The `--branch release_7` option will switch to the tag of the latest stable
release. Omitting that will get the `master` branch which is potentially
unstable.

6
docs/Installation.md


of our tutorials / guides assume you have access to our example environments).
```sh
git clone --branch release_6 https://github.com/Unity-Technologies/ml-agents.git
git clone --branch release_7 https://github.com/Unity-Technologies/ml-agents.git
The `--branch release_6` option will switch to the tag of the latest stable
The `--branch release_7` option will switch to the tag of the latest stable
release. Omitting that will get the `master` branch which is potentially
unstable.

ML-Agents Toolkit for your purposes. If you plan to contribute those changes
back, make sure to clone the `master` branch (by omitting `--branch release_6`
back, make sure to clone the `master` branch (by omitting `--branch release_7`
from the command above). See our
[Contributions Guidelines](../com.unity.ml-agents/CONTRIBUTING.md) for more
information on contributing to the ML-Agents Toolkit.

7
docs/Learning-Environment-Executable.md


- You can exchange executable with other people without having to share your
entire repository.
- You can put your executable on a remote machine for faster training.
- You can use `Headless` mode for faster training.
- You can use `Server Build` (`Headless`) mode for faster training (as long as the executable does not need rendering).
- You can keep using the Unity Editor for other tasks while the agents are
training.

1. Open Player Settings (menu: **Edit** > **Project Settings** > **Player**).
1. Under **Resolution and Presentation**:
- Ensure that **Run in Background** is Checked.
- Ensure that **Display Resolution Dialog** is set to Disabled.
- Ensure that **Display Resolution Dialog** is set to Disabled. (Note: this
setting may not be available in newer versions of the editor.)
1. Open the Build Settings window (menu:**File** > **Build Settings**).
1. Choose your target platform.
- (optional) Select “Development Build” to

- `<run-identifier>` is a string used to separate the results of different
training runs
For example, if you are training with a 3DBall executable you exported to the
For example, if you are training with a 3DBall executable, and you saved it to
the directory where you installed the ML-Agents Toolkit, run:
```sh

36
docs/Training-ML-Agents.md


- [Curriculum Learning](#curriculum)
- [Training with a Curriculum](#training-with-a-curriculum)
- [Training Using Concurrent Unity Instances](#training-using-concurrent-unity-instances)
- [Using PyTorch (Experimental)](#using-pytorch-experimental)
For a broad overview of reinforcement learning, imitation learning and all the
training scenarios, methods and options within the ML-Agents Toolkit, see

save_steps: 50000
swap_steps: 2000
team_change: 100000
# use TensorFlow backend
framework: tensorflow
```
Here is an equivalent file if we use an SAC trainer instead. Notice that the

- **Result Variation Using Concurrent Unity Instances** - If you keep all the
hyperparameters the same, but change `--num-envs=<n>`, the results and model
would likely change.
### Using PyTorch (Experimental)
ML-Agents, by default, uses TensorFlow as its backend, but experimental support
for PyTorch has been added. To use PyTorch, the `torch` Python package must
be installed, and PyTorch must be enabled for your trainer.
#### Installing PyTorch
If you've already installed ML-Agents, follow the
[official PyTorch install instructions](https://pytorch.org/get-started/locally/) for
your platform and configuration. Note that on Windows, you may also need Microsoft's
[Visual C++ Redistributable](https://support.microsoft.com/en-us/help/2977003/the-latest-supported-visual-c-downloads) if you don't have it already.
If you're installing or upgrading ML-Agents on Linux or Mac, you can also run
`pip3 install mlagents[torch]` instead of `pip3 install mlagents`
during [installation](Installation.md). On Windows, install ML-Agents first and then
separately install PyTorch.
#### Enabling PyTorch
PyTorch can be enabled in one of two ways. First, by adding `--torch` to the
`mlagents-learn` command. This will make all behaviors train with PyTorch.
Second, by changing the `framework` option for your agent behavior in the
configuration YAML as below. This will use PyTorch just for that behavior.
```yaml
behaviors:
YourAgentBehavior:
framework: pytorch
```

2
docs/Training-on-Amazon-Web-Service.md


2. Clone the ML-Agents repo and install the required Python packages
```sh
git clone --branch release_6 https://github.com/Unity-Technologies/ml-agents.git
git clone --branch release_7 https://github.com/Unity-Technologies/ml-agents.git
cd ml-agents/ml-agents/
pip3 install -e .
```

4
docs/Unity-Inference-Engine.md


loading expects certain conventions for constants and tensor names. While it is
possible to construct a model that follows these conventions, we don't provide
any additional help for this. More details can be found in
[TensorNames.cs](https://github.com/Unity-Technologies/ml-agents/blob/release_6_docs/com.unity.ml-agents/Runtime/Inference/TensorNames.cs)
[TensorNames.cs](https://github.com/Unity-Technologies/ml-agents/blob/release_7_docs/com.unity.ml-agents/Runtime/Inference/TensorNames.cs)
[BarracudaModelParamLoader.cs](https://github.com/Unity-Technologies/ml-agents/blob/release_6_docs/com.unity.ml-agents/Runtime/Inference/BarracudaModelParamLoader.cs).
[BarracudaModelParamLoader.cs](https://github.com/Unity-Technologies/ml-agents/blob/release_7_docs/com.unity.ml-agents/Runtime/Inference/BarracudaModelParamLoader.cs).
If you wish to run inference on an externally trained model, you should use
Barracuda directly, instead of trying to run it through ML-Agents.

10
ml-agents-envs/mlagents_envs/env_utils.py


candidates = glob.glob(os.path.join(cwd, env_path + ".exe"))
if len(candidates) == 0:
candidates = glob.glob(env_path + ".exe")
if len(candidates) == 0:
# Look for e.g. 3DBall\UnityEnvironment.exe
crash_handlers = set(
glob.glob(os.path.join(cwd, env_path, "UnityCrashHandler*.exe"))
)
candidates = [
c
for c in glob.glob(os.path.join(cwd, env_path, "*.exe"))
if c not in crash_handlers
]
if len(candidates) > 0:
launch_string = candidates[0]
return launch_string

30
ml-agents/mlagents/trainers/policy/torch_policy.py


memories: Optional[torch.Tensor] = None,
seq_len: int = 1,
all_log_probs: bool = False,
) -> Tuple[
torch.Tensor, torch.Tensor, torch.Tensor, Dict[str, torch.Tensor], torch.Tensor
]:
) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
dists, value_heads, memories = self.actor_critic.get_dist_and_value(
vec_obs, vis_obs, masks, memories, seq_len
)
if memories is None:
dists, memories = self.actor_critic.get_dists(
vec_obs, vis_obs, masks, memories, seq_len
)
else:
# If we're using LSTM. we need to execute the values to get the critic memories
dists, _, memories = self.actor_critic.get_dist_and_value(
vec_obs, vis_obs, masks, memories, seq_len
)
action_list = self.actor_critic.sample_action(dists)
log_probs, entropies, all_logs = ModelUtils.get_probs_and_entropy(
action_list, dists

else:
actions = actions[:, 0, :]
return (
actions,
all_logs if all_log_probs else log_probs,
entropies,
value_heads,
memories,
)
return (actions, all_logs if all_log_probs else log_probs, entropies, memories)
def evaluate_actions(
self,

run_out = {}
with torch.no_grad():
action, log_probs, entropy, value_heads, memories = self.sample_actions(
action, log_probs, entropy, memories = self.sample_actions(
vec_obs, vis_obs, masks=masks, memories=memories
)
run_out["action"] = ModelUtils.to_numpy(action)

run_out["entropy"] = ModelUtils.to_numpy(entropy)
run_out["value_heads"] = {
name: ModelUtils.to_numpy(t) for name, t in value_heads.items()
}
run_out["value"] = np.mean(list(run_out["value_heads"].values()), 0)
run_out["learning_rate"] = 0.0
if self.use_recurrent:
run_out["memory_out"] = ModelUtils.to_numpy(memories).squeeze(0)

34
ml-agents/mlagents/trainers/sac/optimizer_torch.py


self.target_network.q2_network.network_body.copy_normalization(
self.policy.actor_critic.network_body
)
(sampled_actions, log_probs, _, _) = self._sample_actions(
(sampled_actions, log_probs, _, _) = self.policy.sample_actions(
vec_obs,
vis_obs,
masks=act_masks,

)
sampled_values, _ = self.policy.actor_critic.critic_pass(
vec_obs, vis_obs, memories, sequence_length=self.policy.sequence_length
)
if self.policy.use_continuous_act:
squeezed_actions = actions.squeeze(-1)

q2_stream = self._condense_q_streams(q2_out, actions)
with torch.no_grad():
(next_actions, next_log_probs, _, _) = self._sample_actions(
(next_actions, next_log_probs, _, _) = self.policy.sample_actions(
next_vec_obs,
next_vis_obs,
masks=act_masks,

for reward_provider in self.reward_signals.values():
modules.update(reward_provider.get_modules())
return modules
def _sample_actions(
self,
vec_obs: List[torch.Tensor],
vis_obs: List[torch.Tensor],
masks: Optional[torch.Tensor] = None,
memories: Optional[torch.Tensor] = None,
seq_len: int = 1,
all_log_probs: bool = False,
) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
"""
:param all_log_probs: Returns (for discrete actions) a tensor of log probs, one for each action.
"""
dists, memories = self.policy.actor_critic.get_dists(
vec_obs, vis_obs, masks, memories, seq_len
)
action_list = self.policy.actor_critic.sample_action(dists)
log_probs, entropies, all_logs = ModelUtils.get_probs_and_entropy(
action_list, dists
)
actions = torch.stack(action_list, dim=-1)
if self.policy.use_continuous_act:
actions = actions[:, :, 0]
else:
actions = actions[:, 0, :]
return (actions, all_logs if all_log_probs else log_probs, entropies, memories)

4
ml-agents/mlagents/trainers/tests/torch/saver/test_saver.py


).unsqueeze(0)
with torch.no_grad():
_, log_probs1, _, _, _ = policy1.sample_actions(
_, log_probs1, _, _ = policy1.sample_actions(
_, log_probs2, _, _, _ = policy2.sample_actions(
_, log_probs2, _, _ = policy2.sample_actions(
vec_obs, vis_obs, masks=masks, memories=memories, all_log_probs=True
)

10
ml-agents/mlagents/trainers/tests/torch/test_policy.py


if len(memories) > 0:
memories = torch.stack(memories).unsqueeze(0)
(
sampled_actions,
log_probs,
entropies,
sampled_values,
memories,
) = policy.sample_actions(
(sampled_actions, log_probs, entropies, memories) = policy.sample_actions(
vec_obs,
vis_obs,
masks=act_masks,

else:
assert log_probs.shape == (64, policy.behavior_spec.action_shape)
assert entropies.shape == (64, policy.behavior_spec.action_size)
for val in sampled_values.values():
assert val.shape == (64,)
if rnn:
assert memories.shape == (1, 1, policy.m_size)

2
ml-agents/mlagents/trainers/torch/components/bc/module.py


else:
vis_obs = []
selected_actions, all_log_probs, _, _, _ = self.policy.sample_actions(
selected_actions, all_log_probs, _, _ = self.policy.sample_actions(
vec_obs,
vis_obs,
masks=act_masks,

6
ml-agents/mlagents/trainers/torch/networks.py


else 0
)
self.visual_processors, self.vector_processors, encoder_input_size = ModelUtils.create_input_processors(
(
self.visual_processors,
self.vector_processors,
encoder_input_size,
) = ModelUtils.create_input_processors(
observation_shapes,
self.h_size,
network_settings.vis_encode_type,

1
utils/make_readme_table.py


ReleaseInfo("release_4", "1.2.0", "0.18.0", "July 15, 2020"),
ReleaseInfo("release_5", "1.2.1", "0.18.1", "July 31, 2020"),
ReleaseInfo("release_6", "1.3.0", "0.19.0", "August 12, 2020"),
ReleaseInfo("release_7", "1.4.0", "0.20.0", "September 16, 2020"),
]
MAX_DAYS = 150 # do not print releases older than this many days

正在加载...
取消
保存