浏览代码

Merge branch 'master' into develop-splitpolicyoptimizer

/develop/nopreviousactions
Ervin Teng 4 年前
当前提交
db249ceb
共有 280 个文件被更改,包括 4249 次插入3410 次删除
  1. 2
      .circleci/config.yml
  2. 4
      .github/ISSUE_TEMPLATE/config.yml
  3. 9
      .github/ISSUE_TEMPLATE/discussion---general-questions.md
  4. 26
      .gitignore
  5. 2
      .yamato/standalone-build-test.yml
  6. 12
      README.md
  7. 16
      config/gail_config.yaml
  8. 13
      config/sac_trainer_config.yaml
  9. 9
      config/trainer_config.yaml
  10. 10
      docs/Basic-Guide.md
  11. 4
      docs/Installation-Windows.md
  12. 10
      docs/Installation.md
  13. 10
      docs/Learning-Environment-Create-New.md
  14. 7
      docs/Learning-Environment-Examples.md
  15. 4
      docs/Learning-Environment-Executable.md
  16. 12
      docs/Migrating.md
  17. 2
      docs/Reward-Signals.md
  18. 2
      docs/Training-Curriculum-Learning.md
  19. 2
      docs/Training-PPO.md
  20. 2
      docs/Training-SAC.md
  21. 8
      docs/dox-ml-agents.conf
  22. 254
      docs/images/3dball_learning_brain.png
  23. 221
      docs/images/mlagents-NewProject.png
  24. 4
      docs/localized/KR/README.md
  25. 2
      docs/localized/zh-CN/README.md
  26. 2
      docs/localized/zh-CN/docs/Learning-Environment-Examples.md
  27. 2
      gym-unity/gym_unity/__init__.py
  28. 2
      ml-agents-envs/mlagents_envs/__init__.py
  29. 2
      ml-agents-envs/mlagents_envs/environment.py
  30. 1
      ml-agents/mlagents/tf_utils/__init__.py
  31. 17
      ml-agents/mlagents/tf_utils/tf.py
  32. 2
      ml-agents/mlagents/trainers/__init__.py
  33. 4
      ml-agents/mlagents/trainers/action_info.py
  34. 30
      ml-agents/mlagents/trainers/agent_processor.py
  35. 7
      ml-agents/mlagents/trainers/brain_conversion_utils.py
  36. 50
      ml-agents/mlagents/trainers/demo_loader.py
  37. 73
      ml-agents/mlagents/trainers/env_manager.py
  38. 3
      ml-agents/mlagents/trainers/ppo/trainer.py
  39. 3
      ml-agents/mlagents/trainers/sac/trainer.py
  40. 4
      ml-agents/mlagents/trainers/simple_env_manager.py
  41. 16
      ml-agents/mlagents/trainers/stats.py
  42. 4
      ml-agents/mlagents/trainers/subprocess_env_manager.py
  43. 2
      ml-agents/mlagents/trainers/tests/test_agent_processor.py
  44. 4
      ml-agents/mlagents/trainers/tests/test_bcmodule.py
  45. 37
      ml-agents/mlagents/trainers/tests/test_demo_loader.py
  46. 2
      ml-agents/mlagents/trainers/tests/test_policy.py
  47. 24
      ml-agents/mlagents/trainers/tests/test_ppo.py
  48. 2
      ml-agents/mlagents/trainers/tests/test_reward_signals.py
  49. 24
      ml-agents/mlagents/trainers/tests/test_sac.py
  50. 33
      ml-agents/mlagents/trainers/tests/test_simple_rl.py
  51. 46
      ml-agents/mlagents/trainers/tests/test_subprocess_env_manager.py
  52. 54
      ml-agents/mlagents/trainers/tests/test_trainer_controller.py
  53. 43
      ml-agents/mlagents/trainers/tf_policy.py
  54. 21
      ml-agents/mlagents/trainers/trainer.py
  55. 82
      ml-agents/mlagents/trainers/trainer_controller.py
  56. 12
      ml-agents/mlagents/trainers/trainer_util.py
  57. 2
      notebooks/getting-started.ipynb
  58. 2
      protobuf-definitions/README.md
  59. 6
      protobuf-definitions/make.sh
  60. 6
      protobuf-definitions/make_for_win.bat
  61. 3
      test_constraints_max_tf1_version.txt
  62. 2
      utils/validate_meta_files.py
  63. 2
      com.unity.ml-agents/CONTRIBUTING.md
  64. 6
      Project/ProjectSettings/ClusterInputManager.asset
  65. 5
      Project/ProjectSettings/GraphicsSettings.asset
  66. 295
      Project/ProjectSettings/InputManager.asset
  67. 2
      Project/ProjectSettings/NavMeshAreas.asset
  68. 9
      Project/ProjectSettings/TimeManager.asset
  69. 1
      Project/ProjectSettings/EditorBuildSettings.asset
  70. 190
      Project/ProjectSettings/ProjectSettings.asset
  71. 1
      Project/ProjectSettings/ProjectVersion.txt
  72. 5
      com.unity.ml-agents/Plugins/ProtoBuffer/runtimes/win/native/grpc_csharp_ext.x86.dll.meta
  73. 5
      com.unity.ml-agents/Plugins/ProtoBuffer/runtimes/win/native/grpc_csharp_ext.x64.dll.meta
  74. 247
      com.unity.ml-agents/Plugins/ProtoBuffer/runtimes/osx/native/libgrpc_csharp_ext.x64.bundle.meta
  75. 5
      com.unity.ml-agents/Plugins/ProtoBuffer/runtimes/linux/native/libgrpc_csharp_ext.x64.so.meta
  76. 5
      com.unity.ml-agents/Plugins/ProtoBuffer/Grpc.Core.dll.meta
  77. 1
      com.unity.ml-agents/Editor/DemonstrationDrawer.cs
  78. 2
      com.unity.ml-agents/Editor/BrainParametersDrawer.cs
  79. 2
      com.unity.ml-agents/Editor/BehaviorParametersEditor.cs
  80. 25
      com.unity.ml-agents/Editor/AgentEditor.cs
  81. 2
      com.unity.ml-agents/Tests/Editor/SideChannelTests.cs
  82. 6
      com.unity.ml-agents/Tests/Editor/Sensor/WriterAdapterTests.cs
  83. 23
      com.unity.ml-agents/Tests/Editor/Sensor/VectorSensorTests.cs
  84. 26
      com.unity.ml-agents/Tests/Editor/Sensor/StackingSensorTests.cs
  85. 197
      com.unity.ml-agents/Tests/Editor/Sensor/RayPerceptionSensorTests.cs
  86. 2
      com.unity.ml-agents/Tests/Editor/Sensor/FloatVisualSensorTests.cs
  87. 107
      com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs
  88. 23
      com.unity.ml-agents/Tests/Editor/EditModeTestInternalBrainTensorGenerator.cs
  89. 8
      com.unity.ml-agents/Tests/Editor/EditModeTestInternalBrainTensorApplier.cs
  90. 6
      com.unity.ml-agents/Tests/Editor/DemonstrationTests.cs
  91. 929
      Project/Assets/ML-Agents/Examples/WallJump/Prefabs/WallJumpArea.prefab
  92. 93
      Project/Assets/ML-Agents/Examples/Walker/Scripts/WalkerAgent.cs
  93. 988
      Project/Assets/ML-Agents/Examples/Walker/Prefabs/WalkerPair.prefab
  94. 1001
      Project/Assets/ML-Agents/Examples/Tennis/TFModels/Tennis.nn
  95. 2
      Project/Assets/ML-Agents/Examples/Tennis/Scripts/TennisArea.cs
  96. 9
      Project/Assets/ML-Agents/Examples/Tennis/Scripts/TennisAgent.cs
  97. 157
      Project/Assets/ML-Agents/Examples/Tennis/Scripts/HitWall.cs
  98. 118
      Project/Assets/ML-Agents/Examples/Tennis/Scenes/Tennis.unity
  99. 946
      Project/Assets/ML-Agents/Examples/Tennis/Prefabs/TennisArea.prefab
  100. 932
      Project/Assets/ML-Agents/Examples/Soccer/Prefabs/SoccerFieldTwos.prefab

2
.circleci/config.yml


chmod +x Grpc.Tools.1.14.1/tools/linux_x64/protoc
chmod +x Grpc.Tools.1.14.1/tools/linux_x64/grpc_csharp_plugin
COMPILER=Grpc.Tools.1.14.1/tools/linux_x64 ./make.sh
CS_PROTO_PATH=UnitySDK/Assets/ML-Agents/Scripts/Grpc/CommunicatorObjects
CS_PROTO_PATH=com.unity.ml-agents/Runtime/Grpc/CommunicatorObjects
git diff --exit-code --quiet -- :/ ":(exclude,top)$CS_PROTO_PATH/*.meta" \
|| { GIT_ERR=$?; echo "protobufs need to be regenerated, apply the patch uploaded to artifacts."; \
echo "Apply the patch with the command: git apply proto.patch"; \

4
.github/ISSUE_TEMPLATE/config.yml


blank_issues_enabled: false
contact_links:
- name: ML-Agents Unity Forum
url: https://forum.unity.com/forums/ml-agents.453/
about: Please ask Installation / Setup and Discussion / General Questions in the Unity Forum.

9
.github/ISSUE_TEMPLATE/discussion---general-questions.md


---
Describe what you'd like to discuss.
For discussions, please post in [ML-Agents Unity Forum](https://forum.unity.com/forums/ml-agents.453/) instead of
creating a Github issue. Unity forums are the predominant community for Unity users and experts. By leveraging Unity
forums for general discussions and project help, we can keep Github issues for bugs, performance issues, and feature
requests for ML-Agents.
**Note**: The ML-Agents team has limited resources for education and community discussion. We'll participate as we are able, but encourage members of the community to support one another to discuss and support one another.
**Note**: The ML-Agents team has limited resources for education and community discussion. We'll participate as we are
able, but encourage members of the community to support one another to discuss and support one another.

26
.gitignore


/UnitySDK/[Ll]ibrary/
/UnitySDK/Logs/
/UnitySDK/[Tt]emp/
/UnitySDK/[Oo]bj/
/UnitySDK/[Bb]uild/
/UnitySDK/[Bb]uilds/
/UnitySDK/Assets/AssetStoreTools*
/UnitySDK/Assets/Plugins*
/UnitySDK/Assets/Demonstrations*
/UnitySDK/csharp_timers.json
# Tensorflow Model Info
/models
/summaries

/UnitySDK/.vs/
# Autogenerated VS/MD/Consulo solution and project files
/UnitySDKExportedObj/
/UnitySDK.consulo/
/com.unity.ml-agentsExportedObj/
/com.unity.ml-agents.consulo/
*.csproj
*.unityproj
*.sln

*.pidb.meta
# Unity3D Generated File On Crash Reports
/UnitySDK/sysinfo.txt
/com.unity.ml-agents/sysinfo.txt
# Builds
*.apk

*.x86_64
*.x86
# Tensorflow Sharp Files
/UnitySDK/Assets/ML-Agents/Plugins/Android*
/UnitySDK/Assets/ML-Agents/Plugins/iOS*
/UnitySDK/Assets/ML-Agents/Plugins/Computer*
/UnitySDK/Assets/ML-Agents/Plugins/System.Numerics*
/UnitySDK/Assets/ML-Agents/Plugins/System.ValueTuple*
/UnitySDK/Assets/ML-Agents/VideoRecorder*
/com.unity.ml-agents/VideoRecorder*
# Generated doc folders
/docs/html

2
.yamato/standalone-build-test.yml


variables:
UNITY_VERSION: {{ editor.version }}
commands:
- ./run-standalone-build-osx.sh
- python -u -m ml-agents.tests.yamato.standalone_build_tests
triggers:
pull_requests:
- targets:

12
README.md


The ML-Agents toolkit is an open-source project and we encourage and welcome
contributions. If you wish to contribute, be sure to review our
[contribution guidelines](CONTRIBUTING.md) and
[contribution guidelines](com.unity.ml-agents/CONTRIBUTING.md) and
If you run into any problems using the ML-Agents toolkit,
[submit an issue](https://github.com/Unity-Technologies/ml-agents/issues) and
make sure to include as much detail as possible.
For problems with the installation and setup of the the ML-Agents toolkit, or
discussions about how to best setup or train your agents, please create a new
thread on the [Unity ML-Agents forum](https://forum.unity.com/forums/ml-agents.453/)
and make sure to include as much detail as possible.
If you run into any other problems using the ML-Agents toolkit, or have a specific
feature requests, please [submit a GitHub issue](https://github.com/Unity-Technologies/ml-agents/issues).
For any other questions or feedback, connect directly with the ML-Agents
team at ml-agents@unity3d.com.

16
config/gail_config.yaml


max_steps: 5.0e5
num_epoch: 3
behavioral_cloning:
demo_path: UnitySDK/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo
demo_path: Project/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo
strength: 0.5
steps: 10000
reward_signals:

strength: 0.01
gamma: 0.99
encoding_size: 128
demo_path: UnitySDK/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo
demo_path: Project/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo
CrawlerStatic:
normalize: true

num_layers: 3
hidden_units: 512
behavioral_cloning:
demo_path: UnitySDK/Assets/ML-Agents/Examples/Crawler/Demos/ExpertCrawlerSta.demo
demo_path: Project/Assets/ML-Agents/Examples/Crawler/Demos/ExpertCrawlerSta.demo
strength: 0.5
steps: 5000
reward_signals:

encoding_size: 128
demo_path: UnitySDK/Assets/ML-Agents/Examples/Crawler/Demos/ExpertCrawlerSta.demo
demo_path: Project/Assets/ML-Agents/Examples/Crawler/Demos/ExpertCrawlerSta.demo
PushBlock:
max_steps: 5.0e4

strength: 1.0
gamma: 0.99
encoding_size: 128
demo_path: UnitySDK/Assets/ML-Agents/Examples/PushBlock/Demos/ExpertPush.demo
demo_path: Project/Assets/ML-Agents/Examples/PushBlock/Demos/ExpertPush.demo
Hallway:
use_recurrent: true

strength: 0.1
gamma: 0.99
encoding_size: 128
demo_path: UnitySDK/Assets/ML-Agents/Examples/Hallway/Demos/ExpertHallway.demo
demo_path: Project/Assets/ML-Agents/Examples/Hallway/Demos/ExpertHallway.demo
FoodCollector:
batch_size: 64

strength: 0.1
gamma: 0.99
encoding_size: 128
demo_path: UnitySDK/Assets/ML-Agents/Examples/FoodCollector/Demos/ExpertFood.demo
demo_path: Project/Assets/ML-Agents/Examples/FoodCollector/Demos/ExpertFood.demo
demo_path: UnitySDK/Assets/ML-Agents/Examples/FoodCollector/Demos/ExpertFood.demo
demo_path: Project/Assets/ML-Agents/Examples/FoodCollector/Demos/ExpertFood.demo
strength: 1.0
steps: 0

13
config/sac_trainer_config.yaml


gamma: 0.99
encoding_size: 128
use_actions: true
demo_path: UnitySDK/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo
demo_path: Project/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo
VisualPyramids:
time_horizon: 128

gamma: 0.99
encoding_size: 128
use_actions: true
demo_path: UnitySDK/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo
demo_path: Project/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo
3DBall:
normalize: true

time_horizon: 1000
Tennis:
buffer_size: 500000
max_steps: 4e6
max_steps: 2e7
hidden_units: 256
self_play:
window: 10
play_against_current_self_ratio: 0.5
save_steps: 50000
swap_steps: 50000
CrawlerStatic:
normalize: true

9
config/trainer_config.yaml


Tennis:
normalize: true
max_steps: 4e6
max_steps: 2e7
learning_rate_schedule: constant
hidden_units: 256
self_play:
window: 10
play_against_current_self_ratio: 0.5
save_steps: 50000
swap_steps: 50000
CrawlerStatic:
normalize: true

10
docs/Basic-Guide.md


1. Launch Unity
2. On the Projects dialog, choose the **Open** option at the top of the window.
3. Using the file dialog that opens, locate the `UnitySDK` folder
3. Using the file dialog that opens, locate the `Project` folder
within the ML-Agents toolkit project and click **Open**.
4. Go to **Edit** > **Project Settings** > **Player**
5. For **each** of the platforms you target (**PC, Mac and Linux Standalone**,

![Platform Prefab](images/platform_prefab.png)
3. In the **Project** window, drag the **3DBallLearning** Model located in
`Assets/ML-Agents/Examples/3DBall/TFModels` into the `Model` property under `Ball 3D Agent (Script)` component in the **Inspector** window.
3. In the **Project** window, drag the **3DBall** Model located in
`Assets/ML-Agents/Examples/3DBall/TFModels` into the `Model` property under `Behavior Parameters (Script)` component in the Agent GameObject **Inspector** window.
4. You should notice that each `Agent` under each `3DBall` in the **Hierarchy** windows now contains **3DBallLearning** as `Model`. __Note__ : You can modify multiple game objects in a scene by selecting them all at
4. You should notice that each `Agent` under each `3DBall` in the **Hierarchy** windows now contains **3DBall** as `Model` on the `Behavior Parameters`. __Note__ : You can modify multiple game objects in a scene by selecting them all at
once using the search bar in the Scene Hierarchy.
8. Select the **InferenceDevice** to use for this model (CPU or GPU) on the Agent.
_Note: CPU is faster for the majority of ML-Agents toolkit generated models_

[above](#running-a-pre-trained-model).
1. Move your model file into
`UnitySDK/Assets/ML-Agents/Examples/3DBall/TFModels/`.
`Project/Assets/ML-Agents/Examples/3DBall/TFModels/`.
2. Open the Unity Editor, and select the **3DBall** scene as described above.
3. Select the **3DBall** prefab Agent object.
4. Drag the `<behavior_name>.nn` file from the Project window of

4
docs/Installation-Windows.md


If you don't want to use Git, you can always directly download all the files
[here](https://github.com/Unity-Technologies/ml-agents/archive/latest_release.zip).
The `UnitySDK` subdirectory contains the Unity Assets to add to your projects.
It also contains many [example environments](Learning-Environment-Examples.md)
The `com.unity.ml-agents` subdirectory contains the core code to add to your projects.
The `Project` subdirectory contains many [example environments](Learning-Environment-Examples.md)
to help you get started.
The `ml-agents` subdirectory contains a Python package which provides deep reinforcement

10
docs/Installation.md


The `--branch latest_release` option will switch to the tag of the latest stable release.
Omitting that will get the `master` branch which is potentially unstable.
The `UnitySDK` subdirectory contains the Unity Assets to add to your projects.
It also contains many [example environments](Learning-Environment-Examples.md)
The `com.unity.ml-agents` subdirectory contains the core code to add to your projects.
The `Project` subdirectory contains many [example environments](Learning-Environment-Examples.md)
If you intend to copy the `UnitySDK` folder in to your project, ensure that
If you intend to copy the `com.unity.ml-agents` folder in to your project, ensure that
To install the Barrcuda package in later versions of Unity, navigate to the Package
To install the Barracuda package in later versions of Unity, navigate to the Package
`Adavanced` dropdown menu to the left of the search bar and make sure "Show Preview Packages"
`Advanced` dropdown menu to the left of the search bar and make sure "Show Preview Packages"
is checked. Search for or select the `Barracuda` package and install the latest version.
<p align="center">

10
docs/Learning-Environment-Create-New.md


but is the default as of 2018.3.)
3. In a file system window, navigate to the folder containing your cloned
ML-Agents repository.
4. Drag the `ML-Agents` folder from `UnitySDK/Assets` to the Unity
Editor Project window. If you see console errors about Barracuda, make sure
you've installed Barracuda from the Unity Package Manager. More information
can be found in the [installation instructions](Installation.md) under
4. Open the `manifest.json` file in the `Packages` directory of your project.
Add the following line to your project's package dependencies:
```
"com.unity.ml-agents" : "file:<path_to_local_ml-agents_repo>/com.unity.ml-agents"
```
More information can be found in the [installation instructions](Installation.md) under
**Package Installation**.
Your Unity **Project** window should contain the following assets:

7
docs/Learning-Environment-Examples.md


The Unity ML-Agents toolkit contains an expanding set of example environments
which demonstrate various features of the platform. Environments are located in
`UnitySDK/Assets/ML-Agents/Examples` and summarized below. Additionally, our
`Project/Assets/ML-Agents/Examples` and summarized below. Additionally, our
[first ML Challenge](https://connect.unity.com/challenges/ml-agents-1) contains
environments created by the community.

researchers.
If you would like to contribute environments, please see our
[contribution guidelines](../CONTRIBUTING.md) page.
[contribution guidelines](../com.unity.ml-agents/CONTRIBUTING.md) page.
## Basic

* Goal: Move to the most reward state.
* Agents: The environment contains one agent.
* Agent Reward Function:
* -0.01 at each step
* +0.1 for arriving at suboptimal state.
* +1.0 for arriving at optimal state.
* Behavior Parameters:

* Visual Observations: None
* Float Properties: None
* Benchmark Mean Reward: 0.94
* Benchmark Mean Reward: 0.93
## [3DBall: 3D Balance Ball](https://youtu.be/dheeCO29-EI)

4
docs/Learning-Environment-Executable.md


1. Launch Unity.
2. On the Projects dialog, choose the **Open** option at the top of the window.
3. Using the file dialog that opens, locate the `UnitySDK` folder within the
3. Using the file dialog that opens, locate the `Project` folder within the
ML-Agents project and click **Open**.
4. In the **Project** window, navigate to the folder
`Assets/ML-Agents/Examples/3DBall/Scenes/`.

into your Agent by following the steps below:
1. Move your model file into
`UnitySDK/Assets/ML-Agents/Examples/3DBall/TFModels/`.
`Project/Assets/ML-Agents/Examples/3DBall/TFModels/`.
2. Open the Unity Editor, and select the **3DBall** scene as described above.
3. Select the **3DBall** prefab from the Project window and select **Agent**.
5. Drag the `<behavior_name>.nn` file from the Project window of

12
docs/Migrating.md


## Migrating from 0.13 to latest
### Important changes
* The `Decision Period` and `On Demand decision` checkbox have been removed from the Agent. On demand decision is now the default (calling `RequestDecision` on the Agent manually.)
* Agents will always request a decision after being marked as `Done()` and will no longer wait for the next call to `RequestDecision()`.
* The `agentParameters` field of the Agent has been removed. (Contained only `maxStep` information)
* `maxStep` is now a public field on the Agent. (Was moved from `agentParameters`)
* The `Info` field of the Agent has been made private. (Was only used internally and not meant to be modified outside of the Agent)
* The `GetReward()` method on the Agent has been removed. (It was being confused with `GetCumulativeReward()`)
* The `AgentAction` struct no longer contains a `value` field. (Value estimates were not set during inference)
* The `GetValueEstimate()` method on the Agent has been removed.
* The `UpdateValueAction()` method on the Agent has been removed.
* Calling `Done()` on the Agent will now reset it immediately and call the `AgentReset` virtual method. (This is to simplify the previous logic in which the Agent had to wait for the next `EnvironmentStep` to reset)
* If you were not using `On Demand Decision` for your Agent, you **must** add a `DecisionRequester` component to your Agent GameObject and set its `Decision Period` field to the old `Decision Period` of the Agent.
* If you have a class that inherits from Academy:
* If the class didn't override any of the virtual methods and didn't store any additional data, you can just remove the old script from the scene.
* If the class had additional data, create a new MonoBehaviour and store the data on this instead.

* Combine curriculum configs into a single file. See [the WallJump curricula](../config/curricula/wall_jump.yaml) for an example of the new curriculum config format.
A tool like https://www.json2yaml.com may be useful to help with the conversion.
* If your Agent implements `AgentOnDone` and your Agent does not have the checkbox `Reset On Done` checked in the inspector, you must call the code that was in `AgentOnDone` manually.
## Migrating from ML-Agents toolkit v0.12.0 to v0.13.0

2
docs/Reward-Signals.md


strength: 0.01
gamma: 0.99
encoding_size: 128
demo_path: UnitySDK/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo
demo_path: Project/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo
```
Each reward signal should define at least two parameters, `strength` and `gamma`, in addition

2
docs/Training-Curriculum-Learning.md


Once our curriculum is defined, we have to use the reset parameters we defined
and modify the environment from the Agent's `AgentReset()` function. See
[WallJumpAgent.cs](https://github.com/Unity-Technologies/ml-agents/blob/master/UnitySDK/Assets/ML-Agents/Examples/WallJump/Scripts/WallJumpAgent.cs)
[WallJumpAgent.cs](https://github.com/Unity-Technologies/ml-agents/blob/master/Project/Assets/ML-Agents/Examples/WallJump/Scripts/WallJumpAgent.cs)
for an example.

2
docs/Training-PPO.md


```
behavioral_cloning:
demo_path: ./UnitySDK/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo
demo_path: ./Project/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo
strength: 0.5
steps: 10000
```

2
docs/Training-SAC.md


```
behavioral_cloning:
demo_path: ./UnitySDK/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo
demo_path: ./Project/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo
strength: 0.5
steps: 10000
```

8
docs/dox-ml-agents.conf


# spaces. See also FILE_PATTERNS and EXTENSION_MAPPING
# Note: If this tag is empty the current directory is searched.
INPUT = ../UnitySDK/Assets/ML-Agents/Scripts/Academy.cs \
../UnitySDK/Assets/ML-Agents/Scripts/Agent.cs \
../UnitySDK/Assets/ML-Agents/Scripts/Monitor.cs \
../UnitySDK/Assets/ML-Agents/Scripts/Decision.cs
INPUT = ../Project/Assets/ML-Agents/Scripts/Academy.cs \
../Project/Assets/ML-Agents/Scripts/Agent.cs \
../Project/Assets/ML-Agents/Scripts/Monitor.cs \
../Project/Assets/ML-Agents/Scripts/Decision.cs
# This tag can be used to specify the character encoding of the source files
# that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses

254
docs/images/3dball_learning_brain.png

之前 之后
宽度: 413  |  高度: 349  |  大小: 35 KiB

221
docs/images/mlagents-NewProject.png

之前 之后
宽度: 193  |  高度: 345  |  大小: 20 KiB

4
docs/localized/KR/README.md


## 커뮤니티 그리고 피드백
ML-Agents toolkit은 오픈소스 프로젝트이며 컨트리뷰션을 환영합니다. 만약 컨트리뷰션을 원하시는 경우
[컨트리뷰션 가이드라인](CONTRIBUTING.md)과 [행동 규칙](CODE_OF_CONDUCT.md)을 검토해주십시오.
[컨트리뷰션 가이드라인](com/unity.ml-agents/CONTRIBUTING.md)과 [행동 규칙](CODE_OF_CONDUCT.md)을 검토해주십시오.
만약 ML-Agents toolkit을 사용하며 문제가 생긴다면, 가능한 많은 세부 사항을 포함하여 [이슈 제출](https://github.com/Unity-Technologies/ml-agents/issues)을 해주십시오.

장현준: totok682@naver.com
민규식: kyushikmin@gmail.com
민규식: kyushikmin@gmail.com

2
docs/localized/zh-CN/README.md


ML-Agents 是一个开源项目,我们鼓励并欢迎大家贡献自己的力量。
如果您想做出贡献,请务必查看我们的
[贡献准则](/CONTRIBUTING.md)和
[贡献准则](/com.unity.ml-agents/CONTRIBUTING.md)和
[行为准则](/CODE_OF_CONDUCT.md)。
您可以通过 Unity Connect 和 GitHub 与我们以及更广泛的社区进行交流:

2
docs/localized/zh-CN/docs/Learning-Environment-Examples.md


页面。
如果您想提交自己的环境,请参阅我们的
[贡献指南](/CONTRIBUTING.md)页面。
[贡献指南](/com.unity.ml-agents/CONTRIBUTING.md)页面。
## Basic

2
gym-unity/gym_unity/__init__.py


__version__ = "0.13.0"
__version__ = "0.14.0.dev0"

2
ml-agents-envs/mlagents_envs/__init__.py


__version__ = "0.13.0"
__version__ = "0.14.0.dev0"

2
ml-agents-envs/mlagents_envs/environment.py


class UnityEnvironment(BaseEnv):
SCALAR_ACTION_TYPES = (int, np.int32, np.int64, float, np.float32, np.float64)
SINGLE_BRAIN_ACTION_TYPES = SCALAR_ACTION_TYPES + (list, np.ndarray)
API_VERSION = "API-13"
API_VERSION = "API-14-dev0"
def __init__(
self,

1
ml-agents/mlagents/tf_utils/__init__.py


from mlagents.tf_utils.tf import tf as tf # noqa
from mlagents.tf_utils.tf import set_warnings_enabled # noqa
from mlagents.tf_utils.tf import generate_session_config # noqa

17
ml-agents/mlagents/tf_utils/tf.py


def set_warnings_enabled(is_enabled: bool) -> None:
"""
Enable or disable tensorflow warnings (notabley, this disables deprecation warnings.
Enable or disable tensorflow warnings (notably, this disables deprecation warnings.
def generate_session_config() -> tf.ConfigProto:
"""
Generate a ConfigProto to use for ML-Agents that doesn't consume all of the GPU memory
and allows for soft placement in the case of multi-GPU.
"""
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
# For multi-GPU training, set allow_soft_placement to True to allow
# placing the operation into an alternative device automatically
# to prevent from exceptions if the device doesn't suppport the operation
# or the device does not exist
config.allow_soft_placement = True
return config

2
ml-agents/mlagents/trainers/__init__.py


__version__ = "0.13.0"
__version__ = "0.14.0.dev0"

4
ml-agents/mlagents/trainers/action_info.py


value: Any
outputs: ActionInfoOutputs
agent_ids: List[AgentId]
@staticmethod
def empty() -> "ActionInfo":
return ActionInfo([], [], {}, [])

30
ml-agents/mlagents/trainers/agent_processor.py


from mlagents.trainers.policy import Policy
from mlagents.trainers.action_info import ActionInfo, ActionInfoOutputs
from mlagents.trainers.stats import StatsReporter
from mlagents.trainers.env_manager import get_global_agent_id
from mlagents.trainers.brain_conversion_utils import get_global_agent_id
T = TypeVar("T")

"Policy/Learning Rate", take_action_outputs["learning_rate"]
)
terminated_agents: List[str] = []
# Make unique agent_ids that are global across workers
action_global_agent_ids = [
get_global_agent_id(worker_id, ag_id) for ag_id in previous_action.agent_ids

"Environment/Episode Length",
self.episode_steps.get(global_id, 0),
)
del self.episode_steps[global_id]
del self.episode_rewards[global_id]
terminated_agents += [global_id]
elif not curr_agent_step.done:
self.episode_steps[global_id] += 1

previous_action.agent_ids, take_action_outputs["action"]
)
for terminated_id in terminated_agents:
self._clean_agent_data(terminated_id)
def _clean_agent_data(self, global_id: str) -> None:
"""
Removes the data for an Agent.
"""
del self.experience_buffers[global_id]
del self.last_take_action_outputs[global_id]
del self.episode_steps[global_id]
del self.episode_rewards[global_id]
del self.last_step_result[global_id]
self.policy.remove_previous_action([global_id])
self.policy.remove_memories([global_id])
def publish_trajectory_queue(
self, trajectory_queue: "AgentManagerQueue[Trajectory]"
) -> None:

:param trajectory_queue: Trajectory queue to publish to.
"""
self.trajectory_queues.append(trajectory_queue)
def end_episode(self) -> None:
"""
Ends the episode, terminating the current trajectory and stopping stats collection for that
episode. Used for forceful reset (e.g. in curriculum or generalization training.)
"""
self.experience_buffers.clear()
self.episode_rewards.clear()
self.episode_steps.clear()
class AgentManagerQueue(Generic[T]):

7
ml-agents/mlagents/trainers/brain_conversion_utils.py


return BrainParameters(
name, int(vec_size), cam_res, a_size, [], vector_action_space_type
)
def get_global_agent_id(worker_id: int, agent_id: int) -> str:
"""
Create an agent id that is unique across environment workers using the worker_id.
"""
return f"${worker_id}-{agent_id}"

50
ml-agents/mlagents/trainers/demo_loader.py


import pathlib
import logging
import os
from typing import List, Tuple

return brain_params, demo_buffer
def get_demo_files(path: str) -> List[str]:
"""
Retrieves the demonstration file(s) from a path.
:param path: Path of demonstration file or directory.
:return: List of demonstration files
Raises errors if |path| is invalid.
"""
if os.path.isfile(path):
if not path.endswith(".demo"):
raise ValueError("The path provided is not a '.demo' file.")
return [path]
elif os.path.isdir(path):
paths = [
os.path.join(path, name)
for name in os.listdir(path)
if name.endswith(".demo")
]
if not paths:
raise ValueError("There are no '.demo' files in the provided directory.")
return paths
else:
raise FileNotFoundError(
f"The demonstration file or directory {path} does not exist."
)
@timed
def load_demonstration(
file_path: str

# First 32 bytes of file dedicated to meta-data.
INITIAL_POS = 33
file_paths = []
if os.path.isdir(file_path):
all_files = os.listdir(file_path)
for _file in all_files:
if _file.endswith(".demo"):
file_paths.append(os.path.join(file_path, _file))
if not all_files:
raise ValueError("There are no '.demo' files in the provided directory.")
elif os.path.isfile(file_path):
file_paths.append(file_path)
file_extension = pathlib.Path(file_path).suffix
if file_extension != ".demo":
raise ValueError(
"The file is not a '.demo' file. Please provide a file with the "
"correct extension."
)
else:
raise FileNotFoundError(
"The demonstration file or directory {} does not exist.".format(file_path)
)
file_paths = get_demo_files(file_path)
group_spec = None
brain_param_proto = None
info_action_pairs = []

73
ml-agents/mlagents/trainers/env_manager.py


from abc import ABC, abstractmethod
import logging
from mlagents.trainers.policy import Policy
from mlagents.trainers.tf_policy import TFPolicy
from mlagents.trainers.agent_processor import AgentManager, AgentManagerQueue
def get_global_agent_id(worker_id: int, agent_id: int) -> str:
"""
Create an agent id that is unique across environment workers using the worker_id.
"""
return f"${worker_id}-{agent_id}"
logger = logging.getLogger("mlagents.trainers")
class EnvironmentStep(NamedTuple):

class EnvManager(ABC):
def __init__(self):
self.policies: Dict[AgentGroup, Policy] = {}
self.policies: Dict[AgentGroup, TFPolicy] = {}
self.agent_managers: Dict[AgentGroup, AgentManager] = {}
self.first_step_infos: List[EnvironmentStep] = None
def set_policy(self, brain_name: AgentGroup, policy: Policy) -> None:
def set_policy(self, brain_name: AgentGroup, policy: TFPolicy) -> None:
if brain_name in self.agent_managers:
self.agent_managers[brain_name].policy = policy
def set_agent_manager(self, brain_name: AgentGroup, manager: AgentManager) -> None:
self.agent_managers[brain_name] = manager
def step(self) -> List[EnvironmentStep]:
def _step(self) -> List[EnvironmentStep]:
def reset(self, config: Dict = None) -> List[EnvironmentStep]:
def _reset_env(self, config: Dict = None) -> List[EnvironmentStep]:
def reset(self, config: Dict = None) -> int:
for manager in self.agent_managers.values():
manager.end_episode()
# Save the first step infos, after the reset.
# They will be processed on the first advance().
self.first_step_infos = self._reset_env(config)
return len(self.first_step_infos)
@property
@abstractmethod
def external_brains(self) -> Dict[AgentGroup, BrainParameters]:

@abstractmethod
def close(self):
pass
def advance(self):
# If we had just reset, process the first EnvironmentSteps.
# Note that we do it here instead of in reset() so that on the very first reset(),
# we can create the needed AgentManagers before calling advance() and processing the EnvironmentSteps.
if self.first_step_infos is not None:
self._process_step_infos(self.first_step_infos)
self.first_step_infos = None
# Get new policies if found
for brain_name in self.external_brains:
try:
_policy = self.agent_managers[brain_name].policy_queue.get_nowait()
self.set_policy(brain_name, _policy)
except AgentManagerQueue.Empty:
pass
# Step the environment
new_step_infos = self._step()
# Add to AgentProcessor
num_step_infos = self._process_step_infos(new_step_infos)
return num_step_infos
def _process_step_infos(self, step_infos: List[EnvironmentStep]) -> int:
for step_info in step_infos:
for name_behavior_id in step_info.name_behavior_ids:
if name_behavior_id not in self.agent_managers:
logger.warning(
"Agent manager was not created for behavior id {}.".format(
name_behavior_id
)
)
continue
self.agent_managers[name_behavior_id].add_experiences(
step_info.current_all_step_result[name_behavior_id],
step_info.worker_id,
step_info.brain_name_to_action_info.get(
name_behavior_id, ActionInfo.empty()
),
)
return len(step_infos)

3
ml-agents/mlagents/trainers/ppo/trainer.py


self.policy.initialize_or_load()
for _reward_signal in self.optimizer.reward_signals.keys():
self.collected_rewards[_reward_signal] = defaultdict(lambda: 0)
# Needed to resume loads properly
self.step = policy.get_current_step()
self.next_summary_step = self._get_next_summary_step()
def get_policy(self, name_behavior_id: str) -> TFPolicy:
"""

3
ml-agents/mlagents/trainers/sac/trainer.py


self.policy.initialize_or_load()
for _reward_signal in self.optimizer.reward_signals.keys():
self.collected_rewards[_reward_signal] = defaultdict(lambda: 0)
# Needed to resume loads properly
self.step = policy.get_current_step()
self.next_summary_step = self._get_next_summary_step()
def get_policy(self, name_behavior_id: str) -> TFPolicy:
"""

4
ml-agents/mlagents/trainers/simple_env_manager.py


self.previous_step: EnvironmentStep = EnvironmentStep.empty(0)
self.previous_all_action_info: Dict[str, ActionInfo] = {}
def step(self) -> List[EnvironmentStep]:
def _step(self) -> List[EnvironmentStep]:
all_action_info = self._take_step(self.previous_step)
self.previous_all_action_info = all_action_info

self.previous_step = step_info
return [step_info]
def reset(
def _reset_env(
self, config: Dict[AgentGroup, float] = None
) -> List[EnvironmentStep]: # type: ignore
if config is not None:

16
ml-agents/mlagents/trainers/stats.py


std: float
num: int
@staticmethod
def empty() -> "StatsSummary":
return StatsSummary(0.0, 0.0, 0)
class StatsWriter(abc.ABC):
"""

:param key: The type of statistic, e.g. Environment/Reward.
:returns: A StatsSummary NamedTuple containing (mean, std, count).
"""
return StatsSummary(
mean=np.mean(StatsReporter.stats_dict[self.category][key]),
std=np.std(StatsReporter.stats_dict[self.category][key]),
num=len(StatsReporter.stats_dict[self.category][key]),
)
if len(StatsReporter.stats_dict[self.category][key]) > 0:
return StatsSummary(
mean=np.mean(StatsReporter.stats_dict[self.category][key]),
std=np.std(StatsReporter.stats_dict[self.category][key]),
num=len(StatsReporter.stats_dict[self.category][key]),
)
return StatsSummary.empty()

4
ml-agents/mlagents/trainers/subprocess_env_manager.py


env_worker.send("step", env_action_info)
env_worker.waiting = True
def step(self) -> List[EnvironmentStep]:
def _step(self) -> List[EnvironmentStep]:
# Queue steps for any workers which aren't in the "waiting" state.
self._queue_steps()

step_infos = self._postprocess_steps(worker_steps)
return step_infos
def reset(self, config: Optional[Dict] = None) -> List[EnvironmentStep]:
def _reset_env(self, config: Optional[Dict] = None) -> List[EnvironmentStep]:
while any(ew.waiting for ew in self.env_workers):
if not self.step_queue.empty():
step = self.step_queue.get_nowait()

2
ml-agents/mlagents/trainers/tests/test_agent_processor.py


)
processor.publish_trajectory_queue(tqueue)
# This is like the initial state after the env reset
processor.add_experiences(mock_step, 0, ActionInfo([], [], {}, []))
processor.add_experiences(mock_step, 0, ActionInfo.empty())
for _ in range(5):
processor.add_experiences(mock_step, 0, fake_action_info)

4
ml-agents/mlagents/trainers/tests/test_bcmodule.py


use_recurrent: false
memory_size: 8
behavioral_cloning:
demo_path: ./UnitySDK/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo
demo_path: ./Project/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo
strength: 1.0
steps: 10000000
reward_signals:

use_recurrent: false
vis_encode_type: simple
behavioral_cloning:
demo_path: ./UnitySDK/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo
demo_path: ./Project/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo
strength: 1.0
steps: 10000000
reward_signals:

37
ml-agents/mlagents/trainers/tests/test_demo_loader.py


import os
import numpy as np
import pytest
import tempfile
from mlagents.trainers.demo_loader import load_demonstration, demo_to_buffer
from mlagents.trainers.demo_loader import (
load_demonstration,
demo_to_buffer,
get_demo_files,
)
def test_load_demo():

_, demo_buffer = demo_to_buffer(path_prefix + "/test_demo_dir", 1)
assert len(demo_buffer["actions"]) == total_expected - 1
def test_edge_cases():
path_prefix = os.path.dirname(os.path.abspath(__file__))
# nonexistent file and directory
with pytest.raises(FileNotFoundError):
get_demo_files(os.path.join(path_prefix, "nonexistent_file.demo"))
with pytest.raises(FileNotFoundError):
get_demo_files(os.path.join(path_prefix, "nonexistent_directory"))
with tempfile.TemporaryDirectory() as tmpdirname:
# empty directory
with pytest.raises(ValueError):
get_demo_files(tmpdirname)
# invalid file
invalid_fname = os.path.join(tmpdirname, "mydemo.notademo")
with open(invalid_fname, "w") as f:
f.write("I'm not a demo")
with pytest.raises(ValueError):
get_demo_files(invalid_fname)
# invalid directory
with pytest.raises(ValueError):
get_demo_files(tmpdirname)
# valid file
valid_fname = os.path.join(tmpdirname, "mydemo.demo")
with open(valid_fname, "w") as f:
f.write("I'm a demo file")
assert get_demo_files(valid_fname) == [valid_fname]
# valid directory
assert get_demo_files(tmpdirname) == [valid_fname]

2
ml-agents/mlagents/trainers/tests/test_policy.py


dummy_groupspec = AgentGroupSpec([(1,)], "continuous", 1)
no_agent_step = BatchedStepResult.empty(dummy_groupspec)
result = policy.get_action(no_agent_step)
assert result == ActionInfo([], [], {}, [])
assert result == ActionInfo.empty()
def test_take_action_returns_nones_on_missing_values():

24
ml-agents/mlagents/trainers/tests/test_ppo.py


brain_params.brain_name, 0, trainer_params, True, False, 0, "0", False
)
policy_mock = mock.Mock(spec=NNPolicy)
policy_mock.get_current_step.return_value = 0
step_count = (
5
) # 10 hacked because this function is no longer called through trainer

for agent in reward.values():
assert agent == 0
assert trainer.stats_reporter.get_stats_summaries("Policy/Extrinsic Reward").num > 0
def test_add_get_policy(dummy_config):
brain_params = make_brain_parameters(
discrete_action=False, visual_inputs=0, vec_obs_size=6
)
dummy_config["summary_path"] = "./summaries/test_trainer_summary"
dummy_config["model_path"] = "./models/test_trainer_models/TestModel"
trainer = PPOTrainer(brain_params, 0, dummy_config, True, False, 0, "0", False)
policy = mock.Mock(spec=NNPolicy)
policy.get_current_step.return_value = 2000
trainer.add_policy(brain_params.brain_name, policy)
assert trainer.get_policy(brain_params.brain_name) == policy
# Make sure the summary steps were loaded properly
assert trainer.get_step == 2000
assert trainer.next_summary_step > 2000
# Test incorrect class of policy
policy = mock.Mock()
with pytest.raises(RuntimeError):
trainer.add_policy(brain_params, policy)
def test_normalization(dummy_config):

2
ml-agents/mlagents/trainers/tests/test_reward_signals.py


use_recurrent: false
vis_encode_type: simple
behavioral_cloning:
demo_path: ./UnitySDK/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo
demo_path: ./Project/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo
strength: 1.0
steps: 10000000
reward_signals:

24
ml-agents/mlagents/trainers/tests/test_sac.py


import pytest
from unittest import mock
import yaml
import numpy as np

policy = trainer2.create_policy(mock_brain)
trainer2.add_policy(mock_brain.brain_name, policy)
assert trainer2.update_buffer.num_experiences == buffer_len
def test_add_get_policy(dummy_config):
brain_params = make_brain_parameters(
discrete_action=False, visual_inputs=0, vec_obs_size=6
)
dummy_config["summary_path"] = "./summaries/test_trainer_summary"
dummy_config["model_path"] = "./models/test_trainer_models/TestModel"
trainer = SACTrainer(brain_params, 0, dummy_config, True, False, 0, "0")
policy = mock.Mock(spec=SACPolicy)
policy.get_current_step.return_value = 2000
trainer.add_policy(brain_params.brain_name, policy)
assert trainer.get_policy(brain_params.brain_name) == policy
# Make sure the summary steps were loaded properly
assert trainer.get_step == 2000
assert trainer.next_summary_step > 2000
# Test incorrect class of policy
policy = mock.Mock()
with pytest.raises(RuntimeError):
trainer.add_policy(brain_params, policy)
def test_process_trajectory(dummy_config):

33
ml-agents/mlagents/trainers/tests/test_simple_rl.py


gamma: 0.99
"""
GHOST_CONFIG = f"""
{BRAIN_NAME}:
trainer: ppo
batch_size: 16
beta: 5.0e-3
buffer_size: 64
epsilon: 0.2
hidden_units: 128
lambd: 0.95
learning_rate: 5.0e-3
max_steps: 2500
memory_size: 256
normalize: false
num_epoch: 3
num_layers: 2
time_horizon: 64
sequence_length: 64
summary_freq: 500
use_recurrent: false
reward_signals:
extrinsic:
strength: 1.0
gamma: 0.99
self_play:
save_step: 1000
"""
def _check_environment_trains(
env, config, meta_curriculum=None, success_threshold=0.99

def test_simple_sac(use_discrete):
env = Simple1DEnvironment(use_discrete=use_discrete)
_check_environment_trains(env, SAC_CONFIG)
@pytest.mark.parametrize("use_discrete", [True, False])
def test_simple_ghost(use_discrete):
env = Simple1DEnvironment(use_discrete=use_discrete)
_check_environment_trains(env, GHOST_CONFIG)

46
ml-agents/mlagents/trainers/tests/test_subprocess_env_manager.py


EnvironmentResponse,
StepResponse,
)
from mlagents.trainers.env_manager import EnvironmentStep
from mlagents_envs.base_env import BaseEnv
from mlagents_envs.side_channel.engine_configuration_channel import EngineConfig

mock_env_factory, EngineConfig.default_config(), 1
)
params = {"test": "params"}
manager.reset(params)
manager._reset_env(params)
manager.env_workers[0].send.assert_called_with("reset", (params))
def test_reset_collects_results_from_all_envs(self):

)
params = {"test": "params"}
res = manager.reset(params)
res = manager._reset_env(params)
for i, env in enumerate(manager.env_workers):
env.send.assert_called_with("reset", (params))
env.recv.assert_called()

manager.env_workers[2].previous_step = last_steps[2]
manager.env_workers[2].waiting = True
manager._take_step = Mock(return_value=step_mock)
res = manager.step()
res = manager._step()
for i, env in enumerate(manager.env_workers):
if i < 2:
env.send.assert_called_with("step", step_mock)

manager.env_workers[0].previous_step,
manager.env_workers[1].previous_step,
]
@mock.patch("mlagents.trainers.subprocess_env_manager.SubprocessEnvManager._step")
@mock.patch(
"mlagents.trainers.subprocess_env_manager.SubprocessEnvManager.external_brains",
new_callable=mock.PropertyMock,
)
def test_advance(self, external_brains_mock, step_mock):
brain_name = "testbrain"
action_info_dict = {brain_name: MagicMock()}
SubprocessEnvManager.create_worker = lambda em, worker_id, step_queue, env_factory, engine_c: MockEnvWorker(
worker_id, EnvironmentResponse("step", worker_id, worker_id)
)
env_manager = SubprocessEnvManager(
mock_env_factory, EngineConfig.default_config(), 3
)
external_brains_mock.return_value = [brain_name]
agent_manager_mock = mock.Mock()
env_manager.set_agent_manager(brain_name, agent_manager_mock)
step_info_dict = {brain_name: Mock()}
step_info = EnvironmentStep(step_info_dict, 0, action_info_dict)
step_mock.return_value = [step_info]
env_manager.advance()
# Test add_experiences
env_manager._step.assert_called_once()
agent_manager_mock.add_experiences.assert_called_once_with(
step_info.current_all_step_result[brain_name],
0,
step_info.brain_name_to_action_info[brain_name],
)
# Test policy queue
mock_policy = mock.Mock()
agent_manager_mock.policy_queue.get_nowait.return_value = mock_policy
env_manager.advance()
assert env_manager.policies[brain_name] == mock_policy
assert agent_manager_mock.policy == mock_policy

54
ml-agents/mlagents/trainers/tests/test_trainer_controller.py


from unittest.mock import MagicMock, Mock, patch
from unittest.mock import MagicMock, patch
from mlagents.trainers.subprocess_env_manager import EnvironmentStep
from mlagents.trainers.sampler_class import SamplerManager

return tc, trainer_mock
def test_take_step_adds_experiences_to_trainer_and_trains(
def test_advance_adds_experiences_to_trainer_and_trains(
action_info_dict = {brain_name: MagicMock()}
brain_info_dict = {brain_name: Mock()}
old_step_info = EnvironmentStep(brain_info_dict, 0, action_info_dict)
new_step_info = EnvironmentStep(brain_info_dict, 0, action_info_dict)
trainer_mock._is_ready_update = MagicMock(return_value=True)
env_mock.step.return_value = [new_step_info]
env_mock.reset.return_value = [old_step_info]
tc.brain_name_to_identifier[brain_name].add(brain_name)

env_mock.step.assert_called_once()
manager_mock = tc.managers[brain_name]
manager_mock.add_experiences.assert_called_once_with(
new_step_info.current_all_step_result[brain_name],
0,
new_step_info.brain_name_to_action_info[brain_name],
)
trainer_mock.advance.assert_called_once()
def test_take_step_if_not_training(trainer_controller_with_take_step_mocks):
tc, trainer_mock = trainer_controller_with_take_step_mocks
tc.train_model = False
brain_name = "testbrain"
action_info_dict = {brain_name: MagicMock()}
brain_info_dict = {brain_name: Mock()}
old_step_info = EnvironmentStep(brain_info_dict, 0, action_info_dict)
new_step_info = EnvironmentStep(brain_info_dict, 0, action_info_dict)
trainer_mock._is_ready_update = MagicMock(return_value=False)
env_mock = MagicMock()
env_mock.step.return_value = [new_step_info]
env_mock.reset.return_value = [old_step_info]
tc.brain_name_to_identifier[brain_name].add(brain_name)
tc.advance(env_mock)
env_mock.reset.assert_not_called()
env_mock.step.assert_called_once()
manager_mock = tc.managers[brain_name]
manager_mock.add_experiences.assert_called_once_with(
new_step_info.current_all_step_result[brain_name],
0,
new_step_info.brain_name_to_action_info[brain_name],
)
env_mock.advance.assert_called_once()
trainer_mock.advance.assert_called_once()

43
ml-agents/mlagents/trainers/tf_policy.py


import numpy as np
from mlagents.tf_utils import tf
from mlagents import tf_utils
from mlagents_envs.exception import UnityException
from mlagents.trainers.policy import Policy

from mlagents.trainers import tensorflow_to_barracuda as tf2bc
from mlagents.trainers.trajectory import SplitObservations
from mlagents.trainers.env_manager import get_global_agent_id
from mlagents.trainers.brain_conversion_utils import get_global_agent_id
from mlagents_envs.base_env import BatchedStepResult
from mlagents.trainers.models import LearningModel

"""
self._version_number_ = 2
self.m_size = 0
# for ghost trainer save/load snapshots
self.assign_phs = []
self.assign_ops = []
self.inference_dict = {}
self.update_dict = {}
self.sequence_length = 1

self.model_path = trainer_parameters["model_path"]
self.keep_checkpoints = trainer_parameters.get("keep_checkpoints", 5)
self.graph = tf.Graph()
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
# For multi-GPU training, set allow_soft_placement to True to allow
# placing the operation into an alternative device automatically
# to prevent from exceptions if the device doesn't suppport the operation
# or the device does not exist
config.allow_soft_placement = True
self.sess = tf.Session(
config=tf_utils.generate_session_config(), graph=self.graph
)
self.sess = tf.Session(config=config, graph=self.graph)
self.saver = None
self.optimizer = None
if self.use_recurrent:

else:
self._initialize_graph()
def get_weights(self):
with self.graph.as_default():
_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
values = [v.eval(session=self.sess) for v in _vars]
return values
def init_load_weights(self):
with self.graph.as_default():
_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
values = [v.eval(session=self.sess) for v in _vars]
for var, value in zip(_vars, values):
assign_ph = tf.placeholder(var.dtype, shape=value.shape)
self.assign_phs.append(assign_ph)
self.assign_ops.append(tf.assign(var, assign_ph))
def load_weights(self, values):
with self.graph.as_default():
feed_dict = {}
for assign_ph, value in zip(self.assign_phs, values):
feed_dict[assign_ph] = value
self.sess.run(self.assign_ops, feed_dict=feed_dict)
def evaluate(
self, batched_step_result: BatchedStepResult, global_agent_ids: List[str]
) -> Dict[str, Any]:

to be passed to add experiences
"""
if batched_step_result.n_agents() == 0:
return ActionInfo([], [], {}, [])
return ActionInfo.empty()
agents_done = [
agent

21
ml-agents/mlagents/trainers/trainer.py


import abc
from mlagents.tf_utils import tf
from mlagents import tf_utils
from collections import deque

self.step: int = 0
self.training_start_time = time.time()
self.summary_freq = self.trainer_parameters["summary_freq"]
self.next_update_step = self.summary_freq
self.next_summary_step = self.summary_freq
def _check_param_keys(self):
for k in self.param_keys:

:param input_dict: A dictionary that will be displayed in a table on Tensorboard.
"""
try:
with tf.Session() as sess:
with tf.Session(config=tf_utils.generate_session_config()) as sess:
s_op = tf.summary.text(
key,
tf.convert_to_tensor(

:param n_steps: number of steps to increment the step count by
"""
self.step += n_steps
self.next_update_step = self.step + (
self.summary_freq - self.step % self.summary_freq
)
self.next_summary_step = self._get_next_summary_step()
def _get_next_summary_step(self) -> int:
"""
Get the next step count that should result in a summary write.
"""
return self.step + (self.summary_freq - self.step % self.summary_freq)
def save_model(self, name_behavior_id: str) -> None:
"""
Saves the model

write the summary. This logic ensures summaries are written on the update step and not in between.
:param step_after_process: the step count after processing the next trajectory.
"""
if step_after_process >= self.next_update_step and self.get_step != 0:
self._write_summary(self.next_update_step)
if step_after_process >= self.next_summary_step and self.get_step != 0:
self._write_summary(self.next_summary_step)
@abc.abstractmethod
def end_episode(self):

self, trajectory_queue: AgentManagerQueue[Trajectory]
) -> None:
"""
Adds a trajectory queue to the list of queues for the trainer injest Trajectories from.
Adds a trajectory queue to the list of queues for the trainer to ingest Trajectories from.
:param queue: Trajectory queue to publish to.
"""
self.trajectory_queues.append(trajectory_queue)

82
ml-agents/mlagents/trainers/trainer_controller.py


import sys
import json
import logging
from typing import Dict, List, Optional, Set
from typing import Dict, Optional, Set
from mlagents.trainers.env_manager import EnvManager, EnvironmentStep
from mlagents.trainers.env_manager import EnvManager
from mlagents_envs.exception import (
UnityEnvironmentException,
UnityCommunicationException,

from mlagents.trainers.trainer import Trainer
from mlagents.trainers.meta_curriculum import MetaCurriculum
from mlagents.trainers.trainer_util import TrainerFactory
from mlagents.trainers.action_info import ActionInfo
from mlagents.trainers.agent_processor import AgentManager, AgentManagerQueue
from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers
from mlagents.trainers.agent_processor import AgentManager
class TrainerController(object):

"""
self.trainers: Dict[str, Trainer] = {}
self.brain_name_to_identifier: Dict[str, Set] = defaultdict(set)
self.managers: Dict[str, AgentManager] = {}
self.trainer_factory = trainer_factory
self.model_path = model_path
self.summaries_dir = summaries_dir

"permissions are set correctly.".format(model_path)
)
def _reset_env(self, env: EnvManager) -> List[EnvironmentStep]:
def _reset_env(self, env: EnvManager) -> None:
"""Resets the environment.
Returns:

self.meta_curriculum.get_config() if self.meta_curriculum else {}
)
sampled_reset_param.update(new_meta_curriculum_config)
return env.reset(config=sampled_reset_param)
env.reset(config=sampled_reset_param)
def _should_save_model(self, global_step: int) -> bool:
return (

def _create_trainer_and_manager(
self, env_manager: EnvManager, name_behavior_id: str
) -> None:
try:
brain_name, _ = name_behavior_id.split("?")
except ValueError:
brain_name = name_behavior_id
brain_name = BehaviorIdentifiers.from_name_behavior_id(
name_behavior_id
).brain_name
try:
trainer = self.trainers[brain_name]
except KeyError:

policy = trainer.create_policy(env_manager.external_brains[name_behavior_id])
trainer.add_policy(name_behavior_id, policy)
env_manager.set_policy(name_behavior_id, policy)
self.brain_name_to_identifier[brain_name].add(name_behavior_id)
agent_manager = AgentManager(
policy,
name_behavior_id,

env_manager.set_agent_manager(name_behavior_id, agent_manager)
env_manager.set_policy(name_behavior_id, policy)
self.brain_name_to_identifier[brain_name].add(name_behavior_id)
self.managers[name_behavior_id] = agent_manager
def _create_trainers_and_managers(
self, env_manager: EnvManager, behavior_ids: Set[str]

global_step = 0
last_brain_behavior_ids: Set[str] = set()
try:
initial_step = self._reset_env(env_manager)
# Create the initial set of trainers and managers
initial_brain_behaviors = set(env_manager.external_brains.keys())
self._create_trainers_and_managers(env_manager, initial_brain_behaviors)
last_brain_behavior_ids = initial_brain_behaviors
self._process_step_infos(initial_step)
# Initial reset
self._reset_env(env_manager)
while self._not_done_training():
external_brain_behavior_ids = set(env_manager.external_brains.keys())
new_behavior_ids = external_brain_behavior_ids - last_brain_behavior_ids

global_step += 1
self.reset_env_if_ready(env_manager, global_step)
if self._should_save_model(global_step):
# Save Tensorflow model
# Final save Tensorflow model
if global_step != 0 and self.train_model:
self._save_model()

def end_trainer_episodes(
self, env: EnvManager, lessons_incremented: Dict[str, bool]
) -> None:
reset_step = self._reset_env(env)
self._process_step_infos(reset_step)
self._reset_env(env)
# Reward buffers reset takes place only for curriculum learning
# else no reset.
for trainer in self.trainers.values():

if meta_curriculum_reset or generalization_reset:
self.end_trainer_episodes(env, lessons_incremented)
def _get_and_process_experiences(self, env: EnvManager) -> int:
with hierarchical_timer("env_step"):
# Get new policies if found
for brain_name in self.trainers.keys():
for name_behavior_id in self.brain_name_to_identifier[brain_name]:
try:
_policy = self.managers[
name_behavior_id
].policy_queue.get_nowait()
env.set_policy(name_behavior_id, _policy)
except AgentManagerQueue.Empty:
pass
# Step the environment
new_step_infos = env.step()
# Add to AgentProcessor
num_step_infos = self._process_step_infos(new_step_infos)
return num_step_infos
def _process_step_infos(self, step_infos: List[EnvironmentStep]) -> int:
for step_info in step_infos:
for name_behavior_id in step_info.name_behavior_ids:
if name_behavior_id not in self.managers:
self.logger.warning(
"Agent manager was not created for behavior id {}.".format(
name_behavior_id
)
)
continue
self.managers[name_behavior_id].add_experiences(
step_info.current_all_step_result[name_behavior_id],
step_info.worker_id,
step_info.brain_name_to_action_info.get(
name_behavior_id, ActionInfo([], [], {}, [])
),
)
return len(step_infos)
num_steps = self._get_and_process_experiences(env)
with hierarchical_timer("env_step"):
num_steps = env.advance()
# Report current lesson
if self.meta_curriculum:

12
ml-agents/mlagents/trainers/trainer_util.py


from mlagents.trainers.exception import UnityTrainerException
from mlagents.trainers.ppo.trainer import PPOTrainer
from mlagents.trainers.sac.trainer import SACTrainer
from mlagents.trainers.ghost.trainer import GhostTrainer
logger = logging.getLogger("mlagents.trainers")

seed,
run_id,
)
)
if "self_play" in trainer_parameters:
trainer = GhostTrainer(
trainer,
brain_name,
min_lesson_length,
trainer_parameters,
train_model,
run_id,
)
return trainer

2
notebooks/getting-started.ipynb


"outputs": [],
"source": [
"engine_configuration_channel = EngineConfigurationChannel()\n",
"env = UnityEnvironment(base_port = 5006, file_name=env_name, side_channels = [engine_configuration_channel])\n",
"env = UnityEnvironment(base_port = 5004, file_name=env_name, side_channels = [engine_configuration_channel])\n",
"\n",
"#Reset the environment\n",
"env.reset()\n",

2
protobuf-definitions/README.md


2. Un-comment line 7 in `make.sh` (for Windows, use `make_for_win.bat`), and set to correct Grpc.Tools sub-directory.
3. Run the protobuf generation script from the terminal by navigating to `$MLAGENTS_ROOT\protobuf-definitions` and entering `make.sh` (for Windows, use `make_for_win.bat`)
4. Note any errors generated that may result from setting the wrong directory in step 2.
5. In the generated `UnityToExternalGrpc.cs` file in the `$MLAGENTS_ROOT/UnitySDK/Assets/ML-Agents/Scripts/CommunicatorObjects` folder, check to see if you need to add the following to the beginning of the file:
5. In the generated `UnityToExternalGrpc.cs` file in the `$MLAGENTS_ROOT/com.unity.ml-agents/Runtime/Grpc/CommunicatorObjects` folder, check to see if you need to add the following to the beginning of the file:
```csharp
# if UNITY_EDITOR || UNITY_STANDALONE_WIN || UNITY_STANDALONE_OSX || UNITY_STANDALONE_LINUX

6
protobuf-definitions/make.sh


# COMPILER=[DIRECTORY]
SRC_DIR=proto/mlagents_envs/communicator_objects
DST_DIR_C=../UnitySDK/Assets/ML-Agents/Scripts/Grpc/CommunicatorObjects
DST_DIR_C=../com.unity.ml-agents/Runtime/Grpc/CommunicatorObjects
DST_DIR_P=../ml-agents-envs
PROTO_PATH=proto
PYTHON_PACKAGE=mlagents_envs/communicator_objects

# generate proto objects in python and C#
$COMPILER/protoc --proto_path=proto --csharp_out=$DST_DIR_C $SRC_DIR/*.proto
$COMPILER/protoc --proto_path=proto --csharp_opt=internal_access --csharp_out $DST_DIR_C $SRC_DIR/*.proto
$COMPILER/protoc --proto_path=proto --python_out=$DST_DIR_P --mypy_out=$DST_DIR_P $SRC_DIR/*.proto
# grpc

$COMPILER/protoc --proto_path=proto --csharp_out $DST_DIR_C --grpc_out $DST_DIR_C $SRC_DIR/$GRPC --plugin=protoc-gen-grpc=$COMPILER/grpc_csharp_plugin
$COMPILER/protoc --proto_path=proto --csharp_out=$DST_DIR_C --grpc_out=internal_access:$DST_DIR_C $SRC_DIR/$GRPC --plugin=protoc-gen-grpc=$COMPILER/grpc_csharp_plugin
python3 -m grpc_tools.protoc --proto_path=proto --python_out=$DST_DIR_P --grpc_python_out=$DST_DIR_P $SRC_DIR/$GRPC

6
protobuf-definitions/make_for_win.bat


rem set COMPILER=[DIRECTORY]
set SRC_DIR=proto\mlagents_envs\communicator_objects
set DST_DIR_C=..\UnitySDK\Assets\ML-Agents\Scripts\Grpc\CommunicatorObjects
set DST_DIR_C=..\com.unity.ml-agents\Runtime\Grpc\CommunicatorObjects
set DST_DIR_P=..\ml-agents-envs
set PROTO_PATH=proto

rem generate proto objects in python and C#
for %%i in (%SRC_DIR%\*.proto) do (
%COMPILER%\protoc --proto_path=proto --csharp_out=%DST_DIR_C% %%i
%COMPILER%\protoc --proto_path=proto --csharp_opt=internal_access --csharp_out=%DST_DIR_C% %%i
%COMPILER%\protoc --proto_path=proto --python_out=%DST_DIR_P% %%i
)

%COMPILER%\protoc --proto_path=proto --csharp_out %DST_DIR_C% --grpc_out %DST_DIR_C% %SRC_DIR%\%GRPC% --plugin=protoc-gen-grpc=%COMPILER%\grpc_csharp_plugin.exe
%COMPILER%\protoc --proto_path=proto --csharp_out %DST_DIR_C% --grpc_out=internal_access:%DST_DIR_C% %SRC_DIR%\%GRPC% --plugin=protoc-gen-grpc=%COMPILER%\grpc_csharp_plugin.exe --csharp_opt=internal_access
python -m grpc_tools.protoc --proto_path=proto --python_out=%DST_DIR_P% --grpc_python_out=%DST_DIR_P% %SRC_DIR%\%GRPC%
rem Generate the init file for the python module

3
test_constraints_max_tf1_version.txt


# For projects with upper bounds, we should periodically update this list to the latest release version
grpcio>=1.23.0
numpy>=1.17.2
tensorflow>=1.14.0,<2.0
# Temporary workaround for https://github.com/tensorflow/tensorflow/issues/36179 and https://github.com/tensorflow/tensorflow/issues/36188
tensorflow>=1.14.0,<1.15.1
h5py>=2.10.0

2
utils/validate_meta_files.py


def main():
asset_path = "UnitySDK/Assets"
asset_path = "Project/Assets"
meta_suffix = ".meta"
python_suffix = ".py"

2
com.unity.ml-agents/CONTRIBUTING.md


## Communication
First, please read through our [code of conduct](CODE_OF_CONDUCT.md), as we
First, please read through our [code of conduct](https://github.com/Unity-Technologies/ml-agents/blob/master/CODE_OF_CONDUCT.md), as we
expect all our contributors to follow it.
Second, before starting on a project that you intend to contribute to the

6
Project/ProjectSettings/ClusterInputManager.asset


%YAML 1.1
%TAG !u! tag:unity3d.com,2011:
--- !u!236 &1
ClusterInputManager:
m_ObjectHideFlags: 0
m_Inputs: []

5
Project/ProjectSettings/GraphicsSettings.asset


- {fileID: 15106, guid: 0000000000000000f000000000000000, type: 0}
- {fileID: 10753, guid: 0000000000000000f000000000000000, type: 0}
- {fileID: 10770, guid: 0000000000000000f000000000000000, type: 0}
- {fileID: 16000, guid: 0000000000000000f000000000000000, type: 0}
- {fileID: 16001, guid: 0000000000000000f000000000000000, type: 0}
- {fileID: 17000, guid: 0000000000000000f000000000000000, type: 0}
- {fileID: 10783, guid: 0000000000000000f000000000000000, type: 0}
m_PreloadedShaders: []
m_SpritesDefaultMaterial: {fileID: 10754, guid: 0000000000000000f000000000000000,
type: 0}

m_AlbedoSwatchInfos: []
m_LightsUseLinearIntensity: 0
m_LightsUseColorTemperature: 0
m_LogWhenShaderIsCompiled: 0

295
Project/ProjectSettings/InputManager.asset


%YAML 1.1
%TAG !u! tag:unity3d.com,2011:
--- !u!13 &1
InputManager:
m_ObjectHideFlags: 0
serializedVersion: 2
m_Axes:
- serializedVersion: 3
m_Name: Horizontal
descriptiveName:
descriptiveNegativeName:
negativeButton: left
positiveButton: right
altNegativeButton: a
altPositiveButton: d
gravity: 3
dead: 0.001
sensitivity: 3
snap: 1
invert: 0
type: 0
axis: 0
joyNum: 0
- serializedVersion: 3
m_Name: Vertical
descriptiveName:
descriptiveNegativeName:
negativeButton: down
positiveButton: up
altNegativeButton: s
altPositiveButton: w
gravity: 3
dead: 0.001
sensitivity: 3
snap: 1
invert: 0
type: 0
axis: 0
joyNum: 0
- serializedVersion: 3
m_Name: Fire1
descriptiveName:
descriptiveNegativeName:
negativeButton:
positiveButton: left ctrl
altNegativeButton:
altPositiveButton: mouse 0
gravity: 1000
dead: 0.001
sensitivity: 1000
snap: 0
invert: 0
type: 0
axis: 0
joyNum: 0
- serializedVersion: 3
m_Name: Fire2
descriptiveName:
descriptiveNegativeName:
negativeButton:
positiveButton: left alt
altNegativeButton:
altPositiveButton: mouse 1
gravity: 1000
dead: 0.001
sensitivity: 1000
snap: 0
invert: 0
type: 0
axis: 0
joyNum: 0
- serializedVersion: 3
m_Name: Fire3
descriptiveName:
descriptiveNegativeName:
negativeButton:
positiveButton: left shift
altNegativeButton:
altPositiveButton: mouse 2
gravity: 1000
dead: 0.001
sensitivity: 1000
snap: 0
invert: 0
type: 0
axis: 0
joyNum: 0
- serializedVersion: 3
m_Name: Jump
descriptiveName:
descriptiveNegativeName:
negativeButton:
positiveButton: space
altNegativeButton:
altPositiveButton:
gravity: 1000
dead: 0.001
sensitivity: 1000
snap: 0
invert: 0
type: 0
axis: 0
joyNum: 0
- serializedVersion: 3
m_Name: Mouse X
descriptiveName:
descriptiveNegativeName:
negativeButton:
positiveButton:
altNegativeButton:
altPositiveButton:
gravity: 0
dead: 0
sensitivity: 0.1
snap: 0
invert: 0
type: 1
axis: 0
joyNum: 0
- serializedVersion: 3
m_Name: Mouse Y
descriptiveName:
descriptiveNegativeName:
negativeButton:
positiveButton:
altNegativeButton:
altPositiveButton:
gravity: 0
dead: 0
sensitivity: 0.1
snap: 0
invert: 0
type: 1
axis: 1
joyNum: 0
- serializedVersion: 3
m_Name: Mouse ScrollWheel
descriptiveName:
descriptiveNegativeName:
negativeButton:
positiveButton:
altNegativeButton:
altPositiveButton:
gravity: 0
dead: 0
sensitivity: 0.1
snap: 0
invert: 0
type: 1
axis: 2
joyNum: 0
- serializedVersion: 3
m_Name: Horizontal
descriptiveName:
descriptiveNegativeName:
negativeButton:
positiveButton:
altNegativeButton:
altPositiveButton:
gravity: 0
dead: 0.19
sensitivity: 1
snap: 0
invert: 0
type: 2
axis: 0
joyNum: 0
- serializedVersion: 3
m_Name: Vertical
descriptiveName:
descriptiveNegativeName:
negativeButton:
positiveButton:
altNegativeButton:
altPositiveButton:
gravity: 0
dead: 0.19
sensitivity: 1
snap: 0
invert: 1
type: 2
axis: 1
joyNum: 0
- serializedVersion: 3
m_Name: Fire1
descriptiveName:
descriptiveNegativeName:
negativeButton:
positiveButton: joystick button 0
altNegativeButton:
altPositiveButton:
gravity: 1000
dead: 0.001
sensitivity: 1000
snap: 0
invert: 0
type: 0
axis: 0
joyNum: 0
- serializedVersion: 3
m_Name: Fire2
descriptiveName:
descriptiveNegativeName:
negativeButton:
positiveButton: joystick button 1
altNegativeButton:
altPositiveButton:
gravity: 1000
dead: 0.001
sensitivity: 1000
snap: 0
invert: 0
type: 0
axis: 0
joyNum: 0
- serializedVersion: 3
m_Name: Fire3
descriptiveName:
descriptiveNegativeName:
negativeButton:
positiveButton: joystick button 2
altNegativeButton:
altPositiveButton:
gravity: 1000
dead: 0.001
sensitivity: 1000
snap: 0
invert: 0
type: 0
axis: 0
joyNum: 0
- serializedVersion: 3
m_Name: Jump
descriptiveName:
descriptiveNegativeName:
negativeButton:
positiveButton: joystick button 3
altNegativeButton:
altPositiveButton:
gravity: 1000
dead: 0.001
sensitivity: 1000
snap: 0
invert: 0
type: 0
axis: 0
joyNum: 0
- serializedVersion: 3
m_Name: Submit
descriptiveName:
descriptiveNegativeName:
negativeButton:
positiveButton: return
altNegativeButton:
altPositiveButton: joystick button 0
gravity: 1000
dead: 0.001
sensitivity: 1000
snap: 0
invert: 0
type: 0
axis: 0
joyNum: 0
- serializedVersion: 3
m_Name: Submit
descriptiveName:
descriptiveNegativeName:
negativeButton:
positiveButton: enter
altNegativeButton:
altPositiveButton: space
gravity: 1000
dead: 0.001
sensitivity: 1000
snap: 0
invert: 0
type: 0
axis: 0
joyNum: 0
- serializedVersion: 3
m_Name: Cancel
descriptiveName:
descriptiveNegativeName:
negativeButton:
positiveButton: escape
altNegativeButton:
altPositiveButton: joystick button 1
gravity: 1000
dead: 0.001
sensitivity: 1000
snap: 0
invert: 0
type: 0
axis: 0
joyNum: 0

2
Project/ProjectSettings/NavMeshAreas.asset


manualTileSize: 0
tileSize: 256
accuratePlacement: 0
debug:
m_Flags: 0
m_SettingNames:
- Humanoid

9
Project/ProjectSettings/TimeManager.asset


%YAML 1.1
%TAG !u! tag:unity3d.com,2011:
--- !u!5 &1
TimeManager:
m_ObjectHideFlags: 0
Fixed Timestep: 0.02
Maximum Allowed Timestep: 0.33333334
m_TimeScale: 1
Maximum Particle Timestep: 0.03

1
Project/ProjectSettings/EditorBuildSettings.asset


m_ObjectHideFlags: 0
serializedVersion: 2
m_Scenes: []
m_configObjects: {}

190
Project/ProjectSettings/ProjectSettings.asset


--- !u!129 &1
PlayerSettings:
m_ObjectHideFlags: 0
serializedVersion: 14
serializedVersion: 18
AndroidEnableSustainedPerformanceMode: 0
defaultScreenOrientation: 4
targetDevice: 2
useOnDemandResources: 0

defaultCursor: {fileID: 0}
cursorHotspot: {x: 0, y: 0}
m_SplashScreenBackgroundColor: {r: 0.13725491, g: 0.12156863, b: 0.1254902, a: 1}
m_ShowUnitySplashScreen: 1
m_ShowUnitySplashScreen: 0
m_ShowUnitySplashLogo: 1
m_SplashScreenOverlayOpacity: 1
m_SplashScreenAnimation: 1

m_SplashScreenLogos: []
m_VirtualRealitySplashScreen: {fileID: 0}
m_HolographicTrackingLossScreen: {fileID: 0}
defaultScreenWidth: 1024
defaultScreenHeight: 768
defaultScreenWidth: 80
defaultScreenHeight: 80
defaultScreenWidthWeb: 960
defaultScreenHeightWeb: 600
m_StereoRenderingPath: 0

iosShowActivityIndicatorOnLoading: -1
androidShowActivityIndicatorOnLoading: -1
tizenShowActivityIndicatorOnLoading: -1
iosAppInBackgroundBehavior: 0
iosUseCustomAppBackgroundBehavior: 0
iosAllowHTTPDownload: 1
allowedAutorotateToPortrait: 1
allowedAutorotateToPortraitUpsideDown: 1

use32BitDisplayBuffer: 1
preserveFramebufferAlpha: 0
disableDepthAndStencilBuffers: 0
androidStartInFullscreen: 1
androidRenderOutsideSafeArea: 0
defaultIsFullScreen: 0
defaultIsNativeResolution: 1
macRetinaSupport: 1
runInBackground: 1

visibleInBackground: 0
allowFullscreenSwitch: 1
graphicsJobMode: 0
macFullscreenMode: 2
d3d11FullscreenMode: 1
fullscreenMode: 3
n3dsDisableStereoscopicView: 0
n3dsEnableSharedListOpt: 1
n3dsEnableVSync: 0
xboxOneResolution: 0
xboxOneSResolution: 0
xboxOneXResolution: 3

xboxOnePresentImmediateThreshold: 0
videoMemoryForVertexBuffers: 0
psp2PowerMode: 0
psp2AcquireBGM: 1
wiiUTVResolution: 0
wiiUGamePadMSAA: 1
wiiUSupportsNunchuk: 0
wiiUSupportsClassicController: 0
wiiUSupportsBalanceBoard: 0
wiiUSupportsMotionPlus: 0
wiiUSupportsProController: 0
wiiUAllowScreenCapture: 1
wiiUControllerCount: 0
switchQueueCommandMemory: 1048576
switchQueueControlMemory: 16384
switchQueueComputeMemory: 262144
switchNVNShaderPoolsGranularity: 33554432
switchNVNDefaultPoolsGranularity: 16777216
switchNVNOtherPoolsGranularity: 16777216
vulkanEnableSetSRGBWrite: 0
m_SupportedAspectRatios:
4:3: 1
5:4: 1

m_HolographicPauseOnTrackingLoss: 1
xboxOneDisableKinectGpuReservation: 0
xboxOneEnable7thCore: 0
isWsaHolographicRemotingEnabled: 0
vrSettings:
cardboard:
depthFormat: 0

oculus:
sharedDepthBuffer: 0
dashSupport: 0
lowOverheadMode: 0
protectedContext: 0
v2Signing: 0
enable360StereoCapture: 0
enableFrameTimingStats: 0
useHDRDisplay: 0
m_ColorGamuts: 00000000
targetPixelDensity: 30

APKExpansionFiles: 0
keepLoadedShadersAlive: 0
StripUnusedMeshComponents: 0
VertexChannelCompressionMask:
serializedVersion: 2
m_Bits: 4294901998
VertexChannelCompressionMask: 214
iOSTargetOSVersionString: 8.0
iOSTargetOSVersionString: 9.0
tvOSSdkVersion: 0
tvOSRequireExtendedGameController: 0
tvOSTargetOSVersionString: 9.0

tvOSSmallIconLayers: []
tvOSSmallIconLayers2x: []
tvOSLargeIconLayers: []
tvOSLargeIconLayers2x: []
tvOSTopShelfImageLayers: []
tvOSTopShelfImageLayers2x: []
tvOSTopShelfImageWideLayers: []

appleDeveloperTeamID:
iOSManualSigningProvisioningProfileID:
tvOSManualSigningProvisioningProfileID:
iOSManualSigningProvisioningProfileType: 0
tvOSManualSigningProvisioningProfileType: 0
iOSRequireARKit: 0
iOSAutomaticallyDetectAndAddCapabilities: 1
appleEnableProMotion: 0
AndroidTargetDevice: 0
templatePackageId:
templateDefaultScene:
AndroidTargetArchitectures: 5
AndroidBuildApkPerCpuArchitecture: 0
AndroidTVCompatibility: 1
AndroidIsGame: 1
AndroidEnableTango: 0

androidGamepadSupportLevel: 0
resolutionDialogBanner: {fileID: 0}
m_BuildTargetIcons: []
m_BuildTargetPlatformIcons: []
m_BuildTargetBatching: []
m_BuildTargetGraphicsAPIs:
- m_BuildTarget: MacStandaloneSupport

m_EncodingQuality: 1
- m_BuildTarget: PS4
m_EncodingQuality: 1
wiiUTitleID: 0005000011000000
wiiUGroupID: 00010000
wiiUCommonSaveSize: 4096
wiiUAccountSaveSize: 2048
wiiUOlvAccessKey: 0
wiiUTinCode: 0
wiiUJoinGameId: 0
wiiUJoinGameModeMask: 0000000000000000
wiiUCommonBossSize: 0
wiiUAccountBossSize: 0
wiiUAddOnUniqueIDs: []
wiiUMainThreadStackSize: 3072
wiiULoaderThreadStackSize: 1024
wiiUSystemHeapSize: 128
wiiUTVStartupScreen: {fileID: 0}
wiiUGamePadStartupScreen: {fileID: 0}
wiiUDrcBufferDisabled: 0
wiiUProfilerLibPath:
m_BuildTargetGroupLightmapSettings: []
runPlayModeTestAsEditModeTest: 0
actionOnDotNetUnhandledException: 1
enableInternalProfiler: 0
logObjCUncaughtExceptions: 1

switchRatingsInt_9: 0
switchRatingsInt_10: 0
switchRatingsInt_11: 0
switchRatingsInt_12: 0
switchLocalCommunicationIds_0: 0x0005000C10000001
switchLocalCommunicationIds_1:
switchLocalCommunicationIds_2:

switchAllowsVideoCapturing: 1
switchAllowsRuntimeAddOnContentInstall: 0
switchDataLossConfirmation: 0
switchUserAccountLockEnabled: 0
switchSystemResourceMemory: 16777216
switchNativeFsCacheSize: 32
switchIsHoldTypeHorizontal: 0
switchSupportedNpadCount: 8
switchSocketConfigEnabled: 0
switchTcpInitialSendBufferSize: 32
switchTcpInitialReceiveBufferSize: 64

ps4pnFriends: 1
ps4pnGameCustomData: 1
playerPrefsSupport: 0
enableApplicationExit: 0
resetTempFolder: 1
restrictedAudioUsageRights: 0
ps4UseResolutionFallback: 0
ps4ReprojectionSupport: 0

ps4attribEyeToEyeDistanceSettingVR: 0
ps4IncludedModules: []
monoEnv:
psp2Splashimage: {fileID: 0}
psp2NPTrophyPackPath:
psp2NPSupportGBMorGJP: 0
psp2NPAgeRating: 12
psp2NPTitleDatPath:
psp2NPCommsID:
psp2NPCommunicationsID:
psp2NPCommsPassphrase:
psp2NPCommsSig:
psp2ParamSfxPath:
psp2ManualPath:
psp2LiveAreaGatePath:
psp2LiveAreaBackroundPath:
psp2LiveAreaPath:
psp2LiveAreaTrialPath:
psp2PatchChangeInfoPath:
psp2PatchOriginalPackage:
psp2PackagePassword: V6GXi5xr84P2R391UXaLHbavJvFZGfO4
psp2KeystoneFile:
psp2MemoryExpansionMode: 0
psp2DRMType: 0
psp2StorageType: 0
psp2MediaCapacity: 0
psp2DLCConfigPath:
psp2ThumbnailPath:
psp2BackgroundPath:
psp2SoundPath:
psp2TrophyCommId:
psp2TrophyPackagePath:
psp2PackagedResourcesPath:
psp2SaveDataQuota: 10240
psp2ParentalLevel: 1
psp2ShortTitle: Not Set
psp2ContentID: IV0000-ABCD12345_00-0123456789ABCDEF
psp2Category: 0
psp2MasterVersion: 01.00
psp2AppVersion: 01.00
psp2TVBootMode: 0
psp2EnterButtonAssignment: 2
psp2TVDisableEmu: 0
psp2AllowTwitterDialog: 1
psp2Upgradable: 0
psp2HealthWarning: 0
psp2UseLibLocation: 0
psp2InfoBarOnStartup: 0
psp2InfoBarColor: 0
psp2ScriptOptimizationLevel: 0
psmSplashimage: {fileID: 0}
splashScreenBackgroundSourceLandscape: {fileID: 0}
splashScreenBackgroundSourcePortrait: {fileID: 0}
spritePackerPolicy:

webGLTemplate: APPLICATION:Default
webGLAnalyzeBuildSize: 0
webGLUseEmbeddedResources: 0
webGLUseWasm: 0
webGLLinkerTarget: 1
webGLThreadsSupport: 0
scriptingDefineSymbols:
1:
7: UNITY_POST_PROCESSING_STACK_V2

27: UNITY_POST_PROCESSING_STACK_V2
platformArchitecture: {}
scriptingBackend: {}
il2cppCompilerConfiguration: {}
managedStrippingLevel: {}
allowUnsafeCode: 0
additionalIl2CppArgs:
scriptingRuntimeVersion: 1
apiCompatibilityLevelPerPlatform:

metroApplicationDescription: UnityEnvironment
wsaImages: {}
metroTileShortName:
metroCommandLineArgsFile:
metroSupportStreamingInstall: 0
metroLastRequiredScene: 0
metroDefaultTileSize: 1
metroTileForegroundText: 2
metroTileBackgroundColor: {r: 0.13333334, g: 0.17254902, b: 0.21568628, a: 0}

platformCapabilities: {}
metroTargetDeviceFamilies: {}
tizenProductDescription:
tizenProductURL:
tizenSigningProfileName:
tizenGPSPermissions: 0
tizenMicrophonePermissions: 0
tizenDeploymentTarget:
tizenDeploymentTargetType: -1
tizenMinOSVersion: 1
n3dsUseExtSaveData: 0
n3dsCompressStaticMem: 1
n3dsExtSaveDataNumber: 0x12345
n3dsStackSize: 131072
n3dsTargetPlatform: 2
n3dsRegion: 7
n3dsMediaSize: 0
n3dsLogoStyle: 3
n3dsTitle: GameName
n3dsProductCode:
n3dsApplicationId: 0xFF3FF
XboxOneProductId:
XboxOneUpdateKey:
XboxOneSandboxId:

XboxOneGameOsOverridePath:
XboxOnePackagingOverridePath:
XboxOneAppManifestOverridePath:
XboxOneVersion: 1.0.0.0
XboxOnePackageEncryption: 0
XboxOnePackageUpdateGranularity: 2
XboxOneDescription:

XboxOnePersistentLocalStorageSize: 0
XboxOneXTitleMemory: 8
xboxOneScriptCompiler: 0
XboxOneOverrideIdentityName:
luminIcon:
m_Name:
m_ModelFolderPath:
m_PortalFolderPath:
luminCert:
m_CertPath:
m_PrivateKeyPath:
luminIsChannelApp: 0
luminVersion:
m_VersionCode: 1
m_VersionName:
facebookAppId:
facebookCookies: 1
facebookLogging: 1
facebookStatus: 1
facebookXfbml: 0
facebookFrictionlessRequests: 1
framebufferDepthMemorylessMode: 0
legacyClampBlendShapeWeights: 1

1
Project/ProjectSettings/ProjectVersion.txt


m_EditorVersion: 2018.4.14f1

5
com.unity.ml-agents/Plugins/ProtoBuffer/runtimes/win/native/grpc_csharp_ext.x86.dll.meta


serializedVersion: 2
iconMap: {}
executionOrder: {}
defineConstraints: []
isExplicitlyReferenced: 0
validateReferences: 1
platformData:
- first:
'': Any

second:
enabled: 1
settings:
CPU: x86_64
CPU: AnyCPU
- first:
Standalone: LinuxUniversal
second:

5
com.unity.ml-agents/Plugins/ProtoBuffer/runtimes/win/native/grpc_csharp_ext.x64.dll.meta


serializedVersion: 2
iconMap: {}
executionOrder: {}
defineConstraints: []
isExplicitlyReferenced: 0
validateReferences: 1
platformData:
- first:
'': Any

second:
enabled: 1
settings:
CPU: x86_64
CPU: AnyCPU
- first:
Standalone: LinuxUniversal
second:

247
com.unity.ml-agents/Plugins/ProtoBuffer/runtimes/osx/native/libgrpc_csharp_ext.x64.bundle.meta


fileFormatVersion: 2
guid: 7eeb863bd08ba4388829c23da03a714f
PluginImporter:
externalObjects: {}
defineConstraints: []
isExplicitlyReferenced: 0
validateReferences: 1
data:
first:
'': Any
second:
enabled: 0
settings:
Exclude Android: 1
Exclude Editor: 0
Exclude Linux: 1
Exclude Linux64: 1
Exclude LinuxUniversal: 1
Exclude OSXIntel: 0
Exclude OSXIntel64: 0
Exclude OSXUniversal: 0
Exclude Win: 1
Exclude Win64: 1
Exclude iOS: 1
data:
first:
'': OSXIntel
second:
enabled: 1
settings: {}
data:
first:
'': OSXIntel64
second:
enabled: 1
settings: {}
data:
first:
Android: Android
second:
enabled: 0
settings:
CPU: ARMv7
data:
first:
Any:
second:
enabled: 0
settings: {}
data:
first:
Editor: Editor
second:
enabled: 1
settings:
CPU: x86_64
DefaultValueInitialized: true
OS: OSX
data:
first:
Facebook: Win
second:
enabled: 0
settings:
CPU: AnyCPU
data:
first:
Facebook: Win64
second:
enabled: 0
settings:
CPU: AnyCPU
data:
first:
Standalone: Linux
second:
enabled: 0
settings:
CPU: x86
data:
first:
Standalone: Linux64
second:
enabled: 0
settings:
CPU: x86_64
data:
first:
Standalone: LinuxUniversal
second:
enabled: 0
settings:
CPU: None
data:
first:
Standalone: OSXIntel
second:
enabled: 1
settings:
CPU: AnyCPU
data:
first:
Standalone: OSXIntel64
second:
enabled: 1
settings:
CPU: AnyCPU
data:
first:
Standalone: OSXUniversal
second:
enabled: 1
settings:
CPU: AnyCPU
data:
first:
Standalone: Win
second:
enabled: 0
settings:
CPU: AnyCPU
data:
first:
Standalone: Win64
second:
enabled: 0
settings:
CPU: AnyCPU
data:
first:
iPhone: iOS
second:
enabled: 0
settings:
AddToEmbeddedBinaries: false
CompileFlags:
FrameworkDependencies:
- first:
'': Any
second:
enabled: 0
settings:
Exclude Android: 1
Exclude Editor: 0
Exclude Linux: 1
Exclude Linux64: 1
Exclude LinuxUniversal: 1
Exclude OSXIntel: 0
Exclude OSXIntel64: 0
Exclude OSXUniversal: 0
Exclude Win: 1
Exclude Win64: 1
Exclude iOS: 1
- first:
'': OSXIntel
second:
enabled: 1
settings: {}
- first:
'': OSXIntel64
second:
enabled: 1
settings: {}
- first:
Android: Android
second:
enabled: 0
settings:
CPU: ARMv7
- first:
Any:
second:
enabled: 0
settings: {}
- first:
Editor: Editor
second:
enabled: 1
settings:
CPU: x86_64
DefaultValueInitialized: true
OS: OSX
- first:
Facebook: Win
second:
enabled: 0
settings:
CPU: AnyCPU
- first:
Facebook: Win64
second:
enabled: 0
settings:
CPU: AnyCPU
- first:
Standalone: Linux
second:
enabled: 0
settings:
CPU: x86
- first:
Standalone: Linux64
second:
enabled: 0
settings:
CPU: AnyCPU
- first:
Standalone: LinuxUniversal
second:
enabled: 0
settings:
CPU: None
- first:
Standalone: OSXIntel
second:
enabled: 1
settings:
CPU: AnyCPU
- first:
Standalone: OSXIntel64
second:
enabled: 1
settings:
CPU: AnyCPU
- first:
Standalone: OSXUniversal
second:
enabled: 1
settings:
CPU: AnyCPU
- first:
Standalone: Win
second:
enabled: 0
settings:
CPU: AnyCPU
- first:
Standalone: Win64
second:
enabled: 0
settings:
CPU: AnyCPU
- first:
iPhone: iOS
second:
enabled: 0
settings:
AddToEmbeddedBinaries: false
CompileFlags:
FrameworkDependencies:
userData:
assetBundleName:
assetBundleVariant:

5
com.unity.ml-agents/Plugins/ProtoBuffer/runtimes/linux/native/libgrpc_csharp_ext.x64.so.meta


serializedVersion: 2
iconMap: {}
executionOrder: {}
defineConstraints: []
isExplicitlyReferenced: 0
validateReferences: 1
platformData:
- first:
'': Any

second:
enabled: 1
settings:
CPU: x86_64
CPU: AnyCPU
- first:
Standalone: LinuxUniversal
second:

5
com.unity.ml-agents/Plugins/ProtoBuffer/Grpc.Core.dll.meta


serializedVersion: 2
iconMap: {}
executionOrder: {}
defineConstraints: []
isExplicitlyReferenced: 0
validateReferences: 1
platformData:
- first:
'': Any

second:
enabled: 1
settings:
CPU: x86_64
CPU: AnyCPU
- first:
Standalone: LinuxUniversal
second:

1
com.unity.ml-agents/Editor/DemonstrationDrawer.cs


return actionLabel.ToString();
}
/// <summary>
/// Renders Inspector UI for Brain Parameters of Demonstration.
/// </summary>

2
com.unity.ml-agents/Editor/BrainParametersDrawer.cs


{
return GetHeightDrawVectorObservation() +
GetHeightDrawVectorAction(property);
var indent = EditorGUI.indentLevel;
EditorGUI.indentLevel = 0;
position.height = k_LineHeight;

2
com.unity.ml-agents/Editor/BehaviorParametersEditor.cs


var model = (NNModel)serializedObject.FindProperty("m_Model").objectReferenceValue;
var behaviorParameters = (BehaviorParameters)target;
SensorComponent[] sensorComponents;
if(behaviorParameters.useChildSensors)
if (behaviorParameters.useChildSensors)
{
sensorComponents = behaviorParameters.GetComponentsInChildren<SensorComponent>();
}

25
com.unity.ml-agents/Editor/AgentEditor.cs


var serializedAgent = serializedObject;
serializedAgent.Update();
var actionsPerDecision = serializedAgent.FindProperty(
"agentParameters.numberOfActionsBetweenDecisions");
"agentParameters.maxStep");
var isResetOnDone = serializedAgent.FindProperty(
"agentParameters.resetOnDone");
var isOdd = serializedAgent.FindProperty(
"agentParameters.onDemandDecision");
"maxStep");
EditorGUILayout.PropertyField(
isOdd,
new GUIContent(
"On Demand Decisions",
"If checked, you must manually request decisions."));
if (!isOdd.boolValue)
{
EditorGUILayout.PropertyField(
actionsPerDecision,
new GUIContent(
"Decision Interval",
"The agent will automatically request a decision every X" +
" steps and perform an action at every step."));
actionsPerDecision.intValue = Mathf.Max(1, actionsPerDecision.intValue);
}
serializedAgent.ApplyModifiedProperties();

2
com.unity.ml-agents/Tests/Editor/SideChannelTests.cs


{
public class SideChannelTests
{
public List<int> m_MessagesReceived = new List<int>();
public override int ChannelType() { return -1; }

6
com.unity.ml-agents/Tests/Editor/Sensor/WriterAdapterTests.cs


// AddRange
writer.SetTarget(buffer, shape, 0);
writer.AddRange(new [] {4f, 5f});
writer.AddRange(new[] {4f, 5f});
writer.AddRange(new [] {6f, 7f});
writer.AddRange(new[] {6f, 7f});
Assert.AreEqual(new[] { 4f, 6f, 7f }, buffer);
}

};
writer.SetTarget(t, 1, 1);
writer.AddRange(new [] {-1f, -2f});
writer.AddRange(new[] {-1f, -2f});
Assert.AreEqual(0f, t.data[0, 0]);
Assert.AreEqual(0f, t.data[0, 1]);
Assert.AreEqual(0f, t.data[0, 2]);

23
com.unity.ml-agents/Tests/Editor/Sensor/VectorSensorTests.cs


// Check that Update() clears the data
sensor.Update();
SensorTestHelper.CompareObservation(sensor, new[] { 0f, 0f, 0f, 0f });
}
[Test]

sensor.AddObservation(1.2f);
SensorTestHelper.CompareObservation(sensor, new []{1.2f});
SensorTestHelper.CompareObservation(sensor, new[] {1.2f});
}
[Test]

sensor.AddObservation(42);
SensorTestHelper.CompareObservation(sensor, new []{42f});
SensorTestHelper.CompareObservation(sensor, new[] {42f});
}
[Test]

sensor.AddObservation(new Vector3(1,2,3));
SensorTestHelper.CompareObservation(sensor, new []{1f, 2f, 3f});
sensor.AddObservation(new Vector3(1, 2, 3));
SensorTestHelper.CompareObservation(sensor, new[] {1f, 2f, 3f});
sensor.AddObservation(new Vector2(4,5));
sensor.AddObservation(new Vector2(4, 5));
SensorTestHelper.CompareObservation(sensor, new[] { 4f, 5f });
}

var sensor = new VectorSensor(4);
sensor.AddObservation(Quaternion.identity);
SensorTestHelper.CompareObservation(sensor, new []{0f, 0f, 0f, 1f});
SensorTestHelper.CompareObservation(sensor, new[] {0f, 0f, 0f, 1f});
}
[Test]

sensor.AddObservation(new [] {1f, 2f, 3f, 4f});
sensor.AddObservation(new[] {1f, 2f, 3f, 4f});
SensorTestHelper.CompareObservation(sensor, new[] { 1f, 2f, 3f, 4f });
}

{
var sensor = new VectorSensor(1);
sensor.AddObservation(true);
SensorTestHelper.CompareObservation(sensor, new []{1f});
SensorTestHelper.CompareObservation(sensor, new[] {1f});
}
[Test]

sensor.AddOneHotObservation(2, 4);
SensorTestHelper.CompareObservation(sensor, new []{0f, 0f, 1f, 0f});
SensorTestHelper.CompareObservation(sensor, new[] {0f, 0f, 1f, 0f});
}
[Test]

sensor.AddObservation(new [] {1f, 2f, 3f, 4f});
sensor.AddObservation(new[] {1f, 2f, 3f, 4f});
SensorTestHelper.CompareObservation(sensor, new[] { 1f, 2f});
}

{
var sensor = new VectorSensor(4);
sensor.AddObservation(new [] {1f, 2f});
sensor.AddObservation(new[] {1f, 2f});
// Make sure extra zeros are added
SensorTestHelper.CompareObservation(sensor, new[] { 1f, 2f, 0f, 0f});

26
com.unity.ml-agents/Tests/Editor/Sensor/StackingSensorTests.cs


ISensor wrapped = new VectorSensor(4);
ISensor sensor = new StackingSensor(wrapped, 4);
Assert.AreEqual("StackingSensor_size4_VectorSensor_size4", sensor.GetName());
Assert.AreEqual(sensor.GetObservationShape(), new [] {16});
Assert.AreEqual(sensor.GetObservationShape(), new[] {16});
}
[Test]

ISensor sensor = new StackingSensor(wrapped, 3);
wrapped.AddObservation(new [] {1f, 2f});
SensorTestHelper.CompareObservation(sensor, new [] {0f, 0f, 0f, 0f, 1f, 2f});
wrapped.AddObservation(new[] {1f, 2f});
SensorTestHelper.CompareObservation(sensor, new[] {0f, 0f, 0f, 0f, 1f, 2f});
wrapped.AddObservation(new [] {3f, 4f});
SensorTestHelper.CompareObservation(sensor, new [] {0f, 0f, 1f, 2f, 3f, 4f});
wrapped.AddObservation(new[] {3f, 4f});
SensorTestHelper.CompareObservation(sensor, new[] {0f, 0f, 1f, 2f, 3f, 4f});
wrapped.AddObservation(new [] {5f, 6f});
SensorTestHelper.CompareObservation(sensor, new [] {1f, 2f, 3f, 4f, 5f, 6f});
wrapped.AddObservation(new[] {5f, 6f});
SensorTestHelper.CompareObservation(sensor, new[] {1f, 2f, 3f, 4f, 5f, 6f});
wrapped.AddObservation(new [] {7f, 8f});
SensorTestHelper.CompareObservation(sensor, new [] {3f, 4f, 5f, 6f, 7f, 8f});
wrapped.AddObservation(new[] {7f, 8f});
SensorTestHelper.CompareObservation(sensor, new[] {3f, 4f, 5f, 6f, 7f, 8f});
wrapped.AddObservation(new [] {9f, 10f});
SensorTestHelper.CompareObservation(sensor, new [] {5f, 6f, 7f, 8f, 9f, 10f});
wrapped.AddObservation(new[] {9f, 10f});
SensorTestHelper.CompareObservation(sensor, new[] {5f, 6f, 7f, 8f, 9f, 10f});
SensorTestHelper.CompareObservation(sensor, new [] {5f, 6f, 7f, 8f, 9f, 10f});
SensorTestHelper.CompareObservation(sensor, new[] {5f, 6f, 7f, 8f, 9f, 10f});
}
}

197
com.unity.ml-agents/Tests/Editor/Sensor/RayPerceptionSensorTests.cs


using System;
using System.Collections.Generic;
using NUnit.Framework;
using UnityEngine;
using MLAgents.Sensor;

public void TestGetRayAngles()
{
var angles = RayPerceptionSensorComponentBase.GetRayAngles(3, 90f);
var expectedAngles = new [] { 90f, 60f, 120f, 30f, 150f, 0f, 180f };
var expectedAngles = new[] { 90f, 60f, 120f, 30f, 150f, 0f, 180f };
}
}
}
public class RayPerception3DTests
{
// Use built-in tags
const string k_CubeTag = "Player";
const string k_SphereTag = "Respawn";
void SetupScene()
{
/* Creates game objects in the world for testing.
* C is a cube
* S are spheres
* @ is the agent (at the origin)
* Each space or line is 5 world units, +x is right, +z is up
*
* C
* S S
* @
*
* S
*/
var cube = GameObject.CreatePrimitive(PrimitiveType.Cube);
cube.transform.position = new Vector3(0, 0, 10);
cube.tag = k_CubeTag;
var sphere1 = GameObject.CreatePrimitive(PrimitiveType.Sphere);
sphere1.transform.position = new Vector3(-5, 0, 5);
sphere1.tag = k_SphereTag;
var sphere2 = GameObject.CreatePrimitive(PrimitiveType.Sphere);
sphere2.transform.position = new Vector3(5, 0, 5);
// No tag for sphere2
var sphere3 = GameObject.CreatePrimitive(PrimitiveType.Sphere);
sphere3.transform.position = new Vector3(0, 0, -10);
sphere3.tag = k_SphereTag;
Physics.SyncTransforms();
}
[Test]
public void TestRaycasts()
{
SetupScene();
var obj = new GameObject("agent");
var perception = obj.AddComponent<RayPerceptionSensorComponent3D>();
perception.raysPerDirection = 1;
perception.maxRayDegrees = 45;
perception.rayLength = 20;
perception.detectableTags = new List<string>();
perception.detectableTags.Add(k_CubeTag);
perception.detectableTags.Add(k_SphereTag);
var radii = new[] { 0f, .5f };
foreach (var castRadius in radii)
{
perception.sphereCastRadius = castRadius;
var sensor = perception.CreateSensor();
var expectedObs = (2 * perception.raysPerDirection + 1) * (perception.detectableTags.Count + 2);
Assert.AreEqual(sensor.GetObservationShape()[0], expectedObs);
var outputBuffer = new float[expectedObs];
WriteAdapter writer = new WriteAdapter();
writer.SetTarget(outputBuffer, sensor.GetObservationShape(), 0);
var numWritten = sensor.Write(writer);
Assert.AreEqual(numWritten, expectedObs);
// Expected hits:
// ray 0 should hit the cube at roughly halfway
// ray 1 should hit a sphere but no tag
// ray 2 should hit a sphere with the k_SphereTag tag
// The hit fraction should be the same for rays 1 and
//
Assert.AreEqual(1.0f, outputBuffer[0]); // hit cube
Assert.AreEqual(0.0f, outputBuffer[1]); // missed sphere
Assert.AreEqual(0.0f, outputBuffer[2]); // missed unknown tag
// Hit is at z=9.0 in world space, ray length is 20
Assert.That(
outputBuffer[3], Is.EqualTo((9.5f - castRadius) / perception.rayLength).Within(.0005f)
);
// Spheres are at 5,0,5 and 5,0,-5, so 5*sqrt(2) units from origin
// Minus 1.0 for the sphere radius to get the length of the hit.
var expectedHitLengthWorldSpace = 5.0f * Mathf.Sqrt(2.0f) - 0.5f - castRadius;
Assert.AreEqual(0.0f, outputBuffer[4]); // missed cube
Assert.AreEqual(0.0f, outputBuffer[5]); // missed sphere
Assert.AreEqual(0.0f, outputBuffer[6]); // hit unknown tag -> all 0
Assert.That(
outputBuffer[7], Is.EqualTo(expectedHitLengthWorldSpace / perception.rayLength).Within(.0005f)
);
Assert.AreEqual(0.0f, outputBuffer[8]); // missed cube
Assert.AreEqual(1.0f, outputBuffer[9]); // hit sphere
Assert.AreEqual(0.0f, outputBuffer[10]); // missed unknown tag
Assert.That(
outputBuffer[11], Is.EqualTo(expectedHitLengthWorldSpace / perception.rayLength).Within(.0005f)
);
}
}
[Test]
public void TestRaycastMiss()
{
var obj = new GameObject("agent");
var perception = obj.AddComponent<RayPerceptionSensorComponent3D>();
perception.raysPerDirection = 0;
perception.maxRayDegrees = 45;
perception.rayLength = 20;
perception.detectableTags = new List<string>();
perception.detectableTags.Add(k_CubeTag);
perception.detectableTags.Add(k_SphereTag);
var sensor = perception.CreateSensor();
var expectedObs = (2 * perception.raysPerDirection + 1) * (perception.detectableTags.Count + 2);
Assert.AreEqual(sensor.GetObservationShape()[0], expectedObs);
var outputBuffer = new float[expectedObs];
WriteAdapter writer = new WriteAdapter();
writer.SetTarget(outputBuffer, sensor.GetObservationShape(), 0);
var numWritten = sensor.Write(writer);
Assert.AreEqual(numWritten, expectedObs);
// Everything missed
Assert.AreEqual(new float[] { 0, 0, 1, 1 }, outputBuffer);
}
[Test]
public void TestRayFilter()
{
var cube = GameObject.CreatePrimitive(PrimitiveType.Cube);
cube.transform.position = new Vector3(0, 0, 10);
cube.tag = k_CubeTag;
cube.name = "cubeFar";
var cubeFiltered = GameObject.CreatePrimitive(PrimitiveType.Cube);
cubeFiltered.transform.position = new Vector3(0, 0, 5);
cubeFiltered.tag = k_CubeTag;
cubeFiltered.name = "cubeNear";
cubeFiltered.layer = 7;
Physics.SyncTransforms();
var obj = new GameObject("agent");
var perception = obj.AddComponent<RayPerceptionSensorComponent3D>();
perception.raysPerDirection = 0;
perception.rayLength = 20;
perception.detectableTags = new List<string>();
var filterCubeLayers = new[] { false, true };
foreach (var filterCubeLayer in filterCubeLayers)
{
// Set the layer mask to either the default, or one that ignores the close cube's layer
var layerMask = Physics.DefaultRaycastLayers;
if (filterCubeLayer)
{
layerMask &= ~(1 << cubeFiltered.layer);
}
perception.rayLayerMask = layerMask;
var sensor = perception.CreateSensor();
var expectedObs = (2 * perception.raysPerDirection + 1) * (perception.detectableTags.Count + 2);
Assert.AreEqual(sensor.GetObservationShape()[0], expectedObs);
var outputBuffer = new float[expectedObs];
WriteAdapter writer = new WriteAdapter();
writer.SetTarget(outputBuffer, sensor.GetObservationShape(), 0);
var numWritten = sensor.Write(writer);
Assert.AreEqual(numWritten, expectedObs);
if (filterCubeLayer)
{
// Hit the far cube because close was filtered.
Assert.That(outputBuffer[outputBuffer.Length - 1],
Is.EqualTo((9.5f - perception.sphereCastRadius) / perception.rayLength).Within(.0005f)
);
}
else
{
// Hit the close cube because not filtered.
Assert.That(outputBuffer[outputBuffer.Length - 1],
Is.EqualTo((4.5f - perception.sphereCastRadius) / perception.rayLength).Within(.0005f)
);
}
}
}
}

2
com.unity.ml-agents/Tests/Editor/Sensor/FloatVisualSensorTests.cs


}
}
public void Update() { }
public void Update() {}
public SensorCompressionType GetCompressionType()
{

107
com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs


{
public class TestAgent : Agent
{
public AgentInfo _Info
{
get
{
return (AgentInfo)typeof(Agent).GetField("m_Info", BindingFlags.Instance | BindingFlags.NonPublic).GetValue(this);
}
set
{
typeof(Agent).GetField("m_Info", BindingFlags.Instance | BindingFlags.NonPublic).SetValue(this, value);
}
}
public int initializeAgentCalls;
public int collectObservationsCalls;
public int agentActionCalls;

{
return new float[0];
}
}
public class TestSensor : ISensor

return sensorName;
}
public void Update() { }
public void Update() {}
}
[TestFixture]

agentGo2.AddComponent<TestAgent>();
var agent2 = agentGo2.GetComponent<TestAgent>();
Assert.AreEqual(false, agent1.IsDone());
Assert.AreEqual(false, agent2.IsDone());
Assert.AreEqual(0, agent1.agentResetCalls);
Assert.AreEqual(0, agent2.agentResetCalls);
Assert.AreEqual(0, agent1.initializeAgentCalls);

var agentEnableMethod = typeof(Agent).GetMethod("OnEnableHelper",
BindingFlags.Instance | BindingFlags.NonPublic);
agentEnableMethod?.Invoke(agent2, new object[] { });
agentEnableMethod?.Invoke(agent1, new object[] { });
agentEnableMethod?.Invoke(agent2, new object[] {});
agentEnableMethod?.Invoke(agent1, new object[] {});
Assert.AreEqual(false, agent1.IsDone());
Assert.AreEqual(false, agent2.IsDone());
// agent1 was not enabled when the academy started
// The agents have been initialized
Assert.AreEqual(0, agent1.agentResetCalls);

var agentEnableMethod = typeof(Agent).GetMethod(
"OnEnableHelper", BindingFlags.Instance | BindingFlags.NonPublic);
agent1.agentParameters = new AgentParameters();
agent2.agentParameters = new AgentParameters();
// We use event based so the agent will now try to send anything to the brain
agent1.agentParameters.onDemandDecision = false;
agent1.agentParameters.numberOfActionsBetweenDecisions = 2;
var decisionRequester = agent1.gameObject.AddComponent<DecisionRequester>();
decisionRequester.DecisionPeriod = 2;
decisionRequester.Awake();
agent2.agentParameters.onDemandDecision = true;
agentEnableMethod?.Invoke(agent1, new object[] { });
agentEnableMethod?.Invoke(agent1, new object[] {});
var numberAgent1Reset = 0;
var numberAgent2Initialization = 0;

//Agent 2 is only initialized at step 2
if (i == 2)
{
agentEnableMethod?.Invoke(agent2, new object[] { });
agentEnableMethod?.Invoke(agent2, new object[] {});
numberAgent2Initialization += 1;
}

var agentEnableMethod = typeof(Agent).GetMethod(
"OnEnableHelper", BindingFlags.Instance | BindingFlags.NonPublic);
agent1.agentParameters = new AgentParameters();
agent2.agentParameters = new AgentParameters();
// We use event based so the agent will now try to send anything to the brain
agent1.agentParameters.onDemandDecision = false;
agent1.agentParameters.numberOfActionsBetweenDecisions = 2;
// agent1 will take an action at every step and request a decision every 2 steps
agent2.agentParameters.onDemandDecision = true;
var decisionRequester = agent1.gameObject.AddComponent<DecisionRequester>();
decisionRequester.DecisionPeriod = 2;
agentEnableMethod?.Invoke(agent2, new object[] { });
agentEnableMethod?.Invoke(agent2, new object[] {});
var numberAgent1Reset = 0;
var numberAgent2Reset = 0;

//Agent 1 is only initialized at step 2
if (i == 2)
{
agentEnableMethod?.Invoke(agent1, new object[] { });
agentEnableMethod?.Invoke(agent1, new object[] {});
numberAgent1Reset += 1;
if (!(agent2.IsDone()))
{
// If the agent was already reset before the request decision
// We should not reset again
agent2.Done();
numberAgent2Reset += 1;
agent2StepSinceReset = 0;
}
agent2.Done();
numberAgent2Reset += 1;
agent2StepSinceReset = 0;
}
// Request a decision for agent 2 regularly
if (i % 3 == 2)

// Request an action without decision regularly
agent2.RequestAction();
}
if (agent1.IsDone() && (((acaStepsSinceReset) % agent1.agentParameters.numberOfActionsBetweenDecisions == 0)))
{
numberAgent1Reset += 1;
}
//Agent 1 is only initialized at step 2
if (i < 2)
{ }
aca.EnvironmentStep();
}
}

public class EditModeTestMiscellaneous
{
[SetUp]
public void SetUp()
{

var agentEnableMethod = typeof(Agent).GetMethod(
"OnEnableHelper", BindingFlags.Instance | BindingFlags.NonPublic);
agent1.agentParameters = new AgentParameters();
agent2.agentParameters = new AgentParameters();
// We use event based so the agent will now try to send anything to the brain
agent1.agentParameters.onDemandDecision = false;
agent1.agentParameters.numberOfActionsBetweenDecisions = 3;
// agent1 will take an action at every step and request a decision every 2 steps
agent2.agentParameters.onDemandDecision = true;
// agent2 will request decisions only when RequestDecision is called
agent1.agentParameters.maxStep = 20;
var decisionRequester = agent1.gameObject.AddComponent<DecisionRequester>();
decisionRequester.DecisionPeriod = 2;
decisionRequester.Awake();
agent1.maxStep = 20;
agentEnableMethod?.Invoke(agent2, new object[] { });
agentEnableMethod?.Invoke(agent1, new object[] { });
agentEnableMethod?.Invoke(agent2, new object[] {});
agentEnableMethod?.Invoke(agent1, new object[] {});
if (i % 20 == 0)
{
j = 0;
}
else
{
j++;
}
Assert.LessOrEqual(Mathf.Abs(j * 0.1f + j * 10f - agent1.GetCumulativeReward()), 0.05f);
Assert.LessOrEqual(Mathf.Abs(j * 10.1f - agent1.GetCumulativeReward()), 0.05f);
agent1.AddReward(10f);
agent1.AddReward(10f);
if ((i % 21 == 0) && (i > 0))
{
j = 0;
}
j++;
}
}
}

23
com.unity.ml-agents/Tests/Editor/EditModeTestInternalBrainTensorGenerator.cs


}
}
static List<Agent> GetFakeAgents()
static List<TestAgent> GetFakeAgents()
{
var goA = new GameObject("goA");
var bpA = goA.AddComponent<BehaviorParameters>();

bpB.brainParameters.numStackedVectorObservations = 1;
var agentB = goB.AddComponent<TestAgent>();
var agents = new List<Agent> { agentA, agentB };
var agents = new List<TestAgent> { agentA, agentB };
agentEnableMethod?.Invoke(agent, new object[] { });
agentEnableMethod?.Invoke(agent, new object[] {});
}
agentA.collectObservationsSensor.AddObservation(new Vector3(1, 2, 3));
agentB.collectObservationsSensor.AddObservation(new Vector3(4, 5, 6));

actionMasks = new[] { true, false, false, false, false },
};
agentA.Info = infoA;
agentB.Info = infoB;
agentA._Info = infoA;
agentB._Info = infoB;
return agents;
}

var agent1 = agentInfos[1];
var inputs = new List<AgentInfoSensorsPair>
{
new AgentInfoSensorsPair{agentInfo = agent0.Info, sensors = agent0.sensors},
new AgentInfoSensorsPair{agentInfo = agent1.Info, sensors = agent1.sensors},
new AgentInfoSensorsPair {agentInfo = agent0._Info, sensors = agent0.sensors},
new AgentInfoSensorsPair {agentInfo = agent1._Info, sensors = agent1.sensors},
};
generator.Generate(inputTensor, batchSize, inputs);
Assert.IsNotNull(inputTensor.data);

var agent1 = agentInfos[1];
var inputs = new List<AgentInfoSensorsPair>
{
new AgentInfoSensorsPair{agentInfo = agent0.Info, sensors = agent0.sensors},
new AgentInfoSensorsPair{agentInfo = agent1.Info, sensors = agent1.sensors},
new AgentInfoSensorsPair {agentInfo = agent0._Info, sensors = agent0.sensors},
new AgentInfoSensorsPair {agentInfo = agent1._Info, sensors = agent1.sensors},
};
generator.Generate(inputTensor, batchSize, inputs);
Assert.IsNotNull(inputTensor.data);

var agent1 = agentInfos[1];
var inputs = new List<AgentInfoSensorsPair>
{
new AgentInfoSensorsPair{agentInfo = agent0.Info, sensors = agent0.sensors},
new AgentInfoSensorsPair{agentInfo = agent1.Info, sensors = agent1.sensors},
new AgentInfoSensorsPair {agentInfo = agent0._Info, sensors = agent0.sensors},
new AgentInfoSensorsPair {agentInfo = agent1._Info, sensors = agent1.sensors},
};
generator.Generate(inputTensor, batchSize, inputs);

8
com.unity.ml-agents/Tests/Editor/EditModeTestInternalBrainTensorApplier.cs


var action1 = new AgentAction();
var callbacks = new List<AgentIdActionPair>()
{
new AgentIdActionPair{agentId = 0, action = (a) => action0 = a},
new AgentIdActionPair{agentId = 1, action = (a) => action1 = a}
new AgentIdActionPair {agentId = 0, action = (a) => action0 = a},
new AgentIdActionPair {agentId = 1, action = (a) => action1 = a}
};
applier.Apply(inputTensor, callbacks);

var action1 = new AgentAction();
var callbacks = new List<AgentIdActionPair>()
{
new AgentIdActionPair{agentId = 0, action = (a) => action0 = a},
new AgentIdActionPair{agentId = 1, action = (a) => action1 = a}
new AgentIdActionPair {agentId = 0, action = (a) => action0 = a},
new AgentIdActionPair {agentId = 1, action = (a) => action1 = a}
};
applier.Apply(inputTensor, callbacks);

6
com.unity.ml-agents/Tests/Editor/DemonstrationTests.cs


reward = 1f,
actionMasks = new[] { false, true },
done = true,
id = 5,
episodeId = 5,
maxStepReached = true,
storedVectorActions = new[] { 0f, 1f },
};

var agentSendInfo = typeof(Agent).GetMethod("SendInfo",
BindingFlags.Instance | BindingFlags.NonPublic);
agentEnableMethod?.Invoke(agent1, new object[] { });
agentEnableMethod?.Invoke(agent1, new object[] {});
agentSendInfo?.Invoke(agent1, new object[] { });
agentSendInfo?.Invoke(agent1, new object[] {});
demoRecorder.Close();

929
Project/Assets/ML-Agents/Examples/WallJump/Prefabs/WallJumpArea.prefab
文件差异内容过多而无法显示
查看文件

93
Project/Assets/ML-Agents/Examples/Walker/Scripts/WalkerAgent.cs


public Transform forearmR;
public Transform handR;
JointDriveController m_JdController;
bool m_IsNewDecisionStep;
int m_CurrentDecisionStep;
Rigidbody m_HipsRb;
Rigidbody m_ChestRb;

public override void AgentAction(float[] vectorAction)
{
m_DirToTarget = target.position - m_JdController.bodyPartsDict[hips].rb.position;
var bpDict = m_JdController.bodyPartsDict;
var i = -1;
// Apply action to all relevant body parts.
if (m_IsNewDecisionStep)
{
var bpDict = m_JdController.bodyPartsDict;
var i = -1;
bpDict[chest].SetJointTargetRotation(vectorAction[++i], vectorAction[++i], vectorAction[++i]);
bpDict[spine].SetJointTargetRotation(vectorAction[++i], vectorAction[++i], vectorAction[++i]);
bpDict[chest].SetJointTargetRotation(vectorAction[++i], vectorAction[++i], vectorAction[++i]);
bpDict[spine].SetJointTargetRotation(vectorAction[++i], vectorAction[++i], vectorAction[++i]);
bpDict[thighL].SetJointTargetRotation(vectorAction[++i], vectorAction[++i], 0);
bpDict[thighR].SetJointTargetRotation(vectorAction[++i], vectorAction[++i], 0);
bpDict[shinL].SetJointTargetRotation(vectorAction[++i], 0, 0);
bpDict[shinR].SetJointTargetRotation(vectorAction[++i], 0, 0);
bpDict[footR].SetJointTargetRotation(vectorAction[++i], vectorAction[++i], vectorAction[++i]);
bpDict[footL].SetJointTargetRotation(vectorAction[++i], vectorAction[++i], vectorAction[++i]);
bpDict[thighL].SetJointTargetRotation(vectorAction[++i], vectorAction[++i], 0);
bpDict[thighR].SetJointTargetRotation(vectorAction[++i], vectorAction[++i], 0);
bpDict[shinL].SetJointTargetRotation(vectorAction[++i], 0, 0);
bpDict[shinR].SetJointTargetRotation(vectorAction[++i], 0, 0);
bpDict[footR].SetJointTargetRotation(vectorAction[++i], vectorAction[++i], vectorAction[++i]);
bpDict[footL].SetJointTargetRotation(vectorAction[++i], vectorAction[++i], vectorAction[++i]);
bpDict[armL].SetJointTargetRotation(vectorAction[++i], vectorAction[++i], 0);
bpDict[armR].SetJointTargetRotation(vectorAction[++i], vectorAction[++i], 0);
bpDict[forearmL].SetJointTargetRotation(vectorAction[++i], 0, 0);
bpDict[forearmR].SetJointTargetRotation(vectorAction[++i], 0, 0);
bpDict[head].SetJointTargetRotation(vectorAction[++i], vectorAction[++i], 0);
bpDict[armL].SetJointTargetRotation(vectorAction[++i], vectorAction[++i], 0);
bpDict[armR].SetJointTargetRotation(vectorAction[++i], vectorAction[++i], 0);
bpDict[forearmL].SetJointTargetRotation(vectorAction[++i], 0, 0);
bpDict[forearmR].SetJointTargetRotation(vectorAction[++i], 0, 0);
bpDict[head].SetJointTargetRotation(vectorAction[++i], vectorAction[++i], 0);
//update joint strength settings
bpDict[chest].SetJointStrength(vectorAction[++i]);
bpDict[spine].SetJointStrength(vectorAction[++i]);
bpDict[head].SetJointStrength(vectorAction[++i]);
bpDict[thighL].SetJointStrength(vectorAction[++i]);
bpDict[shinL].SetJointStrength(vectorAction[++i]);
bpDict[footL].SetJointStrength(vectorAction[++i]);
bpDict[thighR].SetJointStrength(vectorAction[++i]);
bpDict[shinR].SetJointStrength(vectorAction[++i]);
bpDict[footR].SetJointStrength(vectorAction[++i]);
bpDict[armL].SetJointStrength(vectorAction[++i]);
bpDict[forearmL].SetJointStrength(vectorAction[++i]);
bpDict[armR].SetJointStrength(vectorAction[++i]);
bpDict[forearmR].SetJointStrength(vectorAction[++i]);
}
//update joint strength settings
bpDict[chest].SetJointStrength(vectorAction[++i]);
bpDict[spine].SetJointStrength(vectorAction[++i]);
bpDict[head].SetJointStrength(vectorAction[++i]);
bpDict[thighL].SetJointStrength(vectorAction[++i]);
bpDict[shinL].SetJointStrength(vectorAction[++i]);
bpDict[footL].SetJointStrength(vectorAction[++i]);
bpDict[thighR].SetJointStrength(vectorAction[++i]);
bpDict[shinR].SetJointStrength(vectorAction[++i]);
bpDict[footR].SetJointStrength(vectorAction[++i]);
bpDict[armL].SetJointStrength(vectorAction[++i]);
bpDict[forearmL].SetJointStrength(vectorAction[++i]);
bpDict[armR].SetJointStrength(vectorAction[++i]);
bpDict[forearmR].SetJointStrength(vectorAction[++i]);
}
IncrementDecisionTimer();
void FixedUpdate()
{
m_DirToTarget = target.position - m_JdController.bodyPartsDict[hips].rb.position;
AddReward(
+0.03f * Vector3.Dot(m_DirToTarget.normalized, m_JdController.bodyPartsDict[hips].rb.velocity)
+ 0.01f * Vector3.Dot(m_DirToTarget.normalized, hips.forward)

}
/// <summary>
/// Only change the joint settings based on decision frequency.
/// </summary>
public void IncrementDecisionTimer()
{
if (m_CurrentDecisionStep == agentParameters.numberOfActionsBetweenDecisions ||
agentParameters.numberOfActionsBetweenDecisions == 1)
{
m_CurrentDecisionStep = 1;
m_IsNewDecisionStep = true;
}
else
{
m_CurrentDecisionStep++;
m_IsNewDecisionStep = false;
}
}
/// <summary>
/// Loop over body parts and reset them to initial conditions.
/// </summary>
public override void AgentReset()

{
bodyPart.Reset(bodyPart);
}
m_IsNewDecisionStep = true;
m_CurrentDecisionStep = 1;
SetResetParameters();
}

988
Project/Assets/ML-Agents/Examples/Walker/Prefabs/WalkerPair.prefab
文件差异内容过多而无法显示
查看文件

1001
Project/Assets/ML-Agents/Examples/Tennis/TFModels/Tennis.nn
文件差异内容过多而无法显示
查看文件

2
Project/Assets/ML-Agents/Examples/Tennis/Scripts/TennisArea.cs


ball.transform.position = new Vector3(ballOut, 6f, 0f) + transform.position;
}
m_BallRb.velocity = new Vector3(0f, 0f, 0f);
ball.transform.localScale = new Vector3(1, 1, 1);
ball.transform.localScale = new Vector3(.5f, .5f, .5f);
ball.GetComponent<HitWall>().lastAgentHit = -1;
}

9
Project/Assets/ML-Agents/Examples/Tennis/Scripts/TennisAgent.cs


AddVectorObs(ball.transform.position.y - myArea.transform.position.y);
AddVectorObs(m_InvertMult * m_BallRb.velocity.x);
AddVectorObs(m_BallRb.velocity.y);
AddVectorObs(m_InvertMult * gameObject.transform.rotation.z);
}
public override void AgentAction(float[] vectorAction)

var rotate = Mathf.Clamp(vectorAction[2], -1f, 1f) * m_InvertMult;
if (moveY > 0.5 && transform.position.y - transform.parent.transform.position.y < -1.5f)
{

m_AgentRb.velocity = new Vector3(moveX * 30f, m_AgentRb.velocity.y, 0f);
m_AgentRb.transform.rotation = Quaternion.Euler(0f, -180f, 55f * rotate + m_InvertMult * 90f);
if (invertX && transform.position.x - transform.parent.transform.position.x < -m_InvertMult ||
!invertX && transform.position.x - transform.parent.transform.position.x > -m_InvertMult)

{
m_InvertMult = invertX ? -1f : 1f;
transform.position = new Vector3(-m_InvertMult * Random.Range(6f, 8f), -1.5f, -3.5f) + transform.parent.transform.position;
transform.position = new Vector3(-m_InvertMult * Random.Range(6f, 8f), -1.5f, -1.8f) + transform.parent.transform.position;
m_AgentRb.velocity = new Vector3(0f, 0f, 0f);
SetResetParameters();

public void SetBall()
{
scale = m_ResetParams.GetPropertyWithDefault("scale", 1);
scale = m_ResetParams.GetPropertyWithDefault("scale", .5f);
ball.transform.localScale = new Vector3(scale, scale, scale);
}

157
Project/Assets/ML-Agents/Examples/Tennis/Scripts/HitWall.cs


{
public GameObject areaObject;
public int lastAgentHit;
public bool net;
public enum FloorHit
{
Service,
FloorHitUnset,
FloorAHit,
FloorBHit
}
public FloorHit lastFloorHit;
// Use this for initialization
// Use this for initialization
void Start()
{
m_Area = areaObject.GetComponent<TennisArea>();

void OnTriggerExit(Collider other)
void Reset()
if (other.name == "over")
{
if (lastAgentHit == 0)
{
m_AgentA.AddReward(0.1f);
}
else
{
m_AgentB.AddReward(0.1f);
}
lastAgentHit = 0;
}
m_AgentA.Done();
m_AgentB.Done();
m_Area.MatchReset();
lastFloorHit = FloorHit.Service;
net = false;
}
void AgentAWins()
{
m_AgentA.SetReward(1);
m_AgentB.SetReward(-1);
m_AgentA.score += 1;
Reset();
}
void AgentBWins()
{
m_AgentA.SetReward(-1);
m_AgentB.SetReward(1);
m_AgentB.score += 1;
Reset();
}
void OnCollisionEnter(Collision collision)

if (collision.gameObject.name == "wallA")
{
if (lastAgentHit == 0)
// Agent A hits into wall or agent B hit a winner
if (lastAgentHit == 0 || lastFloorHit == FloorHit.FloorAHit)
m_AgentA.AddReward(-0.01f);
m_AgentB.SetReward(0);
m_AgentB.score += 1;
AgentBWins();
// Agent B hits long
m_AgentA.SetReward(0);
m_AgentB.AddReward(-0.01f);
m_AgentA.score += 1;
AgentAWins();
if (lastAgentHit == 0)
// Agent B hits into wall or agent A hit a winner
if (lastAgentHit == 1 || lastFloorHit == FloorHit.FloorBHit)
m_AgentA.AddReward(-0.01f);
m_AgentB.SetReward(0);
m_AgentB.score += 1;
AgentAWins();
// Agent A hits long
m_AgentA.SetReward(0);
m_AgentB.AddReward(-0.01f);
m_AgentA.score += 1;
AgentBWins();
if (lastAgentHit == 0 || lastAgentHit == -1)
// Agent A hits into floor, double bounce or service
if (lastAgentHit == 0 || lastFloorHit == FloorHit.FloorAHit || lastFloorHit == FloorHit.Service)
m_AgentA.AddReward(-0.01f);
m_AgentB.SetReward(0);
m_AgentB.score += 1;
AgentBWins();
m_AgentA.AddReward(-0.01f);
m_AgentB.SetReward(0);
m_AgentB.score += 1;
lastFloorHit = FloorHit.FloorAHit;
//successful serve
if (!net)
{
net = true;
}
if (lastAgentHit == 1 || lastAgentHit == -1)
// Agent B hits into floor, double bounce or service
if (lastAgentHit == 1 || lastFloorHit == FloorHit.FloorBHit || lastFloorHit == FloorHit.Service)
m_AgentA.SetReward(0);
m_AgentB.AddReward(-0.01f);
m_AgentA.score += 1;
AgentAWins();
m_AgentA.SetReward(0);
m_AgentB.AddReward(-0.01f);
m_AgentA.score += 1;
lastFloorHit = FloorHit.FloorBHit;
//successful serve
if (!net)
{
net = true;
}
else if (collision.gameObject.name == "net")
else if (collision.gameObject.name == "net" && !net)
m_AgentA.AddReward(-0.01f);
m_AgentB.SetReward(0);
m_AgentB.score += 1;
AgentBWins();
else
else if (lastAgentHit == 1)
m_AgentA.SetReward(0);
m_AgentB.AddReward(-0.01f);
m_AgentA.score += 1;
AgentAWins();
m_AgentA.Done();
m_AgentB.Done();
m_Area.MatchReset();
else if (collision.gameObject.name == "AgentA")
{
// Agent A double hit
if (lastAgentHit == 0)
{
AgentBWins();
}
else
{
//agent can return serve in the air
if (lastFloorHit != FloorHit.Service && !net)
{
net = true;
}
if (collision.gameObject.CompareTag("agent"))
lastAgentHit = 0;
lastFloorHit = FloorHit.FloorHitUnset;
}
}
else if (collision.gameObject.name == "AgentB")
lastAgentHit = collision.gameObject.name == "AgentA" ? 0 : 1;
// Agent B double hit
if (lastAgentHit == 1)
{
AgentAWins();
}
else
{
if (lastFloorHit != FloorHit.Service && !net)
{
net = true;
}
lastAgentHit = 1;
lastFloorHit = FloorHit.FloorHitUnset;
}
}
}
}

118
Project/Assets/ML-Agents/Examples/Tennis/Scenes/Tennis.unity


objectReference: {fileID: 0}
m_RemovedComponents: []
m_SourcePrefab: {fileID: 100100000, guid: 812997c7bc2544b6f927ff684c03450f, type: 3}
--- !u!1 &1022397856
GameObject:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
serializedVersion: 6
m_Component:
- component: {fileID: 1022397857}
- component: {fileID: 1022397858}
m_Layer: 0
m_Name: TennisSettings
m_TagString: Untagged
m_Icon: {fileID: 0}
m_NavMeshLayer: 0
m_StaticEditorFlags: 0
m_IsActive: 1
--- !u!4 &1022397857
Transform:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 1022397856}
m_LocalRotation: {x: 0, y: 0, z: 0, w: 1}
m_LocalPosition: {x: -2.5590992, y: 4.387929, z: 6.622064}
m_LocalScale: {x: 1, y: 1, z: 1}
m_Children: []
m_Father: {fileID: 0}
m_RootOrder: 4
m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
--- !u!114 &1022397858
MonoBehaviour:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 1022397856}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: ec51f47c5ed0478080c449c74fd9c154, type: 3}
m_Name:
m_EditorClassIdentifier:
gravityMultiplier: 1
monitorVerticalOffset: 0
fixedDeltaTime: 0.02
maximumDeltaTime: 0.33333334
solverIterations: 6
solverVelocityIterations: 1
--- !u!1001 &1065879750
PrefabInstance:
m_ObjectHideFlags: 0

- target: {fileID: 1541947554534326, guid: 812997c7bc2544b6f927ff684c03450f, type: 3}
propertyPath: m_Name
value: TennisArea (3)
objectReference: {fileID: 0}
- target: {fileID: 1541947554534326, guid: 812997c7bc2544b6f927ff684c03450f, type: 3}
propertyPath: m_IsActive
value: 1
objectReference: {fileID: 0}
- target: {fileID: 4172342666475122, guid: 812997c7bc2544b6f927ff684c03450f, type: 3}
propertyPath: m_LocalPosition.x

objectReference: {fileID: 0}
m_RemovedComponents: []
m_SourcePrefab: {fileID: 100100000, guid: 5889392e3f05b448a8a06c5def6c2dec, type: 3}
--- !u!1 &1585008373
GameObject:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
serializedVersion: 6
m_Component:
- component: {fileID: 1585008375}
- component: {fileID: 1585008374}
m_Layer: 0
m_Name: TennisSettings
m_TagString: Untagged
m_Icon: {fileID: 0}
m_NavMeshLayer: 0
m_StaticEditorFlags: 0
m_IsActive: 1
--- !u!114 &1585008374
MonoBehaviour:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 1585008373}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: ec51f47c5ed0478080c449c74fd9c154, type: 3}
m_Name:
m_EditorClassIdentifier:
gravityMultiplier: 1
monitorVerticalOffset: 0
fixedDeltaTime: 0.02
maximumDeltaTime: 0.33333334
solverIterations: 6
solverVelocityIterations: 1
--- !u!4 &1585008375
Transform:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 1585008373}
m_LocalRotation: {x: -0, y: -0, z: -0, w: 1}
m_LocalPosition: {x: 0, y: 0, z: 0}
m_LocalScale: {x: 1, y: 1, z: 1}
m_Children: []
m_Father: {fileID: 0}
m_RootOrder: 4
m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
--- !u!1001 &1667694556
PrefabInstance:
m_ObjectHideFlags: 0

m_Modifications:
- target: {fileID: 1541947554534326, guid: 812997c7bc2544b6f927ff684c03450f, type: 3}
propertyPath: m_Name
value: TennisArea
objectReference: {fileID: 0}
- target: {fileID: 4172342666475122, guid: 812997c7bc2544b6f927ff684c03450f, type: 3}
propertyPath: m_LocalPosition.x
value: 0

- target: {fileID: 4172342666475122, guid: 812997c7bc2544b6f927ff684c03450f, type: 3}
propertyPath: m_RootOrder
value: 5
objectReference: {fileID: 0}
- target: {fileID: 4172342666475122, guid: 812997c7bc2544b6f927ff684c03450f, type: 3}
propertyPath: m_LocalEulerAnglesHint.x
value: 0
objectReference: {fileID: 0}
- target: {fileID: 4172342666475122, guid: 812997c7bc2544b6f927ff684c03450f, type: 3}
propertyPath: m_LocalEulerAnglesHint.y
value: 0
objectReference: {fileID: 0}
- target: {fileID: 4172342666475122, guid: 812997c7bc2544b6f927ff684c03450f, type: 3}
propertyPath: m_LocalEulerAnglesHint.z
value: 0
objectReference: {fileID: 0}
m_RemovedComponents: []
m_SourcePrefab: {fileID: 100100000, guid: 812997c7bc2544b6f927ff684c03450f, type: 3}

946
Project/Assets/ML-Agents/Examples/Tennis/Prefabs/TennisArea.prefab
文件差异内容过多而无法显示
查看文件

932
Project/Assets/ML-Agents/Examples/Soccer/Prefabs/SoccerFieldTwos.prefab
文件差异内容过多而无法显示
查看文件

部分文件因为文件数量过多而无法显示

正在加载...
取消
保存