浏览代码

Merge branch 'master' into develop-splitpolicyoptimizer

/develop/nopreviousactions
Ervin Teng 5 年前
当前提交
db249ceb
共有 280 个文件被更改,包括 4249 次插入3410 次删除
  1. 2
      .circleci/config.yml
  2. 4
      .github/ISSUE_TEMPLATE/config.yml
  3. 9
      .github/ISSUE_TEMPLATE/discussion---general-questions.md
  4. 26
      .gitignore
  5. 2
      .yamato/standalone-build-test.yml
  6. 12
      README.md
  7. 16
      config/gail_config.yaml
  8. 13
      config/sac_trainer_config.yaml
  9. 9
      config/trainer_config.yaml
  10. 10
      docs/Basic-Guide.md
  11. 4
      docs/Installation-Windows.md
  12. 10
      docs/Installation.md
  13. 10
      docs/Learning-Environment-Create-New.md
  14. 7
      docs/Learning-Environment-Examples.md
  15. 4
      docs/Learning-Environment-Executable.md
  16. 12
      docs/Migrating.md
  17. 2
      docs/Reward-Signals.md
  18. 2
      docs/Training-Curriculum-Learning.md
  19. 2
      docs/Training-PPO.md
  20. 2
      docs/Training-SAC.md
  21. 8
      docs/dox-ml-agents.conf
  22. 254
      docs/images/3dball_learning_brain.png
  23. 221
      docs/images/mlagents-NewProject.png
  24. 4
      docs/localized/KR/README.md
  25. 2
      docs/localized/zh-CN/README.md
  26. 2
      docs/localized/zh-CN/docs/Learning-Environment-Examples.md
  27. 2
      gym-unity/gym_unity/__init__.py
  28. 2
      ml-agents-envs/mlagents_envs/__init__.py
  29. 2
      ml-agents-envs/mlagents_envs/environment.py
  30. 1
      ml-agents/mlagents/tf_utils/__init__.py
  31. 17
      ml-agents/mlagents/tf_utils/tf.py
  32. 2
      ml-agents/mlagents/trainers/__init__.py
  33. 4
      ml-agents/mlagents/trainers/action_info.py
  34. 30
      ml-agents/mlagents/trainers/agent_processor.py
  35. 7
      ml-agents/mlagents/trainers/brain_conversion_utils.py
  36. 50
      ml-agents/mlagents/trainers/demo_loader.py
  37. 73
      ml-agents/mlagents/trainers/env_manager.py
  38. 3
      ml-agents/mlagents/trainers/ppo/trainer.py
  39. 3
      ml-agents/mlagents/trainers/sac/trainer.py
  40. 4
      ml-agents/mlagents/trainers/simple_env_manager.py
  41. 16
      ml-agents/mlagents/trainers/stats.py
  42. 4
      ml-agents/mlagents/trainers/subprocess_env_manager.py
  43. 2
      ml-agents/mlagents/trainers/tests/test_agent_processor.py
  44. 4
      ml-agents/mlagents/trainers/tests/test_bcmodule.py
  45. 37
      ml-agents/mlagents/trainers/tests/test_demo_loader.py
  46. 2
      ml-agents/mlagents/trainers/tests/test_policy.py
  47. 24
      ml-agents/mlagents/trainers/tests/test_ppo.py
  48. 2
      ml-agents/mlagents/trainers/tests/test_reward_signals.py
  49. 24
      ml-agents/mlagents/trainers/tests/test_sac.py
  50. 33
      ml-agents/mlagents/trainers/tests/test_simple_rl.py
  51. 46
      ml-agents/mlagents/trainers/tests/test_subprocess_env_manager.py
  52. 54
      ml-agents/mlagents/trainers/tests/test_trainer_controller.py
  53. 43
      ml-agents/mlagents/trainers/tf_policy.py
  54. 21
      ml-agents/mlagents/trainers/trainer.py
  55. 82
      ml-agents/mlagents/trainers/trainer_controller.py
  56. 12
      ml-agents/mlagents/trainers/trainer_util.py
  57. 2
      notebooks/getting-started.ipynb
  58. 2
      protobuf-definitions/README.md
  59. 6
      protobuf-definitions/make.sh
  60. 6
      protobuf-definitions/make_for_win.bat
  61. 3
      test_constraints_max_tf1_version.txt
  62. 2
      utils/validate_meta_files.py
  63. 2
      com.unity.ml-agents/CONTRIBUTING.md
  64. 6
      Project/ProjectSettings/ClusterInputManager.asset
  65. 5
      Project/ProjectSettings/GraphicsSettings.asset
  66. 295
      Project/ProjectSettings/InputManager.asset
  67. 2
      Project/ProjectSettings/NavMeshAreas.asset
  68. 9
      Project/ProjectSettings/TimeManager.asset
  69. 1
      Project/ProjectSettings/EditorBuildSettings.asset
  70. 190
      Project/ProjectSettings/ProjectSettings.asset
  71. 1
      Project/ProjectSettings/ProjectVersion.txt
  72. 5
      com.unity.ml-agents/Plugins/ProtoBuffer/runtimes/win/native/grpc_csharp_ext.x86.dll.meta
  73. 5
      com.unity.ml-agents/Plugins/ProtoBuffer/runtimes/win/native/grpc_csharp_ext.x64.dll.meta
  74. 247
      com.unity.ml-agents/Plugins/ProtoBuffer/runtimes/osx/native/libgrpc_csharp_ext.x64.bundle.meta
  75. 5
      com.unity.ml-agents/Plugins/ProtoBuffer/runtimes/linux/native/libgrpc_csharp_ext.x64.so.meta
  76. 5
      com.unity.ml-agents/Plugins/ProtoBuffer/Grpc.Core.dll.meta
  77. 1
      com.unity.ml-agents/Editor/DemonstrationDrawer.cs
  78. 2
      com.unity.ml-agents/Editor/BrainParametersDrawer.cs
  79. 2
      com.unity.ml-agents/Editor/BehaviorParametersEditor.cs
  80. 25
      com.unity.ml-agents/Editor/AgentEditor.cs
  81. 2
      com.unity.ml-agents/Tests/Editor/SideChannelTests.cs
  82. 6
      com.unity.ml-agents/Tests/Editor/Sensor/WriterAdapterTests.cs
  83. 23
      com.unity.ml-agents/Tests/Editor/Sensor/VectorSensorTests.cs
  84. 26
      com.unity.ml-agents/Tests/Editor/Sensor/StackingSensorTests.cs
  85. 197
      com.unity.ml-agents/Tests/Editor/Sensor/RayPerceptionSensorTests.cs
  86. 2
      com.unity.ml-agents/Tests/Editor/Sensor/FloatVisualSensorTests.cs
  87. 107
      com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs
  88. 23
      com.unity.ml-agents/Tests/Editor/EditModeTestInternalBrainTensorGenerator.cs
  89. 8
      com.unity.ml-agents/Tests/Editor/EditModeTestInternalBrainTensorApplier.cs
  90. 6
      com.unity.ml-agents/Tests/Editor/DemonstrationTests.cs
  91. 929
      Project/Assets/ML-Agents/Examples/WallJump/Prefabs/WallJumpArea.prefab
  92. 93
      Project/Assets/ML-Agents/Examples/Walker/Scripts/WalkerAgent.cs
  93. 988
      Project/Assets/ML-Agents/Examples/Walker/Prefabs/WalkerPair.prefab
  94. 1001
      Project/Assets/ML-Agents/Examples/Tennis/TFModels/Tennis.nn
  95. 2
      Project/Assets/ML-Agents/Examples/Tennis/Scripts/TennisArea.cs
  96. 9
      Project/Assets/ML-Agents/Examples/Tennis/Scripts/TennisAgent.cs
  97. 157
      Project/Assets/ML-Agents/Examples/Tennis/Scripts/HitWall.cs
  98. 118
      Project/Assets/ML-Agents/Examples/Tennis/Scenes/Tennis.unity
  99. 946
      Project/Assets/ML-Agents/Examples/Tennis/Prefabs/TennisArea.prefab
  100. 932
      Project/Assets/ML-Agents/Examples/Soccer/Prefabs/SoccerFieldTwos.prefab

2
.circleci/config.yml


chmod +x Grpc.Tools.1.14.1/tools/linux_x64/protoc
chmod +x Grpc.Tools.1.14.1/tools/linux_x64/grpc_csharp_plugin
COMPILER=Grpc.Tools.1.14.1/tools/linux_x64 ./make.sh
CS_PROTO_PATH=UnitySDK/Assets/ML-Agents/Scripts/Grpc/CommunicatorObjects
CS_PROTO_PATH=com.unity.ml-agents/Runtime/Grpc/CommunicatorObjects
git diff --exit-code --quiet -- :/ ":(exclude,top)$CS_PROTO_PATH/*.meta" \
|| { GIT_ERR=$?; echo "protobufs need to be regenerated, apply the patch uploaded to artifacts."; \
echo "Apply the patch with the command: git apply proto.patch"; \

4
.github/ISSUE_TEMPLATE/config.yml


blank_issues_enabled: false
contact_links:
- name: ML-Agents Unity Forum
url: https://forum.unity.com/forums/ml-agents.453/
about: Please ask Installation / Setup and Discussion / General Questions in the Unity Forum.

9
.github/ISSUE_TEMPLATE/discussion---general-questions.md


---
Describe what you'd like to discuss.
For discussions, please post in [ML-Agents Unity Forum](https://forum.unity.com/forums/ml-agents.453/) instead of
creating a Github issue. Unity forums are the predominant community for Unity users and experts. By leveraging Unity
forums for general discussions and project help, we can keep Github issues for bugs, performance issues, and feature
requests for ML-Agents.
**Note**: The ML-Agents team has limited resources for education and community discussion. We'll participate as we are able, but encourage members of the community to support one another to discuss and support one another.
**Note**: The ML-Agents team has limited resources for education and community discussion. We'll participate as we are
able, but encourage members of the community to support one another to discuss and support one another.

26
.gitignore


/UnitySDK/[Ll]ibrary/
/UnitySDK/Logs/
/UnitySDK/[Tt]emp/
/UnitySDK/[Oo]bj/
/UnitySDK/[Bb]uild/
/UnitySDK/[Bb]uilds/
/UnitySDK/Assets/AssetStoreTools*
/UnitySDK/Assets/Plugins*
/UnitySDK/Assets/Demonstrations*
/UnitySDK/csharp_timers.json
# Tensorflow Model Info
/models
/summaries

/UnitySDK/.vs/
# Autogenerated VS/MD/Consulo solution and project files
/UnitySDKExportedObj/
/UnitySDK.consulo/
/com.unity.ml-agentsExportedObj/
/com.unity.ml-agents.consulo/
*.csproj
*.unityproj
*.sln

*.pidb.meta
# Unity3D Generated File On Crash Reports
/UnitySDK/sysinfo.txt
/com.unity.ml-agents/sysinfo.txt
# Builds
*.apk

*.x86_64
*.x86
# Tensorflow Sharp Files
/UnitySDK/Assets/ML-Agents/Plugins/Android*
/UnitySDK/Assets/ML-Agents/Plugins/iOS*
/UnitySDK/Assets/ML-Agents/Plugins/Computer*
/UnitySDK/Assets/ML-Agents/Plugins/System.Numerics*
/UnitySDK/Assets/ML-Agents/Plugins/System.ValueTuple*
/UnitySDK/Assets/ML-Agents/VideoRecorder*
/com.unity.ml-agents/VideoRecorder*
# Generated doc folders
/docs/html

2
.yamato/standalone-build-test.yml


variables:
UNITY_VERSION: {{ editor.version }}
commands:
- ./run-standalone-build-osx.sh
- python -u -m ml-agents.tests.yamato.standalone_build_tests
triggers:
pull_requests:
- targets:

12
README.md


The ML-Agents toolkit is an open-source project and we encourage and welcome
contributions. If you wish to contribute, be sure to review our
[contribution guidelines](CONTRIBUTING.md) and
[contribution guidelines](com.unity.ml-agents/CONTRIBUTING.md) and
If you run into any problems using the ML-Agents toolkit,
[submit an issue](https://github.com/Unity-Technologies/ml-agents/issues) and
make sure to include as much detail as possible.
For problems with the installation and setup of the the ML-Agents toolkit, or
discussions about how to best setup or train your agents, please create a new
thread on the [Unity ML-Agents forum](https://forum.unity.com/forums/ml-agents.453/)
and make sure to include as much detail as possible.
If you run into any other problems using the ML-Agents toolkit, or have a specific
feature requests, please [submit a GitHub issue](https://github.com/Unity-Technologies/ml-agents/issues).
For any other questions or feedback, connect directly with the ML-Agents
team at ml-agents@unity3d.com.

16
config/gail_config.yaml


max_steps: 5.0e5
num_epoch: 3
behavioral_cloning:
demo_path: UnitySDK/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo
demo_path: Project/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo
strength: 0.5
steps: 10000
reward_signals:

strength: 0.01
gamma: 0.99
encoding_size: 128
demo_path: UnitySDK/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo
demo_path: Project/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo
CrawlerStatic:
normalize: true

num_layers: 3
hidden_units: 512
behavioral_cloning:
demo_path: UnitySDK/Assets/ML-Agents/Examples/Crawler/Demos/ExpertCrawlerSta.demo
demo_path: Project/Assets/ML-Agents/Examples/Crawler/Demos/ExpertCrawlerSta.demo
strength: 0.5
steps: 5000
reward_signals:

encoding_size: 128
demo_path: UnitySDK/Assets/ML-Agents/Examples/Crawler/Demos/ExpertCrawlerSta.demo
demo_path: Project/Assets/ML-Agents/Examples/Crawler/Demos/ExpertCrawlerSta.demo
PushBlock:
max_steps: 5.0e4

strength: 1.0
gamma: 0.99
encoding_size: 128
demo_path: UnitySDK/Assets/ML-Agents/Examples/PushBlock/Demos/ExpertPush.demo
demo_path: Project/Assets/ML-Agents/Examples/PushBlock/Demos/ExpertPush.demo
Hallway:
use_recurrent: true

strength: 0.1
gamma: 0.99
encoding_size: 128
demo_path: UnitySDK/Assets/ML-Agents/Examples/Hallway/Demos/ExpertHallway.demo
demo_path: Project/Assets/ML-Agents/Examples/Hallway/Demos/ExpertHallway.demo
FoodCollector:
batch_size: 64

strength: 0.1
gamma: 0.99
encoding_size: 128
demo_path: UnitySDK/Assets/ML-Agents/Examples/FoodCollector/Demos/ExpertFood.demo
demo_path: Project/Assets/ML-Agents/Examples/FoodCollector/Demos/ExpertFood.demo
demo_path: UnitySDK/Assets/ML-Agents/Examples/FoodCollector/Demos/ExpertFood.demo
demo_path: Project/Assets/ML-Agents/Examples/FoodCollector/Demos/ExpertFood.demo
strength: 1.0
steps: 0

13
config/sac_trainer_config.yaml


gamma: 0.99
encoding_size: 128
use_actions: true
demo_path: UnitySDK/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo
demo_path: Project/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo
VisualPyramids:
time_horizon: 128

gamma: 0.99
encoding_size: 128
use_actions: true
demo_path: UnitySDK/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo
demo_path: Project/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo
3DBall:
normalize: true

time_horizon: 1000
Tennis:
buffer_size: 500000
max_steps: 4e6
max_steps: 2e7
hidden_units: 256
self_play:
window: 10
play_against_current_self_ratio: 0.5
save_steps: 50000
swap_steps: 50000
CrawlerStatic:
normalize: true

9
config/trainer_config.yaml


Tennis:
normalize: true
max_steps: 4e6
max_steps: 2e7
learning_rate_schedule: constant
hidden_units: 256
self_play:
window: 10
play_against_current_self_ratio: 0.5
save_steps: 50000
swap_steps: 50000
CrawlerStatic:
normalize: true

10
docs/Basic-Guide.md


1. Launch Unity
2. On the Projects dialog, choose the **Open** option at the top of the window.
3. Using the file dialog that opens, locate the `UnitySDK` folder
3. Using the file dialog that opens, locate the `Project` folder
within the ML-Agents toolkit project and click **Open**.
4. Go to **Edit** > **Project Settings** > **Player**
5. For **each** of the platforms you target (**PC, Mac and Linux Standalone**,

![Platform Prefab](images/platform_prefab.png)
3. In the **Project** window, drag the **3DBallLearning** Model located in
`Assets/ML-Agents/Examples/3DBall/TFModels` into the `Model` property under `Ball 3D Agent (Script)` component in the **Inspector** window.
3. In the **Project** window, drag the **3DBall** Model located in
`Assets/ML-Agents/Examples/3DBall/TFModels` into the `Model` property under `Behavior Parameters (Script)` component in the Agent GameObject **Inspector** window.
4. You should notice that each `Agent` under each `3DBall` in the **Hierarchy** windows now contains **3DBallLearning** as `Model`. __Note__ : You can modify multiple game objects in a scene by selecting them all at
4. You should notice that each `Agent` under each `3DBall` in the **Hierarchy** windows now contains **3DBall** as `Model` on the `Behavior Parameters`. __Note__ : You can modify multiple game objects in a scene by selecting them all at
once using the search bar in the Scene Hierarchy.
8. Select the **InferenceDevice** to use for this model (CPU or GPU) on the Agent.
_Note: CPU is faster for the majority of ML-Agents toolkit generated models_

[above](#running-a-pre-trained-model).
1. Move your model file into
`UnitySDK/Assets/ML-Agents/Examples/3DBall/TFModels/`.
`Project/Assets/ML-Agents/Examples/3DBall/TFModels/`.
2. Open the Unity Editor, and select the **3DBall** scene as described above.
3. Select the **3DBall** prefab Agent object.
4. Drag the `<behavior_name>.nn` file from the Project window of

4
docs/Installation-Windows.md


If you don't want to use Git, you can always directly download all the files
[here](https://github.com/Unity-Technologies/ml-agents/archive/latest_release.zip).
The `UnitySDK` subdirectory contains the Unity Assets to add to your projects.
It also contains many [example environments](Learning-Environment-Examples.md)
The `com.unity.ml-agents` subdirectory contains the core code to add to your projects.
The `Project` subdirectory contains many [example environments](Learning-Environment-Examples.md)
to help you get started.
The `ml-agents` subdirectory contains a Python package which provides deep reinforcement

10
docs/Installation.md


The `--branch latest_release` option will switch to the tag of the latest stable release.
Omitting that will get the `master` branch which is potentially unstable.
The `UnitySDK` subdirectory contains the Unity Assets to add to your projects.
It also contains many [example environments](Learning-Environment-Examples.md)
The `com.unity.ml-agents` subdirectory contains the core code to add to your projects.
The `Project` subdirectory contains many [example environments](Learning-Environment-Examples.md)
If you intend to copy the `UnitySDK` folder in to your project, ensure that
If you intend to copy the `com.unity.ml-agents` folder in to your project, ensure that
To install the Barrcuda package in later versions of Unity, navigate to the Package
To install the Barracuda package in later versions of Unity, navigate to the Package
`Adavanced` dropdown menu to the left of the search bar and make sure "Show Preview Packages"
`Advanced` dropdown menu to the left of the search bar and make sure "Show Preview Packages"
is checked. Search for or select the `Barracuda` package and install the latest version.
<p align="center">

10
docs/Learning-Environment-Create-New.md


but is the default as of 2018.3.)
3. In a file system window, navigate to the folder containing your cloned
ML-Agents repository.
4. Drag the `ML-Agents` folder from `UnitySDK/Assets` to the Unity
Editor Project window. If you see console errors about Barracuda, make sure
you've installed Barracuda from the Unity Package Manager. More information
can be found in the [installation instructions](Installation.md) under
4. Open the `manifest.json` file in the `Packages` directory of your project.
Add the following line to your project's package dependencies:
```
"com.unity.ml-agents" : "file:<path_to_local_ml-agents_repo>/com.unity.ml-agents"
```
More information can be found in the [installation instructions](Installation.md) under
**Package Installation**.
Your Unity **Project** window should contain the following assets:

7
docs/Learning-Environment-Examples.md


The Unity ML-Agents toolkit contains an expanding set of example environments
which demonstrate various features of the platform. Environments are located in
`UnitySDK/Assets/ML-Agents/Examples` and summarized below. Additionally, our
`Project/Assets/ML-Agents/Examples` and summarized below. Additionally, our
[first ML Challenge](https://connect.unity.com/challenges/ml-agents-1) contains
environments created by the community.

researchers.
If you would like to contribute environments, please see our
[contribution guidelines](../CONTRIBUTING.md) page.
[contribution guidelines](../com.unity.ml-agents/CONTRIBUTING.md) page.
## Basic

* Goal: Move to the most reward state.
* Agents: The environment contains one agent.
* Agent Reward Function:
* -0.01 at each step
* +0.1 for arriving at suboptimal state.
* +1.0 for arriving at optimal state.
* Behavior Parameters:

* Visual Observations: None
* Float Properties: None
* Benchmark Mean Reward: 0.94
* Benchmark Mean Reward: 0.93
## [3DBall: 3D Balance Ball](https://youtu.be/dheeCO29-EI)

4
docs/Learning-Environment-Executable.md


1. Launch Unity.
2. On the Projects dialog, choose the **Open** option at the top of the window.
3. Using the file dialog that opens, locate the `UnitySDK` folder within the
3. Using the file dialog that opens, locate the `Project` folder within the
ML-Agents project and click **Open**.
4. In the **Project** window, navigate to the folder
`Assets/ML-Agents/Examples/3DBall/Scenes/`.

into your Agent by following the steps below:
1. Move your model file into
`UnitySDK/Assets/ML-Agents/Examples/3DBall/TFModels/`.
`Project/Assets/ML-Agents/Examples/3DBall/TFModels/`.
2. Open the Unity Editor, and select the **3DBall** scene as described above.
3. Select the **3DBall** prefab from the Project window and select **Agent**.
5. Drag the `<behavior_name>.nn` file from the Project window of

12
docs/Migrating.md


## Migrating from 0.13 to latest
### Important changes
* The `Decision Period` and `On Demand decision` checkbox have been removed from the Agent. On demand decision is now the default (calling `RequestDecision` on the Agent manually.)
* Agents will always request a decision after being marked as `Done()` and will no longer wait for the next call to `RequestDecision()`.
* The `agentParameters` field of the Agent has been removed. (Contained only `maxStep` information)
* `maxStep` is now a public field on the Agent. (Was moved from `agentParameters`)
* The `Info` field of the Agent has been made private. (Was only used internally and not meant to be modified outside of the Agent)
* The `GetReward()` method on the Agent has been removed. (It was being confused with `GetCumulativeReward()`)
* The `AgentAction` struct no longer contains a `value` field. (Value estimates were not set during inference)
* The `GetValueEstimate()` method on the Agent has been removed.
* The `UpdateValueAction()` method on the Agent has been removed.
* Calling `Done()` on the Agent will now reset it immediately and call the `AgentReset` virtual method. (This is to simplify the previous logic in which the Agent had to wait for the next `EnvironmentStep` to reset)
* If you were not using `On Demand Decision` for your Agent, you **must** add a `DecisionRequester` component to your Agent GameObject and set its `Decision Period` field to the old `Decision Period` of the Agent.
* If you have a class that inherits from Academy:
* If the class didn't override any of the virtual methods and didn't store any additional data, you can just remove the old script from the scene.
* If the class had additional data, create a new MonoBehaviour and store the data on this instead.

* Combine curriculum configs into a single file. See [the WallJump curricula](../config/curricula/wall_jump.yaml) for an example of the new curriculum config format.
A tool like https://www.json2yaml.com may be useful to help with the conversion.
* If your Agent implements `AgentOnDone` and your Agent does not have the checkbox `Reset On Done` checked in the inspector, you must call the code that was in `AgentOnDone` manually.
## Migrating from ML-Agents toolkit v0.12.0 to v0.13.0

2
docs/Reward-Signals.md


strength: 0.01
gamma: 0.99
encoding_size: 128
demo_path: UnitySDK/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo
demo_path: Project/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo
```
Each reward signal should define at least two parameters, `strength` and `gamma`, in addition

2
docs/Training-Curriculum-Learning.md


Once our curriculum is defined, we have to use the reset parameters we defined
and modify the environment from the Agent's `AgentReset()` function. See
[WallJumpAgent.cs](https://github.com/Unity-Technologies/ml-agents/blob/master/UnitySDK/Assets/ML-Agents/Examples/WallJump/Scripts/WallJumpAgent.cs)
[WallJumpAgent.cs](https://github.com/Unity-Technologies/ml-agents/blob/master/Project/Assets/ML-Agents/Examples/WallJump/Scripts/WallJumpAgent.cs)
for an example.

2
docs/Training-PPO.md


```
behavioral_cloning:
demo_path: ./UnitySDK/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo
demo_path: ./Project/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo
strength: 0.5
steps: 10000
```

2
docs/Training-SAC.md


```
behavioral_cloning:
demo_path: ./UnitySDK/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo
demo_path: ./Project/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo
strength: 0.5
steps: 10000
```

8
docs/dox-ml-agents.conf


# spaces. See also FILE_PATTERNS and EXTENSION_MAPPING
# Note: If this tag is empty the current directory is searched.
INPUT = ../UnitySDK/Assets/ML-Agents/Scripts/Academy.cs \
../UnitySDK/Assets/ML-Agents/Scripts/Agent.cs \
../UnitySDK/Assets/ML-Agents/Scripts/Monitor.cs \
../UnitySDK/Assets/ML-Agents/Scripts/Decision.cs
INPUT = ../Project/Assets/ML-Agents/Scripts/Academy.cs \
../Project/Assets/ML-Agents/Scripts/Agent.cs \
../Project/Assets/ML-Agents/Scripts/Monitor.cs \
../Project/Assets/ML-Agents/Scripts/Decision.cs
# This tag can be used to specify the character encoding of the source files
# that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses

254
docs/images/3dball_learning_brain.png

之前 之后
宽度: 413  |  高度: 349  |  大小: 35 KiB

221
docs/images/mlagents-NewProject.png

之前 之后
宽度: 193  |  高度: 345  |  大小: 20 KiB

4
docs/localized/KR/README.md


## 커뮤니티 그리고 피드백
ML-Agents toolkit은 오픈소스 프로젝트이며 컨트리뷰션을 환영합니다. 만약 컨트리뷰션을 원하시는 경우
[컨트리뷰션 가이드라인](CONTRIBUTING.md)과 [행동 규칙](CODE_OF_CONDUCT.md)을 검토해주십시오.
[컨트리뷰션 가이드라인](com/unity.ml-agents/CONTRIBUTING.md)과 [행동 규칙](CODE_OF_CONDUCT.md)을 검토해주십시오.
만약 ML-Agents toolkit을 사용하며 문제가 생긴다면, 가능한 많은 세부 사항을 포함하여 [이슈 제출](https://github.com/Unity-Technologies/ml-agents/issues)을 해주십시오.

장현준: totok682@naver.com
민규식: kyushikmin@gmail.com
민규식: kyushikmin@gmail.com

2
docs/localized/zh-CN/README.md


ML-Agents 是一个开源项目,我们鼓励并欢迎大家贡献自己的力量。
如果您想做出贡献,请务必查看我们的
[贡献准则](/CONTRIBUTING.md)和
[贡献准则](/com.unity.ml-agents/CONTRIBUTING.md)和
[行为准则](/CODE_OF_CONDUCT.md)。
您可以通过 Unity Connect 和 GitHub 与我们以及更广泛的社区进行交流:

2
docs/localized/zh-CN/docs/Learning-Environment-Examples.md


页面。
如果您想提交自己的环境,请参阅我们的
[贡献指南](/CONTRIBUTING.md)页面。
[贡献指南](/com.unity.ml-agents/CONTRIBUTING.md)页面。
## Basic

2
gym-unity/gym_unity/__init__.py


__version__ = "0.13.0"
__version__ = "0.14.0.dev0"

2
ml-agents-envs/mlagents_envs/__init__.py


__version__ = "0.13.0"
__version__ = "0.14.0.dev0"

2
ml-agents-envs/mlagents_envs/environment.py


class UnityEnvironment(BaseEnv):
SCALAR_ACTION_TYPES = (int, np.int32, np.int64, float, np.float32, np.float64)
SINGLE_BRAIN_ACTION_TYPES = SCALAR_ACTION_TYPES + (list, np.ndarray)
API_VERSION = "API-13"
API_VERSION = "API-14-dev0"
def __init__(
self,

1
ml-agents/mlagents/tf_utils/__init__.py


from mlagents.tf_utils.tf import tf as tf # noqa
from mlagents.tf_utils.tf import set_warnings_enabled # noqa
from mlagents.tf_utils.tf import generate_session_config # noqa

17
ml-agents/mlagents/tf_utils/tf.py


def set_warnings_enabled(is_enabled: bool) -> None:
"""
Enable or disable tensorflow warnings (notabley, this disables deprecation warnings.
Enable or disable tensorflow warnings (notably, this disables deprecation warnings.
def generate_session_config() -> tf.ConfigProto:
"""
Generate a ConfigProto to use for ML-Agents that doesn't consume all of the GPU memory
and allows for soft placement in the case of multi-GPU.
"""
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
# For multi-GPU training, set allow_soft_placement to True to allow
# placing the operation into an alternative device automatically
# to prevent from exceptions if the device doesn't suppport the operation
# or the device does not exist
config.allow_soft_placement = True
return config

2
ml-agents/mlagents/trainers/__init__.py


__version__ = "0.13.0"
__version__ = "0.14.0.dev0"

4
ml-agents/mlagents/trainers/action_info.py


value: Any
outputs: ActionInfoOutputs
agent_ids: List[AgentId]
@staticmethod
def empty() -> "ActionInfo":
return ActionInfo([], [], {}, [])

30
ml-agents/mlagents/trainers/agent_processor.py


from mlagents.trainers.policy import Policy
from mlagents.trainers.action_info import ActionInfo, ActionInfoOutputs
from mlagents.trainers.stats import StatsReporter
from mlagents.trainers.env_manager import get_global_agent_id
from mlagents.trainers.brain_conversion_utils import get_global_agent_id
T = TypeVar("T")

"Policy/Learning Rate", take_action_outputs["learning_rate"]
)
terminated_agents: List[str] = []
# Make unique agent_ids that are global across workers
action_global_agent_ids = [
get_global_agent_id(worker_id, ag_id) for ag_id in previous_action.agent_ids

"Environment/Episode Length",
self.episode_steps.get(global_id, 0),
)
del self.episode_steps[global_id]
del self.episode_rewards[global_id]
terminated_agents += [global_id]
elif not curr_agent_step.done:
self.episode_steps[global_id] += 1

previous_action.agent_ids, take_action_outputs["action"]
)
for terminated_id in terminated_agents:
self._clean_agent_data(terminated_id)
def _clean_agent_data(self, global_id: str) -> None:
"""
Removes the data for an Agent.
"""
del self.experience_buffers[global_id]
del self.last_take_action_outputs[global_id]
del self.episode_steps[global_id]
del self.episode_rewards[global_id]
del self.last_step_result[global_id]
self.policy.remove_previous_action([global_id])
self.policy.remove_memories([global_id])
def publish_trajectory_queue(
self, trajectory_queue: "AgentManagerQueue[Trajectory]"
) -> None:

:param trajectory_queue: Trajectory queue to publish to.
"""
self.trajectory_queues.append(trajectory_queue)
def end_episode(self) -> None:
"""
Ends the episode, terminating the current trajectory and stopping stats collection for that
episode. Used for forceful reset (e.g. in curriculum or generalization training.)
"""
self.experience_buffers.clear()
self.episode_rewards.clear()
self.episode_steps.clear()
class AgentManagerQueue(Generic[T]):

7
ml-agents/mlagents/trainers/brain_conversion_utils.py


return BrainParameters(
name, int(vec_size), cam_res, a_size, [], vector_action_space_type
)
def get_global_agent_id(worker_id: int, agent_id: int) -> str:
"""
Create an agent id that is unique across environment workers using the worker_id.
"""
return f"${worker_id}-{agent_id}"

50
ml-agents/mlagents/trainers/demo_loader.py


import pathlib
import logging
import os
from typing import List, Tuple

return brain_params, demo_buffer
def get_demo_files(path: str) -> List[str]:
"""
Retrieves the demonstration file(s) from a path.
:param path: Path of demonstration file or directory.
:return: List of demonstration files
Raises errors if |path| is invalid.
"""
if os.path.isfile(path):
if not path.endswith(".demo"):
raise ValueError("The path provided is not a '.demo' file.")
return [path]
elif os.path.isdir(path):
paths = [
os.path.join(path, name)
for name in os.listdir(path)
if name.endswith(".demo")
]
if not paths:
raise ValueError("There are no '.demo' files in the provided directory.")
return paths
else:
raise FileNotFoundError(
f"The demonstration file or directory {path} does not exist."
)
@timed
def load_demonstration(
file_path: str

# First 32 bytes of file dedicated to meta-data.
INITIAL_POS = 33
file_paths = []
if os.path.isdir(file_path):
all_files = os.listdir(file_path)
for _file in all_files:
if _file.endswith(".demo"):
file_paths.append(os.path.join(file_path, _file))
if not all_files:
raise ValueError("There are no '.demo' files in the provided directory.")
elif os.path.isfile(file_path):
file_paths.append(file_path)
file_extension = pathlib.Path(file_path).suffix
if file_extension != ".demo":
raise ValueError(
"The file is not a '.demo' file. Please provide a file with the "
"correct extension."
)
else:
raise FileNotFoundError(
"The demonstration file or directory {} does not exist.".format(file_path)
)
file_paths = get_demo_files(file_path)
group_spec = None
brain_param_proto = None
info_action_pairs = []

73
ml-agents/mlagents/trainers/env_manager.py


from abc import ABC, abstractmethod
import logging
from mlagents.trainers.policy import Policy
from mlagents.trainers.tf_policy import TFPolicy
from mlagents.trainers.agent_processor import AgentManager, AgentManagerQueue
def get_global_agent_id(worker_id: int, agent_id: int) -> str:
"""
Create an agent id that is unique across environment workers using the worker_id.
"""
return f"${worker_id}-{agent_id}"
logger = logging.getLogger("mlagents.trainers")
class EnvironmentStep(NamedTuple):

class EnvManager(ABC):
def __init__(self):
self.policies: Dict[AgentGroup, Policy] = {}
self.policies: Dict[AgentGroup, TFPolicy] = {}
self.agent_managers: Dict[AgentGroup, AgentManager] = {}
self.first_step_infos: List[EnvironmentStep] = None
def set_policy(self, brain_name: AgentGroup, policy: Policy) -> None:
def set_policy(self, brain_name: AgentGroup, policy: TFPolicy) -> None:
if brain_name in self.agent_managers:
self.agent_managers[brain_name].policy = policy
def set_agent_manager(self, brain_name: AgentGroup, manager: AgentManager) -> None:
self.agent_managers[brain_name] = manager
def step(self) -> List[EnvironmentStep]:
def _step(self) -> List[EnvironmentStep]:
def reset(self, config: Dict = None) -> List[EnvironmentStep]:
def _reset_env(self, config: Dict = None) -> List[EnvironmentStep]:
def reset(self, config: Dict = None) -> int:
for manager in self.agent_managers.values():
manager.end_episode()
# Save the first step infos, after the reset.
# They will be processed on the first advance().
self.first_step_infos = self._reset_env(config)
return len(self.first_step_infos)
@property
@abstractmethod
def external_brains(self) -> Dict[AgentGroup, BrainParameters]:

@abstractmethod
def close(self):
pass
def advance(self):
# If we had just reset, process the first EnvironmentSteps.
# Note that we do it here instead of in reset() so that on the very first reset(),
# we can create the needed AgentManagers before calling advance() and processing the EnvironmentSteps.
if self.first_step_infos is not None:
self._process_step_infos(self.first_step_infos)
self.first_step_infos = None
# Get new policies if found
for brain_name in self.external_brains:
try:
_policy = self.agent_managers[brain_name].policy_queue.get_nowait()
self.set_policy(brain_name, _policy)
except AgentManagerQueue.Empty:
pass
# Step the environment
new_step_infos = self._step()
# Add to AgentProcessor
num_step_infos = self._process_step_infos(new_step_infos)
return num_step_infos
def _process_step_infos(self, step_infos: List[EnvironmentStep]) -> int:
for step_info in step_infos:
for name_behavior_id in step_info.name_behavior_ids:
if name_behavior_id not in self.agent_managers:
logger.warning(
"Agent manager was not created for behavior id {}.".format(
name_behavior_id
)
)
continue
self.agent_managers[name_behavior_id].add_experiences(
step_info.current_all_step_result[name_behavior_id],
step_info.worker_id,
step_info.brain_name_to_action_info.get(
name_behavior_id, ActionInfo.empty()
),
)
return len(step_infos)

3
ml-agents/mlagents/trainers/ppo/trainer.py


self.policy.initialize_or_load()
for _reward_signal in self.optimizer.reward_signals.keys():
self.collected_rewards[_reward_signal] = defaultdict(lambda: 0)
# Needed to resume loads properly
self.step = policy.get_current_step()
self.next_summary_step = self._get_next_summary_step()
def get_policy(self, name_behavior_id: str) -> TFPolicy:
"""

3
ml-agents/mlagents/trainers/sac/trainer.py


self.policy.initialize_or_load()
for _reward_signal in self.optimizer.reward_signals.keys():
self.collected_rewards[_reward_signal] = defaultdict(lambda: 0)
# Needed to resume loads properly
self.step = policy.get_current_step()
self.next_summary_step = self._get_next_summary_step()
def get_policy(self, name_behavior_id: str) -> TFPolicy:
"""

4
ml-agents/mlagents/trainers/simple_env_manager.py


self.previous_step: EnvironmentStep = EnvironmentStep.empty(0)
self.previous_all_action_info: Dict[str, ActionInfo] = {}
def step(self) -> List[EnvironmentStep]:
def _step(self) -> List[EnvironmentStep]:
all_action_info = self._take_step(self.previous_step)
self.previous_all_action_info = all_action_info

self.previous_step = step_info
return [step_info]
def reset(
def _reset_env(
self, config: Dict[AgentGroup, float] = None
) -> List[EnvironmentStep]: # type: ignore
if config is not None:

16
ml-agents/mlagents/trainers/stats.py


std: float
num: int
@staticmethod
def empty() -> "StatsSummary":
return StatsSummary(0.0, 0.0, 0)
class StatsWriter(abc.ABC):
"""

:param key: The type of statistic, e.g. Environment/Reward.
:returns: A StatsSummary NamedTuple containing (mean, std, count).
"""
return StatsSummary(
mean=np.mean(StatsReporter.stats_dict[self.category][key]),
std=np.std(StatsReporter.stats_dict[self.category][key]),
num=len(StatsReporter.stats_dict[self.category][key]),
)
if len(StatsReporter.stats_dict[self.category][key]) > 0:
return StatsSummary(
mean=np.mean(StatsReporter.stats_dict[self.category][key]),
std=np.std(StatsReporter.stats_dict[self.category][key]),
num=len(StatsReporter.stats_dict[self.category][key]),
)
return StatsSummary.empty()

4
ml-agents/mlagents/trainers/subprocess_env_manager.py


env_worker.send("step", env_action_info)
env_worker.waiting = True
def step(self) -> List[EnvironmentStep]:
def _step(self) -> List[EnvironmentStep]:
# Queue steps for any workers which aren't in the "waiting" state.
self._queue_steps()

step_infos = self._postprocess_steps(worker_steps)
return step_infos
def reset(self, config: Optional[Dict] = None) -> List[EnvironmentStep]:
def _reset_env(self, config: Optional[Dict] = None) -> List[EnvironmentStep]:
while any(ew.waiting for ew in self.env_workers):
if not self.step_queue.empty():
step = self.step_queue.get_nowait()

2
ml-agents/mlagents/trainers/tests/test_agent_processor.py


)
processor.publish_trajectory_queue(tqueue)
# This is like the initial state after the env reset
processor.add_experiences(mock_step, 0, ActionInfo([], [], {}, []))
processor.add_experiences(mock_step, 0, ActionInfo.empty())
for _ in range(5):
processor.add_experiences(mock_step, 0, fake_action_info)

4
ml-agents/mlagents/trainers/tests/test_bcmodule.py


use_recurrent: false
memory_size: 8
behavioral_cloning:
demo_path: ./UnitySDK/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo
demo_path: ./Project/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo
strength: 1.0
steps: 10000000
reward_signals:

use_recurrent: false
vis_encode_type: simple
behavioral_cloning:
demo_path: ./UnitySDK/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo
demo_path: ./Project/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo
strength: 1.0
steps: 10000000
reward_signals:

37
ml-agents/mlagents/trainers/tests/test_demo_loader.py


import os
import numpy as np
import pytest
import tempfile
from mlagents.trainers.demo_loader import load_demonstration, demo_to_buffer
from mlagents.trainers.demo_loader import (
load_demonstration,
demo_to_buffer,
get_demo_files,
)
def test_load_demo():

_, demo_buffer = demo_to_buffer(path_prefix + "/test_demo_dir", 1)
assert len(demo_buffer["actions"]) == total_expected - 1
def test_edge_cases():
path_prefix = os.path.dirname(os.path.abspath(__file__))
# nonexistent file and directory
with pytest.raises(FileNotFoundError):
get_demo_files(os.path.join(path_prefix, "nonexistent_file.demo"))
with pytest.raises(FileNotFoundError):
get_demo_files(os.path.join(path_prefix, "nonexistent_directory"))
with tempfile.TemporaryDirectory() as tmpdirname:
# empty directory
with pytest.raises(ValueError):
get_demo_files(tmpdirname)
# invalid file
invalid_fname = os.path.join(tmpdirname, "mydemo.notademo")
with open(invalid_fname, "w") as f:
f.write("I'm not a demo")
with pytest.raises(ValueError):
get_demo_files(invalid_fname)
# invalid directory
with pytest.raises(ValueError):
get_demo_files(tmpdirname)
# valid file
valid_fname = os.path.join(tmpdirname, "mydemo.demo")
with open(valid_fname, "w") as f:
f.write("I'm a demo file")
assert get_demo_files(valid_fname) == [valid_fname]
# valid directory
assert get_demo_files(tmpdirname) == [valid_fname]

2
ml-agents/mlagents/trainers/tests/test_policy.py


dummy_groupspec = AgentGroupSpec([(1,)], "continuous", 1)
no_agent_step = BatchedStepResult.empty(dummy_groupspec)
result = policy.get_action(no_agent_step)
assert result == ActionInfo([], [], {}, [])
assert result == ActionInfo.empty()
def test_take_action_returns_nones_on_missing_values():

24
ml-agents/mlagents/trainers/tests/test_ppo.py


brain_params.brain_name, 0, trainer_params, True, False, 0, "0", False
)
policy_mock = mock.Mock(spec=NNPolicy)
policy_mock.get_current_step.return_value = 0
step_count = (
5
) # 10 hacked because this function is no longer called through trainer

for agent in reward.values():
assert agent == 0
assert trainer.stats_reporter.get_stats_summaries("Policy/Extrinsic Reward").num > 0
def test_add_get_policy(dummy_config):
brain_params = make_brain_parameters(
discrete_action=False, visual_inputs=0, vec_obs_size=6
)
dummy_config["summary_path"] = "./summaries/test_trainer_summary"
dummy_config["model_path"] = "./models/test_trainer_models/TestModel"
trainer = PPOTrainer(brain_params, 0, dummy_config, True, False, 0, "0", False)
policy = mock.Mock(spec=NNPolicy)
policy.get_current_step.return_value = 2000
trainer.add_policy(brain_params.brain_name, policy)
assert trainer.get_policy(brain_params.brain_name) == policy
# Make sure the summary steps were loaded properly
assert trainer.get_step == 2000
assert trainer.next_summary_step > 2000
# Test incorrect class of policy
policy = mock.Mock()
with pytest.raises(RuntimeError):
trainer.add_policy(brain_params, policy)
def test_normalization(dummy_config):

2
ml-agents/mlagents/trainers/tests/test_reward_signals.py


use_recurrent: false
vis_encode_type: simple
behavioral_cloning:
demo_path: ./UnitySDK/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo
demo_path: ./Project/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo
strength: 1.0
steps: 10000000
reward_signals:

24
ml-agents/mlagents/trainers/tests/test_sac.py


import pytest
from unittest import mock
import yaml
import numpy as np

policy = trainer2.create_policy(mock_brain)
trainer2.add_policy(mock_brain.brain_name, policy)
assert trainer2.update_buffer.num_experiences == buffer_len
def test_add_get_policy(dummy_config):
brain_params = make_brain_parameters(
discrete_action=False, visual_inputs=0, vec_obs_size=6
)
dummy_config["summary_path"] = "./summaries/test_trainer_summary"
dummy_config["model_path"] = "./models/test_trainer_models/TestModel"
trainer = SACTrainer(brain_params, 0, dummy_config, True, False, 0, "0")
policy = mock.Mock(spec=SACPolicy)
policy.get_current_step.return_value = 2000
trainer.add_policy(brain_params.brain_name, policy)
assert trainer.get_policy(brain_params.brain_name) == policy
# Make sure the summary steps were loaded properly
assert trainer.get_step == 2000
assert trainer.next_summary_step > 2000
# Test incorrect class of policy
policy = mock.Mock()
with pytest.raises(RuntimeError):
trainer.add_policy(brain_params, policy)
def test_process_trajectory(dummy_config):

33
ml-agents/mlagents/trainers/tests/test_simple_rl.py


gamma: 0.99
"""
GHOST_CONFIG = f"""
{BRAIN_NAME}:
trainer: ppo
batch_size: 16
beta: 5.0e-3
buffer_size: 64
epsilon: 0.2
hidden_units: 128
lambd: 0.95
learning_rate: 5.0e-3
max_steps: 2500
memory_size: 256
normalize: false
num_epoch: 3
num_layers: 2
time_horizon: 64
sequence_length: 64
summary_freq: 500
use_recurrent: false
reward_signals:
extrinsic:
strength: 1.0
gamma: 0.99
self_play:
save_step: 1000
"""
def _check_environment_trains(
env, config, meta_curriculum=None, success_threshold=0.99

def test_simple_sac(use_discrete):
env = Simple1DEnvironment(use_discrete=use_discrete)
_check_environment_trains(env, SAC_CONFIG)
@pytest.mark.parametrize("use_discrete", [True, False])
def test_simple_ghost(use_discrete):
env = Simple1DEnvironment(use_discrete=use_discrete)
_check_environment_trains(env, GHOST_CONFIG)

46
ml-agents/mlagents/trainers/tests/test_subprocess_env_manager.py


EnvironmentResponse,
StepResponse,
)
from mlagents.trainers.env_manager import EnvironmentStep
from mlagents_envs.base_env import BaseEnv
from mlagents_envs.side_channel.engine_configuration_channel import EngineConfig

mock_env_factory, EngineConfig.default_config(), 1
)
params = {"test": "params"}
manager.reset(params)
manager._reset_env(params)
manager.env_workers[0].send.assert_called_with("reset", (params))
def test_reset_collects_results_from_all_envs(self):

)
params = {"test": "params"}
res = manager.reset(params)
res = manager._reset_env(params)
for i, env in enumerate(manager.env_workers):
env.send.assert_called_with("reset", (params))
env.recv.assert_called()

manager.env_workers[2].previous_step = last_steps[2]
manager.env_workers[2].waiting = True
manager._take_step = Mock(return_value=step_mock)
res = manager.step()
res = manager._step()
for i, env in enumerate(manager.env_workers):
if i < 2:
env.send.assert_called_with("step", step_mock)

manager.env_workers[0].previous_step,
manager.env_workers[1].previous_step,
]
@mock.patch("mlagents.trainers.subprocess_env_manager.SubprocessEnvManager._step")
@mock.patch(
"mlagents.trainers.subprocess_env_manager.SubprocessEnvManager.external_brains",
new_callable=mock.PropertyMock,
)
def test_advance(self, external_brains_mock, step_mock):
brain_name = "testbrain"
action_info_dict = {brain_name: MagicMock()}
SubprocessEnvManager.create_worker = lambda em, worker_id, step_queue, env_factory, engine_c: MockEnvWorker(
worker_id, EnvironmentResponse("step", worker_id, worker_id)
)
env_manager = SubprocessEnvManager(
mock_env_factory, EngineConfig.default_config(), 3
)
external_brains_mock.return_value = [brain_name]
agent_manager_mock = mock.Mock()
env_manager.set_agent_manager(brain_name, agent_manager_mock)
step_info_dict = {brain_name: Mock()}
step_info = EnvironmentStep(step_info_dict, 0, action_info_dict)
step_mock.return_value = [step_info]
env_manager.advance()
# Test add_experiences
env_manager._step.assert_called_once()
agent_manager_mock.add_experiences.assert_called_once_with(
step_info.current_all_step_result[brain_name],
0,
step_info.brain_name_to_action_info[brain_name],
)
# Test policy queue
mock_policy = mock.Mock()
agent_manager_mock.policy_queue.get_nowait.return_value = mock_policy
env_manager.advance()
assert env_manager.policies[brain_name] == mock_policy
assert agent_manager_mock.policy == mock_policy

54
ml-agents/mlagents/trainers/tests/test_trainer_controller.py


from unittest.mock import MagicMock, Mock, patch
from unittest.mock import MagicMock, patch
from mlagents.trainers.subprocess_env_manager import EnvironmentStep
from mlagents.trainers.sampler_class import SamplerManager

return tc, trainer_mock
def test_take_step_adds_experiences_to_trainer_and_trains(
def test_advance_adds_experiences_to_trainer_and_trains(
action_info_dict = {brain_name: MagicMock()}
brain_info_dict = {brain_name: Mock()}
old_step_info = EnvironmentStep(brain_info_dict, 0, action_info_dict)
new_step_info = EnvironmentStep(brain_info_dict, 0, action_info_dict)
trainer_mock._is_ready_update = MagicMock(return_value=True)
env_mock.step.return_value = [new_step_info]
env_mock.reset.return_value = [old_step_info]
tc.brain_name_to_identifier[brain_name].add(brain_name)

env_mock.step.assert_called_once()
manager_mock = tc.managers[brain_name]
manager_mock.add_experiences.assert_called_once_with(
new_step_info.current_all_step_result[brain_name],
0,
new_step_info.brain_name_to_action_info[brain_name],
)
trainer_mock.advance.assert_called_once()
def test_take_step_if_not_training(trainer_controller_with_take_step_mocks):
tc, trainer_mock = trainer_controller_with_take_step_mocks
tc.train_model = False
brain_name = "testbrain"
action_info_dict = {brain_name: MagicMock()}
brain_info_dict = {brain_name: Mock()}
old_step_info = EnvironmentStep(brain_info_dict, 0, action_info_dict)
new_step_info = EnvironmentStep(brain_info_dict, 0, action_info_dict)
trainer_mock._is_ready_update = MagicMock(return_value=False)
env_mock = MagicMock()
env_mock.step.return_value = [new_step_info]
env_mock.reset.return_value = [old_step_info]
tc.brain_name_to_identifier[brain_name].add(brain_name)
tc.advance(env_mock)
env_mock.reset.assert_not_called()
env_mock.step.assert_called_once()
manager_mock = tc.managers[brain_name]
manager_mock.add_experiences.assert_called_once_with(
new_step_info.current_all_step_result[brain_name],
0,
new_step_info.brain_name_to_action_info[brain_name],
)
env_mock.advance.assert_called_once()
trainer_mock.advance.assert_called_once()

43
ml-agents/mlagents/trainers/tf_policy.py


import numpy as np
from mlagents.tf_utils import tf
from mlagents import tf_utils
from mlagents_envs.exception import UnityException
from mlagents.trainers.policy import Policy

from mlagents.trainers import tensorflow_to_barracuda as tf2bc
from mlagents.trainers.trajectory import SplitObservations
from mlagents.trainers.env_manager import get_global_agent_id
from mlagents.trainers.brain_conversion_utils import get_global_agent_id
from mlagents_envs.base_env import BatchedStepResult
from mlagents.trainers.models import LearningModel