浏览代码

Merge branch 'develop-attention-refactor' into develop-centralizedcritic-mm

/develop/centralizedcritic
Ervin Teng 4 年前
当前提交
aba633b2
共有 56 个文件被更改,包括 520 次插入1060 次删除
  1. 2
      .gitignore
  2. 12
      .pre-commit-config.yaml
  3. 4
      .yamato/com.unity.ml-agents-pack.yml
  4. 15
      .yamato/com.unity.ml-agents-performance.yml
  5. 4
      .yamato/com.unity.ml-agents-test.yml
  6. 19
      .yamato/compressed-sensor-test.yml
  7. 19
      .yamato/gym-interface-test.yml
  8. 29
      .yamato/protobuf-generation-test.yml
  9. 27
      .yamato/python-ll-api-test.yml
  10. 30
      .yamato/standalone-build-test.yml
  11. 29
      .yamato/training-int-tests.yml
  12. 8
      Project/Assets/ML-Agents/Editor/Tests/StandaloneBuildTest.cs
  13. 5
      Project/Assets/ML-Agents/Examples/Hallway/Scripts/HallwayAgent.cs
  14. 2
      README.md
  15. 4
      com.unity.ml-agents/CHANGELOG.md
  16. 3
      com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs
  17. 2
      com.unity.ml-agents/Runtime/Sensors/BufferSensor.cs
  18. 2
      com.unity.ml-agents/Runtime/Sensors/BufferSensorComponent.cs
  19. 4
      com.unity.ml-agents/Runtime/Sensors/IDimensionPropertiesSensor.cs
  20. 7
      com.unity.ml-agents/Runtime/StatsRecorder.cs
  21. 45
      docs/FAQ.md
  22. 34
      docs/Learning-Environment-Design-Agents.md
  23. 3
      ml-agents-envs/mlagents_envs/env_utils.py
  24. 2
      ml-agents-envs/mlagents_envs/environment.py
  25. 2
      ml-agents-envs/mlagents_envs/registry/binary_utils.py
  26. 15
      ml-agents-envs/mlagents_envs/rpc_utils.py
  27. 5
      ml-agents-envs/mlagents_envs/side_channel/stats_side_channel.py
  28. 2
      ml-agents/mlagents/torch_utils/torch.py
  29. 8
      ml-agents/mlagents/trainers/agent_processor.py
  30. 3
      ml-agents/mlagents/trainers/env_manager.py
  31. 1
      ml-agents/mlagents/trainers/learn.py
  32. 2
      ml-agents/mlagents/trainers/optimizer/torch_optimizer.py
  33. 14
      ml-agents/mlagents/trainers/policy/policy.py
  34. 18
      ml-agents/mlagents/trainers/policy/torch_policy.py
  35. 2
      ml-agents/mlagents/trainers/ppo/optimizer_torch.py
  36. 4
      ml-agents/mlagents/trainers/sac/optimizer_torch.py
  37. 1
      ml-agents/mlagents/trainers/settings.py
  38. 78
      ml-agents/mlagents/trainers/stats.py
  39. 2
      ml-agents/mlagents/trainers/tests/__init__.py
  40. 4
      ml-agents/mlagents/trainers/tests/check_env_trains.py
  41. 25
      ml-agents/mlagents/trainers/tests/test_agent_processor.py
  42. 16
      ml-agents/mlagents/trainers/tests/test_learn.py
  43. 6
      ml-agents/mlagents/trainers/tests/test_rl_trainer.py
  44. 43
      ml-agents/mlagents/trainers/tests/test_stats.py
  45. 2
      ml-agents/mlagents/trainers/tests/torch/test_ppo.py
  46. 2
      ml-agents/mlagents/trainers/tests/torch/test_sac.py
  47. 224
      ml-agents/mlagents/trainers/torch/attention.py
  48. 2
      ml-agents/mlagents/trainers/torch/layers.py
  49. 13
      ml-agents/mlagents/trainers/torch/model_serialization.py
  50. 4
      ml-agents/mlagents/trainers/torch/networks.py
  51. 2
      ml-agents/mlagents/trainers/trainer/rl_trainer.py
  52. 13
      ml-agents/tests/yamato/standalone_build_tests.py
  53. 32
      ml-agents/tests/yamato/training_int_tests.py
  54. 62
      ml-agents/tests/yamato/yamato_utils.py
  55. 609
      ml-agents/mlagents/trainers/barracuda.py
  56. 53
      .pylintrc

2
.gitignore


/summaries
# Output Artifacts
/results
# Output Builds
/Builds
# Training environments
/envs

12
.pre-commit-config.yaml


hooks:
- id: python-check-mock-methods
- repo: https://github.com/pre-commit/mirrors-pylint
rev: v2.4.4
hooks:
- id: pylint
exclude: >
(?x)^(
.*_pb2.py|
.*_pb2_grpc.py|
.*/tests/.*
)$
args: [--score=n]
- repo: https://github.com/mattlqx/pre-commit-search-and-replace
rev: v1.0.3
hooks:

4
.yamato/com.unity.ml-agents-pack.yml


pack:
name: Pack
agent:
type: Unity::VM::osx
image: package-ci/mac:stable
type: Unity::VM
image: package-ci/ubuntu:stable
flavor: b1.small
commands:
- npm install upm-ci-utils@stable -g --registry https://artifactory.prd.cds.internal.unity3d.com/artifactory/api/npm/upm-npm

15
.yamato/com.unity.ml-agents-performance.yml


variables:
UNITY_VERSION: {{ editor.version }}
commands:
- python -m pip install unity-downloader-cli --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple --upgrade
- python3 -m pip install unity-downloader-cli --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple --upgrade
- unity-downloader-cli -u {{ editor.version }} -c editor --wait --fast
- curl -s https://artifactory.internal.unity3d.com/core-automation/tools/utr-standalone/utr --output utr
- chmod +x ./utr

expression: |
(pull_request.target eq "master" OR
pull_request.target match "release.+") AND
NOT pull_request.draft AND
(pull_request.changes.any match "com.unity.ml-agents/**" OR
pull_request.changes.any match "DevProject/**" OR
pull_request.changes.any match "ml-agents/**" OR
pull_request.changes.any match "ml-agents-envs/**" OR
pull_request.changes.any match ".yamato/com.unity.ml-agents-performance.yml") AND
NOT pull_request.changes.all match "**/*.md"
recurring:
- branch: master
frequency: daily
artifacts:
logs:
paths:

4
.yamato/com.unity.ml-agents-test.yml


- .yamato/com.unity.ml-agents-pack.yml#pack
triggers:
cancel_old_ci: true
{% if platform.name == "mac" %}
{% if platform.name == "linux" %}
expression: |
(pull_request.target eq "master" OR
pull_request.target match "release.+") AND

image: {{ platform.image }}
flavor: {{ platform.flavor}}
commands:
- python -m pip install unity-downloader-cli --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple --upgrade
- python3 -m pip install unity-downloader-cli --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple --upgrade
- unity-downloader-cli -u trunk -c editor --wait --fast
- npm install upm-ci-utils@stable -g --registry https://artifactory.prd.cds.internal.unity3d.com/artifactory/api/npm/upm-npm
- upm-ci project test -u {{ editor.version }} --project-path {{ editor.testProject }} --package-filter {{ package.name }} {{ coverageOptions }} --extra-create-project-arg="-upmNoDefaultPackages" --extra-utr-arg "reruncount=2"

19
.yamato/compressed-sensor-test.yml


test_compressed_obs_{{ editor.version }}:
name: Test Compressed Sensor Observation {{ editor.version }}
agent:
type: Unity::VM::osx
image: ml-agents/ml-agents-bokken-mac:0.1.4-492264
flavor: b1.small
type: Unity::VM
image: package-ci/ubuntu:stable
flavor: b1.medium
- pip install pyyaml --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
- python -u -m ml-agents.tests.yamato.setup_venv
- ./venv/bin/python ml-agents/tests/yamato/scripts/run_compressed_sensor.py --env=artifacts/testPlayer-TestGridCompressed
- ./venv/bin/python ml-agents/tests/yamato/scripts/run_compressed_sensor.py --env=artifacts/testPlayer-TestTextureCompressed
- |
sudo apt-get update && sudo apt-get install -y python3-venv
python3 -m venv venv && source venv/bin/activate
python -m pip install pyyaml --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
python -u -m ml-agents.tests.yamato.setup_venv
python ml-agents/tests/yamato/scripts/run_compressed_sensor.py --env=artifacts/testPlayer-TestGridCompressed
python ml-agents/tests/yamato/scripts/run_compressed_sensor.py --env=artifacts/testPlayer-TestTextureCompressed
- .yamato/standalone-build-test.yml#test_mac_standalone_{{ editor.version }}
- .yamato/standalone-build-test.yml#test_linux_standalone_{{ editor.version }}
triggers:
cancel_old_ci: true
expression: |

19
.yamato/gym-interface-test.yml


---
{% for editor in test_editors %}
test_gym_interface_{{ editor.version }}:
name: Test Mac Gym Interface {{ editor.version }}
name: Test Linux Gym Interface {{ editor.version }}
type: Unity::VM::osx
image: ml-agents/ml-agents-bokken-mac:0.1.4-492264
flavor: b1.small
type: Unity::VM
image: package-ci/ubuntu:stable
flavor: b1.medium
- pip install pyyaml --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
- python -u -m ml-agents.tests.yamato.setup_venv
- ./venv/bin/python ml-agents/tests/yamato/scripts/run_gym.py --env=artifacts/testPlayer-Basic
- |
sudo apt-get update && sudo apt-get install -y python3-venv
python3 -m venv venv && source venv/bin/activate
python -m pip install pyyaml --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
python -u -m ml-agents.tests.yamato.setup_venv
python ml-agents/tests/yamato/scripts/run_gym.py --env=artifacts/testPlayer-Basic
- .yamato/standalone-build-test.yml#test_mac_standalone_{{ editor.version }}
- .yamato/standalone-build-test.yml#test_linux_standalone_{{ editor.version }}
triggers:
cancel_old_ci: true
expression: |

29
.yamato/protobuf-generation-test.yml


test_mac_protobuf_generation:
test_linux_protobuf_generation:
type: Unity::VM::osx
image: package-ci/mac:stable
flavor: b1.small
type: Unity::VM
image: package-ci/ubuntu:stable
flavor: b1.large
HOMEBREW_NO_AUTO_UPDATE: "1"
brew install nuget
sudo apt-get update && sudo apt-get install -y python3-venv nuget
python3 -m venv venv && source venv/bin/activate
python3 -m venv venv
. venv/bin/activate
pip install --upgrade pip --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
pip install grpcio==1.28.1 grpcio-tools==1.13.0 protobuf==3.11.3 six==1.14.0 mypy-protobuf==1.16.0 --progress-bar=off --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
cd protobuf-definitions
chmod +x Grpc.Tools.$GRPC_VERSION/tools/macosx_x64/protoc
chmod +x Grpc.Tools.$GRPC_VERSION/tools/macosx_x64/grpc_csharp_plugin
COMPILER=Grpc.Tools.$GRPC_VERSION/tools/macosx_x64 ./make.sh
python3 -m pip install --upgrade pip --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
python3 -m pip install grpcio==1.28.1 grpcio-tools==1.13.0 protobuf==3.11.3 six==1.14.0 mypy-protobuf==1.16.0 --progress-bar=off --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
pushd protobuf-definitions
chmod +x Grpc.Tools.$GRPC_VERSION/tools/linux_x64/protoc
chmod +x Grpc.Tools.$GRPC_VERSION/tools/linux_x64/grpc_csharp_plugin
COMPILER=Grpc.Tools.$GRPC_VERSION/tools/linux_x64 ./make.sh
popd
mkdir -p artifacts
touch artifacts/proto.patch
git diff --exit-code -- :/ ":(exclude,top)$CS_PROTO_PATH/*.meta" \

pull_request.target match "release.+") AND
NOT pull_request.draft AND
(pull_request.changes.any match "protobuf-definitions/**" OR
pull_request.changes.any match "com.unity.ml-agents/Runtime/Grpc/CommunicatorObjects/**" OR
pull_request.changes.any match "ml-agents-envs/mlagents_envs/communicator_objects/**" OR
pull_request.changes.any match ".yamato/protobuf-generation-test.yml") AND
NOT pull_request.changes.all match "protobuf-definitions/**/*.md"
artifacts:

27
.yamato/python-ll-api-test.yml


{% metadata_file .yamato/test_versions.metafile %}
---
{% for editor in test_editors %}
test_mac_ll_api_{{ editor.version }}:
name: Test Mac LL-API {{ editor.version }}
test_linux_ll_api_{{ editor.version }}:
name: Test Linux LL-API {{ editor.version }}
type: Unity::VM::osx
image: ml-agents/ml-agents-bokken-mac:0.1.4-492264
flavor: b1.small
type: Unity::VM
image: package-ci/ubuntu:stable
flavor: b1.medium
- pip install pyyaml --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
- python -u -m ml-agents.tests.yamato.setup_venv
- ./venv/bin/python ml-agents/tests/yamato/scripts/run_llapi.py
- ./venv/bin/python ml-agents/tests/yamato/scripts/run_llapi.py --env=artifacts/testPlayer-Basic
- ./venv/bin/python ml-agents/tests/yamato/scripts/run_llapi.py --env=artifacts/testPlayer-WallJump
- ./venv/bin/python ml-agents/tests/yamato/scripts/run_llapi.py --env=artifacts/testPlayer-Bouncer
- |
sudo apt-get update && sudo apt-get install -y python3-venv
python3 -m venv venv && source venv/bin/activate
python -m pip install pyyaml --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
python -u -m ml-agents.tests.yamato.setup_venv
python ml-agents/tests/yamato/scripts/run_llapi.py
python ml-agents/tests/yamato/scripts/run_llapi.py --env=artifacts/testPlayer-Basic
python ml-agents/tests/yamato/scripts/run_llapi.py --env=artifacts/testPlayer-WallJump
python ml-agents/tests/yamato/scripts/run_llapi.py --env=artifacts/testPlayer-Bouncer
- .yamato/standalone-build-test.yml#test_mac_standalone_{{ editor.version }}
- .yamato/standalone-build-test.yml#test_linux_standalone_{{ editor.version }}
triggers:
cancel_old_ci: true
expression: |

30
.yamato/standalone-build-test.yml


{% metadata_file .yamato/test_versions.metafile %}
---
{% for editor in test_editors %}
test_mac_standalone_{{ editor.version }}:
name: Test Mac Standalone {{ editor.version }}
test_linux_standalone_{{ editor.version }}:
name: Test Linux Standalone {{ editor.version }}
type: Unity::VM::osx
image: ml-agents/ml-agents-bokken-mac:0.1.4-492264
flavor: i1.small
type: Unity::VM
image: package-ci/ubuntu:stable
flavor: b1.large
- pip install pyyaml --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
# TODO remove the "--user" command and the path prefix when we can migrate away from the custom bokken image
- python -m pip install unity-downloader-cli --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple --upgrade --user
- /Users/bokken/Library/Python/3.7/bin/unity-downloader-cli -u {{ editor.version }} -c editor --wait --fast
- python -u -m ml-agents.tests.yamato.standalone_build_tests
- python -u -m ml-agents.tests.yamato.standalone_build_tests --scene=Assets/ML-Agents/Examples/Basic/Scenes/Basic.unity
- python -u -m ml-agents.tests.yamato.standalone_build_tests --scene=Assets/ML-Agents/Examples/Bouncer/Scenes/Bouncer.unity
- python -u -m ml-agents.tests.yamato.standalone_build_tests --scene=Assets/ML-Agents/Examples/WallJump/Scenes/WallJump.unity
- python -u -m ml-agents.tests.yamato.standalone_build_tests --scene=Assets/ML-Agents/TestScenes/TestCompressedGrid/TestGridCompressed.unity
- python -u -m ml-agents.tests.yamato.standalone_build_tests --scene=Assets/ML-Agents/TestScenes/TestCompressedTexture/TestTextureCompressed.unity
- python3 -m pip install pyyaml --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
- python3 -m pip install unity-downloader-cli --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple --upgrade
- unity-downloader-cli -u {{ editor.version }} -c editor --wait --fast
- python3 -u -m ml-agents.tests.yamato.standalone_build_tests --build-target=linux
- python3 -u -m ml-agents.tests.yamato.standalone_build_tests --build-target=linux --scene=Assets/ML-Agents/Examples/Basic/Scenes/Basic.unity
- python3 -u -m ml-agents.tests.yamato.standalone_build_tests --build-target=linux --scene=Assets/ML-Agents/Examples/Bouncer/Scenes/Bouncer.unity
- python3 -u -m ml-agents.tests.yamato.standalone_build_tests --build-target=linux --scene=Assets/ML-Agents/Examples/WallJump/Scenes/WallJump.unity
- python3 -u -m ml-agents.tests.yamato.standalone_build_tests --build-target=linux --scene=Assets/ML-Agents/TestScenes/TestCompressedGrid/TestGridCompressed.unity
- python3 -u -m ml-agents.tests.yamato.standalone_build_tests --build-target=linux --scene=Assets/ML-Agents/TestScenes/TestCompressedTexture/TestTextureCompressed.unity
triggers:
cancel_old_ci: true
expression: |

standalonebuild:
paths:
- "artifacts/testPlayer*/**"
- "artifacts/**/UnityPlayer.so"
{% endfor %}

29
.yamato/training-int-tests.yml


{% metadata_file .yamato/test_versions.metafile %}
---
{% for editor in test_editors %}
test_mac_training_int_{{ editor.version }}:
name: Test Mac Fast Training {{ editor.version }}
test_linux_training_int_{{ editor.version }}:
name: Test Linux Fast Training {{ editor.version }}
type: Unity::VM::osx
image: ml-agents/ml-agents-bokken-mac:0.1.4-492264
flavor: b1.small
type: Unity::VM
image: package-ci/ubuntu:stable
flavor: b1.medium
- pip install pyyaml --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
# TODO remove the "--user" command and the path prefix when we can migrate away from the custom bokken image
- python -m pip install unity-downloader-cli --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple --upgrade --user
- /Users/bokken/Library/Python/3.7/bin/unity-downloader-cli -u {{ editor.version }} -c editor --wait --fast
- python -u -m ml-agents.tests.yamato.training_int_tests
# Backwards-compatibility tests.
# If we make a breaking change to the communication protocol, these will need
# to be disabled until the next release.
- python -u -m ml-agents.tests.yamato.training_int_tests --python=0.16.0
- python -u -m ml-agents.tests.yamato.training_int_tests --csharp={{ editor.csharp_backcompat_version }}
- |
sudo apt-get update && sudo apt-get install -y python3-venv
python3 -m venv venv && source venv/bin/activate
python -m pip install pyyaml --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
python -u -m ml-agents.tests.yamato.training_int_tests
- .yamato/standalone-build-test.yml#test_mac_standalone_{{ editor.version }}
- .yamato/standalone-build-test.yml#test_linux_standalone_{{ editor.version }}
triggers:
cancel_old_ci: true
expression: |

- "artifacts/inference.onnx.txt"
standalonebuild:
paths:
- "artifacts/testplayer*/**"
- "artifacts/testPlayer*/**"
- "artifacts/models/**"
{% endfor %}

8
Project/Assets/ML-Agents/Editor/Tests/StandaloneBuildTest.cs


{
const string k_OutputCommandLineFlag = "--mlagents-build-output-path";
const string k_SceneCommandLineFlag = "--mlagents-build-scene-path";
private const string k_BuildTargetFlag = "--mlagents-build-target";
public static void BuildStandalonePlayerOSX()
{

var buildTarget = BuildTarget.StandaloneOSX;
var args = Environment.GetCommandLineArgs();
for (var i = 0; i < args.Length - 1; i++)

{
scenePath = args[i + 1];
}
else if (args[i] == k_BuildTargetFlag)
{
buildTarget = (BuildTarget)Enum.Parse(typeof(BuildTarget), args[i + 1], ignoreCase: true);
}
}
string[] scenes = { scenePath };

BuildTarget.StandaloneOSX,
buildTarget,
BuildOptions.None
);
var isOk = buildResult.summary.result == BuildResult.Succeeded;

5
Project/Assets/ML-Agents/Examples/Hallway/Scripts/HallwayAgent.cs


m_AgentRb = GetComponent<Rigidbody>();
m_GroundRenderer = ground.GetComponent<Renderer>();
m_GroundMaterial = m_GroundRenderer.material;
m_statsRecorder = Academy.Instance.StatsRecorder;
}
public override void CollectObservations(VectorSensor sensor)

{
SetReward(1f);
StartCoroutine(GoalScoredSwapGroundMaterial(m_HallwaySettings.goalScoredMaterial, 0.5f));
m_statsRecorder.Add("Goal/Correct", 1, StatAggregationMethod.Sum);
m_statsRecorder.Add("Goal/Wrong", 1, StatAggregationMethod.Sum);
}
EndEpisode();
}

symbolXGoal.transform.position = new Vector3(7f, 0.5f, 22.29f) + area.transform.position;
symbolOGoal.transform.position = new Vector3(-7f, 0.5f, 22.29f) + area.transform.position;
}
m_statsRecorder.Add("Goal/Correct", 0, StatAggregationMethod.Sum);
m_statsRecorder.Add("Goal/Wrong", 0, StatAggregationMethod.Sum);
}
}

2
README.md


In addition to our own documentation, here are some additional, relevant
articles:
- [A Game Developer Learns Machine Learning](https://mikecann.co.uk/machine-learning/a-game-developer-learns-machine-learning-intent/)
- [A Game Developer Learns Machine Learning](https://mikecann.co.uk/posts/a-game-developer-learns-machine-learning-intent)
- [Explore Unity Technologies ML-Agents Exclusively on Intel Architecture](https://software.intel.com/en-us/articles/explore-unity-technologies-ml-agents-exclusively-on-intel-architecture)
- [ML-Agents Penguins tutorial](https://learn.unity.com/project/ml-agents-penguins)

4
com.unity.ml-agents/CHANGELOG.md


### Minor Changes
#### com.unity.ml-agents / com.unity.ml-agents.extensions (C#)
- `StatAggregationMethod.Sum` can now be passed to `StatsRecorder.Add()`. This
will result in the values being summed (instead of averaged) when written to
TensorBoard. Thanks to @brccabral for the contribution! (#4816)
- Fix a compile warning about using an obsolete enum in `GrpcExtensions.cs`. (#4812)
#### ml-agents / ml-agents-envs / gym-unity (Python)

3
com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs


ActionSpec actionSpec;
if (bpp.ActionSpec == null)
{
// Disable deprecation warnings so we can set legacy fields
#pragma warning disable CS0618
var spaceType = (SpaceType)bpp.VectorActionSpaceTypeDeprecated;
if (spaceType == SpaceType.Continuous)
{

{
actionSpec = ActionSpec.MakeDiscrete(bpp.VectorActionSizeDeprecated.ToArray());
}
#pragma warning restore CS0618
}
else
{

2
com.unity.ml-agents/Runtime/Sensors/BufferSensor.cs


namespace Unity.MLAgents.Sensors
{
public class BufferSensor : ISensor, IDimensionPropertiesSensor
internal class BufferSensor : ISensor, IDimensionPropertiesSensor
{
private int m_MaxNumObs;
private int m_ObsSize;

2
com.unity.ml-agents/Runtime/Sensors/BufferSensorComponent.cs


/// A component for BufferSensor.
/// </summary>
[AddComponentMenu("ML Agents/Buffer Sensor", (int)MenuGroup.Sensors)]
public class BufferSensorComponent : SensorComponent
internal class BufferSensorComponent : SensorComponent
{
public int ObservableSize;
public int MaxNumObservables;

4
com.unity.ml-agents/Runtime/Sensors/IDimensionPropertiesSensor.cs


/// The Dimension property flags of the observations
/// </summary>
[System.Flags]
public enum DimensionProperty
internal enum DimensionProperty
{
/// <summary>
/// No properties specified.

/// <summary>
/// Sensor interface for sensors with special dimension properties.
/// </summary>
public interface IDimensionPropertiesSensor
internal interface IDimensionPropertiesSensor
{
/// <summary>
/// Returns the array containing the properties of each dimensions of the

7
com.unity.ml-agents/Runtime/StatsRecorder.cs


/// To avoid conflicts when training with multiple concurrent environments, only
/// stats from worker index 0 will be tracked.
/// </summary>
MostRecent = 1
MostRecent = 1,
/// <summary>
/// Values within the summary period are summed up before reporting.
/// </summary>
Sum = 2
}
/// <summary>

45
docs/FAQ.md


## Installation problems
### Tensorflow dependency
ML Agents requires TensorFlow; if you don't already have it installed, `pip`
will try to install it when you install the ml-agents package.
If you see a message like this
```console
ERROR: Could not find a version that satisfies the requirement tensorflow<2.0,>=1.7 (from mlagents) (from versions: none)
ERROR: No matching distribution found for tensorflow<2.0,>=1.7 (from mlagents)
```
it means that there is no version of TensorFlow for your python environment.
Some known potential causes are:
- You're using 32-bit python instead of 64-bit. See the answer
[here](https://stackoverflow.com/a/1405971/224264) for how to tell which you
have installed.
- You have the `tensorflow-gpu` package installed. This is equivalent to
`tensorflow`, however `pip` doesn't recognize this. The best way to resolve
this is to update to `tensorflow==1.15.0` which provides GPU support in the
same package (see the
[release notes](https://github.com/tensorflow/tensorflow/issues/33374) for
more details.)
- You're on another architecture (e.g. ARM) which requires vendor provided
packages.
In all of these cases, the issue is a pip/python environment setup issue. Please
search the tensorflow github issues for similar problems and solutions before
creating a new issue.
#### Visual C++ Dependency (Windows Users)
When running `mlagents-learn`, if you see a stack trace with a message like this:
```console
ImportError: DLL load failed: The specified module could not be found.
```
then either of the required DLLs, `msvcp140.dll` (old) or `msvcp140_1.dll` (new), are missing on your machine. The `import tensorflow` command will print this warning message.
To solve it, download and install (with a reboot) the install [Microsoft Visual C++ Redistributable for Visual Studio 2015, 2017 and 2019](https://support.microsoft.com/en-my/help/2977003/the-latest-supported-visual-c-downloads).
For more details, please see the [TensorFlow 2.1.0 release notes](https://github.com/tensorflow/tensorflow/releases/tag/v2.1.0)
and the [TensorFlow github issue](https://github.com/tensorflow/tensorflow/issues/22794#issuecomment-573297027).
## Environment Permission Error
If you directly import your Unity environment without building it in the editor,

34
docs/Learning-Environment-Design-Agents.md


- [Visual Observation Summary & Best Practices](#visual-observation-summary--best-practices)
- [Raycast Observations](#raycast-observations)
- [RayCast Observation Summary & Best Practices](#raycast-observation-summary--best-practices)
- [Actions](#actions)
- [Actions and Actuators](#actions-and-actuators)
- [Continuous Actions](#continuous-actions)
- [Discrete Actions](#discrete-actions)
- [Masking Discrete Actions](#masking-discrete-actions)

- Use as few rays and tags as necessary to solve the problem in order to improve
learning stability and agent performance.
## Actions
## Actions and Actuators
action is passed to the Agent as the `ActionBuffers` parameter when the Academy invokes the
agent's `OnActionReceived()` function. There are two types of actions that an Agent can use:
**Continuous** and **Discrete**.
action is passed to the an `IActionReceiver` (either an `Agent` or an `IActuator`)
as the `ActionBuffers` parameter when the Academy invokes the
`IActionReciever.OnActionReceived()` function.
There are two types of actions supported: **Continuous** and **Discrete**.
Neither the Policy nor the training algorithm know anything about what the
action values themselves mean. The training algorithm simply tries different

branches.
- You cannot mask all the actions of a branch.
- You cannot mask actions in continuous control.
### IActuator interface and ActuatorComponents
The Actuator API allows users to abstract behavior out of Agents and in to
components (similar to the ISensor API). The `IActuator` interface and `Agent`
class both implement the `IActionReceiver` interface to allow for backward compatibility
with the current `Agent.OnActionReceived` and `Agent.CollectDiscreteActionMasks` APIs.
This means you will not have to change your code until you decide to use the `IActuator` API.
Like the `ISensor` interface, the `IActuator` interface is intended for advanced users.
The `ActuatorComponent` abstract class is used to create the actual `IActuator` at
runtime. It must be attached to the same `GameObject` as the `Agent`, or to a
child `GameObject`. Actuators and all of their data structures are initialized
during `Agent.Initialize`. This was done to prevent an unexpected allocations at runtime.
You can find an example of an `IActuator` implementation in the `Basic` example scene.
**NOTE**: you do not need to adjust the Actions in the Agent's
`Behavior Parameters` when using an `IActuator` and `ActuatorComponents`.
Internally, `Agent.OnActionReceived` uses an `IActuator` to send actions to the Agent,
although this is mostly abstracted from the user.
### Actions Summary & Best Practices

3
ml-agents-envs/mlagents_envs/env_utils.py


candidates = glob.glob(env_path + ".x86_64")
if len(candidates) == 0:
candidates = glob.glob(env_path + ".x86")
if len(candidates) == 0:
if os.path.isfile(env_path):
candidates = [env_path]
if len(candidates) > 0:
launch_string = candidates[0]

2
ml-agents-envs/mlagents_envs/environment.py


"""
try:
# A negative value -N indicates that the child was terminated by signal N (POSIX only).
s = signal.Signals(-returncode) # pylint: disable=no-member
s = signal.Signals(-returncode)
return s.name
except Exception:
# Should generally be a ValueError, but catch everything just in case.

2
ml-agents-envs/mlagents_envs/registry/binary_utils.py


break
try:
download_and_extract_zip(url, name)
except Exception: # pylint: disable=W0702
except Exception:
if attempt + 1 < NUMBER_ATTEMPTS:
logger.warning(
f"Attempt {attempt + 1} / {NUMBER_ATTEMPTS}"

15
ml-agents-envs/mlagents_envs/rpc_utils.py


def _process_visual_observation(
obs_index: int,
shape: Tuple[int, int, int],
agent_info_list: Collection[
AgentInfoProto
], # pylint: disable=unsubscriptable-object
agent_info_list: Collection[AgentInfoProto],
) -> np.ndarray:
if len(agent_info_list) == 0:
return np.zeros((0, shape[0], shape[1], shape[2]), dtype=np.float32)

@timed
def _process_vector_observation(
obs_index: int,
shape: Tuple[int, ...],
agent_info_list: Collection[
AgentInfoProto
], # pylint: disable=unsubscriptable-object
obs_index: int, shape: Tuple[int, ...], agent_info_list: Collection[AgentInfoProto]
) -> np.ndarray:
if len(agent_info_list) == 0:
return np.zeros((0,) + shape, dtype=np.float32)

@timed
def steps_from_proto(
agent_info_list: Collection[
AgentInfoProto
], # pylint: disable=unsubscriptable-object
behavior_spec: BehaviorSpec,
agent_info_list: Collection[AgentInfoProto], behavior_spec: BehaviorSpec
) -> Tuple[DecisionSteps, TerminalSteps]:
decision_agent_info_list = [
agent_info for agent_info in agent_info_list if not agent_info.done

5
ml-agents-envs/mlagents_envs/side_channel/stats_side_channel.py


# Only the most recent value is reported.
MOST_RECENT = 1
# Values within the summary period are summed up before reporting.
SUM = 2
StatList = List[Tuple[float, StatsAggregationMethod]]
EnvironmentStats = Mapping[str, StatList]

def on_message_received(self, msg: IncomingMessage) -> None:
"""
Receive the message from the environment, and save it for later retrieval.
:param msg:
:return:
"""

def get_and_reset_stats(self) -> EnvironmentStats:
"""
Returns the current stats, and resets the internal storage of the stats.
:return:
"""
s = self.stats

2
ml-agents/mlagents/torch_utils/torch.py


torch.set_num_threads(cpu_utils.get_num_threads_to_use())
os.environ["KMP_BLOCKTIME"] = "0"
# Known PyLint compatibility with PyTorch https://github.com/pytorch/pytorch/issues/701
# pylint: disable=E1101
if torch.cuda.is_available():
torch.set_default_tensor_type(torch.cuda.FloatTensor)
device = torch.device("cuda")

8
ml-agents/mlagents/trainers/agent_processor.py


):
"""
Create an AgentProcessor.
:param trainer: Trainer instance connected to this AgentProcessor. Trainer is given trajectory
when it is finished.
:param policy: Policy instance associated with this AgentProcessor.

"""
Pass stats from the environment to the StatsReporter.
Depending on the StatsAggregationMethod, either StatsReporter.add_stat or StatsReporter.set_stat is used.
The worker_id is used to determin whether StatsReporter.set_stat should be used.
The worker_id is used to determine whether StatsReporter.set_stat should be used.
:param env_stats:
:param worker_id:
:return:

if agg_type == StatsAggregationMethod.AVERAGE:
self.stats_reporter.add_stat(stat_name, val)
self.stats_reporter.add_stat(stat_name, val, agg_type)
elif agg_type == StatsAggregationMethod.SUM:
self.stats_reporter.add_stat(stat_name, val, agg_type)
elif agg_type == StatsAggregationMethod.MOST_RECENT:
# In order to prevent conflicts between multiple environments,
# only stats from the first environment are recorded.

3
ml-agents/mlagents/trainers/env_manager.py


_policy = self.agent_managers[brain_name].policy_queue.get_nowait()
except AgentManagerQueue.Empty:
if _policy is not None:
# policy_queue contains Policy, but we need a TFPolicy here
self.set_policy(brain_name, _policy) # type: ignore
self.set_policy(brain_name, _policy)
# Step the environments
new_step_infos = self._step()
return new_step_infos

1
ml-agents/mlagents/trainers/learn.py


def get_version_string() -> str:
# pylint: disable=no-member
return f""" Version information:
ml-agents: {mlagents.trainers.__version__},
ml-agents-envs: {mlagents_envs.__version__},

2
ml-agents/mlagents/trainers/optimizer/torch_optimizer.py


from mlagents.trainers.torch.utils import ModelUtils
class TorchOptimizer(Optimizer): # pylint: disable=W0223
class TorchOptimizer(Optimizer):
def __init__(self, policy: TorchPolicy, trainer_settings: TrainerSettings):
super().__init__()
self.policy = policy

14
ml-agents/mlagents/trainers/policy/policy.py


self.trainer_settings = trainer_settings
self.network_settings: NetworkSettings = trainer_settings.network_settings
self.seed = seed
self.act_size = (
list(self.behavior_spec.action_spec.discrete_branches)
if self.behavior_spec.action_spec.is_discrete()
else [self.behavior_spec.action_spec.continuous_size]
)
self.vec_obs_size = sum(
sen_spec.shape[0]
for sen_spec in behavior_spec.sensor_specs
if len(sen_spec.shape) == 1
)
self.vis_obs_size = sum(
1 for sen_spec in behavior_spec.sensor_specs if len(sen_spec.shape) == 3
)
self.use_continuous_act = self.behavior_spec.action_spec.is_continuous()
self.previous_action_dict: Dict[str, np.ndarray] = {}
self.memory_dict: Dict[str, np.ndarray] = {}
self.normalize = trainer_settings.network_settings.normalize

18
ml-agents/mlagents/trainers/policy/torch_policy.py


def _extract_masks(self, decision_requests: DecisionSteps) -> np.ndarray:
mask = None
if self.behavior_spec.action_spec.discrete_size > 0:
mask = torch.ones([len(decision_requests), np.sum(self.act_size)])
num_discrete_flat = np.sum(self.behavior_spec.action_spec.discrete_branches)
mask = torch.ones([len(decision_requests), num_discrete_flat])
if decision_requests.action_mask is not None:
mask = torch.as_tensor(
1 - np.concatenate(decision_requests.action_mask, axis=1)

:param buffer: The buffer with the observations to add to the running estimate
of the distribution.
"""
if self.use_vec_obs and self.normalize:
if self.normalize:
self.actor_critic.update_normalization(buffer)
@timed

for agent_id in decision_requests.agent_id
] # For 1-D array, the iterator order is correct.
run_out = self.evaluate(
decision_requests, global_agent_ids
) # pylint: disable=assignment-from-no-return
run_out = self.evaluate(decision_requests, global_agent_ids)
self.save_memories(global_agent_ids, run_out.get("memory_out"))
self.check_nan_action(run_out.get("action"))
return ActionInfo(

outputs=run_out,
agent_ids=list(decision_requests.agent_id),
)
@property
def use_vis_obs(self):
return self.vis_obs_size > 0
@property
def use_vec_obs(self):
return self.vec_obs_size > 0
def get_current_step(self):
"""

2
ml-agents/mlagents/trainers/ppo/optimizer_torch.py


"""
Takes a Policy and a Dict of trainer parameters and creates an Optimizer around the policy.
The PPO optimizer has a value estimator and a loss function.
:param policy: A TFPolicy object that will be updated by this PPO Optimizer.
:param policy: A TorchPolicy object that will be updated by this PPO Optimizer.
:param trainer_params: Trainer parameters dictionary that specifies the
properties of the trainer.
"""

4
ml-agents/mlagents/trainers/sac/optimizer_torch.py


# ExitStack allows us to enter the torch.no_grad() context conditionally
with ExitStack() as stack:
if not q1_grad:
stack.enter_context(torch.no_grad()) # pylint: disable=E1101
stack.enter_context(torch.no_grad())
q1_out, _ = self.q1_network(
inputs,
actions=actions,

with ExitStack() as stack:
if not q2_grad:
stack.enter_context(torch.no_grad()) # pylint: disable=E1101
stack.enter_context(torch.no_grad())
q2_out, _ = self.q2_network(
inputs,
actions=actions,

1
ml-agents/mlagents/trainers/settings.py


class SerializationSettings:
convert_to_barracuda = True
convert_to_onnx = True
onnx_opset = 9

78
ml-agents/mlagents/trainers/stats.py


import time
from threading import RLock
from mlagents_envs.side_channel.stats_side_channel import StatsAggregationMethod
from mlagents_envs.logging_util import get_logger
from mlagents_envs.timers import set_gauge
from torch.utils.tensorboard import SummaryWriter

"""
Takes a parameter dictionary and converts it to a human-readable string.
Recurses if there are multiple levels of dict. Used to print out hyperparameters.
param: param_dict: A Dictionary of key, value parameters.
return: A string version of this dictionary.
:param param_dict: A Dictionary of key, value parameters.
:return: A string version of this dictionary.
"""
if not isinstance(param_dict, dict):
return str(param_dict)

mean: float
std: float
num: int
sum: float
aggregation_method: StatsAggregationMethod
return StatsSummary(0.0, 0.0, 0)
return StatsSummary(0.0, 0.0, 0, 0.0, StatsAggregationMethod.AVERAGE)
@property
def aggregated_value(self):
if self.aggregation_method == StatsAggregationMethod.SUM:
return self.sum
else:
return self.mean
class StatsPropertyType(Enum):

Add a generic property to the StatsWriter. This could be e.g. a Dict of hyperparameters,
a max step count, a trainer type, etc. Note that not all StatsWriters need to be compatible
with all types of properties. For instance, a TB writer doesn't need a max step.
:param type: The type of property.
:param property_type: The type of property.
:param value: The property itself.
"""
pass

set_gauge(
GaugeWriter.sanitize_string(f"{category}.{val}.mean"),
float(stats_summary.mean),
)
set_gauge(
GaugeWriter.sanitize_string(f"{category}.{val}.sum"),
float(stats_summary.sum),
)

is_training = "Not Training"
if "Is Training" in values:
stats_summary = values["Is Training"]
if stats_summary.mean > 0.0:
if stats_summary.aggregated_value > 0.0:
is_training = "Training"
elapsed_time = time.time() - self.training_start_time

def __init__(self, base_dir: str, clear_past_data: bool = False):
"""
A StatsWriter that writes to a Tensorboard summary.
category.
category.
"""
self.summary_writers: Dict[str, SummaryWriter] = {}
self.base_dir: str = base_dir

) -> None:
self._maybe_create_summary_writer(category)
for key, value in values.items():
self.summary_writers[category].add_scalar(f"{key}", value.mean, step)
self.summary_writers[category].add_scalar(
f"{key}", value.aggregated_value, step
)
self.summary_writers[category].flush()
def _maybe_create_summary_writer(self, category: str) -> None:

writers: List[StatsWriter] = []
stats_dict: Dict[str, Dict[str, List]] = defaultdict(lambda: defaultdict(list))
lock = RLock()
stats_aggregation: Dict[str, Dict[str, StatsAggregationMethod]] = defaultdict(
lambda: defaultdict(lambda: StatsAggregationMethod.AVERAGE)
)
def __init__(self, category: str):
"""

Add a generic property to the StatsReporter. This could be e.g. a Dict of hyperparameters,
a max step count, a trainer type, etc. Note that not all StatsWriters need to be compatible
with all types of properties. For instance, a TB writer doesn't need a max step.
:param key: The type of property.
:param property_type: The type of property.
:param value: The property itself.
"""
with StatsReporter.lock:

def add_stat(self, key: str, value: float) -> None:
def add_stat(
self,
key: str,
value: float,
aggregation: StatsAggregationMethod = StatsAggregationMethod.AVERAGE,
) -> None:
:param aggregation: the aggregation method for the statistic, default StatsAggregationMethod.AVERAGE.
StatsReporter.stats_aggregation[self.category][key] = aggregation
StatsReporter.stats_aggregation[self.category][
key
] = StatsAggregationMethod.MOST_RECENT
def write_stats(self, step: int) -> None:
"""

:param step: Training step which to write these stats as.
"""
with StatsReporter.lock:

def get_stats_summaries(self, key: str) -> StatsSummary:
"""
Get the mean, std, and count of a particular statistic, since last write.
Get the mean, std, count, sum and aggregation method of a particular statistic, since last write.
:returns: A StatsSummary NamedTuple containing (mean, std, count).
:returns: A StatsSummary containing summary statistics.
if len(StatsReporter.stats_dict[self.category][key]) > 0:
return StatsSummary(
mean=np.mean(StatsReporter.stats_dict[self.category][key]),
std=np.std(StatsReporter.stats_dict[self.category][key]),
num=len(StatsReporter.stats_dict[self.category][key]),
)
return StatsSummary.empty()
stat_values = StatsReporter.stats_dict[self.category][key]
if len(stat_values) == 0:
return StatsSummary.empty()
return StatsSummary(
mean=np.mean(stat_values),
std=np.std(stat_values),
num=len(stat_values),
sum=np.sum(stat_values),
aggregation_method=StatsReporter.stats_aggregation[self.category][key],
)

2
ml-agents/mlagents/trainers/tests/__init__.py


if (
"ml-agents/mlagents" in filename
or "ml-agents-envs/mlagents" in filename
) and "tensorflow_to_barracuda.py" not in filename:
):
raise ValueError(
f"float64 array created. Set dtype=np.float32 instead of current dtype={kwargs_dtype}. "
f"Run pytest with TEST_ENFORCE_NUMPY_FLOAT32=1 to confirm fix."

4
ml-agents/mlagents/trainers/tests/check_env_trains.py


) -> None:
for val, stats_summary in values.items():
if val == "Environment/Cumulative Reward":
print(step, val, stats_summary.mean)
self._last_reward_summary[category] = stats_summary.mean
print(step, val, stats_summary.aggregated_value)
self._last_reward_summary[category] = stats_summary.aggregated_value
# The reward processor is passed as an argument to _check_environment_trains.

25
ml-agents/mlagents/trainers/tests/test_agent_processor.py


{
"averaged": [(1.0, StatsAggregationMethod.AVERAGE)],
"most_recent": [(2.0, StatsAggregationMethod.MOST_RECENT)],
"summed": [(3.1, StatsAggregationMethod.SUM)],
"summed": [(1.1, StatsAggregationMethod.SUM)],
},
]
for env_stats in all_env_stats:

"averaged": StatsSummary(mean=2.0, std=mock.ANY, num=2),
"most_recent": StatsSummary(mean=4.0, std=0.0, num=1),
"averaged": StatsSummary(
mean=2.0,
std=mock.ANY,
num=2,
sum=4.0,
aggregation_method=StatsAggregationMethod.AVERAGE,
),
"most_recent": StatsSummary(
mean=4.0,
std=0.0,
num=1,
sum=4.0,
aggregation_method=StatsAggregationMethod.MOST_RECENT,
),
"summed": StatsSummary(
mean=2.1,
std=mock.ANY,
num=2,
sum=4.2,
aggregation_method=StatsAggregationMethod.SUM,
),
}
stats_reporter.write_stats(123)
writer.write_stats.assert_any_call("FakeCategory", expected_stats, 123)

16
ml-agents/mlagents/trainers/tests/test_learn.py


from mlagents_envs.exception import UnityEnvironmentException
from mlagents.trainers.stats import StatsReporter
from mlagents.trainers.environment_parameter_manager import EnvironmentParameterManager
import os.path
def basic_options(extra_args=None):

learn.run_training(0, options)
mock_init.assert_called_once_with(
trainer_factory_mock.return_value,
"results/ppo",
os.path.join("results", "ppo"),
"ppo",
"mock_param_manager",
True,

"results/ppo", False, False, "results/notuselessrun"
os.path.join("results", "ppo"),
False,
False,
os.path.join("results", "notuselessrun"),
)
write_timing_tree_mock.assert_called_once_with(
os.path.join("results", "ppo", "run_logs")
)
write_run_options_mock.assert_called_once_with(
os.path.join("results", "ppo"), options
write_timing_tree_mock.assert_called_once_with("results/ppo/run_logs")
write_run_options_mock.assert_called_once_with("results/ppo", options)
StatsReporter.writers.clear() # make sure there aren't any writers as added by learn.py

6
ml-agents/mlagents/trainers/tests/test_rl_trainer.py


from mlagents.trainers.settings import TrainerSettings
from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes
from mlagents_envs.base_env import ActionSpec
import os.path
# Add concrete implementations of abstract methods

mock_model_saver.model_path = self.artifact_path
mock_model_saver.save_checkpoint.side_effect = checkpoint_path
self.model_saver = mock_model_saver
def create_tf_policy(self, parsed_behavior_id, behavior_spec):
return mock.Mock()
def create_torch_policy(self, parsed_behavior_id, behavior_spec):
return mock.Mock()

trainer.brain_name,
ModelCheckpoint(
step,
f"{trainer.model_saver.model_path}/{trainer.brain_name}-{step}.{export_ext}",
f"{trainer.model_saver.model_path}{os.path.sep}{trainer.brain_name}-{step}.{export_ext}",
None,
mock.ANY,
),

43
ml-agents/mlagents/trainers/tests/test_stats.py


GaugeWriter,
ConsoleWriter,
StatsPropertyType,
StatsAggregationMethod,
)

category = "category1"
with tempfile.TemporaryDirectory(prefix="unittest-") as base_dir:
tb_writer = TensorboardWriter(base_dir, clear_past_data=False)
statssummary1 = StatsSummary(mean=1.0, std=1.0, num=1)
statssummary1 = StatsSummary(
mean=1.0,
std=1.0,
num=1,
sum=1.0,
aggregation_method=StatsAggregationMethod.AVERAGE,
)
tb_writer.write_stats("category1", {"key1": statssummary1}, 10)
# Test that the filewriter has been created and the directory has been created.

def test_tensorboard_writer_clear(tmp_path):
tb_writer = TensorboardWriter(tmp_path, clear_past_data=False)
statssummary1 = StatsSummary(mean=1.0, std=1.0, num=1)
statssummary1 = StatsSummary(
mean=1.0,
std=1.0,
num=1,
sum=1.0,
aggregation_method=StatsAggregationMethod.AVERAGE,
)
tb_writer.write_stats("category1", {"key1": statssummary1}, 10)
# TB has some sort of timeout before making a new file
time.sleep(1.0)

with self.assertLogs("mlagents.trainers", level="INFO") as cm:
category = "category1"
console_writer = ConsoleWriter()
statssummary1 = StatsSummary(mean=1.0, std=1.0, num=1)
statssummary1 = StatsSummary(
mean=1.0,
std=1.0,
num=1,
sum=1.0,
aggregation_method=StatsAggregationMethod.AVERAGE,
)
console_writer.write_stats(
category,
{

10,
)
statssummary2 = StatsSummary(mean=0.0, std=0.0, num=1)
statssummary2 = StatsSummary(
mean=0.0,
std=0.0,
num=1,
sum=0.0,
aggregation_method=StatsAggregationMethod.AVERAGE,
)
"Environment/Cumulative Reward": statssummary1,
"Environment/Cumulative Reward": statssummary2,
"Is Training": statssummary2,
},
10,

category = "category1"
console_writer = ConsoleWriter()
console_writer.add_property(category, StatsPropertyType.SELF_PLAY, True)
statssummary1 = StatsSummary(mean=1.0, std=1.0, num=1)
statssummary1 = StatsSummary(
mean=1.0,
std=1.0,
num=1,
sum=1.0,
aggregation_method=StatsAggregationMethod.AVERAGE,
)
console_writer.write_stats(
category,
{

2
ml-agents/mlagents/trainers/tests/torch/test_ppo.py


from mlagents.trainers.tests import mock_brain as mb
from mlagents.trainers.tests.test_trajectory import make_fake_trajectory
from mlagents.trainers.settings import NetworkSettings
from mlagents.trainers.tests.dummy_config import ( # noqa: F401; pylint: disable=unused-variable
from mlagents.trainers.tests.dummy_config import ( # noqa: F401
ppo_dummy_config,
curiosity_dummy_config,
gail_dummy_config,

2
ml-agents/mlagents/trainers/tests/torch/test_sac.py


from mlagents.trainers.policy.torch_policy import TorchPolicy
from mlagents.trainers.tests import mock_brain as mb
from mlagents.trainers.settings import NetworkSettings
from mlagents.trainers.tests.dummy_config import ( # noqa: F401; pylint: disable=unused-variable
from mlagents.trainers.tests.dummy_config import ( # noqa: F401
sac_dummy_config,
curiosity_dummy_config,
)

224
ml-agents/mlagents/trainers/torch/attention.py


from mlagents.torch_utils import torch
from typing import Tuple, Optional, List
from mlagents.trainers.torch.layers import LinearEncoder
from mlagents.trainers.torch.layers import LinearEncoder, Initialization, linear_layer
class MultiHeadAttention(torch.nn.Module):

Takes as input to the forward method 3 tensors:
- query: of dimensions (batch_size, number_of_queries, key_size)
- key: of dimensions (batch_size, number_of_keys, key_size)
- value: of dimensions (batch_size, number_of_keys, value_size)
- query: of dimensions (batch_size, number_of_queries, embedding_size)
- key: of dimensions (batch_size, number_of_keys, embedding_size)
- value: of dimensions (batch_size, number_of_keys, embedding_size)
- The output: (batch_size, number_of_queries, output_size)
- The output: (batch_size, number_of_queries, embedding_size)
def __init__(
self,
query_size: int,
key_size: int,
value_size: int,
output_size: int,
num_heads: int,
embedding_size: int,
):
def __init__(self, embedding_size: int, num_heads: int):
self.output_size = output_size
self.fc_q = torch.nn.Linear(query_size, self.n_heads * self.embedding_size)
self.fc_k = torch.nn.Linear(key_size, self.n_heads * self.embedding_size)
self.fc_v = torch.nn.Linear(value_size, self.n_heads * self.embedding_size)
# self.fc_q = LinearEncoder(query_size, 2, self.n_heads * self.embedding_size)
# self.fc_k = LinearEncoder(key_size,2, self.n_heads * self.embedding_size)
# self.fc_v = LinearEncoder(value_size,2, self.n_heads * self.embedding_size)
self.fc_out = torch.nn.Linear(
self.n_heads * self.embedding_size, self.output_size
)
self.head_size: int = self.embedding_size // self.n_heads
def forward(
self,

n_q: int,
n_k: int,
number_of_keys: int = -1,
number_of_queries: int = -1,
# This is to avoid using .size() when possible as Barracuda does not support
n_q = number_of_queries if number_of_queries != -1 else query.size(1)
n_k = number_of_keys if number_of_keys != -1 else key.size(1)