浏览代码

Merge branch 'develop-attention-refactor' into develop-centralizedcritic-mm

/develop/centralizedcritic
Ervin Teng 4 年前
当前提交
aba633b2
共有 56 个文件被更改,包括 520 次插入1060 次删除
  1. 2
      .gitignore
  2. 12
      .pre-commit-config.yaml
  3. 4
      .yamato/com.unity.ml-agents-pack.yml
  4. 15
      .yamato/com.unity.ml-agents-performance.yml
  5. 4
      .yamato/com.unity.ml-agents-test.yml
  6. 19
      .yamato/compressed-sensor-test.yml
  7. 19
      .yamato/gym-interface-test.yml
  8. 29
      .yamato/protobuf-generation-test.yml
  9. 27
      .yamato/python-ll-api-test.yml
  10. 30
      .yamato/standalone-build-test.yml
  11. 29
      .yamato/training-int-tests.yml
  12. 8
      Project/Assets/ML-Agents/Editor/Tests/StandaloneBuildTest.cs
  13. 5
      Project/Assets/ML-Agents/Examples/Hallway/Scripts/HallwayAgent.cs
  14. 2
      README.md
  15. 4
      com.unity.ml-agents/CHANGELOG.md
  16. 3
      com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs
  17. 2
      com.unity.ml-agents/Runtime/Sensors/BufferSensor.cs
  18. 2
      com.unity.ml-agents/Runtime/Sensors/BufferSensorComponent.cs
  19. 4
      com.unity.ml-agents/Runtime/Sensors/IDimensionPropertiesSensor.cs
  20. 7
      com.unity.ml-agents/Runtime/StatsRecorder.cs
  21. 45
      docs/FAQ.md
  22. 34
      docs/Learning-Environment-Design-Agents.md
  23. 3
      ml-agents-envs/mlagents_envs/env_utils.py
  24. 2
      ml-agents-envs/mlagents_envs/environment.py
  25. 2
      ml-agents-envs/mlagents_envs/registry/binary_utils.py
  26. 15
      ml-agents-envs/mlagents_envs/rpc_utils.py
  27. 5
      ml-agents-envs/mlagents_envs/side_channel/stats_side_channel.py
  28. 2
      ml-agents/mlagents/torch_utils/torch.py
  29. 8
      ml-agents/mlagents/trainers/agent_processor.py
  30. 3
      ml-agents/mlagents/trainers/env_manager.py
  31. 1
      ml-agents/mlagents/trainers/learn.py
  32. 2
      ml-agents/mlagents/trainers/optimizer/torch_optimizer.py
  33. 14
      ml-agents/mlagents/trainers/policy/policy.py
  34. 18
      ml-agents/mlagents/trainers/policy/torch_policy.py
  35. 2
      ml-agents/mlagents/trainers/ppo/optimizer_torch.py
  36. 4
      ml-agents/mlagents/trainers/sac/optimizer_torch.py
  37. 1
      ml-agents/mlagents/trainers/settings.py
  38. 78
      ml-agents/mlagents/trainers/stats.py
  39. 2
      ml-agents/mlagents/trainers/tests/__init__.py
  40. 4
      ml-agents/mlagents/trainers/tests/check_env_trains.py
  41. 25
      ml-agents/mlagents/trainers/tests/test_agent_processor.py
  42. 16
      ml-agents/mlagents/trainers/tests/test_learn.py
  43. 6
      ml-agents/mlagents/trainers/tests/test_rl_trainer.py
  44. 43
      ml-agents/mlagents/trainers/tests/test_stats.py
  45. 2
      ml-agents/mlagents/trainers/tests/torch/test_ppo.py
  46. 2
      ml-agents/mlagents/trainers/tests/torch/test_sac.py
  47. 224
      ml-agents/mlagents/trainers/torch/attention.py
  48. 2
      ml-agents/mlagents/trainers/torch/layers.py
  49. 13
      ml-agents/mlagents/trainers/torch/model_serialization.py
  50. 4
      ml-agents/mlagents/trainers/torch/networks.py
  51. 2
      ml-agents/mlagents/trainers/trainer/rl_trainer.py
  52. 13
      ml-agents/tests/yamato/standalone_build_tests.py
  53. 32
      ml-agents/tests/yamato/training_int_tests.py
  54. 62
      ml-agents/tests/yamato/yamato_utils.py
  55. 609
      ml-agents/mlagents/trainers/barracuda.py
  56. 53
      .pylintrc

2
.gitignore


/summaries
# Output Artifacts
/results
# Output Builds
/Builds
# Training environments
/envs

12
.pre-commit-config.yaml


hooks:
- id: python-check-mock-methods
- repo: https://github.com/pre-commit/mirrors-pylint
rev: v2.4.4
hooks:
- id: pylint
exclude: >
(?x)^(
.*_pb2.py|
.*_pb2_grpc.py|
.*/tests/.*
)$
args: [--score=n]
- repo: https://github.com/mattlqx/pre-commit-search-and-replace
rev: v1.0.3
hooks:

4
.yamato/com.unity.ml-agents-pack.yml


pack:
name: Pack
agent:
type: Unity::VM::osx
image: package-ci/mac:stable
type: Unity::VM
image: package-ci/ubuntu:stable
flavor: b1.small
commands:
- npm install upm-ci-utils@stable -g --registry https://artifactory.prd.cds.internal.unity3d.com/artifactory/api/npm/upm-npm

15
.yamato/com.unity.ml-agents-performance.yml


variables:
UNITY_VERSION: {{ editor.version }}
commands:
- python -m pip install unity-downloader-cli --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple --upgrade
- python3 -m pip install unity-downloader-cli --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple --upgrade
- unity-downloader-cli -u {{ editor.version }} -c editor --wait --fast
- curl -s https://artifactory.internal.unity3d.com/core-automation/tools/utr-standalone/utr --output utr
- chmod +x ./utr

expression: |
(pull_request.target eq "master" OR
pull_request.target match "release.+") AND
NOT pull_request.draft AND
(pull_request.changes.any match "com.unity.ml-agents/**" OR
pull_request.changes.any match "DevProject/**" OR
pull_request.changes.any match "ml-agents/**" OR
pull_request.changes.any match "ml-agents-envs/**" OR
pull_request.changes.any match ".yamato/com.unity.ml-agents-performance.yml") AND
NOT pull_request.changes.all match "**/*.md"
recurring:
- branch: master
frequency: daily
artifacts:
logs:
paths:

4
.yamato/com.unity.ml-agents-test.yml


- .yamato/com.unity.ml-agents-pack.yml#pack
triggers:
cancel_old_ci: true
{% if platform.name == "mac" %}
{% if platform.name == "linux" %}
expression: |
(pull_request.target eq "master" OR
pull_request.target match "release.+") AND

image: {{ platform.image }}
flavor: {{ platform.flavor}}
commands:
- python -m pip install unity-downloader-cli --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple --upgrade
- python3 -m pip install unity-downloader-cli --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple --upgrade
- unity-downloader-cli -u trunk -c editor --wait --fast
- npm install upm-ci-utils@stable -g --registry https://artifactory.prd.cds.internal.unity3d.com/artifactory/api/npm/upm-npm
- upm-ci project test -u {{ editor.version }} --project-path {{ editor.testProject }} --package-filter {{ package.name }} {{ coverageOptions }} --extra-create-project-arg="-upmNoDefaultPackages" --extra-utr-arg "reruncount=2"

19
.yamato/compressed-sensor-test.yml


test_compressed_obs_{{ editor.version }}:
name: Test Compressed Sensor Observation {{ editor.version }}
agent:
type: Unity::VM::osx
image: ml-agents/ml-agents-bokken-mac:0.1.4-492264
flavor: b1.small
type: Unity::VM
image: package-ci/ubuntu:stable
flavor: b1.medium
- pip install pyyaml --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
- python -u -m ml-agents.tests.yamato.setup_venv
- ./venv/bin/python ml-agents/tests/yamato/scripts/run_compressed_sensor.py --env=artifacts/testPlayer-TestGridCompressed
- ./venv/bin/python ml-agents/tests/yamato/scripts/run_compressed_sensor.py --env=artifacts/testPlayer-TestTextureCompressed
- |
sudo apt-get update && sudo apt-get install -y python3-venv
python3 -m venv venv && source venv/bin/activate
python -m pip install pyyaml --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
python -u -m ml-agents.tests.yamato.setup_venv
python ml-agents/tests/yamato/scripts/run_compressed_sensor.py --env=artifacts/testPlayer-TestGridCompressed
python ml-agents/tests/yamato/scripts/run_compressed_sensor.py --env=artifacts/testPlayer-TestTextureCompressed
- .yamato/standalone-build-test.yml#test_mac_standalone_{{ editor.version }}
- .yamato/standalone-build-test.yml#test_linux_standalone_{{ editor.version }}
triggers:
cancel_old_ci: true
expression: |

19
.yamato/gym-interface-test.yml


---
{% for editor in test_editors %}
test_gym_interface_{{ editor.version }}:
name: Test Mac Gym Interface {{ editor.version }}
name: Test Linux Gym Interface {{ editor.version }}
type: Unity::VM::osx
image: ml-agents/ml-agents-bokken-mac:0.1.4-492264
flavor: b1.small
type: Unity::VM
image: package-ci/ubuntu:stable
flavor: b1.medium
- pip install pyyaml --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
- python -u -m ml-agents.tests.yamato.setup_venv
- ./venv/bin/python ml-agents/tests/yamato/scripts/run_gym.py --env=artifacts/testPlayer-Basic
- |
sudo apt-get update && sudo apt-get install -y python3-venv
python3 -m venv venv && source venv/bin/activate
python -m pip install pyyaml --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
python -u -m ml-agents.tests.yamato.setup_venv
python ml-agents/tests/yamato/scripts/run_gym.py --env=artifacts/testPlayer-Basic
- .yamato/standalone-build-test.yml#test_mac_standalone_{{ editor.version }}
- .yamato/standalone-build-test.yml#test_linux_standalone_{{ editor.version }}
triggers:
cancel_old_ci: true
expression: |

29
.yamato/protobuf-generation-test.yml


test_mac_protobuf_generation:
test_linux_protobuf_generation:
type: Unity::VM::osx
image: package-ci/mac:stable
flavor: b1.small
type: Unity::VM
image: package-ci/ubuntu:stable
flavor: b1.large
HOMEBREW_NO_AUTO_UPDATE: "1"
brew install nuget
sudo apt-get update && sudo apt-get install -y python3-venv nuget
python3 -m venv venv && source venv/bin/activate
python3 -m venv venv
. venv/bin/activate
pip install --upgrade pip --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
pip install grpcio==1.28.1 grpcio-tools==1.13.0 protobuf==3.11.3 six==1.14.0 mypy-protobuf==1.16.0 --progress-bar=off --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
cd protobuf-definitions
chmod +x Grpc.Tools.$GRPC_VERSION/tools/macosx_x64/protoc
chmod +x Grpc.Tools.$GRPC_VERSION/tools/macosx_x64/grpc_csharp_plugin
COMPILER=Grpc.Tools.$GRPC_VERSION/tools/macosx_x64 ./make.sh
python3 -m pip install --upgrade pip --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
python3 -m pip install grpcio==1.28.1 grpcio-tools==1.13.0 protobuf==3.11.3 six==1.14.0 mypy-protobuf==1.16.0 --progress-bar=off --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
pushd protobuf-definitions
chmod +x Grpc.Tools.$GRPC_VERSION/tools/linux_x64/protoc
chmod +x Grpc.Tools.$GRPC_VERSION/tools/linux_x64/grpc_csharp_plugin
COMPILER=Grpc.Tools.$GRPC_VERSION/tools/linux_x64 ./make.sh
popd
mkdir -p artifacts
touch artifacts/proto.patch
git diff --exit-code -- :/ ":(exclude,top)$CS_PROTO_PATH/*.meta" \

pull_request.target match "release.+") AND
NOT pull_request.draft AND
(pull_request.changes.any match "protobuf-definitions/**" OR
pull_request.changes.any match "com.unity.ml-agents/Runtime/Grpc/CommunicatorObjects/**" OR
pull_request.changes.any match "ml-agents-envs/mlagents_envs/communicator_objects/**" OR
pull_request.changes.any match ".yamato/protobuf-generation-test.yml") AND
NOT pull_request.changes.all match "protobuf-definitions/**/*.md"
artifacts:

27
.yamato/python-ll-api-test.yml


{% metadata_file .yamato/test_versions.metafile %}
---
{% for editor in test_editors %}
test_mac_ll_api_{{ editor.version }}:
name: Test Mac LL-API {{ editor.version }}
test_linux_ll_api_{{ editor.version }}:
name: Test Linux LL-API {{ editor.version }}
type: Unity::VM::osx
image: ml-agents/ml-agents-bokken-mac:0.1.4-492264
flavor: b1.small
type: Unity::VM
image: package-ci/ubuntu:stable
flavor: b1.medium
- pip install pyyaml --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
- python -u -m ml-agents.tests.yamato.setup_venv
- ./venv/bin/python ml-agents/tests/yamato/scripts/run_llapi.py
- ./venv/bin/python ml-agents/tests/yamato/scripts/run_llapi.py --env=artifacts/testPlayer-Basic
- ./venv/bin/python ml-agents/tests/yamato/scripts/run_llapi.py --env=artifacts/testPlayer-WallJump
- ./venv/bin/python ml-agents/tests/yamato/scripts/run_llapi.py --env=artifacts/testPlayer-Bouncer
- |
sudo apt-get update && sudo apt-get install -y python3-venv
python3 -m venv venv && source venv/bin/activate
python -m pip install pyyaml --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
python -u -m ml-agents.tests.yamato.setup_venv
python ml-agents/tests/yamato/scripts/run_llapi.py
python ml-agents/tests/yamato/scripts/run_llapi.py --env=artifacts/testPlayer-Basic
python ml-agents/tests/yamato/scripts/run_llapi.py --env=artifacts/testPlayer-WallJump
python ml-agents/tests/yamato/scripts/run_llapi.py --env=artifacts/testPlayer-Bouncer
- .yamato/standalone-build-test.yml#test_mac_standalone_{{ editor.version }}
- .yamato/standalone-build-test.yml#test_linux_standalone_{{ editor.version }}
triggers:
cancel_old_ci: true
expression: |

30
.yamato/standalone-build-test.yml


{% metadata_file .yamato/test_versions.metafile %}
---
{% for editor in test_editors %}
test_mac_standalone_{{ editor.version }}:
name: Test Mac Standalone {{ editor.version }}
test_linux_standalone_{{ editor.version }}:
name: Test Linux Standalone {{ editor.version }}
type: Unity::VM::osx
image: ml-agents/ml-agents-bokken-mac:0.1.4-492264
flavor: i1.small
type: Unity::VM
image: package-ci/ubuntu:stable
flavor: b1.large
- pip install pyyaml --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
# TODO remove the "--user" command and the path prefix when we can migrate away from the custom bokken image
- python -m pip install unity-downloader-cli --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple --upgrade --user
- /Users/bokken/Library/Python/3.7/bin/unity-downloader-cli -u {{ editor.version }} -c editor --wait --fast
- python -u -m ml-agents.tests.yamato.standalone_build_tests
- python -u -m ml-agents.tests.yamato.standalone_build_tests --scene=Assets/ML-Agents/Examples/Basic/Scenes/Basic.unity
- python -u -m ml-agents.tests.yamato.standalone_build_tests --scene=Assets/ML-Agents/Examples/Bouncer/Scenes/Bouncer.unity
- python -u -m ml-agents.tests.yamato.standalone_build_tests --scene=Assets/ML-Agents/Examples/WallJump/Scenes/WallJump.unity
- python -u -m ml-agents.tests.yamato.standalone_build_tests --scene=Assets/ML-Agents/TestScenes/TestCompressedGrid/TestGridCompressed.unity
- python -u -m ml-agents.tests.yamato.standalone_build_tests --scene=Assets/ML-Agents/TestScenes/TestCompressedTexture/TestTextureCompressed.unity
- python3 -m pip install pyyaml --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
- python3 -m pip install unity-downloader-cli --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple --upgrade
- unity-downloader-cli -u {{ editor.version }} -c editor --wait --fast
- python3 -u -m ml-agents.tests.yamato.standalone_build_tests --build-target=linux
- python3 -u -m ml-agents.tests.yamato.standalone_build_tests --build-target=linux --scene=Assets/ML-Agents/Examples/Basic/Scenes/Basic.unity
- python3 -u -m ml-agents.tests.yamato.standalone_build_tests --build-target=linux --scene=Assets/ML-Agents/Examples/Bouncer/Scenes/Bouncer.unity
- python3 -u -m ml-agents.tests.yamato.standalone_build_tests --build-target=linux --scene=Assets/ML-Agents/Examples/WallJump/Scenes/WallJump.unity
- python3 -u -m ml-agents.tests.yamato.standalone_build_tests --build-target=linux --scene=Assets/ML-Agents/TestScenes/TestCompressedGrid/TestGridCompressed.unity
- python3 -u -m ml-agents.tests.yamato.standalone_build_tests --build-target=linux --scene=Assets/ML-Agents/TestScenes/TestCompressedTexture/TestTextureCompressed.unity
triggers:
cancel_old_ci: true
expression: |

standalonebuild:
paths:
- "artifacts/testPlayer*/**"
- "artifacts/**/UnityPlayer.so"
{% endfor %}

29
.yamato/training-int-tests.yml


{% metadata_file .yamato/test_versions.metafile %}
---
{% for editor in test_editors %}
test_mac_training_int_{{ editor.version }}:
name: Test Mac Fast Training {{ editor.version }}
test_linux_training_int_{{ editor.version }}:
name: Test Linux Fast Training {{ editor.version }}
type: Unity::VM::osx
image: ml-agents/ml-agents-bokken-mac:0.1.4-492264
flavor: b1.small
type: Unity::VM
image: package-ci/ubuntu:stable
flavor: b1.medium
- pip install pyyaml --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
# TODO remove the "--user" command and the path prefix when we can migrate away from the custom bokken image
- python -m pip install unity-downloader-cli --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple --upgrade --user
- /Users/bokken/Library/Python/3.7/bin/unity-downloader-cli -u {{ editor.version }} -c editor --wait --fast
- python -u -m ml-agents.tests.yamato.training_int_tests
# Backwards-compatibility tests.
# If we make a breaking change to the communication protocol, these will need
# to be disabled until the next release.
- python -u -m ml-agents.tests.yamato.training_int_tests --python=0.16.0
- python -u -m ml-agents.tests.yamato.training_int_tests --csharp={{ editor.csharp_backcompat_version }}
- |
sudo apt-get update && sudo apt-get install -y python3-venv
python3 -m venv venv && source venv/bin/activate
python -m pip install pyyaml --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
python -u -m ml-agents.tests.yamato.training_int_tests
- .yamato/standalone-build-test.yml#test_mac_standalone_{{ editor.version }}
- .yamato/standalone-build-test.yml#test_linux_standalone_{{ editor.version }}
triggers:
cancel_old_ci: true
expression: |

- "artifacts/inference.onnx.txt"
standalonebuild:
paths:
- "artifacts/testplayer*/**"
- "artifacts/testPlayer*/**"
- "artifacts/models/**"
{% endfor %}

8
Project/Assets/ML-Agents/Editor/Tests/StandaloneBuildTest.cs


{
const string k_OutputCommandLineFlag = "--mlagents-build-output-path";
const string k_SceneCommandLineFlag = "--mlagents-build-scene-path";
private const string k_BuildTargetFlag = "--mlagents-build-target";
public static void BuildStandalonePlayerOSX()
{

var buildTarget = BuildTarget.StandaloneOSX;
var args = Environment.GetCommandLineArgs();
for (var i = 0; i < args.Length - 1; i++)

{
scenePath = args[i + 1];
}
else if (args[i] == k_BuildTargetFlag)
{
buildTarget = (BuildTarget)Enum.Parse(typeof(BuildTarget), args[i + 1], ignoreCase: true);
}
}
string[] scenes = { scenePath };

BuildTarget.StandaloneOSX,
buildTarget,
BuildOptions.None
);
var isOk = buildResult.summary.result == BuildResult.Succeeded;

5
Project/Assets/ML-Agents/Examples/Hallway/Scripts/HallwayAgent.cs


m_AgentRb = GetComponent<Rigidbody>();
m_GroundRenderer = ground.GetComponent<Renderer>();
m_GroundMaterial = m_GroundRenderer.material;
m_statsRecorder = Academy.Instance.StatsRecorder;
}
public override void CollectObservations(VectorSensor sensor)

{
SetReward(1f);
StartCoroutine(GoalScoredSwapGroundMaterial(m_HallwaySettings.goalScoredMaterial, 0.5f));
m_statsRecorder.Add("Goal/Correct", 1, StatAggregationMethod.Sum);
m_statsRecorder.Add("Goal/Wrong", 1, StatAggregationMethod.Sum);
}
EndEpisode();
}

symbolXGoal.transform.position = new Vector3(7f, 0.5f, 22.29f) + area.transform.position;
symbolOGoal.transform.position = new Vector3(-7f, 0.5f, 22.29f) + area.transform.position;
}
m_statsRecorder.Add("Goal/Correct", 0, StatAggregationMethod.Sum);
m_statsRecorder.Add("Goal/Wrong", 0, StatAggregationMethod.Sum);
}
}

2
README.md


In addition to our own documentation, here are some additional, relevant
articles:
- [A Game Developer Learns Machine Learning](https://mikecann.co.uk/machine-learning/a-game-developer-learns-machine-learning-intent/)
- [A Game Developer Learns Machine Learning](https://mikecann.co.uk/posts/a-game-developer-learns-machine-learning-intent)
- [Explore Unity Technologies ML-Agents Exclusively on Intel Architecture](https://software.intel.com/en-us/articles/explore-unity-technologies-ml-agents-exclusively-on-intel-architecture)
- [ML-Agents Penguins tutorial](https://learn.unity.com/project/ml-agents-penguins)

4
com.unity.ml-agents/CHANGELOG.md


### Minor Changes
#### com.unity.ml-agents / com.unity.ml-agents.extensions (C#)
- `StatAggregationMethod.Sum` can now be passed to `StatsRecorder.Add()`. This
will result in the values being summed (instead of averaged) when written to
TensorBoard. Thanks to @brccabral for the contribution! (#4816)
- Fix a compile warning about using an obsolete enum in `GrpcExtensions.cs`. (#4812)
#### ml-agents / ml-agents-envs / gym-unity (Python)

3
com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs


ActionSpec actionSpec;
if (bpp.ActionSpec == null)
{
// Disable deprecation warnings so we can set legacy fields
#pragma warning disable CS0618
var spaceType = (SpaceType)bpp.VectorActionSpaceTypeDeprecated;
if (spaceType == SpaceType.Continuous)
{

{
actionSpec = ActionSpec.MakeDiscrete(bpp.VectorActionSizeDeprecated.ToArray());
}
#pragma warning restore CS0618
}
else
{

2
com.unity.ml-agents/Runtime/Sensors/BufferSensor.cs


namespace Unity.MLAgents.Sensors
{
public class BufferSensor : ISensor, IDimensionPropertiesSensor
internal class BufferSensor : ISensor, IDimensionPropertiesSensor
{
private int m_MaxNumObs;
private int m_ObsSize;

2
com.unity.ml-agents/Runtime/Sensors/BufferSensorComponent.cs


/// A component for BufferSensor.
/// </summary>
[AddComponentMenu("ML Agents/Buffer Sensor", (int)MenuGroup.Sensors)]
public class BufferSensorComponent : SensorComponent
internal class BufferSensorComponent : SensorComponent
{
public int ObservableSize;
public int MaxNumObservables;

4
com.unity.ml-agents/Runtime/Sensors/IDimensionPropertiesSensor.cs


/// The Dimension property flags of the observations
/// </summary>
[System.Flags]
public enum DimensionProperty
internal enum DimensionProperty
{
/// <summary>
/// No properties specified.

/// <summary>
/// Sensor interface for sensors with special dimension properties.
/// </summary>
public interface IDimensionPropertiesSensor
internal interface IDimensionPropertiesSensor
{
/// <summary>
/// Returns the array containing the properties of each dimensions of the

7
com.unity.ml-agents/Runtime/StatsRecorder.cs


/// To avoid conflicts when training with multiple concurrent environments, only
/// stats from worker index 0 will be tracked.
/// </summary>
MostRecent = 1
MostRecent = 1,
/// <summary>
/// Values within the summary period are summed up before reporting.
/// </summary>
Sum = 2
}
/// <summary>

45
docs/FAQ.md


## Installation problems
### Tensorflow dependency
ML Agents requires TensorFlow; if you don't already have it installed, `pip`
will try to install it when you install the ml-agents package.
If you see a message like this
```console
ERROR: Could not find a version that satisfies the requirement tensorflow<2.0,>=1.7 (from mlagents) (from versions: none)
ERROR: No matching distribution found for tensorflow<2.0,>=1.7 (from mlagents)
```
it means that there is no version of TensorFlow for your python environment.
Some known potential causes are:
- You're using 32-bit python instead of 64-bit. See the answer
[here](https://stackoverflow.com/a/1405971/224264) for how to tell which you
have installed.
- You have the `tensorflow-gpu` package installed. This is equivalent to
`tensorflow`, however `pip` doesn't recognize this. The best way to resolve
this is to update to `tensorflow==1.15.0` which provides GPU support in the
same package (see the
[release notes](https://github.com/tensorflow/tensorflow/issues/33374) for
more details.)
- You're on another architecture (e.g. ARM) which requires vendor provided
packages.
In all of these cases, the issue is a pip/python environment setup issue. Please
search the tensorflow github issues for similar problems and solutions before
creating a new issue.
#### Visual C++ Dependency (Windows Users)
When running `mlagents-learn`, if you see a stack trace with a message like this:
```console
ImportError: DLL load failed: The specified module could not be found.
```
then either of the required DLLs, `msvcp140.dll` (old) or `msvcp140_1.dll` (new), are missing on your machine. The `import tensorflow` command will print this warning message.
To solve it, download and install (with a reboot) the install [Microsoft Visual C++ Redistributable for Visual Studio 2015, 2017 and 2019](https://support.microsoft.com/en-my/help/2977003/the-latest-supported-visual-c-downloads).
For more details, please see the [TensorFlow 2.1.0 release notes](https://github.com/tensorflow/tensorflow/releases/tag/v2.1.0)
and the [TensorFlow github issue](https://github.com/tensorflow/tensorflow/issues/22794#issuecomment-573297027).
## Environment Permission Error
If you directly import your Unity environment without building it in the editor,

34
docs/Learning-Environment-Design-Agents.md


- [Visual Observation Summary & Best Practices](#visual-observation-summary--best-practices)
- [Raycast Observations](#raycast-observations)
- [RayCast Observation Summary & Best Practices](#raycast-observation-summary--best-practices)
- [Actions](#actions)
- [Actions and Actuators](#actions-and-actuators)
- [Continuous Actions](#continuous-actions)
- [Discrete Actions](#discrete-actions)
- [Masking Discrete Actions](#masking-discrete-actions)

- Use as few rays and tags as necessary to solve the problem in order to improve
learning stability and agent performance.
## Actions
## Actions and Actuators
action is passed to the Agent as the `ActionBuffers` parameter when the Academy invokes the
agent's `OnActionReceived()` function. There are two types of actions that an Agent can use:
**Continuous** and **Discrete**.
action is passed to the an `IActionReceiver` (either an `Agent` or an `IActuator`)
as the `ActionBuffers` parameter when the Academy invokes the
`IActionReciever.OnActionReceived()` function.
There are two types of actions supported: **Continuous** and **Discrete**.
Neither the Policy nor the training algorithm know anything about what the
action values themselves mean. The training algorithm simply tries different

branches.
- You cannot mask all the actions of a branch.
- You cannot mask actions in continuous control.
### IActuator interface and ActuatorComponents
The Actuator API allows users to abstract behavior out of Agents and in to
components (similar to the ISensor API). The `IActuator` interface and `Agent`
class both implement the `IActionReceiver` interface to allow for backward compatibility
with the current `Agent.OnActionReceived` and `Agent.CollectDiscreteActionMasks` APIs.
This means you will not have to change your code until you decide to use the `IActuator` API.
Like the `ISensor` interface, the `IActuator` interface is intended for advanced users.
The `ActuatorComponent` abstract class is used to create the actual `IActuator` at
runtime. It must be attached to the same `GameObject` as the `Agent`, or to a
child `GameObject`. Actuators and all of their data structures are initialized
during `Agent.Initialize`. This was done to prevent an unexpected allocations at runtime.
You can find an example of an `IActuator` implementation in the `Basic` example scene.
**NOTE**: you do not need to adjust the Actions in the Agent's
`Behavior Parameters` when using an `IActuator` and `ActuatorComponents`.
Internally, `Agent.OnActionReceived` uses an `IActuator` to send actions to the Agent,
although this is mostly abstracted from the user.
### Actions Summary & Best Practices

3
ml-agents-envs/mlagents_envs/env_utils.py


candidates = glob.glob(env_path + ".x86_64")
if len(candidates) == 0:
candidates = glob.glob(env_path + ".x86")
if len(candidates) == 0:
if os.path.isfile(env_path):
candidates = [env_path]
if len(candidates) > 0:
launch_string = candidates[0]

2
ml-agents-envs/mlagents_envs/environment.py


"""
try:
# A negative value -N indicates that the child was terminated by signal N (POSIX only).
s = signal.Signals(-returncode) # pylint: disable=no-member
s = signal.Signals(-returncode)
return s.name
except Exception:
# Should generally be a ValueError, but catch everything just in case.

2
ml-agents-envs/mlagents_envs/registry/binary_utils.py


break
try:
download_and_extract_zip(url, name)
except Exception: # pylint: disable=W0702
except Exception:
if attempt + 1 < NUMBER_ATTEMPTS:
logger.warning(
f"Attempt {attempt + 1} / {NUMBER_ATTEMPTS}"

15
ml-agents-envs/mlagents_envs/rpc_utils.py


def _process_visual_observation(
obs_index: int,
shape: Tuple[int, int, int],
agent_info_list: Collection[
AgentInfoProto
], # pylint: disable=unsubscriptable-object
agent_info_list: Collection[AgentInfoProto],
) -> np.ndarray:
if len(agent_info_list) == 0:
return np.zeros((0, shape[0], shape[1], shape[2]), dtype=np.float32)

@timed
def _process_vector_observation(
obs_index: int,
shape: Tuple[int, ...],
agent_info_list: Collection[
AgentInfoProto
], # pylint: disable=unsubscriptable-object
obs_index: int, shape: Tuple[int, ...], agent_info_list: Collection[AgentInfoProto]
) -> np.ndarray:
if len(agent_info_list) == 0:
return np.zeros((0,) + shape, dtype=np.float32)

@timed
def steps_from_proto(
agent_info_list: Collection[
AgentInfoProto
], # pylint: disable=unsubscriptable-object
behavior_spec: BehaviorSpec,
agent_info_list: Collection[AgentInfoProto], behavior_spec: BehaviorSpec
) -> Tuple[DecisionSteps, TerminalSteps]:
decision_agent_info_list = [
agent_info for agent_info in agent_info_list if not agent_info.done

5
ml-agents-envs/mlagents_envs/side_channel/stats_side_channel.py


# Only the most recent value is reported.
MOST_RECENT = 1
# Values within the summary period are summed up before reporting.
SUM = 2
StatList = List[Tuple[float, StatsAggregationMethod]]
EnvironmentStats = Mapping[str, StatList]

def on_message_received(self, msg: IncomingMessage) -> None:
"""
Receive the message from the environment, and save it for later retrieval.
:param msg:
:return:
"""

def get_and_reset_stats(self) -> EnvironmentStats:
"""
Returns the current stats, and resets the internal storage of the stats.
:return:
"""
s = self.stats

2
ml-agents/mlagents/torch_utils/torch.py


torch.set_num_threads(cpu_utils.get_num_threads_to_use())
os.environ["KMP_BLOCKTIME"] = "0"
# Known PyLint compatibility with PyTorch https://github.com/pytorch/pytorch/issues/701
# pylint: disable=E1101
if torch.cuda.is_available():
torch.set_default_tensor_type(torch.cuda.FloatTensor)
device = torch.device("cuda")

8
ml-agents/mlagents/trainers/agent_processor.py


):
"""
Create an AgentProcessor.
:param trainer: Trainer instance connected to this AgentProcessor. Trainer is given trajectory
when it is finished.
:param policy: Policy instance associated with this AgentProcessor.

"""
Pass stats from the environment to the StatsReporter.
Depending on the StatsAggregationMethod, either StatsReporter.add_stat or StatsReporter.set_stat is used.
The worker_id is used to determin whether StatsReporter.set_stat should be used.
The worker_id is used to determine whether StatsReporter.set_stat should be used.
:param env_stats:
:param worker_id:
:return:

if agg_type == StatsAggregationMethod.AVERAGE:
self.stats_reporter.add_stat(stat_name, val)
self.stats_reporter.add_stat(stat_name, val, agg_type)
elif agg_type == StatsAggregationMethod.SUM:
self.stats_reporter.add_stat(stat_name, val, agg_type)
elif agg_type == StatsAggregationMethod.MOST_RECENT:
# In order to prevent conflicts between multiple environments,
# only stats from the first environment are recorded.

3
ml-agents/mlagents/trainers/env_manager.py


_policy = self.agent_managers[brain_name].policy_queue.get_nowait()
except AgentManagerQueue.Empty:
if _policy is not None:
# policy_queue contains Policy, but we need a TFPolicy here
self.set_policy(brain_name, _policy) # type: ignore
self.set_policy(brain_name, _policy)
# Step the environments
new_step_infos = self._step()
return new_step_infos

1
ml-agents/mlagents/trainers/learn.py


def get_version_string() -> str:
# pylint: disable=no-member
return f""" Version information:
ml-agents: {mlagents.trainers.__version__},
ml-agents-envs: {mlagents_envs.__version__},

2
ml-agents/mlagents/trainers/optimizer/torch_optimizer.py


from mlagents.trainers.torch.utils import ModelUtils
class TorchOptimizer(Optimizer): # pylint: disable=W0223
class TorchOptimizer(Optimizer):
def __init__(self, policy: TorchPolicy, trainer_settings: TrainerSettings):
super().__init__()
self.policy = policy

14
ml-agents/mlagents/trainers/policy/policy.py


self.trainer_settings = trainer_settings
self.network_settings: NetworkSettings = trainer_settings.network_settings
self.seed = seed
self.act_size = (
list(self.behavior_spec.action_spec.discrete_branches)
if self.behavior_spec.action_spec.is_discrete()
else [self.behavior_spec.action_spec.continuous_size]
)
self.vec_obs_size = sum(
sen_spec.shape[0]
for sen_spec in behavior_spec.sensor_specs
if len(sen_spec.shape) == 1
)
self.vis_obs_size = sum(
1 for sen_spec in behavior_spec.sensor_specs if len(sen_spec.shape) == 3
)
self.use_continuous_act = self.behavior_spec.action_spec.is_continuous()
self.previous_action_dict: Dict[str, np.ndarray] = {}
self.memory_dict: Dict[str, np.ndarray] = {}
self.normalize = trainer_settings.network_settings.normalize

18
ml-agents/mlagents/trainers/policy/torch_policy.py


def _extract_masks(self, decision_requests: DecisionSteps) -> np.ndarray:
mask = None
if self.behavior_spec.action_spec.discrete_size > 0:
mask = torch.ones([len(decision_requests), np.sum(self.act_size)])
num_discrete_flat = np.sum(self.behavior_spec.action_spec.discrete_branches)
mask = torch.ones([len(decision_requests), num_discrete_flat])
if decision_requests.action_mask is not None:
mask = torch.as_tensor(
1 - np.concatenate(decision_requests.action_mask, axis=1)

:param buffer: The buffer with the observations to add to the running estimate
of the distribution.
"""
if self.use_vec_obs and self.normalize:
if self.normalize:
self.actor_critic.update_normalization(buffer)
@timed

for agent_id in decision_requests.agent_id
] # For 1-D array, the iterator order is correct.
run_out = self.evaluate(
decision_requests, global_agent_ids
) # pylint: disable=assignment-from-no-return
run_out = self.evaluate(decision_requests, global_agent_ids)
self.save_memories(global_agent_ids, run_out.get("memory_out"))
self.check_nan_action(run_out.get("action"))
return ActionInfo(

outputs=run_out,
agent_ids=list(decision_requests.agent_id),
)
@property
def use_vis_obs(self):
return self.vis_obs_size > 0
@property
def use_vec_obs(self):
return self.vec_obs_size > 0
def get_current_step(self):
"""

2
ml-agents/mlagents/trainers/ppo/optimizer_torch.py


"""
Takes a Policy and a Dict of trainer parameters and creates an Optimizer around the policy.
The PPO optimizer has a value estimator and a loss function.
:param policy: A TFPolicy object that will be updated by this PPO Optimizer.
:param policy: A TorchPolicy object that will be updated by this PPO Optimizer.
:param trainer_params: Trainer parameters dictionary that specifies the
properties of the trainer.
"""

4
ml-agents/mlagents/trainers/sac/optimizer_torch.py


# ExitStack allows us to enter the torch.no_grad() context conditionally
with ExitStack() as stack:
if not q1_grad:
stack.enter_context(torch.no_grad()) # pylint: disable=E1101
stack.enter_context(torch.no_grad())
q1_out, _ = self.q1_network(
inputs,
actions=actions,

with ExitStack() as stack:
if not q2_grad:
stack.enter_context(torch.no_grad()) # pylint: disable=E1101
stack.enter_context(torch.no_grad())
q2_out, _ = self.q2_network(
inputs,
actions=actions,

1
ml-agents/mlagents/trainers/settings.py


class SerializationSettings:
convert_to_barracuda = True
convert_to_onnx = True
onnx_opset = 9

78
ml-agents/mlagents/trainers/stats.py


import time
from threading import RLock
from mlagents_envs.side_channel.stats_side_channel import StatsAggregationMethod
from mlagents_envs.logging_util import get_logger
from mlagents_envs.timers import set_gauge
from torch.utils.tensorboard import SummaryWriter

"""
Takes a parameter dictionary and converts it to a human-readable string.
Recurses if there are multiple levels of dict. Used to print out hyperparameters.
param: param_dict: A Dictionary of key, value parameters.
return: A string version of this dictionary.
:param param_dict: A Dictionary of key, value parameters.
:return: A string version of this dictionary.
"""
if not isinstance(param_dict, dict):
return str(param_dict)

mean: float
std: float
num: int
sum: float
aggregation_method: StatsAggregationMethod
return StatsSummary(0.0, 0.0, 0)
return StatsSummary(0.0, 0.0, 0, 0.0, StatsAggregationMethod.AVERAGE)
@property
def aggregated_value(self):
if self.aggregation_method == StatsAggregationMethod.SUM:
return self.sum
else:
return self.mean
class StatsPropertyType(Enum):

Add a generic property to the StatsWriter. This could be e.g. a Dict of hyperparameters,
a max step count, a trainer type, etc. Note that not all StatsWriters need to be compatible
with all types of properties. For instance, a TB writer doesn't need a max step.
:param type: The type of property.
:param property_type: The type of property.
:param value: The property itself.
"""
pass

set_gauge(
GaugeWriter.sanitize_string(f"{category}.{val}.mean"),
float(stats_summary.mean),
)
set_gauge(
GaugeWriter.sanitize_string(f"{category}.{val}.sum"),
float(stats_summary.sum),
)

is_training = "Not Training"
if "Is Training" in values:
stats_summary = values["Is Training"]
if stats_summary.mean > 0.0:
if stats_summary.aggregated_value > 0.0:
is_training = "Training"
elapsed_time = time.time() - self.training_start_time

def __init__(self, base_dir: str, clear_past_data: bool = False):
"""
A StatsWriter that writes to a Tensorboard summary.
category.
category.
"""
self.summary_writers: Dict[str, SummaryWriter] = {}
self.base_dir: str = base_dir

) -> None:
self._maybe_create_summary_writer(category)
for key, value in values.items():
self.summary_writers[category].add_scalar(f"{key}", value.mean, step)
self.summary_writers[category].add_scalar(
f"{key}", value.aggregated_value, step
)
self.summary_writers[category].flush()
def _maybe_create_summary_writer(self, category: str) -> None:

writers: List[StatsWriter] = []
stats_dict: Dict[str, Dict[str, List]] = defaultdict(lambda: defaultdict(list))
lock = RLock()
stats_aggregation: Dict[str, Dict[str, StatsAggregationMethod]] = defaultdict(
lambda: defaultdict(lambda: StatsAggregationMethod.AVERAGE)
)
def __init__(self, category: str):
"""

Add a generic property to the StatsReporter. This could be e.g. a Dict of hyperparameters,
a max step count, a trainer type, etc. Note that not all StatsWriters need to be compatible
with all types of properties. For instance, a TB writer doesn't need a max step.
:param key: The type of property.
:param property_type: The type of property.
:param value: The property itself.
"""
with StatsReporter.lock:

def add_stat(self, key: str, value: float) -> None:
def add_stat(
self,
key: str,
value: float,
aggregation: StatsAggregationMethod = StatsAggregationMethod.AVERAGE,
) -> None:
:param aggregation: the aggregation method for the statistic, default StatsAggregationMethod.AVERAGE.
StatsReporter.stats_aggregation[self.category][key] = aggregation
StatsReporter.stats_aggregation[self.category][
key
] = StatsAggregationMethod.MOST_RECENT
def write_stats(self, step: int) -> None:
"""

:param step: Training step which to write these stats as.
"""
with StatsReporter.lock:

def get_stats_summaries(self, key: str) -> StatsSummary:
"""
Get the mean, std, and count of a particular statistic, since last write.
Get the mean, std, count, sum and aggregation method of a particular statistic, since last write.
:returns: A StatsSummary NamedTuple containing (mean, std, count).
:returns: A StatsSummary containing summary statistics.
if len(StatsReporter.stats_dict[self.category][key]) > 0:
return StatsSummary(
mean=np.mean(StatsReporter.stats_dict[self.category][key]),
std=np.std(StatsReporter.stats_dict[self.category][key]),
num=len(StatsReporter.stats_dict[self.category][key]),
)
return StatsSummary.empty()
stat_values = StatsReporter.stats_dict[self.category][key]
if len(stat_values) == 0:
return StatsSummary.empty()
return StatsSummary(
mean=np.mean(stat_values),
std=np.std(stat_values),
num=len(stat_values),
sum=np.sum(stat_values),
aggregation_method=StatsReporter.stats_aggregation[self.category][key],
)

2
ml-agents/mlagents/trainers/tests/__init__.py


if (
"ml-agents/mlagents" in filename
or "ml-agents-envs/mlagents" in filename
) and "tensorflow_to_barracuda.py" not in filename:
):
raise ValueError(
f"float64 array created. Set dtype=np.float32 instead of current dtype={kwargs_dtype}. "
f"Run pytest with TEST_ENFORCE_NUMPY_FLOAT32=1 to confirm fix."

4
ml-agents/mlagents/trainers/tests/check_env_trains.py


) -> None:
for val, stats_summary in values.items():
if val == "Environment/Cumulative Reward":
print(step, val, stats_summary.mean)
self._last_reward_summary[category] = stats_summary.mean
print(step, val, stats_summary.aggregated_value)
self._last_reward_summary[category] = stats_summary.aggregated_value
# The reward processor is passed as an argument to _check_environment_trains.

25
ml-agents/mlagents/trainers/tests/test_agent_processor.py


{
"averaged": [(1.0, StatsAggregationMethod.AVERAGE)],
"most_recent": [(2.0, StatsAggregationMethod.MOST_RECENT)],
"summed": [(3.1, StatsAggregationMethod.SUM)],
"summed": [(1.1, StatsAggregationMethod.SUM)],
},
]
for env_stats in all_env_stats:

"averaged": StatsSummary(mean=2.0, std=mock.ANY, num=2),
"most_recent": StatsSummary(mean=4.0, std=0.0, num=1),
"averaged": StatsSummary(
mean=2.0,
std=mock.ANY,
num=2,
sum=4.0,
aggregation_method=StatsAggregationMethod.AVERAGE,
),
"most_recent": StatsSummary(
mean=4.0,
std=0.0,
num=1,
sum=4.0,
aggregation_method=StatsAggregationMethod.MOST_RECENT,
),
"summed": StatsSummary(
mean=2.1,
std=mock.ANY,
num=2,
sum=4.2,
aggregation_method=StatsAggregationMethod.SUM,
),
}
stats_reporter.write_stats(123)
writer.write_stats.assert_any_call("FakeCategory", expected_stats, 123)

16
ml-agents/mlagents/trainers/tests/test_learn.py


from mlagents_envs.exception import UnityEnvironmentException
from mlagents.trainers.stats import StatsReporter
from mlagents.trainers.environment_parameter_manager import EnvironmentParameterManager
import os.path
def basic_options(extra_args=None):

learn.run_training(0, options)
mock_init.assert_called_once_with(
trainer_factory_mock.return_value,
"results/ppo",
os.path.join("results", "ppo"),
"ppo",
"mock_param_manager",
True,

"results/ppo", False, False, "results/notuselessrun"
os.path.join("results", "ppo"),
False,
False,
os.path.join("results", "notuselessrun"),
)
write_timing_tree_mock.assert_called_once_with(
os.path.join("results", "ppo", "run_logs")
)
write_run_options_mock.assert_called_once_with(
os.path.join("results", "ppo"), options
write_timing_tree_mock.assert_called_once_with("results/ppo/run_logs")
write_run_options_mock.assert_called_once_with("results/ppo", options)
StatsReporter.writers.clear() # make sure there aren't any writers as added by learn.py

6
ml-agents/mlagents/trainers/tests/test_rl_trainer.py


from mlagents.trainers.settings import TrainerSettings
from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes
from mlagents_envs.base_env import ActionSpec
import os.path
# Add concrete implementations of abstract methods

mock_model_saver.model_path = self.artifact_path
mock_model_saver.save_checkpoint.side_effect = checkpoint_path
self.model_saver = mock_model_saver
def create_tf_policy(self, parsed_behavior_id, behavior_spec):
return mock.Mock()
def create_torch_policy(self, parsed_behavior_id, behavior_spec):
return mock.Mock()

trainer.brain_name,
ModelCheckpoint(
step,
f"{trainer.model_saver.model_path}/{trainer.brain_name}-{step}.{export_ext}",
f"{trainer.model_saver.model_path}{os.path.sep}{trainer.brain_name}-{step}.{export_ext}",
None,
mock.ANY,
),

43
ml-agents/mlagents/trainers/tests/test_stats.py


GaugeWriter,
ConsoleWriter,
StatsPropertyType,
StatsAggregationMethod,
)

category = "category1"
with tempfile.TemporaryDirectory(prefix="unittest-") as base_dir:
tb_writer = TensorboardWriter(base_dir, clear_past_data=False)
statssummary1 = StatsSummary(mean=1.0, std=1.0, num=1)
statssummary1 = StatsSummary(
mean=1.0,
std=1.0,
num=1,
sum=1.0,
aggregation_method=StatsAggregationMethod.AVERAGE,
)
tb_writer.write_stats("category1", {"key1": statssummary1}, 10)
# Test that the filewriter has been created and the directory has been created.

def test_tensorboard_writer_clear(tmp_path):
tb_writer = TensorboardWriter(tmp_path, clear_past_data=False)
statssummary1 = StatsSummary(mean=1.0, std=1.0, num=1)
statssummary1 = StatsSummary(
mean=1.0,
std=1.0,
num=1,
sum=1.0,
aggregation_method=StatsAggregationMethod.AVERAGE,
)
tb_writer.write_stats("category1", {"key1": statssummary1}, 10)
# TB has some sort of timeout before making a new file
time.sleep(1.0)

with self.assertLogs("mlagents.trainers", level="INFO") as cm:
category = "category1"
console_writer = ConsoleWriter()
statssummary1 = StatsSummary(mean=1.0, std=1.0, num=1)
statssummary1 = StatsSummary(
mean=1.0,
std=1.0,
num=1,
sum=1.0,
aggregation_method=StatsAggregationMethod.AVERAGE,
)
console_writer.write_stats(
category,
{

10,
)
statssummary2 = StatsSummary(mean=0.0, std=0.0, num=1)
statssummary2 = StatsSummary(
mean=0.0,
std=0.0,
num=1,
sum=0.0,
aggregation_method=StatsAggregationMethod.AVERAGE,
)
"Environment/Cumulative Reward": statssummary1,
"Environment/Cumulative Reward": statssummary2,
"Is Training": statssummary2,
},
10,

category = "category1"
console_writer = ConsoleWriter()
console_writer.add_property(category, StatsPropertyType.SELF_PLAY, True)
statssummary1 = StatsSummary(mean=1.0, std=1.0, num=1)
statssummary1 = StatsSummary(
mean=1.0,
std=1.0,
num=1,
sum=1.0,
aggregation_method=StatsAggregationMethod.AVERAGE,
)
console_writer.write_stats(
category,
{

2
ml-agents/mlagents/trainers/tests/torch/test_ppo.py


from mlagents.trainers.tests import mock_brain as mb
from mlagents.trainers.tests.test_trajectory import make_fake_trajectory
from mlagents.trainers.settings import NetworkSettings
from mlagents.trainers.tests.dummy_config import ( # noqa: F401; pylint: disable=unused-variable
from mlagents.trainers.tests.dummy_config import ( # noqa: F401
ppo_dummy_config,
curiosity_dummy_config,
gail_dummy_config,

2
ml-agents/mlagents/trainers/tests/torch/test_sac.py


from mlagents.trainers.policy.torch_policy import TorchPolicy
from mlagents.trainers.tests import mock_brain as mb
from mlagents.trainers.settings import NetworkSettings
from mlagents.trainers.tests.dummy_config import ( # noqa: F401; pylint: disable=unused-variable
from mlagents.trainers.tests.dummy_config import ( # noqa: F401
sac_dummy_config,
curiosity_dummy_config,
)

224
ml-agents/mlagents/trainers/torch/attention.py


from mlagents.torch_utils import torch
from typing import Tuple, Optional, List
from mlagents.trainers.torch.layers import LinearEncoder
from mlagents.trainers.torch.layers import LinearEncoder, Initialization, linear_layer
class MultiHeadAttention(torch.nn.Module):

Takes as input to the forward method 3 tensors:
- query: of dimensions (batch_size, number_of_queries, key_size)
- key: of dimensions (batch_size, number_of_keys, key_size)
- value: of dimensions (batch_size, number_of_keys, value_size)
- query: of dimensions (batch_size, number_of_queries, embedding_size)
- key: of dimensions (batch_size, number_of_keys, embedding_size)
- value: of dimensions (batch_size, number_of_keys, embedding_size)
- The output: (batch_size, number_of_queries, output_size)
- The output: (batch_size, number_of_queries, embedding_size)
def __init__(
self,
query_size: int,
key_size: int,
value_size: int,
output_size: int,
num_heads: int,
embedding_size: int,
):
def __init__(self, embedding_size: int, num_heads: int):
self.output_size = output_size
self.fc_q = torch.nn.Linear(query_size, self.n_heads * self.embedding_size)
self.fc_k = torch.nn.Linear(key_size, self.n_heads * self.embedding_size)
self.fc_v = torch.nn.Linear(value_size, self.n_heads * self.embedding_size)
# self.fc_q = LinearEncoder(query_size, 2, self.n_heads * self.embedding_size)
# self.fc_k = LinearEncoder(key_size,2, self.n_heads * self.embedding_size)
# self.fc_v = LinearEncoder(value_size,2, self.n_heads * self.embedding_size)
self.fc_out = torch.nn.Linear(
self.n_heads * self.embedding_size, self.output_size
)
self.head_size: int = self.embedding_size // self.n_heads
def forward(
self,

n_q: int,
n_k: int,
number_of_keys: int = -1,
number_of_queries: int = -1,
# This is to avoid using .size() when possible as Barracuda does not support
n_q = number_of_queries if number_of_queries != -1 else query.size(1)
n_k = number_of_keys if number_of_keys != -1 else key.size(1)
query = self.fc_q(query) # (b, n_q, h*d)
key = self.fc_k(key) # (b, n_k, h*d)
value = self.fc_v(value) # (b, n_k, h*d)
query = query.reshape(b, n_q, self.n_heads, self.embedding_size)
key = key.reshape(b, n_k, self.n_heads, self.embedding_size)
value = value.reshape(b, n_k, self.n_heads, self.embedding_size)
query = query.reshape(
b, n_q, self.n_heads, self.head_size
) # (b, n_q, h, emb / h)
key = key.reshape(b, n_k, self.n_heads, self.head_size) # (b, n_k, h, emb / h)
value = value.reshape(
b, n_k, self.n_heads, self.head_size
) # (b, n_k, h, emb / h)
query = query.permute([0, 2, 1, 3]) # (b, h, n_q, emb)
query = query.permute([0, 2, 1, 3]) # (b, h, n_q, emb / h)
key = key.permute([0, 2, 1, 3]) # (b, h, emb, n_k)
key = key.permute([0, 2, 1, 3]) # (b, h, emb / h, n_k)
key = key.permute([0, 1, 3, 2]) # (b, h, emb, n_k)
key = key.permute([0, 1, 3, 2]) # (b, h, emb / h, n_k)
qk = torch.matmul(query, key) # (b, h, n_q, n_k)

att = torch.softmax(qk, dim=3) # (b, h, n_q, n_k)
value = value.permute([0, 2, 1, 3]) # (b, h, n_k, emb)
value_attention = torch.matmul(att, value) # (b, h, n_q, emb)
value = value.permute([0, 2, 1, 3]) # (b, h, n_k, emb / h)
value_attention = torch.matmul(att, value) # (b, h, n_q, emb / h)
value_attention = value_attention.permute([0, 2, 1, 3]) # (b, n_q, h, emb)
value_attention = value_attention.permute([0, 2, 1, 3]) # (b, n_q, h, emb / h)
b, n_q, self.n_heads * self.embedding_size
) # (b, n_q, h*emb)
b, n_q, self.embedding_size
) # (b, n_q, emb)
out = self.fc_out(value_attention) # (b, n_q, emb)
return out, att
return value_attention, att
class SimpleTransformer(torch.nn.Module):
class EntityEmbeddings(torch.nn.Module):
A simple architecture inspired from https://arxiv.org/pdf/1909.07528.pdf that uses
multi head self attention to encode information about a "Self" and a list of
relevant "Entities".
EPISLON = 1e-7
entities_sizes: List[int],
entity_sizes: List[int],
entity_num_max_elements: List[int],
output_size: Optional[int] = None,
concat_self: bool = True,
self.self_size = x_self_size
self.entities_sizes = entities_sizes
self.entities_num_max_elements: Optional[List[int]] = None
self.self_size: int = x_self_size
self.entity_sizes: List[int] = entity_sizes
self.entity_num_max_elements: List[int] = entity_num_max_elements
self.concat_self: bool = concat_self
# If not concatenating self, input to encoder is just entity size
if not concat_self:
self.self_size = 0
self.ent_encoders = torch.nn.ModuleList(
[
LinearEncoder(self.self_size + ent_size, 2, embedding_size)

self.attention = MultiHeadAttention(
query_size=embedding_size,
key_size=embedding_size,
value_size=embedding_size,
output_size=embedding_size,
num_heads=4,
embedding_size=embedding_size,
)
self.residual_layer = LinearEncoder(embedding_size, 1, embedding_size)
if output_size is None:
output_size = embedding_size
self.x_self_residual_layer = LinearEncoder(
embedding_size + x_self_size, 1, output_size
)
self,
x_self: torch.Tensor,
entities: List[torch.Tensor],
key_masks: List[torch.Tensor],
) -> torch.Tensor:
# Gather the maximum number of entities information
if self.entities_num_max_elements is None:
self.entities_num_max_elements = []
for ent in entities:
self.entities_num_max_elements.append(ent.shape[1])
# Concatenate all observations with self
self_and_ent: List[torch.Tensor] = []
for num_entities, ent in zip(self.entities_num_max_elements, entities):
expanded_self = x_self.reshape(-1, 1, self.self_size)
# .repeat(
# 1, num_entities, 1
# )
expanded_self = torch.cat([expanded_self] * num_entities, dim=1)
self_and_ent.append(torch.cat([expanded_self, ent], dim=2))
# Generate the tensor that will serve as query, key and value to self attention
qkv = torch.cat(
self, x_self: torch.Tensor, entities: List[torch.Tensor]
) -> Tuple[torch.Tensor, int]:
if self.concat_self:
# Concatenate all observations with self
self_and_ent: List[torch.Tensor] = []
for num_entities, ent in zip(self.entities_num_max_elements, entities):
expanded_self = x_self.reshape(-1, 1, self.self_size)
expanded_self = torch.cat([expanded_self] * num_entities, dim=1)
self_and_ent.append(torch.cat([expanded_self, ent], dim=2))
else:
self_and_ent = entities
# Encode and concatenate entites
encoded_entities = torch.cat(
mask = torch.cat(key_masks, dim=1)
# Feed to self attention
max_num_ent = sum(self.entities_num_max_elements)
output, _ = self.attention(qkv, qkv, qkv, mask, max_num_ent, max_num_ent)
# Residual
output = self.residual_layer(output) + qkv
# Average Pooling
numerator = torch.sum(output * (1 - mask).reshape(-1, max_num_ent, 1), dim=1)
denominator = torch.sum(1 - mask, dim=1, keepdim=True) + self.EPISLON
output = numerator / denominator
# Residual between x_self and the output of the module
output = self.x_self_residual_layer(torch.cat([output, x_self], dim=1))
return output
return encoded_entities
@staticmethod
def get_masks(observations: List[torch.Tensor]) -> List[torch.Tensor]:

)
mask = torch.cat(key_masks, dim=1)
# Feed to self attention
max_num_ent = sum(self.entities_num_max_elements)
importance = self.importance_layer(qkv) + mask.unsqueeze(2) * -1e6
importance = torch.softmax(importance, dim=1)
weighted_qkv = qkv * importance

class ResidualSelfAttention(torch.nn.Module):
"""
A simple architecture inspired from https://arxiv.org/pdf/1909.07528.pdf that uses
multi head self attention to encode information about a "Self" and a list of
relevant "Entities".
"""
EPSILON = 1e-7
def __init__(
self,
embedding_size: int,
entity_num_max_elements: List[int],
num_heads: int = 4,
):
super().__init__()
self.entity_num_max_elements: List[int] = entity_num_max_elements
self.max_num_ent = sum(entity_num_max_elements)
self.attention = MultiHeadAttention(
num_heads=num_heads, embedding_size=embedding_size
)
self.fc_q = linear_layer(
embedding_size,
embedding_size,
kernel_init=Initialization.Normal,
kernel_gain=(0.125 / embedding_size) ** 0.5,
)
self.fc_k = linear_layer(
embedding_size,
embedding_size,
kernel_init=Initialization.Normal,
kernel_gain=(0.125 / embedding_size) ** 0.5,
)
self.fc_v = linear_layer(
embedding_size,
embedding_size,
kernel_init=Initialization.Normal,
kernel_gain=(0.125 / embedding_size) ** 0.5,
)
self.fc_out = linear_layer(
embedding_size,
embedding_size,
kernel_init=Initialization.Normal,
kernel_gain=(0.125 / embedding_size) ** 0.5,
)
def forward(self, inp: torch.Tensor, key_masks: List[torch.Tensor]) -> torch.Tensor:
# Gather the maximum number of entities information
mask = torch.cat(key_masks, dim=1)
# Feed to self attention
query = self.fc_q(inp) # (b, n_q, emb)
key = self.fc_k(inp) # (b, n_k, emb)
value = self.fc_v(inp) # (b, n_k, emb)
output, _ = self.attention(
query, key, value, self.max_num_ent, self.max_num_ent, mask
)
# Residual
output = self.fc_out(output) + inp
# Average Pooling
numerator = torch.sum(
output * (1 - mask).reshape(-1, self.max_num_ent, 1), dim=1
)
denominator = torch.sum(1 - mask, dim=1, keepdim=True) + self.EPSILON
output = numerator / denominator
# Residual between x_self and the output of the module
return output

2
ml-agents/mlagents/trainers/torch/layers.py


XavierGlorotUniform = 2
KaimingHeNormal = 3 # also known as Variance scaling
KaimingHeUniform = 4
Normal = 5
_init_methods = {

Initialization.KaimingHeNormal: torch.nn.init.kaiming_normal_,
Initialization.KaimingHeUniform: torch.nn.init.kaiming_uniform_,
Initialization.Normal: torch.nn.init.normal_,
}

13
ml-agents/mlagents/trainers/torch/model_serialization.py


self.policy = policy
batch_dim = [1]
seq_len_dim = [1]
dummy_vec_obs = [torch.zeros(batch_dim + [self.policy.vec_obs_size])]
vec_obs_size = 0
for sens_spec in self.policy.behavior_spec.sensor_specs:
if len(sens_spec.shape) == 1:
vec_obs_size += sens_spec.shape[0]
num_vis_obs = sum(
1
for sens_spec in self.policy.behavior_spec.sensor_specs
if len(sens_spec.shape) == 3
)
dummy_vec_obs = [torch.zeros(batch_dim + [vec_obs_size])]
# create input shape of NCHW
# (It's NHWC in self.policy.behavior_spec.sensor_specs.shape)
dummy_vis_obs = [

self.input_names = (
["vector_observation"]
+ [f"visual_observation_{i}" for i in range(self.policy.vis_obs_size)]
+ [f"visual_observation_{i}" for i in range(num_vis_obs)]
+ ["action_masks", "memories"]
)
self.dynamic_axes = {name: {0: "batch"} for name in self.input_names}

4
ml-agents/mlagents/trainers/torch/networks.py


from mlagents.trainers.torch.encoders import VectorInput
from mlagents.trainers.buffer import AgentBuffer
from mlagents.trainers.trajectory import ObsUtil
from mlagents.trainers.torch.attention import SmallestAttention, SimpleTransformer
from mlagents.trainers.torch.attention import SmallestAttention, EntityEmbeddings
ActivationFunction = Callable[[torch.Tensor], torch.Tensor]

concat_entites = torch.stack(concat_encoded_obs, dim=1)
encoded_state = self.transformer(
x_self, [concat_entites], SimpleTransformer.get_masks([concat_entites])
x_self, [concat_entites], EntityEmbeddings.get_masks([concat_entites])
)
if len(concat_encoded_obs) == 0:

2
ml-agents/mlagents/trainers/trainer/rl_trainer.py


logger = get_logger(__name__)
class RLTrainer(Trainer): # pylint: disable=abstract-method
class RLTrainer(Trainer):
"""
This class is the base class for trainers that use Reward Signals.
"""

13
ml-agents/tests/yamato/standalone_build_tests.py


from .yamato_utils import get_base_path, run_standalone_build
def main(scene_path):
def main(scene_path, build_target):
executable_name = None
executable_name = "testPlayer"
if scene_path is not None:
executable_name = os.path.splitext(scene_path)[0] # Remove extension
executable_name = executable_name.split("/")[-1]

returncode = run_standalone_build(
base_path, output_path=executable_name, scene_path=scene_path
base_path,
output_path=executable_name,
scene_path=scene_path,
build_target=build_target,
log_output_path=None, # Log to stdout so we get timestamps on the logs
)
if returncode == 0:

if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--scene", default=None)
parser.add_argument("--build-target", default="mac", choices=["mac", "linux"])
main(args.scene)
main(args.scene, args.build_target)

32
ml-agents/tests/yamato/training_int_tests.py


else:
standalone_player_path = "testPlayer"
venv_path = init_venv(python_version)
init_venv(python_version)
# Copy the default training config but override the max_steps parameter,
# and reduce the batch_size and buffer_size enough to ensure an update step happens.

}
override_config_file("config/ppo/3DBall.yaml", yaml_out, overrides)
env_path = os.path.join(get_base_output_path(), standalone_player_path + ".app")
mla_learn_cmd = (
f"mlagents-learn {yaml_out} --force --env={env_path} "
f"--run-id={run_id} --no-graphics --env-args -logFile -"
) # noqa
res = subprocess.run(
f"source {venv_path}/bin/activate; {mla_learn_cmd}", shell=True
)
log_output_path = f"{get_base_output_path()}/training.log"
env_path = os.path.join(get_base_output_path(), standalone_player_path)
mla_learn_cmd = [
"mlagents-learn",
yaml_out,
"--force",
"--env",
env_path,
"--run-id",
str(run_id),
"--no-graphics",
"--env-args",
"-logFile",
log_output_path,
]
res = subprocess.run(mla_learn_cmd)
shutil.copy(onnx_file_expected, model_artifacts_dir)
if os.path.exists(onnx_file_expected):
shutil.copy(onnx_file_expected, model_artifacts_dir)
print("Command line: " + " ".join(mla_learn_cmd))
subprocess.run(["cat", log_output_path])
return False
if csharp_version is None and python_version is None:

62
ml-agents/tests/yamato/yamato_utils.py


import shutil
import subprocess
import yaml
from sys import platform
downloader_install_path = "./.Editor/Unity.app/Contents/MacOS/Unity"
if platform == "darwin":
downloader_install_path = "./.Editor/Unity.app/Contents/MacOS/Unity"
else: # if platform == "linux":
downloader_install_path = "./.Editor/Unity"
if os.path.exists(downloader_install_path):
return downloader_install_path
raise FileNotFoundError("Can't find executable from unity-downloader-cli")

verbose: bool = False,
output_path: str = None,
scene_path: str = None,
log_output_path: str = f"{get_base_output_path()}/standalone_build.txt",
build_target: str = None,
log_output_path: Optional[str] = f"{get_base_output_path()}/standalone_build.txt",
artifacts/standalone_build/testPlayer.
artifacts/standalonebuild/testPlayer.
print(f"Running BuildStandalonePlayerOSX via {unity_exe}")
print(f"Running BuildStandalonePlayer via {unity_exe}")
# enum values from https://docs.unity3d.com/2019.4/Documentation/ScriptReference/BuildTarget.html
build_target_to_enum = {
"mac": "StandaloneOSX",
"osx": "StandaloneOSX",
"linux": "StandaloneLinux64",
}
test_args = [
unity_exe,

"Unity.MLAgents.StandaloneBuildTest.BuildStandalonePlayerOSX",
]
os.makedirs(os.path.dirname(log_output_path), exist_ok=True)
subprocess.run(["touch", log_output_path])
test_args += ["-logfile", log_output_path]
if log_output_path:
os.makedirs(os.path.dirname(log_output_path), exist_ok=True)
subprocess.run(["touch", log_output_path])
test_args += ["-logfile", log_output_path]
else:
# Log to stdout
test_args += ["-logfile", "-"]
if output_path is not None:
output_path = os.path.join(get_base_output_path(), output_path)

test_args += ["--mlagents-build-scene-path", scene_path]
if build_target is not None:
test_args += ["--mlagents-build-target", build_target_to_enum[build_target]]
print(f"{' '.join(test_args)} ...")
timeout = 30 * 60 # 30 minutes, just in case

if output_path is None and res.returncode == 0:
exe_name = "testPlayer.app" if platform == "darwin" else "testPlayer"
os.path.join(base_path, "Project", "testPlayer.app"),
os.path.join(get_base_output_path(), "testPlayer.app"),
os.path.join(base_path, "Project", exe_name),
os.path.join(get_base_output_path(), exe_name),
subprocess.run(["cat", log_output_path])
if log_output_path:
subprocess.run(["cat", log_output_path])
return res.returncode

file_path = os.path.join(root, filename)
if os.access(file_path, os.X_OK):
exes.append(file_path)
# Also check the input path
if os.access(root_dir, os.X_OK):
exes.append(root_dir)
) -> str:
) -> None:
Set up the virtual environment, and return the venv path.
Install the necessary packages for the venv
# Use a different venv path for different versions
venv_path = "venv"
if mlagents_python_version:
venv_path += "_" + mlagents_python_version
# Set up the venv and install mlagents
subprocess.check_call(f"python -m venv {venv_path}", shell=True)
if platform != "darwin":
raise RuntimeError("Yamato can only run tensorflow on mac platforms!")
pip_commands += [
f"mlagents=={mlagents_python_version}",
f"gym-unity=={mlagents_python_version}",

pip_commands += ["-e ./ml-agents-envs", "-e ./ml-agents", "-e ./gym-unity"]
if extra_packages:
pip_commands += extra_packages
print(f'Running "python3 -m pip install -q {cmd} {pip_index_url}"')
f"source {venv_path}/bin/activate; python -m pip install -q {cmd} {pip_index_url}",
shell=True,
f"python3 -m pip install -q {cmd} {pip_index_url}", shell=True
return venv_path
def checkout_csharp_version(csharp_version):

609
ml-agents/mlagents/trainers/barracuda.py


# pylint: skip-file
# flake8: noqa
from __future__ import print_function
from collections import defaultdict
import numpy as np
import json
import struct # convert from Python values and C structs
import re
import argparse
import os.path
BARRACUDA_VERSION = 16
# Definition of Barracuda model
class Model:
def __init__(self):
self.layers = []
self.tensors = {}
self.inputs = {}
self.outputs = []
self.globals = []
self.memories = []
class Struct:
"A structure that can have any fields defined."
def __init__(self, **entries):
self.__dict__.update(entries)
# Parse command line argumengts
def parse_args(description, source_extension, help):
parser = argparse.ArgumentParser(description=description)
parser.add_argument("source_file", help=help)
parser.add_argument("target_file", help="output Barracuda binary file")
parser.add_argument("-trim", "--trim-unused-by-output")
parser.add_argument("--print-layers", action="store_true")
parser.add_argument("--print-source-json", action="store_true")
parser.add_argument("-json", "--print-barracuda-json", action="store_true")
parser.add_argument("--print-layer-links", action="store_true")
parser.add_argument("--print-patterns", action="store_true")
parser.add_argument("--print-tensors", action="store_true")
parser.add_argument("--print-supported-ops", action="store_true")
parser.add_argument("--verbose", action="store_true")
args = parser.parse_args()
args.compress_f16 = (
False
) # TEMP: disabled, until properly implemented parser.add_argument('-f16', '--compress-f16', action='store_true')
output_extension = ".bc" if not args.compress_f16 else ".f16.bc"
if not os.path.exists(args.source_file):
args.source_file = args.source_file + source_extension
if not os.path.exists(args.source_file):
print("File", args.source_file, "does not exist.")
exit(-1)
def replaceFilenameExtension(filename, newExtenstion):
return os.path.splitext(os.path.basename(filename))[0] + newExtenstion
if os.path.isdir(args.target_file):
args.target_file = os.path.join(
args.target_file,
replaceFilenameExtension(args.source_file, output_extension),
)
if args.verbose:
print(args)
return args
# Fuse training time BatchNorm tensors into Scale & Bias
def fuse_batchnorm_weights(gamma, beta, mean, var, epsilon):
# https://github.com/Tencent/ncnn/blob/master/src/layer/batchnorm.cpp
""" float sqrt_var = sqrt(var_data[i]);
a_data[i] = bias_data[i] - slope_data[i] * mean_data[i] / sqrt_var;
b_data[i] = slope_data[i] / sqrt_var;
...
ptr[i] = b * ptr[i] + a;
"""
scale = gamma / np.sqrt(var + epsilon)
bias = beta - gamma * mean / np.sqrt(var + epsilon)
return [scale, bias]
# Resort layers so that all inputs are satisfied for every layer beforehand
def sort(model, inputs, memories, verbose):
if hasattr(model, "layers"):
model = model.layers
inputs_and_memories = set(list(inputs) + list(memories[1::3]))
def find_missing_inputs(model, inputs):
missing = set()
ready = set(inputs)
for l in model:
for i in l.inputs:
if i not in ready:
missing.add(i)
ready.add(l.name)
return missing
# Class to represent a graph
# Taken from: https://www.geeksforgeeks.org/python-program-for-topological-sorting/
class Graph:
def __init__(self, vertices):
self.graph = defaultdict(list) # dictionary containing adjacency List
self.V = vertices # No. of vertices
# function to add an edge to graph
def addEdge(self, u, v):
self.graph[u].append(v)
# A recursive function used by topologicalSort
def topologicalSortUtil(self, v, visited, stack):
# Mark the current node as visited.
visited[v] = True
# Recur for all the vertices adjacent to this vertex
for i in self.graph[v]:
if not visited[i]:
self.topologicalSortUtil(i, visited, stack)
# Push current vertex to stack which stores result
stack.insert(0, v)
# The function to do Topological Sort. It uses recursive
# topologicalSortUtil()
def topologicalSort(self):
# Mark all the vertices as not visited
visited = [False] * self.V
stack = []
# Call the recursive helper function to store Topological
# Sort starting from all vertices one by one
for i in range(self.V):
if not visited[i]:
self.topologicalSortUtil(i, visited, stack)
# print(stack)
return stack
if len(find_missing_inputs(model, inputs_and_memories)) == 0:
return model
g = Graph(len(model))
layers = {}
id = 0
for l in model:
layers[l.name] = id
id += 1
for layer in model:
for i in layer.inputs:
if i not in inputs_and_memories:
g.addEdge(layers[i], layers[layer.name])
sorted_layer_indices = g.topologicalSort()
print("SORTED:", sorted_layer_indices)
new_model = [model[idx] for idx in sorted_layer_indices]
assert len(find_missing_inputs(new_model, inputs_and_memories)) == 0
return new_model
# Trim
def trim(model, criteria_regexp_string, verbose):
if hasattr(model, "layers"):
model = model.layers
def flatten(items, enter=lambda x: isinstance(x, list)):
# http://stackoverflow.com/a/40857703
# https://github.com/ctmakro/canton/blob/master/canton/misc.py
"""Yield items from any nested iterable; see REF."""
for x in items:
if enter(x):
yield from flatten(x)
else:
yield x
def trim_model(model, outputs):
layers = {l.name: l for l in model}
connected = {o for o in outputs}
while len(outputs) > 0:
outputs = set(flatten([layers[o].inputs for o in outputs if o in layers]))
if verbose and len(outputs) > 0:
print(outputs)
for o in outputs:
connected.add(o)
trimmed = [l.name for l in model if l.name not in connected]
def array_without_brackets(arr):
return str(arr)[1:-1] # array to string without brackets
print("TRIMMED:", array_without_brackets(trimmed))
return [l for l in model if l.name in connected]
layer_names = {l.name for l in model}
criteria = re.compile(criteria_regexp_string)
preserve_outputs = list(filter(criteria.match, layer_names))
if preserve_outputs:
print("Trimming model given outputs to preserve:", preserve_outputs)
model = trim_model(model, preserve_outputs)
else:
print(
"WARNING: Trim couldn't find any layers to match:", criteria_regexp_string
)
return model
# Fuse
def fuse(model, verbose):
i = 0
while i < len(model) - 1:
if model[i].type == model[i + 1].type and model[i].type == 255: # Load
model[i].tensors += model[i + 1].tensors
del model[i + 1]
else:
i += 1
return model
def compress(model):
compress_classes = {"Dense"}
for l in model.layers:
if l.class_name in compress_classes:
print(
"Compressing %s layer '%s' weights to float16" % (l.class_name, l.name)
)
for x in l.tensors:
x.data = np.float16(x.data)
return model
# Verbose
def to_json(model):
class StructEncoder(json.JSONEncoder):
def default(self, o):
if isinstance(o, np.ndarray): # skip binary data packed inside ndarray
return ""
if getattr(o, "__dict__", None):
return o.__dict__
return str(o)
s = json.dumps(model.layers, cls=StructEncoder, separators=(", ", ":"))
# custom formatting
s = s.replace("]}, {", "]},\n{")
s = s.replace(":[{", ":[\n\t{")
s = s.replace("}, {", "},\n\t{")
s = s.replace('"', "'")
return s
def summary(model, print_layer_links, print_barracuda_json, print_tensors):
def array_without_brackets(arr):
return str(arr)[1:-1] # array to string without brackets
if print_layer_links:
for l in model.layers:
print(l.name, " <= ", l.inputs)
if print_barracuda_json:
print(to_json(model))
if model.globals:
if isinstance(model.globals, dict):
model.globals = {x.name: x.shape for x in model.globals}
print("GLOBALS:", array_without_brackets(model.globals))
for l in model.layers:
if isinstance(model.inputs, dict):
ins = {i: model.inputs[i] for i in l.inputs if i in model.inputs}
else:
ins = [i for i in l.inputs if i in model.inputs]
if ins:
print("IN: %s => '%s'" % (array_without_brackets(ins), l.name))
for mem_in, mem_out in zip(model.memories[1::3], model.memories[2::3]):
print("MEM: '%s' => '%s'" % (mem_in, mem_out))
print("OUT:", array_without_brackets(model.outputs))
if print_tensors:
for l in model.layers:
for x in l.tensors:
print(x.name, x.shape, x.data.dtype, x.data)
class Build:
def __init__(self, scope=""):
self.scope = scope
self.layers = []
self.names_taken = set()
def __getattr__(self, attr):
if attr == "_":
return self.layers[-1].name if len(self.layer) > 0 else self.scope
raise AttributeError(attr)
def _patch_last_layer_name_and_return(self):
if self.layers[-1].name:
return self.layers[-1].name
# generate unique name based on op and increasing id
name = self.layers[-1].op
i = 1
while name in self.names_taken:
name = self.layers[-1].op + "_" + str(i)
i += 1
self.names_taken.add(name)
self.layers[-1].name = self.scope + ("/" if self.scope else "") + name
return self.layers[-1].name
def concat(self, a, b, axis=-1, out=""):
self.layers += [Struct(name=out, op="Concat", axis=axis, input=[a, b])]
return self._patch_last_layer_name_and_return()
def mad(self, x, kernel, bias, out=""):
self.layers += [Struct(name=out, op="Dense", input=[x, kernel, bias])]
return self._patch_last_layer_name_and_return()
def mul(self, a, b, out=""):
self.layers += [Struct(name=out, op="Mul", input=[a, b])]
return self._patch_last_layer_name_and_return()
def add(self, a, b, out=""):
self.layers += [Struct(name=out, op="Add", input=[a, b])]
return self._patch_last_layer_name_and_return()
def sub(self, a, b, out=""):
self.layers += [Struct(name=out, op="Sub", input=[a, b])]
return self._patch_last_layer_name_and_return()
def sigmoid(self, x, out=""):
self.layers += [Struct(name=out, op="Sigmoid", input=[x])]
return self._patch_last_layer_name_and_return()
def tanh(self, x, out=""):
self.layers += [Struct(name=out, op="Tanh", input=[x])]
return self._patch_last_layer_name_and_return()
def reduce(self, op, x, axis=-1, out=""):
self.layers += [Struct(name=out, op="Reduce" + op, axis=axis, input=[x])]
return self._patch_last_layer_name_and_return()
def pool(self, op, x, out=""):
self.layers += [Struct(name=out, op=op + "Pool", input=[x])]
return self._patch_last_layer_name_and_return()
def strided_slice(self, x, begin, end, strides, rank, out=""):
self.layers += [
Struct(
name=out,
op="StridedSlice",
rank=rank,
starts=begin,
ends=end,
slice_strides=strides,
input=[x],
)
]
return self._patch_last_layer_name_and_return()
def mean(name, input, axis=-1):
""" combines mean operation out of several simpler ops
"""
nn = Build(name)
if np.array_equal(axis, [1, 2]):
nn.pool("GlobalAvg", input, out=name)
elif np.array_equal(axis, [1, 2, 3]):
nn.reduce(
"Mean", # over channels
nn.pool("GlobalAvg", input), # over height & width
out=name,
)
elif (
np.array_equal(axis, [3])
or np.array_equal(axis, [-1])
or np.array_equal(axis, 3)
or np.array_equal(axis, -1)
):
nn.reduce("Mean", input, out=name)
return nn.layers
def rnn(name, input, state, kernel, bias, new_state, number_of_gates=2):
""" - Ht = f(Xt*Wi + Ht_1*Ri + Wbi + Rbi)
"""
nn = Build(name)
nn.tanh(nn.mad(kernel=kernel, bias=bias, x=nn.concat(input, state)), out=new_state)
return nn.layers
def gru(
name,
input,
state,
kernel_r,
kernel_u,
kernel_c,
bias_r,
bias_u,
bias_c,
new_state,
number_of_gates=2,
):
""" - zt = f(Xt*Wz + Ht_1*Rz + Wbz + Rbz)
- rt = f(Xt*Wr + Ht_1*Rr + Wbr + Rbr)
- ht = g(Xt*Wh + (rt . Ht_1)*Rh + Rbh + Wbh)
- Ht = (1-zt).ht + zt.Ht_1
"""
nn = Build(name)
inputs = nn.concat(input, state)
u = nn.sigmoid(nn.mad(inputs, kernel_u, bias_u))
r = nn.sigmoid(nn.mad(inputs, kernel_r, bias_r))
r_state = nn.mul(r, state)
c = nn.tanh(nn.mad(kernel=kernel_c, bias=bias_c, x=nn.concat(input, r_state)))
# new_h = u' * state + (1 - u') * c'
# = u' * state + c' - u' * c'
# u' * state + c'
nn.add(nn.mul(u, state), c)
# - u' * c'
nn.sub(nn._, nn.mul(u, c), out=new_state)
return nn.layers
def lstm(
name,
input,
state_c,
state_h,
kernel_i,
kernel_j,
kernel_f,
kernel_o,
bias_i,
bias_j,
bias_f,
bias_o,
new_state_c,
new_state_h,
):
""" Full:
- it = f(Xt*Wi + Ht_1*Ri + Pi . Ct_1 + Wbi + Rbi)
- ft = f(Xt*Wf + Ht_1*Rf + Pf . Ct_1 + Wbf + Rbf)
- ct = g(Xt*Wc + Ht_1*Rc + Wbc + Rbc)
- Ct = ft . Ct_1 + it . ct
- ot = f(Xt*Wo + Ht_1*Ro + Po . Ct + Wbo + Rbo)
- Ht = ot . h(Ct)
"""
""" No peephole:
- it = f(Xt*Wi + Ht_1*Ri + Wbi + Rbi)
- ft = f(Xt*Wf + Ht_1*Rf + Wbf + Rbf)
- ct = g(Xt*Wc + Ht_1*Rc + Wbc + Rbc)
- Ct = ft . Ct_ + it . ct
- ot = f(Xt*Wo + Ht_1*Ro + Wbo + Rbo)
- Ht = ot . h(Ct)
"""
nn = Build(name)
inputs = nn.concat(input, state_h)
i = nn.sigmoid(nn.mad(x=inputs, kernel=kernel_i, bias=bias_i))
j = nn.tanh(nn.mad(inputs, kernel_j, bias_j))
f = nn.sigmoid(nn.mad(inputs, kernel_f, bias_f))
o = nn.sigmoid(nn.mad(inputs, kernel_o, bias_o))
# new_c = state_c * f' + i' * j'
nn.add(nn.mul(state_c, f), nn.mul(i, j), out=new_state_c)
# new_h =
nn.mul(o, nn.tanh(new_state_c), out=new_state_h)
return nn.layers
# Serialize
class BarracudaWriter:
f = None
def __init__(self, filename):
self.f = open(filename, "wb+")
def __enter__(self):
return self
def __exit__(self, type, value, tb):
self.f.close()
def write_array(self, arr):
arr.tofile(self.f)
def write_str_array(self, array_of_strigs):
self.write_int32(len(array_of_strigs))
for s in array_of_strigs:
self.write_str(s)
def write_str(self, s):
self.write_int32(len(s))
self.f.write(s.encode("ascii"))
def write_float(self, d):
self.f.write(struct.pack("<f", d))
def write_int32(self, d):
self.f.write(struct.pack("<i", d))
def write_int64(self, d):
self.f.write(struct.pack("<q", d))
def write_shape(self, s):
self.write_int32(len(s))
for el in s:
self.write_int32(el if el is not None else -1)
def close(self):
self.f.close()
def write(model, filename):
with BarracudaWriter(filename) as w:
# VERSION = 0xBA22AC0DA000 + BARRACUDA_VERSION
w.write_int64(BARRACUDA_VERSION)
# inputs
w.write_int32(len(model.inputs))
for name, shape in model.inputs.items():
w.write_str(name)
w.write_shape(shape)
# outputs
w.write_str_array(model.outputs)
# memories
w.write_int32(len(model.memories) // 3)
for mem_shape, mem_in, mem_out in zip(
model.memories[0::3], model.memories[1::3], model.memories[2::3]
):
w.write_shape(mem_shape)
w.write_str(mem_in)
w.write_str(mem_out)
# layers
offset = 0
all_tensors = []
w.write_int32(len(model.layers))
for l in model.layers:
assert l.name not in l.inputs
w.write_str(l.name)
w.write_int32(l.type)
w.write_int32(l.activation)
w.write_int32(0) # dummy
w.write_int32(0) # dummy
w.write_shape(l.pads)
w.write_shape(l.strides)
w.write_shape(l.pool_size)
w.write_int32(l.axis)
w.write_float(l.alpha)
w.write_float(l.beta)
w.write_int32(0) # dummy
w.write_str_array(l.inputs)
w.write_int32(len(l.tensors))
for x in l.tensors:
assert len(x.shape) == 4
assert x.data.nbytes % 4 == 0
length = (
x.data.nbytes >> 2
) # length is measured in float32s (at least for now)
w.write_str(x.name)
w.write_shape(x.shape)
w.write_int64(offset)
w.write_int32(x.data.itemsize)
w.write_int32(length)
offset += length
all_tensors.append(x)
for x in all_tensors:
w.write_array(x.data)
def print_known_operations(known_classes, known_activations):
print("OPS supported by the converter:")
for key in sorted(known_classes.keys()):
print(key)
print("ACTIVATIONS supported by the converter:")
for key in sorted(known_activations.keys()):
print(key)

53
.pylintrc


[MASTER]
# Add files or directories to the ignore list. They should be base names, not
# paths.
ignore=CVS
generated-members=torch.*
[MESSAGES CONTROL]
#enable=
disable =
# C0301: Line too long
# C0330: Wrong hanging indentation before block
# disabled because black handles this
C0301,C0330,
# C0114: Missing module docstring
# C0115: Missing class docstring
# C0116: Missing function or method docstring
C0114,C0115,C0116,
# All convention and refactor for now
C,R,
# W1201: Specify string format arguments as logging function parameters
# W1202: Use % formatting in logging functions and pass the % parameters as arguments
W1201,W1202,
# W0612: Unused variable
# W0613: Unused argument
W0612, W0613,
# W0107: Unnecessary pass statement
W0107,
# W0511 "TODO"
W0511,
# W0703: Catching too general exception Exception
W0703,
# E0401: Unable to import... - triggers for external dependencies like numpy
E0401,
# This was causing false positives
# Appears to be https://github.com/PyCQA/pylint/issues/2981
W0201,
# Using the global statement
W0603,
# "Access to a protected member _foo of a client class (protected-access)"
W0212
正在加载...
取消
保存