Merge branch 'develop-attention-refactor' into develop-centralizedcritic-mm

4 年前 · aba633b2
--- a/.gitignore
+++ b/.gitignore
 /summaries
 # Output Artifacts
 /results
+# Output Builds
+/Builds

 # Training environments
 /envs
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
    hooks:
    -   id: python-check-mock-methods

-   repo: https://github.com/pre-commit/mirrors-pylint
-    rev: v2.4.4
-    hooks:
-    -   id: pylint
-        exclude: >
-            (?x)^(
-                .*_pb2.py|
-                .*_pb2_grpc.py|
-                .*/tests/.*
-            )$
-        args: [--score=n]
-
 - repo: https://github.com/mattlqx/pre-commit-search-and-replace
  rev: v1.0.3
  hooks:
--- a/.yamato/com.unity.ml-agents-pack.yml
+++ b/.yamato/com.unity.ml-agents-pack.yml
 pack:
  name: Pack
  agent:
-    type: Unity::VM::osx
-    image: package-ci/mac:stable
+    type: Unity::VM
+    image: package-ci/ubuntu:stable
    flavor: b1.small
  commands:
    - npm install upm-ci-utils@stable -g --registry https://artifactory.prd.cds.internal.unity3d.com/artifactory/api/npm/upm-npm
--- a/.yamato/com.unity.ml-agents-performance.yml
+++ b/.yamato/com.unity.ml-agents-performance.yml
  variables:
    UNITY_VERSION: {{ editor.version }}
  commands:
-    - python -m pip install unity-downloader-cli --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple --upgrade
+    - python3 -m pip install unity-downloader-cli --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple --upgrade
    - unity-downloader-cli -u {{ editor.version }} -c editor --wait --fast
    - curl -s https://artifactory.internal.unity3d.com/core-automation/tools/utr-standalone/utr --output utr
    - chmod +x ./utr
-    expression: |
-      (pull_request.target eq "master" OR
-      pull_request.target match "release.+") AND
-      NOT pull_request.draft AND
-      (pull_request.changes.any match "com.unity.ml-agents/**" OR
-      pull_request.changes.any match "DevProject/**" OR
-      pull_request.changes.any match "ml-agents/**" OR
-      pull_request.changes.any match "ml-agents-envs/**" OR
-      pull_request.changes.any match ".yamato/com.unity.ml-agents-performance.yml") AND
-      NOT pull_request.changes.all match "**/*.md"
+    recurring:
+      - branch: master
+        frequency: daily
  artifacts:
    logs:
      paths:
--- a/.yamato/com.unity.ml-agents-test.yml
+++ b/.yamato/com.unity.ml-agents-test.yml
    - .yamato/com.unity.ml-agents-pack.yml#pack
  triggers:
    cancel_old_ci: true
-    {% if platform.name == "mac" %}
+    {% if platform.name == "linux" %}
    expression: |
      (pull_request.target eq "master" OR
      pull_request.target match "release.+") AND
    image: {{ platform.image }}
    flavor: {{ platform.flavor}}
  commands:
-    - python -m pip install unity-downloader-cli --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple --upgrade
+    - python3 -m pip install unity-downloader-cli --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple --upgrade
    - unity-downloader-cli -u trunk -c editor --wait --fast
    - npm install upm-ci-utils@stable -g --registry https://artifactory.prd.cds.internal.unity3d.com/artifactory/api/npm/upm-npm
    - upm-ci project test -u {{ editor.version }} --project-path {{ editor.testProject }} --package-filter {{ package.name }} {{ coverageOptions }} --extra-create-project-arg="-upmNoDefaultPackages" --extra-utr-arg "reruncount=2"
--- a/.yamato/compressed-sensor-test.yml
+++ b/.yamato/compressed-sensor-test.yml
 test_compressed_obs_{{ editor.version }}:
  name: Test Compressed Sensor Observation {{ editor.version }}
  agent:
-    type: Unity::VM::osx
-    image: ml-agents/ml-agents-bokken-mac:0.1.4-492264
-    flavor: b1.small
+    type: Unity::VM
+    image: package-ci/ubuntu:stable
+    flavor: b1.medium
-    - pip install pyyaml --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
-    - python -u -m ml-agents.tests.yamato.setup_venv
-    - ./venv/bin/python ml-agents/tests/yamato/scripts/run_compressed_sensor.py --env=artifacts/testPlayer-TestGridCompressed
-    - ./venv/bin/python ml-agents/tests/yamato/scripts/run_compressed_sensor.py --env=artifacts/testPlayer-TestTextureCompressed
+    - |
+      sudo apt-get update && sudo apt-get install -y python3-venv
+      python3 -m venv venv && source venv/bin/activate
+      python -m pip install pyyaml --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
+      python -u -m ml-agents.tests.yamato.setup_venv
+      python ml-agents/tests/yamato/scripts/run_compressed_sensor.py --env=artifacts/testPlayer-TestGridCompressed
+      python ml-agents/tests/yamato/scripts/run_compressed_sensor.py --env=artifacts/testPlayer-TestTextureCompressed
-    - .yamato/standalone-build-test.yml#test_mac_standalone_{{ editor.version }}
+    - .yamato/standalone-build-test.yml#test_linux_standalone_{{ editor.version }}
  triggers:
    cancel_old_ci: true
    expression: |
--- a/.yamato/gym-interface-test.yml
+++ b/.yamato/gym-interface-test.yml
 ---
 {% for editor in test_editors %}
 test_gym_interface_{{ editor.version }}:
-  name: Test Mac Gym Interface {{ editor.version }}
+  name: Test Linux Gym Interface {{ editor.version }}
-    type: Unity::VM::osx
-    image: ml-agents/ml-agents-bokken-mac:0.1.4-492264
-    flavor: b1.small
+    type: Unity::VM
+    image: package-ci/ubuntu:stable
+    flavor: b1.medium
-    - pip install pyyaml --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
-    - python -u -m ml-agents.tests.yamato.setup_venv
-    - ./venv/bin/python ml-agents/tests/yamato/scripts/run_gym.py --env=artifacts/testPlayer-Basic
+    - |
+      sudo apt-get update && sudo apt-get install -y python3-venv
+      python3 -m venv venv && source venv/bin/activate
+      python -m pip install pyyaml --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
+      python -u -m ml-agents.tests.yamato.setup_venv
+      python ml-agents/tests/yamato/scripts/run_gym.py --env=artifacts/testPlayer-Basic
-    - .yamato/standalone-build-test.yml#test_mac_standalone_{{ editor.version }}
+    - .yamato/standalone-build-test.yml#test_linux_standalone_{{ editor.version }}
  triggers:
    cancel_old_ci: true
    expression: |
--- a/.yamato/protobuf-generation-test.yml
+++ b/.yamato/protobuf-generation-test.yml
-test_mac_protobuf_generation:
+test_linux_protobuf_generation:
-    type: Unity::VM::osx
-    image: package-ci/mac:stable
-    flavor: b1.small
+    type: Unity::VM
+    image: package-ci/ubuntu:stable
+    flavor: b1.large
-    HOMEBREW_NO_AUTO_UPDATE: "1"
-      brew install nuget
+      sudo apt-get update && sudo apt-get install -y python3-venv nuget
+      python3 -m venv venv && source venv/bin/activate
-      python3 -m venv venv
-      . venv/bin/activate
-      pip install --upgrade pip --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
-      pip install grpcio==1.28.1 grpcio-tools==1.13.0 protobuf==3.11.3 six==1.14.0 mypy-protobuf==1.16.0  --progress-bar=off --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
-      cd protobuf-definitions
-      chmod +x Grpc.Tools.$GRPC_VERSION/tools/macosx_x64/protoc
-      chmod +x Grpc.Tools.$GRPC_VERSION/tools/macosx_x64/grpc_csharp_plugin
-      COMPILER=Grpc.Tools.$GRPC_VERSION/tools/macosx_x64 ./make.sh
+      python3 -m pip install --upgrade pip --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
+      python3 -m pip install grpcio==1.28.1 grpcio-tools==1.13.0 protobuf==3.11.3 six==1.14.0 mypy-protobuf==1.16.0  --progress-bar=off --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
+      pushd protobuf-definitions
+      chmod +x Grpc.Tools.$GRPC_VERSION/tools/linux_x64/protoc
+      chmod +x Grpc.Tools.$GRPC_VERSION/tools/linux_x64/grpc_csharp_plugin
+      COMPILER=Grpc.Tools.$GRPC_VERSION/tools/linux_x64 ./make.sh
+      popd
      mkdir -p artifacts
      touch artifacts/proto.patch
      git diff --exit-code -- :/ ":(exclude,top)$CS_PROTO_PATH/*.meta"                          \
      pull_request.target match "release.+") AND
      NOT pull_request.draft AND
      (pull_request.changes.any match "protobuf-definitions/**" OR
+      pull_request.changes.any match "com.unity.ml-agents/Runtime/Grpc/CommunicatorObjects/**" OR
+      pull_request.changes.any match "ml-agents-envs/mlagents_envs/communicator_objects/**" OR
      pull_request.changes.any match ".yamato/protobuf-generation-test.yml") AND
      NOT pull_request.changes.all match "protobuf-definitions/**/*.md"
  artifacts:
--- a/.yamato/python-ll-api-test.yml
+++ b/.yamato/python-ll-api-test.yml
 {% metadata_file .yamato/test_versions.metafile %}
 ---
 {% for editor in test_editors %}
-test_mac_ll_api_{{ editor.version }}:
-  name: Test Mac LL-API {{ editor.version }}
+test_linux_ll_api_{{ editor.version }}:
+  name: Test Linux LL-API {{ editor.version }}
-    type: Unity::VM::osx
-    image: ml-agents/ml-agents-bokken-mac:0.1.4-492264
-    flavor: b1.small
+    type: Unity::VM
+    image: package-ci/ubuntu:stable
+    flavor: b1.medium
-    - pip install pyyaml --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
-    - python -u -m ml-agents.tests.yamato.setup_venv
-    - ./venv/bin/python ml-agents/tests/yamato/scripts/run_llapi.py
-    - ./venv/bin/python ml-agents/tests/yamato/scripts/run_llapi.py --env=artifacts/testPlayer-Basic
-    - ./venv/bin/python ml-agents/tests/yamato/scripts/run_llapi.py --env=artifacts/testPlayer-WallJump
-    - ./venv/bin/python ml-agents/tests/yamato/scripts/run_llapi.py --env=artifacts/testPlayer-Bouncer
+    - |
+      sudo apt-get update && sudo apt-get install -y python3-venv
+      python3 -m venv venv && source venv/bin/activate
+      python -m pip install pyyaml --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
+      python -u -m ml-agents.tests.yamato.setup_venv
+      python ml-agents/tests/yamato/scripts/run_llapi.py
+      python ml-agents/tests/yamato/scripts/run_llapi.py --env=artifacts/testPlayer-Basic
+      python ml-agents/tests/yamato/scripts/run_llapi.py --env=artifacts/testPlayer-WallJump
+      python ml-agents/tests/yamato/scripts/run_llapi.py --env=artifacts/testPlayer-Bouncer
-    - .yamato/standalone-build-test.yml#test_mac_standalone_{{ editor.version }}
+    - .yamato/standalone-build-test.yml#test_linux_standalone_{{ editor.version }}
  triggers:
    cancel_old_ci: true
    expression: |
--- a/.yamato/standalone-build-test.yml
+++ b/.yamato/standalone-build-test.yml
 {% metadata_file .yamato/test_versions.metafile %}
 ---
 {% for editor in test_editors %}
-test_mac_standalone_{{ editor.version }}:
-  name: Test Mac Standalone {{ editor.version }}
+test_linux_standalone_{{ editor.version }}:
+  name: Test Linux Standalone {{ editor.version }}
-    type: Unity::VM::osx
-    image: ml-agents/ml-agents-bokken-mac:0.1.4-492264
-    flavor: i1.small
+    type: Unity::VM
+    image: package-ci/ubuntu:stable
+    flavor: b1.large
-    - pip install pyyaml --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
-    # TODO remove the "--user" command and the path prefix when we can migrate away from the custom bokken image
-    - python -m pip install unity-downloader-cli --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple --upgrade --user
-    - /Users/bokken/Library/Python/3.7/bin/unity-downloader-cli -u {{ editor.version }} -c editor --wait --fast
-    - python -u -m ml-agents.tests.yamato.standalone_build_tests
-    - python -u -m ml-agents.tests.yamato.standalone_build_tests --scene=Assets/ML-Agents/Examples/Basic/Scenes/Basic.unity
-    - python -u -m ml-agents.tests.yamato.standalone_build_tests --scene=Assets/ML-Agents/Examples/Bouncer/Scenes/Bouncer.unity
-    - python -u -m ml-agents.tests.yamato.standalone_build_tests --scene=Assets/ML-Agents/Examples/WallJump/Scenes/WallJump.unity
-    - python -u -m ml-agents.tests.yamato.standalone_build_tests --scene=Assets/ML-Agents/TestScenes/TestCompressedGrid/TestGridCompressed.unity
-    - python -u -m ml-agents.tests.yamato.standalone_build_tests --scene=Assets/ML-Agents/TestScenes/TestCompressedTexture/TestTextureCompressed.unity
+    - python3 -m pip install pyyaml --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
+    - python3 -m pip install unity-downloader-cli --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple --upgrade
+    - unity-downloader-cli -u {{ editor.version }} -c editor --wait --fast
+    - python3 -u -m ml-agents.tests.yamato.standalone_build_tests --build-target=linux
+    - python3 -u -m ml-agents.tests.yamato.standalone_build_tests --build-target=linux --scene=Assets/ML-Agents/Examples/Basic/Scenes/Basic.unity
+    - python3 -u -m ml-agents.tests.yamato.standalone_build_tests --build-target=linux --scene=Assets/ML-Agents/Examples/Bouncer/Scenes/Bouncer.unity
+    - python3 -u -m ml-agents.tests.yamato.standalone_build_tests --build-target=linux --scene=Assets/ML-Agents/Examples/WallJump/Scenes/WallJump.unity
+    - python3 -u -m ml-agents.tests.yamato.standalone_build_tests --build-target=linux --scene=Assets/ML-Agents/TestScenes/TestCompressedGrid/TestGridCompressed.unity
+    - python3 -u -m ml-agents.tests.yamato.standalone_build_tests --build-target=linux --scene=Assets/ML-Agents/TestScenes/TestCompressedTexture/TestTextureCompressed.unity
  triggers:
    cancel_old_ci: true
    expression: |
    standalonebuild:
      paths:
        - "artifacts/testPlayer*/**"
+        - "artifacts/**/UnityPlayer.so"
 {% endfor %}
--- a/.yamato/training-int-tests.yml
+++ b/.yamato/training-int-tests.yml
 {% metadata_file .yamato/test_versions.metafile %}
 ---
 {% for editor in test_editors %}
-test_mac_training_int_{{ editor.version }}:
-  name: Test Mac Fast Training {{ editor.version }}
+test_linux_training_int_{{ editor.version }}:
+  name: Test Linux Fast Training {{ editor.version }}
-    type: Unity::VM::osx
-    image: ml-agents/ml-agents-bokken-mac:0.1.4-492264
-    flavor: b1.small
+    type: Unity::VM
+    image: package-ci/ubuntu:stable
+    flavor: b1.medium
-    - pip install pyyaml --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
-    # TODO remove the "--user" command and the path prefix when we can migrate away from the custom bokken image
-    - python -m pip install unity-downloader-cli --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple --upgrade --user
-    - /Users/bokken/Library/Python/3.7/bin/unity-downloader-cli -u {{ editor.version }} -c editor --wait --fast
-    - python -u -m ml-agents.tests.yamato.training_int_tests
-    # Backwards-compatibility tests.
-    # If we make a breaking change to the communication protocol, these will need
-    # to be disabled until the next release.
-    - python -u -m ml-agents.tests.yamato.training_int_tests --python=0.16.0
-    - python -u -m ml-agents.tests.yamato.training_int_tests --csharp={{ editor.csharp_backcompat_version }}
+    - |
+      sudo apt-get update && sudo apt-get install -y python3-venv
+      python3 -m venv venv && source venv/bin/activate
+      python -m pip install pyyaml --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
+      python -u -m ml-agents.tests.yamato.training_int_tests
-    - .yamato/standalone-build-test.yml#test_mac_standalone_{{ editor.version }}
+    - .yamato/standalone-build-test.yml#test_linux_standalone_{{ editor.version }}
  triggers:
    cancel_old_ci: true
    expression: |
        - "artifacts/inference.onnx.txt"
    standalonebuild:
      paths:
-        - "artifacts/testplayer*/**"
+        - "artifacts/testPlayer*/**"
        - "artifacts/models/**"
 {% endfor %}
--- a/Project/Assets/ML-Agents/Editor/Tests/StandaloneBuildTest.cs
+++ b/Project/Assets/ML-Agents/Editor/Tests/StandaloneBuildTest.cs
    {
        const string k_OutputCommandLineFlag = "--mlagents-build-output-path";
        const string k_SceneCommandLineFlag = "--mlagents-build-scene-path";
+        private const string k_BuildTargetFlag = "--mlagents-build-target";

        public static void BuildStandalonePlayerOSX()
        {
+            var buildTarget = BuildTarget.StandaloneOSX;

            var args = Environment.GetCommandLineArgs();
            for (var i = 0; i < args.Length - 1; i++)
                {
                    scenePath = args[i + 1];
                }
+                else if (args[i] == k_BuildTargetFlag)
+                {
+                    buildTarget = (BuildTarget)Enum.Parse(typeof(BuildTarget), args[i + 1], ignoreCase: true);
+                }
            }

            string[] scenes = { scenePath };
-                BuildTarget.StandaloneOSX,
+                buildTarget,
                BuildOptions.None
            );
            var isOk = buildResult.summary.result == BuildResult.Succeeded;
--- a/Project/Assets/ML-Agents/Examples/Hallway/Scripts/HallwayAgent.cs
+++ b/Project/Assets/ML-Agents/Examples/Hallway/Scripts/HallwayAgent.cs
        m_AgentRb = GetComponent<Rigidbody>();
        m_GroundRenderer = ground.GetComponent<Renderer>();
        m_GroundMaterial = m_GroundRenderer.material;
+        m_statsRecorder = Academy.Instance.StatsRecorder;
    }

    public override void CollectObservations(VectorSensor sensor)
            {
                SetReward(1f);
                StartCoroutine(GoalScoredSwapGroundMaterial(m_HallwaySettings.goalScoredMaterial, 0.5f));
+                m_statsRecorder.Add("Goal/Correct", 1, StatAggregationMethod.Sum);
+                m_statsRecorder.Add("Goal/Wrong", 1, StatAggregationMethod.Sum);
            }
            EndEpisode();
        }
            symbolXGoal.transform.position = new Vector3(7f, 0.5f, 22.29f) + area.transform.position;
            symbolOGoal.transform.position = new Vector3(-7f, 0.5f, 22.29f) + area.transform.position;
        }
+        m_statsRecorder.Add("Goal/Correct", 0, StatAggregationMethod.Sum);
+        m_statsRecorder.Add("Goal/Wrong", 0, StatAggregationMethod.Sum);
    }
 }
--- a/README.md
+++ b/README.md
 In addition to our own documentation, here are some additional, relevant
 articles:

- [A Game Developer Learns Machine Learning](https://mikecann.co.uk/machine-learning/a-game-developer-learns-machine-learning-intent/)
+- [A Game Developer Learns Machine Learning](https://mikecann.co.uk/posts/a-game-developer-learns-machine-learning-intent)
 - [Explore Unity Technologies ML-Agents Exclusively on Intel Architecture](https://software.intel.com/en-us/articles/explore-unity-technologies-ml-agents-exclusively-on-intel-architecture)
 - [ML-Agents Penguins tutorial](https://learn.unity.com/project/ml-agents-penguins)

--- a/com.unity.ml-agents/CHANGELOG.md
+++ b/com.unity.ml-agents/CHANGELOG.md

 ### Minor Changes
 #### com.unity.ml-agents / com.unity.ml-agents.extensions (C#)
+- `StatAggregationMethod.Sum` can now be passed to `StatsRecorder.Add()`. This
+will result in the values being summed (instead of averaged) when written to
+TensorBoard. Thanks to @brccabral for the contribution! (#4816)
+- Fix a compile warning about using an obsolete enum in `GrpcExtensions.cs`. (#4812)
 #### ml-agents / ml-agents-envs / gym-unity (Python)


--- a/com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs
+++ b/com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs
            ActionSpec actionSpec;
            if (bpp.ActionSpec == null)
            {
+                // Disable deprecation warnings so we can set legacy fields
+#pragma warning disable CS0618
                var spaceType = (SpaceType)bpp.VectorActionSpaceTypeDeprecated;
                if (spaceType == SpaceType.Continuous)
                {
                {
                    actionSpec = ActionSpec.MakeDiscrete(bpp.VectorActionSizeDeprecated.ToArray());
                }
+#pragma warning restore CS0618
            }
            else
            {
--- a/com.unity.ml-agents/Runtime/Sensors/BufferSensor.cs
+++ b/com.unity.ml-agents/Runtime/Sensors/BufferSensor.cs

 namespace Unity.MLAgents.Sensors
 {
-    public class BufferSensor : ISensor, IDimensionPropertiesSensor
+    internal class BufferSensor : ISensor, IDimensionPropertiesSensor
    {
        private int m_MaxNumObs;
        private int m_ObsSize;
--- a/com.unity.ml-agents/Runtime/Sensors/BufferSensorComponent.cs
+++ b/com.unity.ml-agents/Runtime/Sensors/BufferSensorComponent.cs
    /// A component for BufferSensor.
    /// </summary>
    [AddComponentMenu("ML Agents/Buffer Sensor", (int)MenuGroup.Sensors)]
-    public class BufferSensorComponent : SensorComponent
+    internal class BufferSensorComponent : SensorComponent
    {
        public int ObservableSize;
        public int MaxNumObservables;
--- a/com.unity.ml-agents/Runtime/Sensors/IDimensionPropertiesSensor.cs
+++ b/com.unity.ml-agents/Runtime/Sensors/IDimensionPropertiesSensor.cs
    /// The Dimension property flags of the observations
    /// </summary>
    [System.Flags]
-    public enum DimensionProperty
+    internal enum DimensionProperty
    {
        /// <summary>
        /// No properties specified.
    /// <summary>
    /// Sensor interface for sensors with special dimension properties.
    /// </summary>
-    public interface IDimensionPropertiesSensor
+    internal interface IDimensionPropertiesSensor
    {
        /// <summary>
        /// Returns the array containing the properties of each dimensions of the
--- a/com.unity.ml-agents/Runtime/StatsRecorder.cs
+++ b/com.unity.ml-agents/Runtime/StatsRecorder.cs
        /// To avoid conflicts when training with multiple concurrent environments, only
        /// stats from worker index 0 will be tracked.
        /// </summary>
-        MostRecent = 1
+        MostRecent = 1,
+
+        /// <summary>
+        /// Values within the summary period are summed up before reporting.
+        /// </summary>
+        Sum = 2
    }

    /// <summary>
--- a/docs/FAQ.md
+++ b/docs/FAQ.md

 ## Installation problems

-### Tensorflow dependency
-
-ML Agents requires TensorFlow; if you don't already have it installed, `pip`
-will try to install it when you install the ml-agents package.
-
-If you see a message like this
-
-```console
-ERROR: Could not find a version that satisfies the requirement tensorflow<2.0,>=1.7 (from mlagents) (from versions: none)
-ERROR: No matching distribution found for tensorflow<2.0,>=1.7 (from mlagents)
-```
-
-it means that there is no version of TensorFlow for your python environment.
-Some known potential causes are:
-
- You're using 32-bit python instead of 64-bit. See the answer
-  [here](https://stackoverflow.com/a/1405971/224264) for how to tell which you
-  have installed.
- You have the `tensorflow-gpu` package installed. This is equivalent to
-  `tensorflow`, however `pip` doesn't recognize this. The best way to resolve
-  this is to update to `tensorflow==1.15.0` which provides GPU support in the
-  same package (see the
-  [release notes](https://github.com/tensorflow/tensorflow/issues/33374) for
-  more details.)
- You're on another architecture (e.g. ARM) which requires vendor provided
-  packages.
-
-In all of these cases, the issue is a pip/python environment setup issue. Please
-search the tensorflow github issues for similar problems and solutions before
-creating a new issue.
-
-#### Visual C++ Dependency (Windows Users)
-When running `mlagents-learn`, if you see a stack trace with a message like this:
-
-```console
-ImportError: DLL load failed: The specified module could not be found.
-```
-
-then either of the required DLLs, `msvcp140.dll` (old) or `msvcp140_1.dll` (new), are missing on your machine. The `import tensorflow` command will print this warning message.
-
-To solve it, download and install (with a reboot) the install [Microsoft Visual C++ Redistributable for Visual Studio 2015, 2017 and 2019](https://support.microsoft.com/en-my/help/2977003/the-latest-supported-visual-c-downloads).
-
-For more details, please see the [TensorFlow 2.1.0 release notes](https://github.com/tensorflow/tensorflow/releases/tag/v2.1.0)
-and the [TensorFlow github issue](https://github.com/tensorflow/tensorflow/issues/22794#issuecomment-573297027).
-
 ## Environment Permission Error

 If you directly import your Unity environment without building it in the editor,
--- a/docs/Learning-Environment-Design-Agents.md
+++ b/docs/Learning-Environment-Design-Agents.md
    - [Visual Observation Summary & Best Practices](#visual-observation-summary--best-practices)
  - [Raycast Observations](#raycast-observations)
    - [RayCast Observation Summary & Best Practices](#raycast-observation-summary--best-practices)
- [Actions](#actions)
+- [Actions and Actuators](#actions-and-actuators)
  - [Continuous Actions](#continuous-actions)
  - [Discrete Actions](#discrete-actions)
    - [Masking Discrete Actions](#masking-discrete-actions)
 - Use as few rays and tags as necessary to solve the problem in order to improve
  learning stability and agent performance.

-## Actions
+## Actions and Actuators
-action is passed to the Agent as the `ActionBuffers` parameter when the Academy invokes the
-agent's `OnActionReceived()` function. There are two types of actions that an Agent can use:
- **Continuous** and **Discrete**.
+action is passed to the an `IActionReceiver` (either an `Agent` or an `IActuator`)
+as the `ActionBuffers` parameter when the Academy invokes the
+`IActionReciever.OnActionReceived()` function.
+There are two types of actions supported: **Continuous** and **Discrete**.

 Neither the Policy nor the training algorithm know anything about what the
 action values themselves mean. The training algorithm simply tries different
  branches.
 - You cannot mask all the actions of a branch.
 - You cannot mask actions in continuous control.
+
+
+### IActuator interface and ActuatorComponents
+The Actuator API allows users to abstract behavior out of Agents and in to
+components (similar to the ISensor API).  The `IActuator` interface and `Agent`
+class both implement the `IActionReceiver` interface to allow for backward compatibility
+with the current `Agent.OnActionReceived` and `Agent.CollectDiscreteActionMasks` APIs.
+This means you will not have to change your code until you decide to use the `IActuator` API.
+
+Like the `ISensor` interface, the `IActuator` interface is intended for advanced users.
+
+The `ActuatorComponent` abstract class is used to create the actual `IActuator` at
+runtime. It must be attached to the same `GameObject` as the `Agent`, or to a
+child `GameObject`.  Actuators and all of their data structures are initialized
+during `Agent.Initialize`.  This was done to prevent an unexpected allocations at runtime.
+
+You can find an example of an `IActuator` implementation in the `Basic` example scene.
+**NOTE**: you do not need to adjust the Actions in the Agent's
+`Behavior Parameters` when using an `IActuator` and `ActuatorComponents`.
+
+Internally, `Agent.OnActionReceived` uses an `IActuator` to send actions to the Agent,
+although this is mostly abstracted from the user.
+

 ### Actions Summary & Best Practices

--- a/ml-agents-envs/mlagents_envs/env_utils.py
+++ b/ml-agents-envs/mlagents_envs/env_utils.py
            candidates = glob.glob(env_path + ".x86_64")
        if len(candidates) == 0:
            candidates = glob.glob(env_path + ".x86")
+        if len(candidates) == 0:
+            if os.path.isfile(env_path):
+                candidates = [env_path]
        if len(candidates) > 0:
            launch_string = candidates[0]

--- a/ml-agents-envs/mlagents_envs/environment.py
+++ b/ml-agents-envs/mlagents_envs/environment.py
        """
        try:
            # A negative value -N indicates that the child was terminated by signal N (POSIX only).
-            s = signal.Signals(-returncode)  # pylint: disable=no-member
+            s = signal.Signals(-returncode)
            return s.name
        except Exception:
            # Should generally be a ValueError, but catch everything just in case.
--- a/ml-agents-envs/mlagents_envs/registry/binary_utils.py
+++ b/ml-agents-envs/mlagents_envs/registry/binary_utils.py
            break
        try:
            download_and_extract_zip(url, name)
-        except Exception:  # pylint: disable=W0702
+        except Exception:
            if attempt + 1 < NUMBER_ATTEMPTS:
                logger.warning(
                    f"Attempt {attempt + 1} / {NUMBER_ATTEMPTS}"
--- a/ml-agents-envs/mlagents_envs/rpc_utils.py
+++ b/ml-agents-envs/mlagents_envs/rpc_utils.py
 def _process_visual_observation(
    obs_index: int,
    shape: Tuple[int, int, int],
-    agent_info_list: Collection[
-        AgentInfoProto
-    ],  # pylint: disable=unsubscriptable-object
+    agent_info_list: Collection[AgentInfoProto],
 ) -> np.ndarray:
    if len(agent_info_list) == 0:
        return np.zeros((0, shape[0], shape[1], shape[2]), dtype=np.float32)

@timed
 def _process_vector_observation(
-    obs_index: int,
-    shape: Tuple[int, ...],
-    agent_info_list: Collection[
-        AgentInfoProto
-    ],  # pylint: disable=unsubscriptable-object
+    obs_index: int, shape: Tuple[int, ...], agent_info_list: Collection[AgentInfoProto]
 ) -> np.ndarray:
    if len(agent_info_list) == 0:
        return np.zeros((0,) + shape, dtype=np.float32)

@timed
 def steps_from_proto(
-    agent_info_list: Collection[
-        AgentInfoProto
-    ],  # pylint: disable=unsubscriptable-object
-    behavior_spec: BehaviorSpec,
+    agent_info_list: Collection[AgentInfoProto], behavior_spec: BehaviorSpec
 ) -> Tuple[DecisionSteps, TerminalSteps]:
    decision_agent_info_list = [
        agent_info for agent_info in agent_info_list if not agent_info.done
--- a/ml-agents-envs/mlagents_envs/side_channel/stats_side_channel.py
+++ b/ml-agents-envs/mlagents_envs/side_channel/stats_side_channel.py
    # Only the most recent value is reported.
    MOST_RECENT = 1

+    # Values within the summary period are summed up before reporting.
+    SUM = 2
+

 StatList = List[Tuple[float, StatsAggregationMethod]]
 EnvironmentStats = Mapping[str, StatList]
    def on_message_received(self, msg: IncomingMessage) -> None:
        """
        Receive the message from the environment, and save it for later retrieval.
+
        :param msg:
        :return:
        """
    def get_and_reset_stats(self) -> EnvironmentStats:
        """
        Returns the current stats, and resets the internal storage of the stats.
+
        :return:
        """
        s = self.stats
--- a/ml-agents/mlagents/torch_utils/torch.py
+++ b/ml-agents/mlagents/torch_utils/torch.py
 torch.set_num_threads(cpu_utils.get_num_threads_to_use())
 os.environ["KMP_BLOCKTIME"] = "0"

-# Known PyLint compatibility with PyTorch https://github.com/pytorch/pytorch/issues/701
-# pylint: disable=E1101
 if torch.cuda.is_available():
    torch.set_default_tensor_type(torch.cuda.FloatTensor)
    device = torch.device("cuda")
--- a/ml-agents/mlagents/trainers/agent_processor.py
+++ b/ml-agents/mlagents/trainers/agent_processor.py
    ):
        """
        Create an AgentProcessor.
+
        :param trainer: Trainer instance connected to this AgentProcessor. Trainer is given trajectory
        when it is finished.
        :param policy: Policy instance associated with this AgentProcessor.
        """
        Pass stats from the environment to the StatsReporter.
        Depending on the StatsAggregationMethod, either StatsReporter.add_stat or StatsReporter.set_stat is used.
-        The worker_id is used to determin whether StatsReporter.set_stat should be used.
+        The worker_id is used to determine whether StatsReporter.set_stat should be used.
+
        :param env_stats:
        :param worker_id:
        :return:
                if agg_type == StatsAggregationMethod.AVERAGE:
-                    self.stats_reporter.add_stat(stat_name, val)
+                    self.stats_reporter.add_stat(stat_name, val, agg_type)
+                elif agg_type == StatsAggregationMethod.SUM:
+                    self.stats_reporter.add_stat(stat_name, val, agg_type)
                elif agg_type == StatsAggregationMethod.MOST_RECENT:
                    # In order to prevent conflicts between multiple environments,
                    # only stats from the first environment are recorded.
--- a/ml-agents/mlagents/trainers/env_manager.py
+++ b/ml-agents/mlagents/trainers/env_manager.py
                    _policy = self.agent_managers[brain_name].policy_queue.get_nowait()
            except AgentManagerQueue.Empty:
                if _policy is not None:
-                    # policy_queue contains Policy, but we need a TFPolicy here
-                    self.set_policy(brain_name, _policy)  # type: ignore
+                    self.set_policy(brain_name, _policy)
        # Step the environments
        new_step_infos = self._step()
        return new_step_infos
--- a/ml-agents/mlagents/trainers/learn.py
+++ b/ml-agents/mlagents/trainers/learn.py


 def get_version_string() -> str:
-    # pylint: disable=no-member
    return f""" Version information:
  ml-agents: {mlagents.trainers.__version__},
  ml-agents-envs: {mlagents_envs.__version__},
--- a/ml-agents/mlagents/trainers/optimizer/torch_optimizer.py
+++ b/ml-agents/mlagents/trainers/optimizer/torch_optimizer.py
 from mlagents.trainers.torch.utils import ModelUtils


-class TorchOptimizer(Optimizer):  # pylint: disable=W0223
+class TorchOptimizer(Optimizer):
    def __init__(self, policy: TorchPolicy, trainer_settings: TrainerSettings):
        super().__init__()
        self.policy = policy
--- a/ml-agents/mlagents/trainers/policy/policy.py
+++ b/ml-agents/mlagents/trainers/policy/policy.py
        self.trainer_settings = trainer_settings
        self.network_settings: NetworkSettings = trainer_settings.network_settings
        self.seed = seed
-        self.act_size = (
-            list(self.behavior_spec.action_spec.discrete_branches)
-            if self.behavior_spec.action_spec.is_discrete()
-            else [self.behavior_spec.action_spec.continuous_size]
-        )
-        self.vec_obs_size = sum(
-            sen_spec.shape[0]
-            for sen_spec in behavior_spec.sensor_specs
-            if len(sen_spec.shape) == 1
-        )
-        self.vis_obs_size = sum(
-            1 for sen_spec in behavior_spec.sensor_specs if len(sen_spec.shape) == 3
-        )
-        self.use_continuous_act = self.behavior_spec.action_spec.is_continuous()
        self.previous_action_dict: Dict[str, np.ndarray] = {}
        self.memory_dict: Dict[str, np.ndarray] = {}
        self.normalize = trainer_settings.network_settings.normalize
--- a/ml-agents/mlagents/trainers/policy/torch_policy.py
+++ b/ml-agents/mlagents/trainers/policy/torch_policy.py
    def _extract_masks(self, decision_requests: DecisionSteps) -> np.ndarray:
        mask = None
        if self.behavior_spec.action_spec.discrete_size > 0:
-            mask = torch.ones([len(decision_requests), np.sum(self.act_size)])
+            num_discrete_flat = np.sum(self.behavior_spec.action_spec.discrete_branches)
+            mask = torch.ones([len(decision_requests), num_discrete_flat])
            if decision_requests.action_mask is not None:
                mask = torch.as_tensor(
                    1 - np.concatenate(decision_requests.action_mask, axis=1)
        :param buffer: The buffer with the observations to add to the running estimate
        of the distribution.
        """
-        if self.use_vec_obs and self.normalize:
+
+        if self.normalize:
            self.actor_critic.update_normalization(buffer)

    @timed
            for agent_id in decision_requests.agent_id
        ]  # For 1-D array, the iterator order is correct.

-        run_out = self.evaluate(
-            decision_requests, global_agent_ids
-        )  # pylint: disable=assignment-from-no-return
+        run_out = self.evaluate(decision_requests, global_agent_ids)
        self.save_memories(global_agent_ids, run_out.get("memory_out"))
        self.check_nan_action(run_out.get("action"))
        return ActionInfo(
            outputs=run_out,
            agent_ids=list(decision_requests.agent_id),
        )
-
-    @property
-    def use_vis_obs(self):
-        return self.vis_obs_size > 0
-
-    @property
-    def use_vec_obs(self):
-        return self.vec_obs_size > 0

    def get_current_step(self):
        """
--- a/ml-agents/mlagents/trainers/ppo/optimizer_torch.py
+++ b/ml-agents/mlagents/trainers/ppo/optimizer_torch.py
        """
        Takes a Policy and a Dict of trainer parameters and creates an Optimizer around the policy.
        The PPO optimizer has a value estimator and a loss function.
-        :param policy: A TFPolicy object that will be updated by this PPO Optimizer.
+        :param policy: A TorchPolicy object that will be updated by this PPO Optimizer.
        :param trainer_params: Trainer parameters dictionary that specifies the
        properties of the trainer.
        """
--- a/ml-agents/mlagents/trainers/sac/optimizer_torch.py
+++ b/ml-agents/mlagents/trainers/sac/optimizer_torch.py
            # ExitStack allows us to enter the torch.no_grad() context conditionally
            with ExitStack() as stack:
                if not q1_grad:
-                    stack.enter_context(torch.no_grad())  # pylint: disable=E1101
+                    stack.enter_context(torch.no_grad())
                q1_out, _ = self.q1_network(
                    inputs,
                    actions=actions,
            with ExitStack() as stack:
                if not q2_grad:
-                    stack.enter_context(torch.no_grad())  # pylint: disable=E1101
+                    stack.enter_context(torch.no_grad())
                q2_out, _ = self.q2_network(
                    inputs,
                    actions=actions,
--- a/ml-agents/mlagents/trainers/settings.py
+++ b/ml-agents/mlagents/trainers/settings.py


 class SerializationSettings:
-    convert_to_barracuda = True
    convert_to_onnx = True
    onnx_opset = 9

--- a/ml-agents/mlagents/trainers/stats.py
+++ b/ml-agents/mlagents/trainers/stats.py
 import time
 from threading import RLock

+from mlagents_envs.side_channel.stats_side_channel import StatsAggregationMethod
+
 from mlagents_envs.logging_util import get_logger
 from mlagents_envs.timers import set_gauge
 from torch.utils.tensorboard import SummaryWriter
    """
    Takes a parameter dictionary and converts it to a human-readable string.
    Recurses if there are multiple levels of dict. Used to print out hyperparameters.
-    param: param_dict: A Dictionary of key, value parameters.
-    return: A string version of this dictionary.
+
+    :param param_dict: A Dictionary of key, value parameters.
+    :return: A string version of this dictionary.
    """
    if not isinstance(param_dict, dict):
        return str(param_dict)
    mean: float
    std: float
    num: int
+    sum: float
+    aggregation_method: StatsAggregationMethod
-        return StatsSummary(0.0, 0.0, 0)
+        return StatsSummary(0.0, 0.0, 0, 0.0, StatsAggregationMethod.AVERAGE)
+
+    @property
+    def aggregated_value(self):
+        if self.aggregation_method == StatsAggregationMethod.SUM:
+            return self.sum
+        else:
+            return self.mean


 class StatsPropertyType(Enum):
        Add a generic property to the StatsWriter. This could be e.g. a Dict of hyperparameters,
        a max step count, a trainer type, etc. Note that not all StatsWriters need to be compatible
        with all types of properties. For instance, a TB writer doesn't need a max step.
+
-        :param type: The type of property.
+        :param property_type: The type of property.
        :param value: The property itself.
        """
        pass
            set_gauge(
                GaugeWriter.sanitize_string(f"{category}.{val}.mean"),
                float(stats_summary.mean),
+            )
+            set_gauge(
+                GaugeWriter.sanitize_string(f"{category}.{val}.sum"),
+                float(stats_summary.sum),
            )


        is_training = "Not Training"
        if "Is Training" in values:
            stats_summary = values["Is Training"]
-            if stats_summary.mean > 0.0:
+            if stats_summary.aggregated_value > 0.0:
                is_training = "Training"

        elapsed_time = time.time() - self.training_start_time
    def __init__(self, base_dir: str, clear_past_data: bool = False):
        """
        A StatsWriter that writes to a Tensorboard summary.
+
-            category.
+        category.
        """
        self.summary_writers: Dict[str, SummaryWriter] = {}
        self.base_dir: str = base_dir
    ) -> None:
        self._maybe_create_summary_writer(category)
        for key, value in values.items():
-            self.summary_writers[category].add_scalar(f"{key}", value.mean, step)
+            self.summary_writers[category].add_scalar(
+                f"{key}", value.aggregated_value, step
+            )
            self.summary_writers[category].flush()

    def _maybe_create_summary_writer(self, category: str) -> None:
    writers: List[StatsWriter] = []
    stats_dict: Dict[str, Dict[str, List]] = defaultdict(lambda: defaultdict(list))
    lock = RLock()
+    stats_aggregation: Dict[str, Dict[str, StatsAggregationMethod]] = defaultdict(
+        lambda: defaultdict(lambda: StatsAggregationMethod.AVERAGE)
+    )

    def __init__(self, category: str):
        """
        Add a generic property to the StatsReporter. This could be e.g. a Dict of hyperparameters,
        a max step count, a trainer type, etc. Note that not all StatsWriters need to be compatible
        with all types of properties. For instance, a TB writer doesn't need a max step.
-        :param key: The type of property.
+
+        :param property_type: The type of property.
        :param value: The property itself.
        """
        with StatsReporter.lock:
-    def add_stat(self, key: str, value: float) -> None:
+    def add_stat(
+        self,
+        key: str,
+        value: float,
+        aggregation: StatsAggregationMethod = StatsAggregationMethod.AVERAGE,
+    ) -> None:
+
+        :param aggregation: the aggregation method for the statistic, default StatsAggregationMethod.AVERAGE.
+            StatsReporter.stats_aggregation[self.category][key] = aggregation
+
+            StatsReporter.stats_aggregation[self.category][
+                key
+            ] = StatsAggregationMethod.MOST_RECENT

    def write_stats(self, step: int) -> None:
        """
+
        :param step: Training step which to write these stats as.
        """
        with StatsReporter.lock:

    def get_stats_summaries(self, key: str) -> StatsSummary:
        """
-        Get the mean, std, and count of a particular statistic, since last write.
+        Get the mean, std, count, sum and aggregation method of a particular statistic, since last write.
+
-        :returns: A StatsSummary NamedTuple containing (mean, std, count).
+        :returns: A StatsSummary containing summary statistics.
-        if len(StatsReporter.stats_dict[self.category][key]) > 0:
-            return StatsSummary(
-                mean=np.mean(StatsReporter.stats_dict[self.category][key]),
-                std=np.std(StatsReporter.stats_dict[self.category][key]),
-                num=len(StatsReporter.stats_dict[self.category][key]),
-            )
-        return StatsSummary.empty()
+        stat_values = StatsReporter.stats_dict[self.category][key]
+        if len(stat_values) == 0:
+            return StatsSummary.empty()
+
+        return StatsSummary(
+            mean=np.mean(stat_values),
+            std=np.std(stat_values),
+            num=len(stat_values),
+            sum=np.sum(stat_values),
+            aggregation_method=StatsReporter.stats_aggregation[self.category][key],
+        )
--- a/ml-agents/mlagents/trainers/tests/init.py
+++ b/ml-agents/mlagents/trainers/tests/init.py
            if (
                "ml-agents/mlagents" in filename
                or "ml-agents-envs/mlagents" in filename
-            ) and "tensorflow_to_barracuda.py" not in filename:
+            ):
                raise ValueError(
                    f"float64 array created. Set dtype=np.float32 instead of current dtype={kwargs_dtype}. "
                    f"Run pytest with TEST_ENFORCE_NUMPY_FLOAT32=1 to confirm fix."
--- a/ml-agents/mlagents/trainers/tests/check_env_trains.py
+++ b/ml-agents/mlagents/trainers/tests/check_env_trains.py
    ) -> None:
        for val, stats_summary in values.items():
            if val == "Environment/Cumulative Reward":
-                print(step, val, stats_summary.mean)
-                self._last_reward_summary[category] = stats_summary.mean
+                print(step, val, stats_summary.aggregated_value)
+                self._last_reward_summary[category] = stats_summary.aggregated_value


 # The reward processor is passed as an argument to _check_environment_trains.
--- a/ml-agents/mlagents/trainers/tests/test_agent_processor.py
+++ b/ml-agents/mlagents/trainers/tests/test_agent_processor.py
        {
            "averaged": [(1.0, StatsAggregationMethod.AVERAGE)],
            "most_recent": [(2.0, StatsAggregationMethod.MOST_RECENT)],
+            "summed": [(3.1, StatsAggregationMethod.SUM)],
+            "summed": [(1.1, StatsAggregationMethod.SUM)],
        },
    ]
    for env_stats in all_env_stats:
-        "averaged": StatsSummary(mean=2.0, std=mock.ANY, num=2),
-        "most_recent": StatsSummary(mean=4.0, std=0.0, num=1),
+        "averaged": StatsSummary(
+            mean=2.0,
+            std=mock.ANY,
+            num=2,
+            sum=4.0,
+            aggregation_method=StatsAggregationMethod.AVERAGE,
+        ),
+        "most_recent": StatsSummary(
+            mean=4.0,
+            std=0.0,
+            num=1,
+            sum=4.0,
+            aggregation_method=StatsAggregationMethod.MOST_RECENT,
+        ),
+        "summed": StatsSummary(
+            mean=2.1,
+            std=mock.ANY,
+            num=2,
+            sum=4.2,
+            aggregation_method=StatsAggregationMethod.SUM,
+        ),
    }
    stats_reporter.write_stats(123)
    writer.write_stats.assert_any_call("FakeCategory", expected_stats, 123)
--- a/ml-agents/mlagents/trainers/tests/test_learn.py
+++ b/ml-agents/mlagents/trainers/tests/test_learn.py
 from mlagents_envs.exception import UnityEnvironmentException
 from mlagents.trainers.stats import StatsReporter
 from mlagents.trainers.environment_parameter_manager import EnvironmentParameterManager
+import os.path


 def basic_options(extra_args=None):
                learn.run_training(0, options)
                mock_init.assert_called_once_with(
                    trainer_factory_mock.return_value,
-                    "results/ppo",
+                    os.path.join("results", "ppo"),
                    "ppo",
                    "mock_param_manager",
                    True,
-                    "results/ppo", False, False, "results/notuselessrun"
+                    os.path.join("results", "ppo"),
+                    False,
+                    False,
+                    os.path.join("results", "notuselessrun"),
+                )
+                write_timing_tree_mock.assert_called_once_with(
+                    os.path.join("results", "ppo", "run_logs")
+                )
+                write_run_options_mock.assert_called_once_with(
+                    os.path.join("results", "ppo"), options
-                write_timing_tree_mock.assert_called_once_with("results/ppo/run_logs")
-                write_run_options_mock.assert_called_once_with("results/ppo", options)
    StatsReporter.writers.clear()  # make sure there aren't any writers as added by learn.py


--- a/ml-agents/mlagents/trainers/tests/test_rl_trainer.py
+++ b/ml-agents/mlagents/trainers/tests/test_rl_trainer.py
 from mlagents.trainers.settings import TrainerSettings
 from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes
 from mlagents_envs.base_env import ActionSpec
+import os.path


 # Add concrete implementations of abstract methods
        mock_model_saver.model_path = self.artifact_path
        mock_model_saver.save_checkpoint.side_effect = checkpoint_path
        self.model_saver = mock_model_saver
-
-    def create_tf_policy(self, parsed_behavior_id, behavior_spec):
-        return mock.Mock()

    def create_torch_policy(self, parsed_behavior_id, behavior_spec):
        return mock.Mock()
            trainer.brain_name,
            ModelCheckpoint(
                step,
-                f"{trainer.model_saver.model_path}/{trainer.brain_name}-{step}.{export_ext}",
+                f"{trainer.model_saver.model_path}{os.path.sep}{trainer.brain_name}-{step}.{export_ext}",
                None,
                mock.ANY,
            ),
--- a/ml-agents/mlagents/trainers/tests/test_stats.py
+++ b/ml-agents/mlagents/trainers/tests/test_stats.py
    GaugeWriter,
    ConsoleWriter,
    StatsPropertyType,
+    StatsAggregationMethod,
 )


    category = "category1"
    with tempfile.TemporaryDirectory(prefix="unittest-") as base_dir:
        tb_writer = TensorboardWriter(base_dir, clear_past_data=False)
-        statssummary1 = StatsSummary(mean=1.0, std=1.0, num=1)
+        statssummary1 = StatsSummary(
+            mean=1.0,
+            std=1.0,
+            num=1,
+            sum=1.0,
+            aggregation_method=StatsAggregationMethod.AVERAGE,
+        )
        tb_writer.write_stats("category1", {"key1": statssummary1}, 10)

        # Test that the filewriter has been created and the directory has been created.

 def test_tensorboard_writer_clear(tmp_path):
    tb_writer = TensorboardWriter(tmp_path, clear_past_data=False)
-    statssummary1 = StatsSummary(mean=1.0, std=1.0, num=1)
+    statssummary1 = StatsSummary(
+        mean=1.0,
+        std=1.0,
+        num=1,
+        sum=1.0,
+        aggregation_method=StatsAggregationMethod.AVERAGE,
+    )
    tb_writer.write_stats("category1", {"key1": statssummary1}, 10)
    # TB has some sort of timeout before making a new file
    time.sleep(1.0)
        with self.assertLogs("mlagents.trainers", level="INFO") as cm:
            category = "category1"
            console_writer = ConsoleWriter()
-            statssummary1 = StatsSummary(mean=1.0, std=1.0, num=1)
+            statssummary1 = StatsSummary(
+                mean=1.0,
+                std=1.0,
+                num=1,
+                sum=1.0,
+                aggregation_method=StatsAggregationMethod.AVERAGE,
+            )
            console_writer.write_stats(
                category,
                {
                10,
            )
-            statssummary2 = StatsSummary(mean=0.0, std=0.0, num=1)
+            statssummary2 = StatsSummary(
+                mean=0.0,
+                std=0.0,
+                num=1,
+                sum=0.0,
+                aggregation_method=StatsAggregationMethod.AVERAGE,
+            )
-                    "Environment/Cumulative Reward": statssummary1,
+                    "Environment/Cumulative Reward": statssummary2,
                    "Is Training": statssummary2,
                },
                10,
            category = "category1"
            console_writer = ConsoleWriter()
            console_writer.add_property(category, StatsPropertyType.SELF_PLAY, True)
-            statssummary1 = StatsSummary(mean=1.0, std=1.0, num=1)
+            statssummary1 = StatsSummary(
+                mean=1.0,
+                std=1.0,
+                num=1,
+                sum=1.0,
+                aggregation_method=StatsAggregationMethod.AVERAGE,
+            )
            console_writer.write_stats(
                category,
                {
--- a/ml-agents/mlagents/trainers/tests/torch/test_ppo.py
+++ b/ml-agents/mlagents/trainers/tests/torch/test_ppo.py
 from mlagents.trainers.tests import mock_brain as mb
 from mlagents.trainers.tests.test_trajectory import make_fake_trajectory
 from mlagents.trainers.settings import NetworkSettings
-from mlagents.trainers.tests.dummy_config import (  # noqa: F401; pylint: disable=unused-variable
+from mlagents.trainers.tests.dummy_config import (  # noqa: F401
    ppo_dummy_config,
    curiosity_dummy_config,
    gail_dummy_config,
--- a/ml-agents/mlagents/trainers/tests/torch/test_sac.py
+++ b/ml-agents/mlagents/trainers/tests/torch/test_sac.py
 from mlagents.trainers.policy.torch_policy import TorchPolicy
 from mlagents.trainers.tests import mock_brain as mb
 from mlagents.trainers.settings import NetworkSettings
-from mlagents.trainers.tests.dummy_config import (  # noqa: F401; pylint: disable=unused-variable
+from mlagents.trainers.tests.dummy_config import (  # noqa: F401
    sac_dummy_config,
    curiosity_dummy_config,
 )
--- a/ml-agents/mlagents/trainers/torch/attention.py
+++ b/ml-agents/mlagents/trainers/torch/attention.py
 from mlagents.torch_utils import torch
 from typing import Tuple, Optional, List
-from mlagents.trainers.torch.layers import LinearEncoder
+from mlagents.trainers.torch.layers import LinearEncoder, Initialization, linear_layer


 class MultiHeadAttention(torch.nn.Module):
    Takes as input to the forward method 3 tensors:
-     - query: of dimensions (batch_size, number_of_queries, key_size)
-     - key: of dimensions (batch_size, number_of_keys, key_size)
-     - value: of dimensions (batch_size, number_of_keys, value_size)
+     - query: of dimensions (batch_size, number_of_queries, embedding_size)
+     - key: of dimensions (batch_size, number_of_keys, embedding_size)
+     - value: of dimensions (batch_size, number_of_keys, embedding_size)
-     - The output: (batch_size, number_of_queries, output_size)
+     - The output: (batch_size, number_of_queries, embedding_size)
-    def __init__(
-        self,
-        query_size: int,
-        key_size: int,
-        value_size: int,
-        output_size: int,
-        num_heads: int,
-        embedding_size: int,
-    ):
+    def __init__(self, embedding_size: int, num_heads: int):
-        self.output_size = output_size
-        self.fc_q = torch.nn.Linear(query_size, self.n_heads * self.embedding_size)
-        self.fc_k = torch.nn.Linear(key_size, self.n_heads * self.embedding_size)
-        self.fc_v = torch.nn.Linear(value_size, self.n_heads * self.embedding_size)
-        # self.fc_q = LinearEncoder(query_size, 2, self.n_heads * self.embedding_size)
-        # self.fc_k = LinearEncoder(key_size,2, self.n_heads * self.embedding_size)
-        # self.fc_v = LinearEncoder(value_size,2, self.n_heads * self.embedding_size)
-        self.fc_out = torch.nn.Linear(
-            self.n_heads * self.embedding_size, self.output_size
-        )
+        self.head_size: int = self.embedding_size // self.n_heads

    def forward(
        self,
+        n_q: int,
+        n_k: int,
-        number_of_keys: int = -1,
-        number_of_queries: int = -1,
-        # This is to avoid using .size() when possible as Barracuda does not support
-        n_q = number_of_queries if number_of_queries != -1 else query.size(1)
-        n_k = number_of_keys if number_of_keys != -1 else key.size(1)
-
-        query = self.fc_q(query)  # (b, n_q, h*d)
-        key = self.fc_k(key)  # (b, n_k, h*d)
-        value = self.fc_v(value)  # (b, n_k, h*d)
-        query = query.reshape(b, n_q, self.n_heads, self.embedding_size)
-        key = key.reshape(b, n_k, self.n_heads, self.embedding_size)
-        value = value.reshape(b, n_k, self.n_heads, self.embedding_size)
+        query = query.reshape(
+            b, n_q, self.n_heads, self.head_size
+        )  # (b, n_q, h, emb / h)
+        key = key.reshape(b, n_k, self.n_heads, self.head_size)  # (b, n_k, h, emb / h)
+        value = value.reshape(
+            b, n_k, self.n_heads, self.head_size
+        )  # (b, n_k, h, emb / h)
-        query = query.permute([0, 2, 1, 3])  # (b, h, n_q, emb)
+        query = query.permute([0, 2, 1, 3])  # (b, h, n_q, emb / h)
-        key = key.permute([0, 2, 1, 3])  # (b, h, emb, n_k)
+        key = key.permute([0, 2, 1, 3])  # (b, h, emb / h, n_k)
-        key = key.permute([0, 1, 3, 2])  # (b, h, emb, n_k)
+        key = key.permute([0, 1, 3, 2])  # (b, h, emb / h, n_k)

        qk = torch.matmul(query, key)  # (b, h, n_q, n_k)


        att = torch.softmax(qk, dim=3)  # (b, h, n_q, n_k)

-        value = value.permute([0, 2, 1, 3])  # (b, h, n_k, emb)
-        value_attention = torch.matmul(att, value)  # (b, h, n_q, emb)
+        value = value.permute([0, 2, 1, 3])  # (b, h, n_k, emb / h)
+        value_attention = torch.matmul(att, value)  # (b, h, n_q, emb / h)
-        value_attention = value_attention.permute([0, 2, 1, 3])  # (b, n_q, h, emb)
+        value_attention = value_attention.permute([0, 2, 1, 3])  # (b, n_q, h, emb / h)
-            b, n_q, self.n_heads * self.embedding_size
-        )  # (b, n_q, h*emb)
+            b, n_q, self.embedding_size
+        )  # (b, n_q, emb)
-        out = self.fc_out(value_attention)  # (b, n_q, emb)
-        return out, att
+        return value_attention, att
-class SimpleTransformer(torch.nn.Module):
+class EntityEmbeddings(torch.nn.Module):
-    A simple architecture inspired from https://arxiv.org/pdf/1909.07528.pdf that uses
-    multi head self attention to encode information about a "Self" and a list of
-    relevant "Entities".
-    EPISLON = 1e-7
-
-        entities_sizes: List[int],
+        entity_sizes: List[int],
+        entity_num_max_elements: List[int],
-        output_size: Optional[int] = None,
+        concat_self: bool = True,
-        self.self_size = x_self_size
-        self.entities_sizes = entities_sizes
-        self.entities_num_max_elements: Optional[List[int]] = None
+        self.self_size: int = x_self_size
+        self.entity_sizes: List[int] = entity_sizes
+        self.entity_num_max_elements: List[int] = entity_num_max_elements
+        self.concat_self: bool = concat_self
+        # If not concatenating self, input to encoder is just entity size
+        if not concat_self:
+            self.self_size = 0
        self.ent_encoders = torch.nn.ModuleList(
            [
                LinearEncoder(self.self_size + ent_size, 2, embedding_size)
-        self.attention = MultiHeadAttention(
-            query_size=embedding_size,
-            key_size=embedding_size,
-            value_size=embedding_size,
-            output_size=embedding_size,
-            num_heads=4,
-            embedding_size=embedding_size,
-        )
-        self.residual_layer = LinearEncoder(embedding_size, 1, embedding_size)
-        if output_size is None:
-            output_size = embedding_size
-        self.x_self_residual_layer = LinearEncoder(
-            embedding_size + x_self_size, 1, output_size
-        )
-        self,
-        x_self: torch.Tensor,
-        entities: List[torch.Tensor],
-        key_masks: List[torch.Tensor],
-    ) -> torch.Tensor:
-        # Gather the maximum number of entities information
-        if self.entities_num_max_elements is None:
-            self.entities_num_max_elements = []
-            for ent in entities:
-                self.entities_num_max_elements.append(ent.shape[1])
-        # Concatenate all observations with self
-        self_and_ent: List[torch.Tensor] = []
-        for num_entities, ent in zip(self.entities_num_max_elements, entities):
-            expanded_self = x_self.reshape(-1, 1, self.self_size)
-            # .repeat(
-            #     1, num_entities, 1
-            # )
-            expanded_self = torch.cat([expanded_self] * num_entities, dim=1)
-            self_and_ent.append(torch.cat([expanded_self, ent], dim=2))
-        # Generate the tensor that will serve as query, key and value to self attention
-        qkv = torch.cat(
+        self, x_self: torch.Tensor, entities: List[torch.Tensor]
+    ) -> Tuple[torch.Tensor, int]:
+        if self.concat_self:
+            # Concatenate all observations with self
+            self_and_ent: List[torch.Tensor] = []
+            for num_entities, ent in zip(self.entities_num_max_elements, entities):
+                expanded_self = x_self.reshape(-1, 1, self.self_size)
+                expanded_self = torch.cat([expanded_self] * num_entities, dim=1)
+                self_and_ent.append(torch.cat([expanded_self, ent], dim=2))
+        else:
+            self_and_ent = entities
+            # Encode and concatenate entites
+        encoded_entities = torch.cat(
-        mask = torch.cat(key_masks, dim=1)
-        # Feed to self attention
-        max_num_ent = sum(self.entities_num_max_elements)
-        output, _ = self.attention(qkv, qkv, qkv, mask, max_num_ent, max_num_ent)
-        # Residual
-        output = self.residual_layer(output) + qkv
-        # Average Pooling
-        numerator = torch.sum(output * (1 - mask).reshape(-1, max_num_ent, 1), dim=1)
-        denominator = torch.sum(1 - mask, dim=1, keepdim=True) + self.EPISLON
-        output = numerator / denominator
-        # Residual between x_self and the output of the module
-        output = self.x_self_residual_layer(torch.cat([output, x_self], dim=1))
-        return output
+        return encoded_entities

    @staticmethod
    def get_masks(observations: List[torch.Tensor]) -> List[torch.Tensor]:
        )
        mask = torch.cat(key_masks, dim=1)
        # Feed to self attention
-        max_num_ent = sum(self.entities_num_max_elements)
-
        importance = self.importance_layer(qkv) + mask.unsqueeze(2) * -1e6
        importance = torch.softmax(importance, dim=1)
        weighted_qkv = qkv * importance

+
+class ResidualSelfAttention(torch.nn.Module):
+    """
+    A simple architecture inspired from https://arxiv.org/pdf/1909.07528.pdf that uses
+    multi head self attention to encode information about a "Self" and a list of
+    relevant "Entities".
+    """
+
+    EPSILON = 1e-7
+
+    def __init__(
+        self,
+        embedding_size: int,
+        entity_num_max_elements: List[int],
+        num_heads: int = 4,
+    ):
+        super().__init__()
+        self.entity_num_max_elements: List[int] = entity_num_max_elements
+        self.max_num_ent = sum(entity_num_max_elements)
+        self.attention = MultiHeadAttention(
+            num_heads=num_heads, embedding_size=embedding_size
+        )
+
+        self.fc_q = linear_layer(
+            embedding_size,
+            embedding_size,
+            kernel_init=Initialization.Normal,
+            kernel_gain=(0.125 / embedding_size) ** 0.5,
+        )
+        self.fc_k = linear_layer(
+            embedding_size,
+            embedding_size,
+            kernel_init=Initialization.Normal,
+            kernel_gain=(0.125 / embedding_size) ** 0.5,
+        )
+        self.fc_v = linear_layer(
+            embedding_size,
+            embedding_size,
+            kernel_init=Initialization.Normal,
+            kernel_gain=(0.125 / embedding_size) ** 0.5,
+        )
+        self.fc_out = linear_layer(
+            embedding_size,
+            embedding_size,
+            kernel_init=Initialization.Normal,
+            kernel_gain=(0.125 / embedding_size) ** 0.5,
+        )
+
+    def forward(self, inp: torch.Tensor, key_masks: List[torch.Tensor]) -> torch.Tensor:
+        # Gather the maximum number of entities information
+        mask = torch.cat(key_masks, dim=1)
+        # Feed to self attention
+        query = self.fc_q(inp)  # (b, n_q, emb)
+        key = self.fc_k(inp)  # (b, n_k, emb)
+        value = self.fc_v(inp)  # (b, n_k, emb)
+        output, _ = self.attention(
+            query, key, value, self.max_num_ent, self.max_num_ent, mask
+        )
+        # Residual
+        output = self.fc_out(output) + inp
+        # Average Pooling
+        numerator = torch.sum(
+            output * (1 - mask).reshape(-1, self.max_num_ent, 1), dim=1
+        )
+        denominator = torch.sum(1 - mask, dim=1, keepdim=True) + self.EPSILON
+        output = numerator / denominator
+        # Residual between x_self and the output of the module
        return output
--- a/ml-agents/mlagents/trainers/torch/layers.py
+++ b/ml-agents/mlagents/trainers/torch/layers.py
    XavierGlorotUniform = 2
    KaimingHeNormal = 3  # also known as Variance scaling
    KaimingHeUniform = 4
+    Normal = 5


 _init_methods = {
    Initialization.KaimingHeNormal: torch.nn.init.kaiming_normal_,
    Initialization.KaimingHeUniform: torch.nn.init.kaiming_uniform_,
+    Initialization.Normal: torch.nn.init.normal_,
 }


--- a/ml-agents/mlagents/trainers/torch/model_serialization.py
+++ b/ml-agents/mlagents/trainers/torch/model_serialization.py
        self.policy = policy
        batch_dim = [1]
        seq_len_dim = [1]
-        dummy_vec_obs = [torch.zeros(batch_dim + [self.policy.vec_obs_size])]
+        vec_obs_size = 0
+        for sens_spec in self.policy.behavior_spec.sensor_specs:
+            if len(sens_spec.shape) == 1:
+                vec_obs_size += sens_spec.shape[0]
+        num_vis_obs = sum(
+            1
+            for sens_spec in self.policy.behavior_spec.sensor_specs
+            if len(sens_spec.shape) == 3
+        )
+        dummy_vec_obs = [torch.zeros(batch_dim + [vec_obs_size])]
        # create input shape of NCHW
        # (It's NHWC in self.policy.behavior_spec.sensor_specs.shape)
        dummy_vis_obs = [

        self.input_names = (
            ["vector_observation"]
-            + [f"visual_observation_{i}" for i in range(self.policy.vis_obs_size)]
+            + [f"visual_observation_{i}" for i in range(num_vis_obs)]
            + ["action_masks", "memories"]
        )
        self.dynamic_axes = {name: {0: "batch"} for name in self.input_names}
--- a/ml-agents/mlagents/trainers/torch/networks.py
+++ b/ml-agents/mlagents/trainers/torch/networks.py
 from mlagents.trainers.torch.encoders import VectorInput
 from mlagents.trainers.buffer import AgentBuffer
 from mlagents.trainers.trajectory import ObsUtil
-from mlagents.trainers.torch.attention import SmallestAttention, SimpleTransformer
+from mlagents.trainers.torch.attention import SmallestAttention, EntityEmbeddings


 ActivationFunction = Callable[[torch.Tensor], torch.Tensor]
        concat_entites = torch.stack(concat_encoded_obs, dim=1)

        encoded_state = self.transformer(
-            x_self, [concat_entites], SimpleTransformer.get_masks([concat_entites])
+            x_self, [concat_entites], EntityEmbeddings.get_masks([concat_entites])
        )

        if len(concat_encoded_obs) == 0:
--- a/ml-agents/mlagents/trainers/trainer/rl_trainer.py
+++ b/ml-agents/mlagents/trainers/trainer/rl_trainer.py
 logger = get_logger(__name__)


-class RLTrainer(Trainer):  # pylint: disable=abstract-method
+class RLTrainer(Trainer):
    """
    This class is the base class for trainers that use Reward Signals.
    """
--- a/ml-agents/tests/yamato/standalone_build_tests.py
+++ b/ml-agents/tests/yamato/standalone_build_tests.py
 from .yamato_utils import get_base_path, run_standalone_build


-def main(scene_path):
+def main(scene_path, build_target):
-    executable_name = None
+    executable_name = "testPlayer"
    if scene_path is not None:
        executable_name = os.path.splitext(scene_path)[0]  # Remove extension
        executable_name = executable_name.split("/")[-1]
    returncode = run_standalone_build(
-        base_path, output_path=executable_name, scene_path=scene_path
+        base_path,
+        output_path=executable_name,
+        scene_path=scene_path,
+        build_target=build_target,
+        log_output_path=None,  # Log to stdout so we get timestamps on the logs
    )

    if returncode == 0:
 if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--scene", default=None)
+    parser.add_argument("--build-target", default="mac", choices=["mac", "linux"])
-    main(args.scene)
+    main(args.scene, args.build_target)
--- a/ml-agents/tests/yamato/training_int_tests.py
+++ b/ml-agents/tests/yamato/training_int_tests.py
    else:
        standalone_player_path = "testPlayer"

-    venv_path = init_venv(python_version)
+    init_venv(python_version)

    # Copy the default training config but override the max_steps parameter,
    # and reduce the batch_size and buffer_size enough to ensure an update step happens.
        }
        override_config_file("config/ppo/3DBall.yaml", yaml_out, overrides)

-    env_path = os.path.join(get_base_output_path(), standalone_player_path + ".app")
-    mla_learn_cmd = (
-        f"mlagents-learn {yaml_out} --force --env={env_path} "
-        f"--run-id={run_id} --no-graphics --env-args -logFile -"
-    )  # noqa
-    res = subprocess.run(
-        f"source {venv_path}/bin/activate; {mla_learn_cmd}", shell=True
-    )
+    log_output_path = f"{get_base_output_path()}/training.log"
+    env_path = os.path.join(get_base_output_path(), standalone_player_path)
+    mla_learn_cmd = [
+        "mlagents-learn",
+        yaml_out,
+        "--force",
+        "--env",
+        env_path,
+        "--run-id",
+        str(run_id),
+        "--no-graphics",
+        "--env-args",
+        "-logFile",
+        log_output_path,
+    ]
+
+    res = subprocess.run(mla_learn_cmd)
-        shutil.copy(onnx_file_expected, model_artifacts_dir)
+        if os.path.exists(onnx_file_expected):
+            shutil.copy(onnx_file_expected, model_artifacts_dir)
+        print("Command line: " + " ".join(mla_learn_cmd))
+        subprocess.run(["cat", log_output_path])
        return False

    if csharp_version is None and python_version is None:
--- a/ml-agents/tests/yamato/yamato_utils.py
+++ b/ml-agents/tests/yamato/yamato_utils.py
 import shutil
 import subprocess
 import yaml
+from sys import platform
-    downloader_install_path = "./.Editor/Unity.app/Contents/MacOS/Unity"
+    if platform == "darwin":
+        downloader_install_path = "./.Editor/Unity.app/Contents/MacOS/Unity"
+    else:  # if platform == "linux":
+        downloader_install_path = "./.Editor/Unity"
    if os.path.exists(downloader_install_path):
        return downloader_install_path
    raise FileNotFoundError("Can't find executable from unity-downloader-cli")
    verbose: bool = False,
    output_path: str = None,
    scene_path: str = None,
-    log_output_path: str = f"{get_base_output_path()}/standalone_build.txt",
+    build_target: str = None,
+    log_output_path: Optional[str] = f"{get_base_output_path()}/standalone_build.txt",
-    artifacts/standalone_build/testPlayer.
+    artifacts/standalonebuild/testPlayer.
-    print(f"Running BuildStandalonePlayerOSX via {unity_exe}")
+    print(f"Running BuildStandalonePlayer via {unity_exe}")
+
+    # enum values from https://docs.unity3d.com/2019.4/Documentation/ScriptReference/BuildTarget.html
+    build_target_to_enum = {
+        "mac": "StandaloneOSX",
+        "osx": "StandaloneOSX",
+        "linux": "StandaloneLinux64",
+    }

    test_args = [
        unity_exe,
        "Unity.MLAgents.StandaloneBuildTest.BuildStandalonePlayerOSX",
    ]

-    os.makedirs(os.path.dirname(log_output_path), exist_ok=True)
-    subprocess.run(["touch", log_output_path])
-    test_args += ["-logfile", log_output_path]
+    if log_output_path:
+        os.makedirs(os.path.dirname(log_output_path), exist_ok=True)
+        subprocess.run(["touch", log_output_path])
+        test_args += ["-logfile", log_output_path]
+    else:
+        # Log to stdout
+        test_args += ["-logfile", "-"]

    if output_path is not None:
        output_path = os.path.join(get_base_output_path(), output_path)
        test_args += ["--mlagents-build-scene-path", scene_path]
+    if build_target is not None:
+        test_args += ["--mlagents-build-target", build_target_to_enum[build_target]]
    print(f"{' '.join(test_args)} ...")

    timeout = 30 * 60  # 30 minutes, just in case
    if output_path is None and res.returncode == 0:
+        exe_name = "testPlayer.app" if platform == "darwin" else "testPlayer"
-            os.path.join(base_path, "Project", "testPlayer.app"),
-            os.path.join(get_base_output_path(), "testPlayer.app"),
+            os.path.join(base_path, "Project", exe_name),
+            os.path.join(get_base_output_path(), exe_name),
-        subprocess.run(["cat", log_output_path])
+        if log_output_path:
+            subprocess.run(["cat", log_output_path])

    return res.returncode

            file_path = os.path.join(root, filename)
            if os.access(file_path, os.X_OK):
                exes.append(file_path)
+    # Also check the input path
+    if os.access(root_dir, os.X_OK):
+        exes.append(root_dir)
-) -> str:
+) -> None:
-    Set up the virtual environment, and return the venv path.
+    Install the necessary packages for the venv
-    # Use a different venv path for different versions
-    venv_path = "venv"
-    if mlagents_python_version:
-        venv_path += "_" + mlagents_python_version
-
-    # Set up the venv and install mlagents
-    subprocess.check_call(f"python -m venv {venv_path}", shell=True)
+        if platform != "darwin":
+            raise RuntimeError("Yamato can only run tensorflow on mac platforms!")
        pip_commands += [
            f"mlagents=={mlagents_python_version}",
            f"gym-unity=={mlagents_python_version}",
        pip_commands += ["-e ./ml-agents-envs", "-e ./ml-agents", "-e ./gym-unity"]
    if extra_packages:
        pip_commands += extra_packages
+
+        print(f'Running "python3 -m pip install -q {cmd} {pip_index_url}"')
-            f"source {venv_path}/bin/activate; python -m pip install -q {cmd} {pip_index_url}",
-            shell=True,
+            f"python3 -m pip install -q {cmd} {pip_index_url}", shell=True
-    return venv_path


 def checkout_csharp_version(csharp_version):
--- a/ml-agents/mlagents/trainers/barracuda.py
+++ b/ml-agents/mlagents/trainers/barracuda.py
-# pylint: skip-file
-# flake8: noqa
-from __future__ import print_function
-from collections import defaultdict
-import numpy as np
-import json
-import struct  # convert from Python values and C structs
-import re
-import argparse
-import os.path
-
-BARRACUDA_VERSION = 16
-
-
-# Definition of Barracuda model
-class Model:
-    def __init__(self):
-        self.layers = []
-        self.tensors = {}
-        self.inputs = {}
-        self.outputs = []
-        self.globals = []
-        self.memories = []
-
-
-class Struct:
-    "A structure that can have any fields defined."
-
-    def __init__(self, **entries):
-        self.__dict__.update(entries)
-
-
-# Parse command line argumengts
-def parse_args(description, source_extension, help):
-    parser = argparse.ArgumentParser(description=description)
-    parser.add_argument("source_file", help=help)
-    parser.add_argument("target_file", help="output Barracuda binary file")
-    parser.add_argument("-trim", "--trim-unused-by-output")
-    parser.add_argument("--print-layers", action="store_true")
-    parser.add_argument("--print-source-json", action="store_true")
-    parser.add_argument("-json", "--print-barracuda-json", action="store_true")
-    parser.add_argument("--print-layer-links", action="store_true")
-    parser.add_argument("--print-patterns", action="store_true")
-    parser.add_argument("--print-tensors", action="store_true")
-    parser.add_argument("--print-supported-ops", action="store_true")
-    parser.add_argument("--verbose", action="store_true")
-    args = parser.parse_args()
-    args.compress_f16 = (
-        False
-    )  # TEMP: disabled, until properly implemented parser.add_argument('-f16', '--compress-f16', action='store_true')
-
-    output_extension = ".bc" if not args.compress_f16 else ".f16.bc"
-
-    if not os.path.exists(args.source_file):
-        args.source_file = args.source_file + source_extension
-
-    if not os.path.exists(args.source_file):
-        print("File", args.source_file, "does not exist.")
-        exit(-1)
-
-    def replaceFilenameExtension(filename, newExtenstion):
-        return os.path.splitext(os.path.basename(filename))[0] + newExtenstion
-
-    if os.path.isdir(args.target_file):
-        args.target_file = os.path.join(
-            args.target_file,
-            replaceFilenameExtension(args.source_file, output_extension),
-        )
-
-    if args.verbose:
-        print(args)
-
-    return args
-
-
-# Fuse training time BatchNorm tensors into Scale & Bias
-def fuse_batchnorm_weights(gamma, beta, mean, var, epsilon):
-    # https://github.com/Tencent/ncnn/blob/master/src/layer/batchnorm.cpp
-    """ float sqrt_var = sqrt(var_data[i]);
-        a_data[i] = bias_data[i] - slope_data[i] * mean_data[i] / sqrt_var;
-        b_data[i] = slope_data[i] / sqrt_var;
-        ...
-        ptr[i] = b * ptr[i] + a;
-    """
-    scale = gamma / np.sqrt(var + epsilon)
-    bias = beta - gamma * mean / np.sqrt(var + epsilon)
-    return [scale, bias]
-
-
-# Resort layers so that all inputs are satisfied for every layer beforehand
-def sort(model, inputs, memories, verbose):
-    if hasattr(model, "layers"):
-        model = model.layers
-    inputs_and_memories = set(list(inputs) + list(memories[1::3]))
-
-    def find_missing_inputs(model, inputs):
-        missing = set()
-        ready = set(inputs)
-        for l in model:
-            for i in l.inputs:
-                if i not in ready:
-                    missing.add(i)
-            ready.add(l.name)
-        return missing
-
-    # Class to represent a graph
-    # Taken from: https://www.geeksforgeeks.org/python-program-for-topological-sorting/
-    class Graph:
-        def __init__(self, vertices):
-            self.graph = defaultdict(list)  # dictionary containing adjacency List
-            self.V = vertices  # No. of vertices
-
-        # function to add an edge to graph
-        def addEdge(self, u, v):
-            self.graph[u].append(v)
-
-        # A recursive function used by topologicalSort
-        def topologicalSortUtil(self, v, visited, stack):
-
-            # Mark the current node as visited.
-            visited[v] = True
-
-            # Recur for all the vertices adjacent to this vertex
-            for i in self.graph[v]:
-                if not visited[i]:
-                    self.topologicalSortUtil(i, visited, stack)
-
-            # Push current vertex to stack which stores result
-            stack.insert(0, v)
-
-        # The function to do Topological Sort. It uses recursive
-        # topologicalSortUtil()
-        def topologicalSort(self):
-            # Mark all the vertices as not visited
-            visited = [False] * self.V
-            stack = []
-
-            # Call the recursive helper function to store Topological
-            # Sort starting from all vertices one by one
-            for i in range(self.V):
-                if not visited[i]:
-                    self.topologicalSortUtil(i, visited, stack)
-
-            # print(stack)
-            return stack
-
-    if len(find_missing_inputs(model, inputs_and_memories)) == 0:
-        return model
-
-    g = Graph(len(model))
-
-    layers = {}
-    id = 0
-    for l in model:
-        layers[l.name] = id
-        id += 1
-
-    for layer in model:
-        for i in layer.inputs:
-            if i not in inputs_and_memories:
-                g.addEdge(layers[i], layers[layer.name])
-
-    sorted_layer_indices = g.topologicalSort()
-    print("SORTED:", sorted_layer_indices)
-    new_model = [model[idx] for idx in sorted_layer_indices]
-
-    assert len(find_missing_inputs(new_model, inputs_and_memories)) == 0
-    return new_model
-
-
-# Trim
-def trim(model, criteria_regexp_string, verbose):
-    if hasattr(model, "layers"):
-        model = model.layers
-
-    def flatten(items, enter=lambda x: isinstance(x, list)):
-        # http://stackoverflow.com/a/40857703
-        # https://github.com/ctmakro/canton/blob/master/canton/misc.py
-        """Yield items from any nested iterable; see REF."""
-        for x in items:
-            if enter(x):
-                yield from flatten(x)
-            else:
-                yield x
-
-    def trim_model(model, outputs):
-        layers = {l.name: l for l in model}
-        connected = {o for o in outputs}
-        while len(outputs) > 0:
-            outputs = set(flatten([layers[o].inputs for o in outputs if o in layers]))
-            if verbose and len(outputs) > 0:
-                print(outputs)
-            for o in outputs:
-                connected.add(o)
-
-        trimmed = [l.name for l in model if l.name not in connected]
-
-        def array_without_brackets(arr):
-            return str(arr)[1:-1]  # array to string without brackets
-
-        print("TRIMMED:", array_without_brackets(trimmed))
-
-        return [l for l in model if l.name in connected]
-
-    layer_names = {l.name for l in model}
-    criteria = re.compile(criteria_regexp_string)
-    preserve_outputs = list(filter(criteria.match, layer_names))
-    if preserve_outputs:
-        print("Trimming model given outputs to preserve:", preserve_outputs)
-        model = trim_model(model, preserve_outputs)
-    else:
-        print(
-            "WARNING: Trim couldn't find any layers to match:", criteria_regexp_string
-        )
-    return model
-
-
-# Fuse
-def fuse(model, verbose):
-    i = 0
-    while i < len(model) - 1:
-        if model[i].type == model[i + 1].type and model[i].type == 255:  # Load
-            model[i].tensors += model[i + 1].tensors
-            del model[i + 1]
-        else:
-            i += 1
-    return model
-
-
-def compress(model):
-    compress_classes = {"Dense"}
-    for l in model.layers:
-        if l.class_name in compress_classes:
-            print(
-                "Compressing %s layer '%s' weights to float16" % (l.class_name, l.name)
-            )
-            for x in l.tensors:
-                x.data = np.float16(x.data)
-    return model
-
-
-# Verbose
-def to_json(model):
-    class StructEncoder(json.JSONEncoder):
-        def default(self, o):
-            if isinstance(o, np.ndarray):  # skip binary data packed inside ndarray
-                return ""
-            if getattr(o, "__dict__", None):
-                return o.__dict__
-            return str(o)
-
-    s = json.dumps(model.layers, cls=StructEncoder, separators=(", ", ":"))
-    # custom formatting
-    s = s.replace("]}, {", "]},\n{")
-    s = s.replace(":[{", ":[\n\t{")
-    s = s.replace("}, {", "},\n\t{")
-    s = s.replace('"', "'")
-    return s
-
-
-def summary(model, print_layer_links, print_barracuda_json, print_tensors):
-    def array_without_brackets(arr):
-        return str(arr)[1:-1]  # array to string without brackets
-
-    if print_layer_links:
-        for l in model.layers:
-            print(l.name, " <= ", l.inputs)
-
-    if print_barracuda_json:
-        print(to_json(model))
-
-    if model.globals:
-        if isinstance(model.globals, dict):
-            model.globals = {x.name: x.shape for x in model.globals}
-        print("GLOBALS:", array_without_brackets(model.globals))
-
-    for l in model.layers:
-        if isinstance(model.inputs, dict):
-            ins = {i: model.inputs[i] for i in l.inputs if i in model.inputs}
-        else:
-            ins = [i for i in l.inputs if i in model.inputs]
-        if ins:
-            print("IN: %s => '%s'" % (array_without_brackets(ins), l.name))
-    for mem_in, mem_out in zip(model.memories[1::3], model.memories[2::3]):
-        print("MEM: '%s' => '%s'" % (mem_in, mem_out))
-    print("OUT:", array_without_brackets(model.outputs))
-
-    if print_tensors:
-        for l in model.layers:
-            for x in l.tensors:
-                print(x.name, x.shape, x.data.dtype, x.data)
-
-
-class Build:
-    def __init__(self, scope=""):
-        self.scope = scope
-        self.layers = []
-        self.names_taken = set()
-
-    def __getattr__(self, attr):
-        if attr == "_":
-            return self.layers[-1].name if len(self.layer) > 0 else self.scope
-        raise AttributeError(attr)
-
-    def _patch_last_layer_name_and_return(self):
-        if self.layers[-1].name:
-            return self.layers[-1].name
-
-        # generate unique name based on op and increasing id
-        name = self.layers[-1].op
-
-        i = 1
-        while name in self.names_taken:
-            name = self.layers[-1].op + "_" + str(i)
-            i += 1
-        self.names_taken.add(name)
-
-        self.layers[-1].name = self.scope + ("/" if self.scope else "") + name
-        return self.layers[-1].name
-
-    def concat(self, a, b, axis=-1, out=""):
-        self.layers += [Struct(name=out, op="Concat", axis=axis, input=[a, b])]
-        return self._patch_last_layer_name_and_return()
-
-    def mad(self, x, kernel, bias, out=""):
-        self.layers += [Struct(name=out, op="Dense", input=[x, kernel, bias])]
-        return self._patch_last_layer_name_and_return()
-
-    def mul(self, a, b, out=""):
-        self.layers += [Struct(name=out, op="Mul", input=[a, b])]
-        return self._patch_last_layer_name_and_return()
-
-    def add(self, a, b, out=""):
-        self.layers += [Struct(name=out, op="Add", input=[a, b])]
-        return self._patch_last_layer_name_and_return()
-
-    def sub(self, a, b, out=""):
-        self.layers += [Struct(name=out, op="Sub", input=[a, b])]
-        return self._patch_last_layer_name_and_return()
-
-    def sigmoid(self, x, out=""):
-        self.layers += [Struct(name=out, op="Sigmoid", input=[x])]
-        return self._patch_last_layer_name_and_return()
-
-    def tanh(self, x, out=""):
-        self.layers += [Struct(name=out, op="Tanh", input=[x])]
-        return self._patch_last_layer_name_and_return()
-
-    def reduce(self, op, x, axis=-1, out=""):
-        self.layers += [Struct(name=out, op="Reduce" + op, axis=axis, input=[x])]
-        return self._patch_last_layer_name_and_return()
-
-    def pool(self, op, x, out=""):
-        self.layers += [Struct(name=out, op=op + "Pool", input=[x])]
-        return self._patch_last_layer_name_and_return()
-
-    def strided_slice(self, x, begin, end, strides, rank, out=""):
-        self.layers += [
-            Struct(
-                name=out,
-                op="StridedSlice",
-                rank=rank,
-                starts=begin,
-                ends=end,
-                slice_strides=strides,
-                input=[x],
-            )
-        ]
-        return self._patch_last_layer_name_and_return()
-
-
-def mean(name, input, axis=-1):
-    """ combines mean operation out of several simpler ops
-    """
-    nn = Build(name)
-    if np.array_equal(axis, [1, 2]):
-        nn.pool("GlobalAvg", input, out=name)
-    elif np.array_equal(axis, [1, 2, 3]):
-        nn.reduce(
-            "Mean",  # over channels
-            nn.pool("GlobalAvg", input),  # over height & width
-            out=name,
-        )
-    elif (
-        np.array_equal(axis, [3])
-        or np.array_equal(axis, [-1])
-        or np.array_equal(axis, 3)
-        or np.array_equal(axis, -1)
-    ):
-        nn.reduce("Mean", input, out=name)
-    return nn.layers
-
-
-def rnn(name, input, state, kernel, bias, new_state, number_of_gates=2):
-    """ - Ht = f(Xt*Wi + Ht_1*Ri + Wbi + Rbi)
-    """
-
-    nn = Build(name)
-    nn.tanh(nn.mad(kernel=kernel, bias=bias, x=nn.concat(input, state)), out=new_state)
-    return nn.layers
-
-
-def gru(
-    name,
-    input,
-    state,
-    kernel_r,
-    kernel_u,
-    kernel_c,
-    bias_r,
-    bias_u,
-    bias_c,
-    new_state,
-    number_of_gates=2,
-):
-    """ - zt = f(Xt*Wz + Ht_1*Rz        + Wbz + Rbz)
-        - rt = f(Xt*Wr + Ht_1*Rr        + Wbr + Rbr)
-        - ht = g(Xt*Wh + (rt . Ht_1)*Rh + Rbh + Wbh)
-        - Ht = (1-zt).ht + zt.Ht_1
-    """
-    nn = Build(name)
-    inputs = nn.concat(input, state)
-
-    u = nn.sigmoid(nn.mad(inputs, kernel_u, bias_u))
-    r = nn.sigmoid(nn.mad(inputs, kernel_r, bias_r))
-    r_state = nn.mul(r, state)
-
-    c = nn.tanh(nn.mad(kernel=kernel_c, bias=bias_c, x=nn.concat(input, r_state)))
-
-    # new_h = u' * state + (1 - u') * c'
-    #       = u' * state + c' - u' * c'
-
-    # u' * state + c'
-    nn.add(nn.mul(u, state), c)
-    # - u' * c'
-    nn.sub(nn._, nn.mul(u, c), out=new_state)
-
-    return nn.layers
-
-
-def lstm(
-    name,
-    input,
-    state_c,
-    state_h,
-    kernel_i,
-    kernel_j,
-    kernel_f,
-    kernel_o,
-    bias_i,
-    bias_j,
-    bias_f,
-    bias_o,
-    new_state_c,
-    new_state_h,
-):
-    """ Full:
-    - it = f(Xt*Wi + Ht_1*Ri + Pi . Ct_1 + Wbi + Rbi)
-    - ft = f(Xt*Wf + Ht_1*Rf + Pf . Ct_1 + Wbf + Rbf)
-    - ct = g(Xt*Wc + Ht_1*Rc + Wbc + Rbc)
-    - Ct =  ft . Ct_1  + it . ct
-    - ot = f(Xt*Wo + Ht_1*Ro + Po . Ct + Wbo + Rbo)
-    - Ht =  ot . h(Ct)
-    """
-
-    """ No peephole:
-    - it = f(Xt*Wi + Ht_1*Ri + Wbi + Rbi)
-    - ft = f(Xt*Wf + Ht_1*Rf + Wbf + Rbf)
-    - ct = g(Xt*Wc + Ht_1*Rc + Wbc + Rbc)
-    - Ct =   ft . Ct_  + it . ct
-    - ot = f(Xt*Wo + Ht_1*Ro + Wbo + Rbo)
-    - Ht =   ot . h(Ct)
-    """
-
-    nn = Build(name)
-    inputs = nn.concat(input, state_h)
-
-    i = nn.sigmoid(nn.mad(x=inputs, kernel=kernel_i, bias=bias_i))
-    j = nn.tanh(nn.mad(inputs, kernel_j, bias_j))
-    f = nn.sigmoid(nn.mad(inputs, kernel_f, bias_f))
-    o = nn.sigmoid(nn.mad(inputs, kernel_o, bias_o))
-
-    # new_c = state_c * f' + i' * j'
-    nn.add(nn.mul(state_c, f), nn.mul(i, j), out=new_state_c)
-
-    # new_h =
-    nn.mul(o, nn.tanh(new_state_c), out=new_state_h)
-
-    return nn.layers
-
-
-# Serialize
-class BarracudaWriter:
-    f = None
-
-    def __init__(self, filename):
-        self.f = open(filename, "wb+")
-
-    def __enter__(self):
-        return self
-
-    def __exit__(self, type, value, tb):
-        self.f.close()
-
-    def write_array(self, arr):
-        arr.tofile(self.f)
-
-    def write_str_array(self, array_of_strigs):
-        self.write_int32(len(array_of_strigs))
-        for s in array_of_strigs:
-            self.write_str(s)
-
-    def write_str(self, s):
-        self.write_int32(len(s))
-        self.f.write(s.encode("ascii"))
-
-    def write_float(self, d):
-        self.f.write(struct.pack("<f", d))
-
-    def write_int32(self, d):
-        self.f.write(struct.pack("<i", d))
-
-    def write_int64(self, d):
-        self.f.write(struct.pack("<q", d))
-
-    def write_shape(self, s):
-        self.write_int32(len(s))
-        for el in s:
-            self.write_int32(el if el is not None else -1)
-
-    def close(self):
-        self.f.close()
-
-
-def write(model, filename):
-
-    with BarracudaWriter(filename) as w:
-
-        # VERSION = 0xBA22AC0DA000 + BARRACUDA_VERSION
-        w.write_int64(BARRACUDA_VERSION)
-
-        # inputs
-        w.write_int32(len(model.inputs))
-        for name, shape in model.inputs.items():
-            w.write_str(name)
-            w.write_shape(shape)
-        # outputs
-        w.write_str_array(model.outputs)
-
-        # memories
-        w.write_int32(len(model.memories) // 3)
-        for mem_shape, mem_in, mem_out in zip(
-            model.memories[0::3], model.memories[1::3], model.memories[2::3]
-        ):
-            w.write_shape(mem_shape)
-            w.write_str(mem_in)
-            w.write_str(mem_out)
-
-        # layers
-        offset = 0
-        all_tensors = []
-
-        w.write_int32(len(model.layers))
-        for l in model.layers:
-
-            assert l.name not in l.inputs
-
-            w.write_str(l.name)
-            w.write_int32(l.type)
-            w.write_int32(l.activation)
-            w.write_int32(0)  # dummy
-            w.write_int32(0)  # dummy
-            w.write_shape(l.pads)
-            w.write_shape(l.strides)
-            w.write_shape(l.pool_size)
-            w.write_int32(l.axis)
-            w.write_float(l.alpha)
-            w.write_float(l.beta)
-            w.write_int32(0)  # dummy
-            w.write_str_array(l.inputs)
-
-            w.write_int32(len(l.tensors))
-            for x in l.tensors:
-                assert len(x.shape) == 4
-                assert x.data.nbytes % 4 == 0
-                length = (
-                    x.data.nbytes >> 2
-                )  # length is measured in float32s (at least for now)
-
-                w.write_str(x.name)
-                w.write_shape(x.shape)
-                w.write_int64(offset)
-                w.write_int32(x.data.itemsize)
-                w.write_int32(length)
-
-                offset += length
-                all_tensors.append(x)
-
-        for x in all_tensors:
-            w.write_array(x.data)
-
-
-def print_known_operations(known_classes, known_activations):
-    print("OPS supported by the converter:")
-    for key in sorted(known_classes.keys()):
-        print(key)
-    print("ACTIVATIONS supported by the converter:")
-    for key in sorted(known_activations.keys()):
-        print(key)
--- a/.pylintrc
+++ b/.pylintrc
-[MASTER]
-# Add files or directories to the ignore list. They should be base names, not
-# paths.
-ignore=CVS
-generated-members=torch.*
-
-
-[MESSAGES CONTROL]
-#enable=
-
-disable =
-    # C0301: Line too long
-    # C0330: Wrong hanging indentation before block
-    # disabled because black handles this
-    C0301,C0330,
-
-    # C0114: Missing module docstring
-    # C0115: Missing class docstring
-    # C0116: Missing function or method docstring
-    C0114,C0115,C0116,
-
-    # All convention and refactor for now
-    C,R,
-
-    # W1201: Specify string format arguments as logging function parameters
-    # W1202: Use % formatting in logging functions and pass the % parameters as arguments
-    W1201,W1202,
-
-    # W0612: Unused variable
-    # W0613: Unused argument
-    W0612, W0613,
-
-    # W0107: Unnecessary pass statement
-    W0107,
-
-    # W0511 "TODO"
-    W0511,
-
-    # W0703: Catching too general exception Exception
-    W0703,
-
-    # E0401: Unable to import... - triggers for external dependencies like numpy
-    E0401,
-
-    # This was causing false positives
-    # Appears to be https://github.com/PyCQA/pylint/issues/2981
-    W0201,
-
-    # Using the global statement
-    W0603,
-
-    # "Access to a protected member _foo of a client class (protected-access)"
-    W0212