Merge branch 'master' into develop-var-len-obs-feature

4 年前 · 4d32857d
--- a/DevProject/Packages/manifest.json
+++ b/DevProject/Packages/manifest.json
    "com.unity.purchasing": "2.1.0",
    "com.unity.test-framework": "1.1.16",
    "com.unity.test-framework.performance": "2.2.0-preview",
-    "com.unity.testtools.codecoverage": "0.2.2-preview",
+    "com.unity.testtools.codecoverage": "1.0.0-pre.3",
    "com.unity.textmeshpro": "2.0.1",
    "com.unity.timeline": "1.2.12",
    "com.unity.ugui": "1.0.0",
--- a/Project/Assets/ML-Agents/Examples/Basic/Scripts/BasicActuatorComponent.cs
+++ b/Project/Assets/ML-Agents/Examples/Basic/Scripts/BasicActuatorComponent.cs
        /// Creates a BasicActuator.
        /// </summary>
        /// <returns></returns>
+#pragma warning disable 672
+#pragma warning restore 672
        {
            return new BasicActuator(basicController);
        }
--- a/Project/Assets/ML-Agents/Examples/Match3/Scripts/Match3ExampleActuatorComponent.cs
+++ b/Project/Assets/ML-Agents/Examples/Match3/Scripts/Match3ExampleActuatorComponent.cs
    public class Match3ExampleActuatorComponent : Match3ActuatorComponent
    {
        /// <inheritdoc/>
+#pragma warning disable 672
+#pragma warning restore 672
        {
            var board = GetComponent<Match3Board>();
            var agent = GetComponentInParent<Agent>();
--- a/com.unity.ml-agents.extensions/Runtime/Match3/Match3ActuatorComponent.cs
+++ b/com.unity.ml-agents.extensions/Runtime/Match3/Match3ActuatorComponent.cs
        public bool ForceHeuristic;

        /// <inheritdoc/>
+#pragma warning disable 672
+#pragma warning restore 672
        {
            var board = GetComponent<AbstractBoard>();
            var agent = GetComponentInParent<Agent>();
--- a/com.unity.ml-agents/CHANGELOG.md
+++ b/com.unity.ml-agents/CHANGELOG.md
  will result in the values being summed (instead of averaged) when written to
  TensorBoard. Thanks to @brccabral for the contribution! (#4816)
 - The upper limit for the time scale (by setting the `--time-scale` paramater in mlagents-learn) was
-removed when training with a player. The Editor still requires it to be clamped to 100. (#4867)
+  removed when training with a player. The Editor still requires it to be clamped to 100. (#4867)
 - Added the IHeuristicProvider interface to allow IActuators as well as Agent implement the Heuristic function to generate actions.
  Updated the Basic example and the Match3 Example to use Actuators.
  Changed the namespace and file names of classes in com.unity.ml-agents.extensions. (#4849)
 - Added `ObservationWriter.AddList()` and deprecated `ObservationWriter.AddRange()`.
  `AddList()` is recommended, as it does not generate any additional memory allocations. (#4887)
 - The Barracuda dependency was upgraded to 1.3.0. (#4898)
+- Added `ActuatorComponent.CreateActuators`, and deprecate `ActuatorComponent.CreateActuator`.  The
+  default implementation will wrap `ActuatorComponent.CreateActuator` in an array and return that. (#4899)
+- Tensorboard now logs the Environment Reward as both a scalar and a histogram. (#4878)
 - Added a `--torch-device` commandline option to `mlagents-learn`, which sets the default
  [`torch.device`](https://pytorch.org/docs/stable/tensor_attributes.html#torch.torch.device) used for training. (#4888)
 - The `--cpu` commandline option had no effect and was removed. Use `--torch-device=cpu` to force CPU training. (#4888)
--- a/com.unity.ml-agents/Runtime/Actuators/ActuatorComponent.cs
+++ b/com.unity.ml-agents/Runtime/Actuators/ActuatorComponent.cs
+using System;
 using UnityEngine;

 namespace Unity.MLAgents.Actuators
        /// Create the IActuator.  This is called by the Agent when it is initialized.
        /// </summary>
        /// <returns>Created IActuator object.</returns>
+        [Obsolete("Use CreateActuators instead.")]
+
+        /// <summary>
+        /// Create a collection of <see cref="IActuator"/>s.  This is called by the <see cref="Agent"/> during
+        /// initialization.
+        /// </summary>
+        /// <returns>A collection of <see cref="IActuator"/>s</returns>
+        public virtual IActuator[] CreateActuators()
+        {
+#pragma warning disable 618
+            return new[] { CreateActuator() };
+#pragma warning restore 618
+        }

        /// <summary>
        /// The specification of the possible actions for this ActuatorComponent.
--- a/com.unity.ml-agents/Runtime/Actuators/ActuatorManager.cs
+++ b/com.unity.ml-agents/Runtime/Actuators/ActuatorManager.cs
            NumContinuousActions = NumDiscreteActions = SumOfDiscreteBranchSizes = 0;
        }

+        /// <summary>
+        /// Add an array of <see cref="IActuator"/>s at once.
+        /// </summary>
+        /// <param name="actuators">The array of <see cref="IActuator"/>s to add.</param>
+        public void AddActuators(IActuator[] actuators)
+        {
+            for (var i = 0; i < actuators.Length; i++)
+            {
+                Add(actuators[i]);
+            }
+        }
+
        /*********************************************************************************
         * IList implementation that delegates to m_Actuators List.                      *
         *********************************************************************************/
        public int Count => m_Actuators.Count;

        /// <inheritdoc/>
-        public bool IsReadOnly => m_Actuators.IsReadOnly;
+        public bool IsReadOnly => false;

        /// <inheritdoc/>
        public int IndexOf(IActuator item)
--- a/com.unity.ml-agents/Runtime/Agent.cs
+++ b/com.unity.ml-agents/Runtime/Agent.cs

            foreach (var actuatorComponent in attachedActuators)
            {
-                m_ActuatorManager.Add(actuatorComponent.CreateActuator());
+                m_ActuatorManager.AddActuators(actuatorComponent.CreateActuators());
            }
        }

--- a/com.unity.ml-agents/Runtime/StatsRecorder.cs
+++ b/com.unity.ml-agents/Runtime/StatsRecorder.cs
        /// <summary>
        /// Values within the summary period are summed up before reporting.
        /// </summary>
-        Sum = 2
+        Sum = 2,
+
+        /// <summary>
+        /// Values within the summary period are reported as a histogram.
+        /// </summary>
+        Histogram = 3
    }

    /// <summary>
--- a/docs/Migrating.md
+++ b/docs/Migrating.md
 - `VectorSensor.AddObservation(IEnumerable<float>)` is deprecated. Use `VectorSensor.AddObservation(IList<float>)`
  instead.
 - `ObservationWriter.AddRange()` is deprecated. Use `ObservationWriter.AddList()` instead.
+- `ActuatorComponent.CreateAcuator()` is deprecated.  Please use override `ActuatorComponent.CreateActuators`
+  instead.  Since `ActuatorComponent.CreateActuator()` is abstract, you will still need to override it in your
+  class until it is removed.  It is only ever called if you don't override `ActuatorComponent.CreateActuators`.
+  You can suppress the warnings by surrounding the method with the following pragma:
+    ```c#
+    #pragma warning disable 672
+    public IActuator CreateActuator() { ... }
+    #pragma warning restore 672
+    ```


 # Migrating
--- a/ml-agents-envs/mlagents_envs/side_channel/stats_side_channel.py
+++ b/ml-agents-envs/mlagents_envs/side_channel/stats_side_channel.py
    # Values within the summary period are summed up before reporting.
    SUM = 2

+    # All values within a summary period are reported as a histogram.
+    HISTOGRAM = 3
+

 StatList = List[Tuple[float, StatsAggregationMethod]]
 EnvironmentStats = Mapping[str, StatList]
--- a/ml-agents/mlagents/trainers/stats.py
+++ b/ml-agents/mlagents/trainers/stats.py


 class StatsSummary(NamedTuple):
-    mean: float
-    std: float
-    num: int
-    sum: float
+    full_dist: List[float]
-        return StatsSummary(0.0, 0.0, 0, 0.0, StatsAggregationMethod.AVERAGE)
+        return StatsSummary([], StatsAggregationMethod.AVERAGE)

    @property
    def aggregated_value(self):
            return self.mean

+    @property
+    def mean(self):
+        return np.mean(self.full_dist)
+
+    @property
+    def std(self):
+        return np.std(self.full_dist)
+
+    @property
+    def num(self):
+        return len(self.full_dist)
+
+    @property
+    def sum(self):
+        return np.sum(self.full_dist)
+

 class StatsPropertyType(Enum):
    HYPERPARAMETERS = "hyperparameters"
            self.summary_writers[category].add_scalar(
                f"{key}", value.aggregated_value, step
            )
+            if value.aggregation_method == StatsAggregationMethod.HISTOGRAM:
+                self.summary_writers[category].add_histogram(
+                    f"{key}_hist", np.array(value.full_dist), step
+                )
            self.summary_writers[category].flush()

    def _maybe_create_summary_writer(self, category: str) -> None:
            return StatsSummary.empty()

        return StatsSummary(
-            mean=np.mean(stat_values),
-            std=np.std(stat_values),
-            num=len(stat_values),
-            sum=np.sum(stat_values),
+            full_dist=stat_values,
            aggregation_method=StatsReporter.stats_aggregation[self.category][key],
        )
--- a/ml-agents/mlagents/trainers/tests/test_agent_processor.py
+++ b/ml-agents/mlagents/trainers/tests/test_agent_processor.py

    expected_stats = {
        "averaged": StatsSummary(
-            mean=2.0,
-            std=mock.ANY,
-            num=2,
-            sum=4.0,
-            aggregation_method=StatsAggregationMethod.AVERAGE,
+            full_dist=[1.0, 3.0], aggregation_method=StatsAggregationMethod.AVERAGE
-            mean=4.0,
-            std=0.0,
-            num=1,
-            sum=4.0,
-            aggregation_method=StatsAggregationMethod.MOST_RECENT,
+            full_dist=[4.0], aggregation_method=StatsAggregationMethod.MOST_RECENT
-            mean=2.1,
-            std=mock.ANY,
-            num=2,
-            sum=4.2,
-            aggregation_method=StatsAggregationMethod.SUM,
+            full_dist=[3.1, 1.1], aggregation_method=StatsAggregationMethod.SUM
        ),
    }
    stats_reporter.write_stats(123)
--- a/ml-agents/mlagents/trainers/tests/test_stats.py
+++ b/ml-agents/mlagents/trainers/tests/test_stats.py
    with tempfile.TemporaryDirectory(prefix="unittest-") as base_dir:
        tb_writer = TensorboardWriter(base_dir, clear_past_data=False)
        statssummary1 = StatsSummary(
-            mean=1.0,
-            std=1.0,
-            num=1,
-            sum=1.0,
-            aggregation_method=StatsAggregationMethod.AVERAGE,
+            full_dist=[1.0], aggregation_method=StatsAggregationMethod.AVERAGE
        )
        tb_writer.write_stats("category1", {"key1": statssummary1}, 10)

 def test_tensorboard_writer_clear(tmp_path):
    tb_writer = TensorboardWriter(tmp_path, clear_past_data=False)
    statssummary1 = StatsSummary(
-        mean=1.0,
-        std=1.0,
-        num=1,
-        sum=1.0,
-        aggregation_method=StatsAggregationMethod.AVERAGE,
+        full_dist=[1.0], aggregation_method=StatsAggregationMethod.AVERAGE
    )
    tb_writer.write_stats("category1", {"key1": statssummary1}, 10)
    # TB has some sort of timeout before making a new file
            category = "category1"
            console_writer = ConsoleWriter()
            statssummary1 = StatsSummary(
-                mean=1.0,
-                std=1.0,
-                num=1,
-                sum=1.0,
-                aggregation_method=StatsAggregationMethod.AVERAGE,
+                full_dist=[1.0], aggregation_method=StatsAggregationMethod.AVERAGE
            )
            console_writer.write_stats(
                category,
                10,
            )
            statssummary2 = StatsSummary(
-                mean=0.0,
-                std=0.0,
-                num=1,
-                sum=0.0,
-                aggregation_method=StatsAggregationMethod.AVERAGE,
+                full_dist=[0.0], aggregation_method=StatsAggregationMethod.AVERAGE
            )
            console_writer.write_stats(
                category,
            )

        self.assertIn(
-            "Mean Reward: 1.000. Std of Reward: 1.000. Training.", cm.output[0]
+            "Mean Reward: 1.000. Std of Reward: 0.000. Training.", cm.output[0]
        )
        self.assertIn("Not Training.", cm.output[1])

            console_writer = ConsoleWriter()
            console_writer.add_property(category, StatsPropertyType.SELF_PLAY, True)
            statssummary1 = StatsSummary(
-                mean=1.0,
-                std=1.0,
-                num=1,
-                sum=1.0,
-                aggregation_method=StatsAggregationMethod.AVERAGE,
+                full_dist=[1.0], aggregation_method=StatsAggregationMethod.AVERAGE
            )
            console_writer.write_stats(
                category,
            )

        self.assertIn(
-            "Mean Reward: 1.000. Std of Reward: 1.000. Training.", cm.output[0]
+            "Mean Reward: 1.000. Std of Reward: 0.000. Training.", cm.output[0]
        )
--- a/ml-agents/mlagents/trainers/trainer/rl_trainer.py
+++ b/ml-agents/mlagents/trainers/trainer/rl_trainer.py
 import abc
 import time
 import attr
+from mlagents_envs.side_channel.stats_side_channel import StatsAggregationMethod
+
 from mlagents.trainers.policy.checkpoint_manager import (
    ModelCheckpoint,
    ModelCheckpointManager,
        for name, rewards in self.collected_rewards.items():
            if name == "environment":
                self.stats_reporter.add_stat(
-                    "Environment/Cumulative Reward", rewards.get(agent_id, 0)
+                    "Environment/Cumulative Reward",
+                    rewards.get(agent_id, 0),
+                    aggregation=StatsAggregationMethod.HISTOGRAM,
                )
                self.cumulative_returns_since_policy_update.append(
                    rewards.get(agent_id, 0)