Added SUM as aggregation type for custom statistics (#4816)

4 年前 · 8a40c58a
--- a/.gitignore
+++ b/.gitignore
 /summaries
 # Output Artifacts
 /results
+# Output Builds
+/Builds

 # Training environments
 /envs
--- a/Project/Assets/ML-Agents/Examples/Hallway/Scripts/HallwayAgent.cs
+++ b/Project/Assets/ML-Agents/Examples/Hallway/Scripts/HallwayAgent.cs
    Renderer m_GroundRenderer;
    HallwaySettings m_HallwaySettings;
    int m_Selection;
+    StatsRecorder m_statsRecorder;

    public override void Initialize()
    {
        m_GroundMaterial = m_GroundRenderer.material;
+        m_statsRecorder = Academy.Instance.StatsRecorder;
    }

    public override void CollectObservations(VectorSensor sensor)
            {
                SetReward(1f);
                StartCoroutine(GoalScoredSwapGroundMaterial(m_HallwaySettings.goalScoredMaterial, 0.5f));
+                m_statsRecorder.Add("Goal/Correct", 1, StatAggregationMethod.Sum);
+                m_statsRecorder.Add("Goal/Wrong", 1, StatAggregationMethod.Sum);
            }
            EndEpisode();
        }
            symbolXGoal.transform.position = new Vector3(7f, 0.5f, 22.29f) + area.transform.position;
            symbolOGoal.transform.position = new Vector3(-7f, 0.5f, 22.29f) + area.transform.position;
        }
+        m_statsRecorder.Add("Goal/Correct", 0, StatAggregationMethod.Sum);
+        m_statsRecorder.Add("Goal/Wrong", 0, StatAggregationMethod.Sum);
    }
 }
--- a/com.unity.ml-agents/CHANGELOG.md
+++ b/com.unity.ml-agents/CHANGELOG.md

 ### Minor Changes
 #### com.unity.ml-agents / com.unity.ml-agents.extensions (C#)
+- `StatAggregationMethod.Sum` can now be passed to `StatsRecorder.Add()`. This
+will result in the values being summed (instead of averaged) when written to
+TensorBoard. Thanks to @brccabral for the contribution! (#4816)

 #### ml-agents / ml-agents-envs / gym-unity (Python)

--- a/com.unity.ml-agents/Runtime/StatsRecorder.cs
+++ b/com.unity.ml-agents/Runtime/StatsRecorder.cs
        /// To avoid conflicts when training with multiple concurrent environments, only
        /// stats from worker index 0 will be tracked.
        /// </summary>
-        MostRecent = 1
+        MostRecent = 1,
+
+        /// <summary>
+        /// Values within the summary period are summed up before reporting.
+        /// </summary>
+        Sum = 2
    }

    /// <summary>
--- a/ml-agents-envs/mlagents_envs/side_channel/stats_side_channel.py
+++ b/ml-agents-envs/mlagents_envs/side_channel/stats_side_channel.py
    # Only the most recent value is reported.
    MOST_RECENT = 1

+    # Values within the summary period are summed up before reporting.
+    SUM = 2
+

 StatList = List[Tuple[float, StatsAggregationMethod]]
 EnvironmentStats = Mapping[str, StatList]
    def on_message_received(self, msg: IncomingMessage) -> None:
        """
        Receive the message from the environment, and save it for later retrieval.
+
        :param msg:
        :return:
        """
    def get_and_reset_stats(self) -> EnvironmentStats:
        """
        Returns the current stats, and resets the internal storage of the stats.
+
        :return:
        """
        s = self.stats
--- a/ml-agents/mlagents/trainers/agent_processor.py
+++ b/ml-agents/mlagents/trainers/agent_processor.py
    ):
        """
        Create an AgentProcessor.
+
        :param trainer: Trainer instance connected to this AgentProcessor. Trainer is given trajectory
        when it is finished.
        :param policy: Policy instance associated with this AgentProcessor.
                    )

    def _process_step(
-        self, step: Union[TerminalStep, DecisionStep], global_id: str, index: int
+        self,
+        step: Union[
+            TerminalStep, DecisionStep
+        ],  # pylint: disable=unsubscriptable-object
+        global_id: str,
+        index: int,
    ) -> None:
        terminated = isinstance(step, TerminalStep)
        stored_decision_step, idx = self.last_step_result.get(global_id, (None, None))
        """
        Pass stats from the environment to the StatsReporter.
        Depending on the StatsAggregationMethod, either StatsReporter.add_stat or StatsReporter.set_stat is used.
-        The worker_id is used to determin whether StatsReporter.set_stat should be used.
+        The worker_id is used to determine whether StatsReporter.set_stat should be used.
+
        :param env_stats:
        :param worker_id:
        :return:
                if agg_type == StatsAggregationMethod.AVERAGE:
-                    self.stats_reporter.add_stat(stat_name, val)
+                    self.stats_reporter.add_stat(stat_name, val, agg_type)
+                elif agg_type == StatsAggregationMethod.SUM:
+                    self.stats_reporter.add_stat(stat_name, val, agg_type)
                elif agg_type == StatsAggregationMethod.MOST_RECENT:
                    # In order to prevent conflicts between multiple environments,
                    # only stats from the first environment are recorded.
--- a/ml-agents/mlagents/trainers/stats.py
+++ b/ml-agents/mlagents/trainers/stats.py
 import time
 from threading import RLock

+from mlagents_envs.side_channel.stats_side_channel import StatsAggregationMethod
+
 from mlagents_envs.logging_util import get_logger
 from mlagents_envs.timers import set_gauge
 from torch.utils.tensorboard import SummaryWriter
    """
    Takes a parameter dictionary and converts it to a human-readable string.
    Recurses if there are multiple levels of dict. Used to print out hyperparameters.
-    param: param_dict: A Dictionary of key, value parameters.
-    return: A string version of this dictionary.
+
+    :param param_dict: A Dictionary of key, value parameters.
+    :return: A string version of this dictionary.
    """
    if not isinstance(param_dict, dict):
        return str(param_dict)
        )


-class StatsSummary(NamedTuple):
+class StatsSummary(NamedTuple):  # pylint: disable=inherit-non-class
+    sum: float
+    aggregation_method: StatsAggregationMethod
-        return StatsSummary(0.0, 0.0, 0)
+        return StatsSummary(0.0, 0.0, 0, 0.0, StatsAggregationMethod.AVERAGE)
+
+    @property
+    def aggregated_value(self):
+        if self.aggregation_method == StatsAggregationMethod.SUM:
+            return self.sum
+        else:
+            return self.mean


 class StatsPropertyType(Enum):
        Add a generic property to the StatsWriter. This could be e.g. a Dict of hyperparameters,
        a max step count, a trainer type, etc. Note that not all StatsWriters need to be compatible
        with all types of properties. For instance, a TB writer doesn't need a max step.
+
-        :param type: The type of property.
+        :param property_type: The type of property.
        :param value: The property itself.
        """
        pass
                GaugeWriter.sanitize_string(f"{category}.{val}.mean"),
                float(stats_summary.mean),
            )
+            set_gauge(
+                GaugeWriter.sanitize_string(f"{category}.{val}.sum"),
+                float(stats_summary.sum),
+            )


 class ConsoleWriter(StatsWriter):
        is_training = "Not Training"
        if "Is Training" in values:
            stats_summary = values["Is Training"]
-            if stats_summary.mean > 0.0:
+            if stats_summary.aggregated_value > 0.0:
                is_training = "Training"

        elapsed_time = time.time() - self.training_start_time
    def __init__(self, base_dir: str, clear_past_data: bool = False):
        """
        A StatsWriter that writes to a Tensorboard summary.
+
-            category.
+        category.
        """
        self.summary_writers: Dict[str, SummaryWriter] = {}
        self.base_dir: str = base_dir
    ) -> None:
        self._maybe_create_summary_writer(category)
        for key, value in values.items():
-            self.summary_writers[category].add_scalar(f"{key}", value.mean, step)
+            self.summary_writers[category].add_scalar(
+                f"{key}", value.aggregated_value, step
+            )
            self.summary_writers[category].flush()

    def _maybe_create_summary_writer(self, category: str) -> None:
    writers: List[StatsWriter] = []
    stats_dict: Dict[str, Dict[str, List]] = defaultdict(lambda: defaultdict(list))
    lock = RLock()
+    stats_aggregation: Dict[str, Dict[str, StatsAggregationMethod]] = defaultdict(
+        lambda: defaultdict(lambda: StatsAggregationMethod.AVERAGE)
+    )

    def __init__(self, category: str):
        """
        Add a generic property to the StatsReporter. This could be e.g. a Dict of hyperparameters,
        a max step count, a trainer type, etc. Note that not all StatsWriters need to be compatible
        with all types of properties. For instance, a TB writer doesn't need a max step.
-        :param key: The type of property.
+
+        :param property_type: The type of property.
        :param value: The property itself.
        """
        with StatsReporter.lock:
-    def add_stat(self, key: str, value: float) -> None:
+    def add_stat(
+        self,
+        key: str,
+        value: float,
+        aggregation: StatsAggregationMethod = StatsAggregationMethod.AVERAGE,
+    ) -> None:
+
+        :param aggregation: the aggregation method for the statistic, default StatsAggregationMethod.AVERAGE.
+            StatsReporter.stats_aggregation[self.category][key] = aggregation
+
+            StatsReporter.stats_aggregation[self.category][
+                key
+            ] = StatsAggregationMethod.MOST_RECENT

    def write_stats(self, step: int) -> None:
        """
+
        :param step: Training step which to write these stats as.
        """
        with StatsReporter.lock:

    def get_stats_summaries(self, key: str) -> StatsSummary:
        """
-        Get the mean, std, and count of a particular statistic, since last write.
+        Get the mean, std, count, sum and aggregation method of a particular statistic, since last write.
+
-        :returns: A StatsSummary NamedTuple containing (mean, std, count).
+        :returns: A StatsSummary containing summary statistics.
-        if len(StatsReporter.stats_dict[self.category][key]) > 0:
-            return StatsSummary(
-                mean=np.mean(StatsReporter.stats_dict[self.category][key]),
-                std=np.std(StatsReporter.stats_dict[self.category][key]),
-                num=len(StatsReporter.stats_dict[self.category][key]),
-            )
-        return StatsSummary.empty()
+        stat_values = StatsReporter.stats_dict[self.category][key]
+        if len(stat_values) == 0:
+            return StatsSummary.empty()
+
+        return StatsSummary(
+            mean=np.mean(stat_values),
+            std=np.std(stat_values),
+            num=len(stat_values),
+            sum=np.sum(stat_values),
+            aggregation_method=StatsReporter.stats_aggregation[self.category][key],
+        )
--- a/ml-agents/mlagents/trainers/tests/check_env_trains.py
+++ b/ml-agents/mlagents/trainers/tests/check_env_trains.py
    ) -> None:
        for val, stats_summary in values.items():
            if val == "Environment/Cumulative Reward":
-                print(step, val, stats_summary.mean)
-                self._last_reward_summary[category] = stats_summary.mean
+                print(step, val, stats_summary.aggregated_value)
+                self._last_reward_summary[category] = stats_summary.aggregated_value


 # The reward processor is passed as an argument to _check_environment_trains.
--- a/ml-agents/mlagents/trainers/tests/test_agent_processor.py
+++ b/ml-agents/mlagents/trainers/tests/test_agent_processor.py
        {
            "averaged": [(1.0, StatsAggregationMethod.AVERAGE)],
            "most_recent": [(2.0, StatsAggregationMethod.MOST_RECENT)],
+            "summed": [(3.1, StatsAggregationMethod.SUM)],
+            "summed": [(1.1, StatsAggregationMethod.SUM)],
        },
    ]
    for env_stats in all_env_stats:
-        "averaged": StatsSummary(mean=2.0, std=mock.ANY, num=2),
-        "most_recent": StatsSummary(mean=4.0, std=0.0, num=1),
+        "averaged": StatsSummary(
+            mean=2.0,
+            std=mock.ANY,
+            num=2,
+            sum=4.0,
+            aggregation_method=StatsAggregationMethod.AVERAGE,
+        ),
+        "most_recent": StatsSummary(
+            mean=4.0,
+            std=0.0,
+            num=1,
+            sum=4.0,
+            aggregation_method=StatsAggregationMethod.MOST_RECENT,
+        ),
+        "summed": StatsSummary(
+            mean=2.1,
+            std=mock.ANY,
+            num=2,
+            sum=4.2,
+            aggregation_method=StatsAggregationMethod.SUM,
+        ),
    }
    stats_reporter.write_stats(123)
    writer.write_stats.assert_any_call("FakeCategory", expected_stats, 123)
--- a/ml-agents/mlagents/trainers/tests/test_learn.py
+++ b/ml-agents/mlagents/trainers/tests/test_learn.py
 from mlagents_envs.exception import UnityEnvironmentException
 from mlagents.trainers.stats import StatsReporter
 from mlagents.trainers.environment_parameter_manager import EnvironmentParameterManager
+import os.path


 def basic_options(extra_args=None):
                learn.run_training(0, options)
                mock_init.assert_called_once_with(
                    trainer_factory_mock.return_value,
-                    "results/ppo",
+                    os.path.join("results", "ppo"),
                    "ppo",
                    "mock_param_manager",
                    True,
-                    "results/ppo", False, False, "results/notuselessrun"
+                    os.path.join("results", "ppo"),
+                    False,
+                    False,
+                    os.path.join("results", "notuselessrun"),
+                )
+                write_timing_tree_mock.assert_called_once_with(
+                    os.path.join("results", "ppo", "run_logs")
+                )
+                write_run_options_mock.assert_called_once_with(
+                    os.path.join("results", "ppo"), options
-                write_timing_tree_mock.assert_called_once_with("results/ppo/run_logs")
-                write_run_options_mock.assert_called_once_with("results/ppo", options)
    StatsReporter.writers.clear()  # make sure there aren't any writers as added by learn.py


--- a/ml-agents/mlagents/trainers/tests/test_rl_trainer.py
+++ b/ml-agents/mlagents/trainers/tests/test_rl_trainer.py
 from mlagents.trainers.settings import TrainerSettings
 from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes
 from mlagents_envs.base_env import ActionSpec
+import os.path


 # Add concrete implementations of abstract methods
            trainer.brain_name,
            ModelCheckpoint(
                step,
-                f"{trainer.model_saver.model_path}/{trainer.brain_name}-{step}.{export_ext}",
+                f"{trainer.model_saver.model_path}{os.path.sep}{trainer.brain_name}-{step}.{export_ext}",
                None,
                mock.ANY,
            ),
--- a/ml-agents/mlagents/trainers/tests/test_stats.py
+++ b/ml-agents/mlagents/trainers/tests/test_stats.py
    GaugeWriter,
    ConsoleWriter,
    StatsPropertyType,
+    StatsAggregationMethod,
 )


    category = "category1"
    with tempfile.TemporaryDirectory(prefix="unittest-") as base_dir:
        tb_writer = TensorboardWriter(base_dir, clear_past_data=False)
-        statssummary1 = StatsSummary(mean=1.0, std=1.0, num=1)
+        statssummary1 = StatsSummary(
+            mean=1.0,
+            std=1.0,
+            num=1,
+            sum=1.0,
+            aggregation_method=StatsAggregationMethod.AVERAGE,
+        )
        tb_writer.write_stats("category1", {"key1": statssummary1}, 10)

        # Test that the filewriter has been created and the directory has been created.

 def test_tensorboard_writer_clear(tmp_path):
    tb_writer = TensorboardWriter(tmp_path, clear_past_data=False)
-    statssummary1 = StatsSummary(mean=1.0, std=1.0, num=1)
+    statssummary1 = StatsSummary(
+        mean=1.0,
+        std=1.0,
+        num=1,
+        sum=1.0,
+        aggregation_method=StatsAggregationMethod.AVERAGE,
+    )
    tb_writer.write_stats("category1", {"key1": statssummary1}, 10)
    # TB has some sort of timeout before making a new file
    time.sleep(1.0)
        with self.assertLogs("mlagents.trainers", level="INFO") as cm:
            category = "category1"
            console_writer = ConsoleWriter()
-            statssummary1 = StatsSummary(mean=1.0, std=1.0, num=1)
+            statssummary1 = StatsSummary(
+                mean=1.0,
+                std=1.0,
+                num=1,
+                sum=1.0,
+                aggregation_method=StatsAggregationMethod.AVERAGE,
+            )
            console_writer.write_stats(
                category,
                {
                10,
            )
-            statssummary2 = StatsSummary(mean=0.0, std=0.0, num=1)
+            statssummary2 = StatsSummary(
+                mean=0.0,
+                std=0.0,
+                num=1,
+                sum=0.0,
+                aggregation_method=StatsAggregationMethod.AVERAGE,
+            )
-                    "Environment/Cumulative Reward": statssummary1,
+                    "Environment/Cumulative Reward": statssummary2,
                    "Is Training": statssummary2,
                },
                10,
            category = "category1"
            console_writer = ConsoleWriter()
            console_writer.add_property(category, StatsPropertyType.SELF_PLAY, True)
-            statssummary1 = StatsSummary(mean=1.0, std=1.0, num=1)
+            statssummary1 = StatsSummary(
+                mean=1.0,
+                std=1.0,
+                num=1,
+                sum=1.0,
+                aggregation_method=StatsAggregationMethod.AVERAGE,
+            )
            console_writer.write_stats(
                category,
                {