[MLA-1233] Remove stats.CSVWriter (#4300)

4 年前 · 493793a6
--- a/com.unity.ml-agents/CHANGELOG.md
+++ b/com.unity.ml-agents/CHANGELOG.md

 ### Minor Changes
 #### com.unity.ml-agents (C#)
-#### ml-agents / ml-agents-envs / gym-unity (Python)
+#### ml-agents / ml-agents-envs / gym-unity (Python)
+- CSV statistics writer was removed (#4300).

 ### Bug Fixes
 #### com.unity.ml-agents (C#)
--- a/docs/Training-ML-Agents.md
+++ b/docs/Training-ML-Agents.md
 mlagents-learn config/ppo/3DBall_randomize.yaml --run-id=3D-Ball-randomize
 ```

-We can observe progress and metrics via Tensorboard.
+We can observe progress and metrics via TensorBoard.

 #### Curriculum

--- a/docs/Using-Tensorboard.md
+++ b/docs/Using-Tensorboard.md
  skill level between two players. In a proper training run, the ELO of the
  agent should steadily increase.

+## Exporting Data from TensorBoard
+To export timeseries data in CSV or JSON format, check the "Show data download
+links" in the upper left. This will enable download links below each chart.
+
+![Example TensorBoard Run](images/TensorBoard-download.png)
+
-To get custom metrics from a C# environment into Tensorboard, you can use the
+To get custom metrics from a C# environment into TensorBoard, you can use the
 `StatsRecorder`:

 ```csharp
--- a/ml-agents/mlagents/trainers/learn.py
+++ b/ml-agents/mlagents/trainers/learn.py
 from mlagents.trainers.trainer_util import TrainerFactory, handle_existing_directories
 from mlagents.trainers.stats import (
    TensorboardWriter,
-    CSVWriter,
    StatsReporter,
    GaugeWriter,
    ConsoleWriter,
                os.path.join(run_logs_dir, "training_status.json")
            )

-        # Configure CSV, Tensorboard Writers and StatsReporter
-        # We assume reward and episode length are needed in the CSV.
-        csv_writer = CSVWriter(
-            write_path,
-            required_fields=[
-                "Environment/Cumulative Reward",
-                "Environment/Episode Length",
-            ],
-        )
+        # Configure Tensorboard Writers and StatsReporter
        tb_writer = TensorboardWriter(
            write_path, clear_past_data=not checkpoint_settings.resume
        )
-        StatsReporter.add_writer(csv_writer)
        StatsReporter.add_writer(gauge_write)
        StatsReporter.add_writer(console_writer)

--- a/ml-agents/mlagents/trainers/stats.py
+++ b/ml-agents/mlagents/trainers/stats.py
 from typing import List, Dict, NamedTuple, Any, Optional
 import numpy as np
 import abc
-import csv
 import os
 import time
 from threading import RLock
        """
        Add a generic property to the StatsWriter. This could be e.g. a Dict of hyperparameters,
        a max step count, a trainer type, etc. Note that not all StatsWriters need to be compatible
-        with all types of properties. For instance, a TB writer doesn't need a max step, nor should
-        we write hyperparameters to the CSV.
+        with all types of properties. For instance, a TB writer doesn't need a max step.
        :param category: The category that the property belongs to.
        :param type: The type of property.
        :param value: The property itself.
            return None


-class CSVWriter(StatsWriter):
-    def __init__(self, base_dir: str, required_fields: List[str] = None):
-        """
-        A StatsWriter that writes to a Tensorboard summary.
-        :param base_dir: The directory within which to place the CSV file, which will be {base_dir}/{category}.csv.
-        :param required_fields: If provided, the CSV writer won't write until these fields have statistics to write for
-        them.
-        """
-        # We need to keep track of the fields in the CSV, as all rows need the same fields.
-        self.csv_fields: Dict[str, List[str]] = {}
-        self.required_fields = required_fields if required_fields else []
-        self.base_dir: str = base_dir
-
-    def write_stats(
-        self, category: str, values: Dict[str, StatsSummary], step: int
-    ) -> None:
-        if self._maybe_create_csv_file(category, list(values.keys())):
-            row = [str(step)]
-            # Only record the stats that showed up in the first valid row
-            for key in self.csv_fields[category]:
-                _val = values.get(key, None)
-                row.append(str(_val.mean) if _val else "None")
-            with open(self._get_filepath(category), "a") as file:
-                writer = csv.writer(file)
-                writer.writerow(row)
-
-    def _maybe_create_csv_file(self, category: str, keys: List[str]) -> bool:
-        """
-        If no CSV file exists and the keys have the required values,
-        make the CSV file and write hte title row.
-        Returns True if there is now (or already is) a valid CSV file.
-        """
-        if category not in self.csv_fields:
-            summary_dir = self.base_dir
-            os.makedirs(summary_dir, exist_ok=True)
-            # Only store if the row contains the required fields
-            if all(item in keys for item in self.required_fields):
-                self.csv_fields[category] = keys
-                with open(self._get_filepath(category), "w") as file:
-                    title_row = ["Steps"]
-                    title_row.extend(keys)
-                    writer = csv.writer(file)
-                    writer.writerow(title_row)
-                return True
-            return False
-        return True
-
-    def _get_filepath(self, category: str) -> str:
-        file_dir = os.path.join(self.base_dir, category + ".csv")
-        return file_dir
-
-
 class StatsReporter:
    writers: List[StatsWriter] = []
    stats_dict: Dict[str, Dict[str, List]] = defaultdict(lambda: defaultdict(list))
        """
        Add a generic property to the StatsReporter. This could be e.g. a Dict of hyperparameters,
        a max step count, a trainer type, etc. Note that not all StatsWriters need to be compatible
-        with all types of properties. For instance, a TB writer doesn't need a max step, nor should
-        we write hyperparameters to the CSV.
+        with all types of properties. For instance, a TB writer doesn't need a max step.
        :param key: The type of property.
        :param value: The property itself.
        """
--- a/ml-agents/mlagents/trainers/tests/test_stats.py
+++ b/ml-agents/mlagents/trainers/tests/test_stats.py
 import pytest
 import tempfile
 import unittest
-import csv
-    CSVWriter,
    StatsSummary,
    GaugeWriter,
    ConsoleWriter,
    tb_writer = TensorboardWriter(tmp_path, clear_past_data=True)
    tb_writer.write_stats("category1", {"key1": statssummary1}, 10)
    assert len(os.listdir(os.path.join(tmp_path, "category1"))) == 1
-
-
-def test_csv_writer():
-    # Test write_stats
-    category = "category1"
-    with tempfile.TemporaryDirectory(prefix="unittest-") as base_dir:
-        csv_writer = CSVWriter(base_dir, required_fields=["key1", "key2"])
-        statssummary1 = StatsSummary(mean=1.0, std=1.0, num=1)
-        csv_writer.write_stats("category1", {"key1": statssummary1}, 10)
-
-        # Test that the filewriter has been created and the directory has been created.
-        filewriter_dir = "{basedir}/{category}.csv".format(
-            basedir=base_dir, category=category
-        )
-        # The required keys weren't in the stats
-        assert not os.path.exists(filewriter_dir)
-
-        csv_writer.write_stats(
-            "category1", {"key1": statssummary1, "key2": statssummary1}, 10
-        )
-        csv_writer.write_stats(
-            "category1", {"key1": statssummary1, "key2": statssummary1}, 20
-        )
-
-        # The required keys were in the stats
-        assert os.path.exists(filewriter_dir)
-
-        with open(filewriter_dir) as csv_file:
-            csv_reader = csv.reader(csv_file, delimiter=",")
-            line_count = 0
-            for row in csv_reader:
-                if line_count == 0:
-                    assert "key1" in row
-                    assert "key2" in row
-                    assert "Steps" in row
-                    line_count += 1
-                else:
-                    assert len(row) == 3
-                    line_count += 1
-            assert line_count == 3


 def test_gauge_stat_writer_sanitize():
--- a/docs/images/TensorBoard-download.png
+++ b/docs/images/TensorBoard-download.png