浏览代码

Make the timer output format consistent (#3472)

/asymm-envs
GitHub 4 年前
当前提交
be14dd42
共有 7 个文件被更改,包括 63 次插入44 次删除
  1. 3
      docs/Profiling-Python.md
  2. 39
      ml-agents-envs/mlagents_envs/tests/test_timers.py
  3. 22
      ml-agents-envs/mlagents_envs/timers.py
  4. 9
      ml-agents/mlagents/trainers/learn.py
  5. 23
      ml-agents/mlagents/trainers/stats.py
  6. 9
      ml-agents/mlagents/trainers/tests/test_stats.py
  7. 2
      ml-agents/mlagents/trainers/trainer_controller.py

3
docs/Profiling-Python.md


## Output
By default, at the end of training, timers are collected and written in json format to
`{summaries_dir}/{run_id}_timers.json`. The output consists of node objects with the following keys:
* name (string): The name of the block of code.
* children (list): A list of child nodes.
* children (dictionary): A dictionary of child nodes, keyed by the node name.
* is_parallel (bool): Indicates that the block of code was executed in multiple threads or processes (see below). This
is optional and defaults to false.

39
ml-agents-envs/mlagents_envs/tests/test_timers.py


"total": mock.ANY,
"count": 1,
"self": mock.ANY,
"children": [
{
"name": "top_level",
"children": {
"top_level": {
"children": [
{
"name": "multiple",
"children": {
"multiple": {
"children": [
{
"name": "decorated_func",
"children": {
"decorated_func": {
],
},
{
"name": "raises",
"total": mock.ANY,
"count": 1,
"self": mock.ANY,
},
{
"name": "post_raise",
"total": mock.ANY,
"count": 1,
"self": mock.ANY,
},
],
"raises": {"total": mock.ANY, "count": 1, "self": mock.ANY},
"post_raise": {"total": mock.ANY, "count": 1, "self": mock.ANY},
},
],
"gauges": [
{"name": "my_gauge", "value": 4.0, "max": 4.0, "min": 0.0, "count": 3}
],
},
"gauges": {"my_gauge": {"value": 4.0, "max": 4.0, "min": 0.0, "count": 3}},
}
assert timer_tree == expected_tree

22
ml-agents-envs/mlagents_envs/timers.py


from time import perf_counter
from contextlib import contextmanager
from typing import Any, Callable, Dict, Generator, List, TypeVar
from typing import Any, Callable, Dict, Generator, TypeVar
class TimerNode:

res["is_parallel"] = True
child_total = 0.0
child_list = []
child_dict = {}
child_res: Dict[str, Any] = {
"name": child_name,
**self.get_timing_tree(child_node),
}
child_list.append(child_res)
child_res: Dict[str, Any] = self.get_timing_tree(child_node)
child_dict[child_name] = child_res
if child_list:
res["children"] = child_list
if child_dict:
res["children"] = child_dict
return res

else:
self.gauges[name] = GaugeNode(value)
def _get_gauges(self) -> List[Dict[str, Any]]:
gauges = []
def _get_gauges(self) -> Dict[str, Dict[str, float]]:
gauges = {}
gauge_dict: Dict[str, Any] = {"name": gauge_name, **gauge_node.as_dict()}
gauges.append(gauge_dict)
gauges[gauge_name] = gauge_node.as_dict()
return gauges

9
ml-agents/mlagents/trainers/learn.py


from mlagents.trainers.trainer_controller import TrainerController
from mlagents.trainers.meta_curriculum import MetaCurriculum
from mlagents.trainers.trainer_util import load_config, TrainerFactory
from mlagents.trainers.stats import TensorboardWriter, CSVWriter, StatsReporter
from mlagents.trainers.stats import (
TensorboardWriter,
CSVWriter,
StatsReporter,
GaugeWriter,
)
from mlagents_envs.environment import UnityEnvironment
from mlagents.trainers.sampler_class import SamplerManager
from mlagents.trainers.exception import SamplerException

required_fields=["Environment/Cumulative Reward", "Environment/Episode Length"],
)
tb_writer = TensorboardWriter(summaries_dir)
gauge_write = GaugeWriter()
StatsReporter.add_writer(gauge_write)
if options.env_path is None:
port = UnityEnvironment.DEFAULT_EDITOR_PORT

23
ml-agents/mlagents/trainers/stats.py


import os
from mlagents.tf_utils import tf
from mlagents_envs.timers import set_gauge
class StatsSummary(NamedTuple):

pass
@abc.abstractmethod
def write_text(self, category: str, text: str, step: int) -> None:
pass
class GaugeWriter(StatsWriter):
"""
Write all stats that we recieve to the timer gauges, so we can track them offline easily
"""
@staticmethod
def sanitize_string(s: str) -> str:
"""
Clean up special characters in the category and value names.
"""
return s.replace("/", ".").replace(" ", "")
def write_stats(
self, category: str, values: Dict[str, StatsSummary], step: int
) -> None:
for val, stats_summary in values.items():
set_gauge(f"{category}.{val}.mean", float(stats_summary.mean))
def write_text(self, category: str, text: str, step: int) -> None:
pass

9
ml-agents/mlagents/trainers/tests/test_stats.py


TensorboardWriter,
CSVWriter,
StatsSummary,
GaugeWriter,
)

assert len(row) == 3
line_count += 1
assert line_count == 3
def test_gauge_stat_writer_sanitize():
assert GaugeWriter.sanitize_string("Policy/Learning Rate") == "Policy.LearningRate"
assert (
GaugeWriter.sanitize_string("Very/Very/Very Nested Stat")
== "Very.Very.VeryNestedStat"
)

2
ml-agents/mlagents/trainers/trainer_controller.py


timing_path = f"{self.summaries_dir}/{self.run_id}_timers.json"
try:
with open(timing_path, "w") as f:
json.dump(get_timer_tree(), f, indent=2)
json.dump(get_timer_tree(), f, indent=4)
except FileNotFoundError:
self.logger.warning(
f"Unable to save to {timing_path}. Make sure the directory exists"

正在加载...
取消
保存