浏览代码

Add stats reporter class and re-enable missing stats (#3076)

/develop-newnormalization
GitHub 5 年前
当前提交
45010af3
共有 18 个文件被更改,包括 306 次插入104 次删除
  1. 40
      ml-agents/mlagents/trainers/agent_processor.py
  2. 13
      ml-agents/mlagents/trainers/curriculum.py
  3. 6
      ml-agents/mlagents/trainers/learn.py
  4. 2
      ml-agents/mlagents/trainers/ppo/policy.py
  5. 8
      ml-agents/mlagents/trainers/ppo/trainer.py
  6. 10
      ml-agents/mlagents/trainers/rl_trainer.py
  7. 2
      ml-agents/mlagents/trainers/sac/policy.py
  8. 10
      ml-agents/mlagents/trainers/sac/trainer.py
  9. 9
      ml-agents/mlagents/trainers/tests/test_agent_processor.py
  10. 2
      ml-agents/mlagents/trainers/tests/test_ppo.py
  11. 2
      ml-agents/mlagents/trainers/tests/test_sac.py
  12. 3
      ml-agents/mlagents/trainers/tests/test_simple_rl.py
  13. 4
      ml-agents/mlagents/trainers/tests/test_trainer_util.py
  14. 85
      ml-agents/mlagents/trainers/trainer.py
  15. 15
      ml-agents/mlagents/trainers/trainer_controller.py
  16. 4
      ml-agents/mlagents/trainers/trainer_util.py
  17. 119
      ml-agents/mlagents/trainers/stats.py
  18. 76
      ml-agents/mlagents/trainers/tests/test_stats.py

40
ml-agents/mlagents/trainers/agent_processor.py


from mlagents.trainers.brain import BrainInfo
from mlagents.trainers.tf_policy import TFPolicy
from mlagents.trainers.action_info import ActionInfoOutputs
from mlagents.trainers.stats import StatsReporter
class AgentProcessor:

One AgentProcessor should be created per agent group.
"""
def __init__(self, trainer: Trainer, policy: TFPolicy, max_trajectory_length: int):
def __init__(
self,
trainer: Trainer,
policy: TFPolicy,
max_trajectory_length: int,
stats_reporter: StatsReporter,
):
"""
Create an AgentProcessor.
:param trainer: Trainer instance connected to this AgentProcessor. Trainer is given trajectory

:param stats_category: The category under which to write the stats. Usually, this comes from the Trainer.
self.stats: Dict[str, List[float]] = defaultdict(list)
self.episode_rewards: Dict[str, float] = defaultdict(lambda: 0.0)
self.episode_rewards: Dict[str, float] = defaultdict(float)
self.stats_reporter = stats_reporter
if max_trajectory_length:
self.max_trajectory_length = max_trajectory_length
self.ignore_max_length = False

:param take_action_outputs: The outputs of the Policy's get_action method.
"""
if take_action_outputs:
self.stats["Policy/Entropy"].append(take_action_outputs["entropy"].mean())
self.stats["Policy/Learning Rate"].append(
take_action_outputs["learning_rate"]
self.stats_reporter.add_stat(
"Policy/Entropy", take_action_outputs["entropy"].mean()
for name, values in take_action_outputs["value_heads"].items():
self.stats[name].append(np.mean(values))
self.stats_reporter.add_stat(
"Policy/Learning Rate", take_action_outputs["learning_rate"]
)
for agent_id in curr_info.agents:
self.last_brain_info[agent_id] = curr_info

action_masks = stored_info.action_masks[idx]
prev_action = self.policy.retrieve_previous_action([agent_id])[0, :]
values = stored_take_action_outputs["value_heads"]
experience = AgentExperience(
obs=obs,
reward=tmp_environment_reward[next_idx],

)
# Add the value outputs if needed
self.experience_buffers[agent_id].append(experience)
self.episode_rewards[agent_id] += tmp_environment_reward[next_idx]
if (
next_info.local_done[next_idx]
or (

# This will eventually be replaced with a queue
self.trainer.process_trajectory(trajectory)
self.experience_buffers[agent_id] = []
if next_info.local_done[next_idx]:
self.stats_reporter.add_stat(
"Environment/Cumulative Reward",
self.episode_rewards.get(agent_id, 0),
)
self.stats_reporter.add_stat(
"Environment/Episode Length",
self.episode_steps.get(agent_id, 0),
)
del self.episode_steps[agent_id]
del self.episode_rewards[agent_id]
if agent_id not in self.episode_steps:
self.episode_steps[agent_id] = 0
self.episode_steps[agent_id] += 1
self.policy.save_previous_action(
curr_info.agents, take_action_outputs["action"]

13
ml-agents/mlagents/trainers/curriculum.py


import os
import json
import math
from typing import Dict, Any, TextIO
from .exception import CurriculumConfigError, CurriculumLoadingError

)
@property
def lesson_num(self):
def lesson_num(self) -> int:
def lesson_num(self, lesson_num):
def lesson_num(self, lesson_num: int) -> None:
def increment_lesson(self, measure_val):
def increment_lesson(self, measure_val: float) -> bool:
"""
Increments the lesson number depending on the progress given.
:param measure_val: Measure of progress (either reward or percentage

return True
return False
def get_config(self, lesson=None):
def get_config(self, lesson: int = None) -> Dict[str, Any]:
"""
Returns reset parameters which correspond to the lesson.
:param lesson: The lesson you want to get the config of. If None, the

return config
@staticmethod
def load_curriculum_file(location):
def load_curriculum_file(location: str) -> None:
try:
with open(location) as data_file:
return Curriculum._load_curriculum(data_file)

)
@staticmethod
def _load_curriculum(fp):
def _load_curriculum(fp: TextIO) -> None:
try:
return json.load(fp)
except json.decoder.JSONDecodeError as e:

6
ml-agents/mlagents/trainers/learn.py


from mlagents.trainers.exception import TrainerError
from mlagents.trainers.meta_curriculum import MetaCurriculum
from mlagents.trainers.trainer_util import load_config, TrainerFactory
from mlagents.trainers.stats import TensorboardWriter, StatsReporter
from mlagents.envs.environment import UnityEnvironment
from mlagents.trainers.sampler_class import SamplerManager
from mlagents.trainers.exception import SamplerException

)
trainer_config = load_config(trainer_config_path)
port = options.base_port + (sub_id * options.num_envs)
# Configure Tensorboard Writers and StatsReporter
tb_writer = TensorboardWriter(summaries_dir)
StatsReporter.add_writer(tb_writer)
if options.env_path is None:
port = 5004 # This is the in Editor Training Port
env_factory = create_environment_factory(

2
ml-agents/mlagents/trainers/ppo/policy.py


{
"action": self.model.output,
"log_probs": self.model.all_log_probs,
"value_heads": self.model.value_heads,
"value": self.model.value,
"entropy": self.model.entropy,
"learning_rate": self.model.learning_rate,
}

8
ml-agents/mlagents/trainers/ppo/trainer.py


)
for name, v in value_estimates.items():
agent_buffer_trajectory["{}_value_estimates".format(name)].extend(v)
self.stats[self.policy.reward_signals[name].value_name].append(np.mean(v))
self.stats_reporter.add_stat(
self.policy.reward_signals[name].value_name, np.mean(v)
)
value_next = self.policy.get_value_estimates(
trajectory.next_obs,

batch_update_stats[stat_name].append(value)
for stat, stat_list in batch_update_stats.items():
self.stats[stat].append(np.mean(stat_list))
self.stats_reporter.add_stat(stat, np.mean(stat_list))
self.stats[stat].append(val)
self.stats_reporter.add_stat(stat, val)
self.clear_update_buffer()
self.trainer_metrics.end_policy_update()

10
ml-agents/mlagents/trainers/rl_trainer.py


rewards[agent_id] = 0
def _update_end_episode_stats(self, agent_id: str) -> None:
self.stats["Environment/Episode Length"].append(
self.episode_steps.get(agent_id, 0)
)
self.episode_steps[agent_id] = 0
for name, rewards in self.collected_rewards.items():
if name == "environment":

self.stats["Environment/Cumulative Reward"].append(
rewards.get(agent_id, 0)
)
self.stats[self.policy.reward_signals[name].stat_name].append(
rewards.get(agent_id, 0)
self.stats_reporter.add_stat(
self.policy.reward_signals[name].stat_name, rewards.get(agent_id, 0)
)
rewards[agent_id] = 0

2
ml-agents/mlagents/trainers/sac/policy.py


{
"action": self.model.output,
"log_probs": self.model.all_log_probs,
"value_heads": self.model.value_heads,
"value": self.model.value,
"entropy": self.model.entropy,
"learning_rate": self.model.learning_rate,
}

10
ml-agents/mlagents/trainers/sac/trainer.py


agent_buffer_trajectory
)
for name, v in value_estimates.items():
self.stats[self.policy.reward_signals[name].value_name].append(np.mean(v))
self.stats_reporter.add_stat(
self.policy.reward_signals[name].value_name, np.mean(v)
)
# Bootstrap using the last step rather than the bootstrap step if max step is reached.
# Set last element to duplicate obs and remove dones.

)
for stat, stat_list in batch_update_stats.items():
self.stats[stat].append(np.mean(stat_list))
self.stats_reporter.add_stat(stat, np.mean(stat_list))
self.stats[stat].append(val)
self.stats_reporter.add_stat(stat, val)
def update_reward_signals(self) -> None:
"""

for stat_name, value in update_stats.items():
batch_update_stats[stat_name].append(value)
for stat, stat_list in batch_update_stats.items():
self.stats[stat].append(np.mean(stat_list))
self.stats_reporter.add_stat(stat, np.mean(stat_list))

9
ml-agents/mlagents/trainers/tests/test_agent_processor.py


import mlagents.trainers.tests.mock_brain as mb
import numpy as np
from mlagents.trainers.agent_processor import AgentProcessor
from mlagents.trainers.stats import StatsReporter
def create_mock_brain():

def test_agentprocessor(num_vis_obs):
policy = create_mock_policy()
trainer = mock.Mock()
processor = AgentProcessor(trainer, policy, max_trajectory_length=5)
processor = AgentProcessor(
trainer,
policy,
max_trajectory_length=5,
stats_reporter=StatsReporter("testcat"),
)
"value_heads": {},
"entropy": np.array([1.0], dtype=np.float32),
"learning_rate": 1.0,
"pre_action": [0.1, 0.1],

2
ml-agents/mlagents/trainers/tests/test_ppo.py


for reward in trainer.collected_rewards.values():
for agent in reward.values():
assert agent == 0
assert len(trainer.stats["Environment/Cumulative Reward"]) > 0
assert trainer.stats_reporter.get_stats_summaries("Policy/Extrinsic Reward").num > 0
if __name__ == "__main__":

2
ml-agents/mlagents/trainers/tests/test_sac.py


for reward in trainer.collected_rewards.values():
for agent in reward.values():
assert agent == 0
assert len(trainer.stats["Environment/Cumulative Reward"]) > 0
assert trainer.stats_reporter.get_stats_summaries("Policy/Extrinsic Reward").num > 0
if __name__ == "__main__":

3
ml-agents/mlagents/trainers/tests/test_simple_rl.py


from mlagents.trainers.brain import BrainParameters
from mlagents.trainers.simple_env_manager import SimpleEnvManager
from mlagents.trainers.sampler_class import SamplerManager
from mlagents.trainers.stats import StatsReporter
from mlagents.envs.side_channel.float_properties_channel import FloatPropertiesChannel
BRAIN_NAME = __name__

run_id = "id"
save_freq = 99999
seed = 1337
StatsReporter.writers.clear() # Clear StatsReporters so we don't write to file
trainer_config = yaml.safe_load(config)
env_manager = SimpleEnvManager(env, FloatPropertiesChannel())
trainer_factory = TrainerFactory(

4
ml-agents/mlagents/trainers/tests/test_trainer_util.py


base_config = dummy_config_with_override()
expected_config = base_config["default"]
expected_config["summary_path"] = summaries_dir + f"/{run_id}_testbrain"
expected_config["summary_path"] = f"{run_id}_testbrain"
expected_config["model_path"] = model_path + "/testbrain"
expected_config["keep_checkpoints"] = keep_checkpoints

base_config = dummy_config()
expected_config = base_config["default"]
expected_config["summary_path"] = summaries_dir + f"/{run_id}_testbrain"
expected_config["summary_path"] = f"{run_id}_testbrain"
expected_config["model_path"] = model_path + "/testbrain"
expected_config["keep_checkpoints"] = keep_checkpoints

85
ml-agents/mlagents/trainers/trainer.py


# # Unity ML-Agents Toolkit
import logging
from typing import Dict, List, Deque, Any
import os
import numpy as np
from collections import deque, defaultdict
from collections import deque
from mlagents.trainers.stats import StatsReporter
from mlagents.trainers.trajectory import Trajectory
from mlagents.trainers.brain import BrainParameters

self.run_id = run_id
self.trainer_parameters = trainer_parameters
self.summary_path = trainer_parameters["summary_path"]
if not os.path.exists(self.summary_path):
os.makedirs(self.summary_path)
self.stats_reporter = StatsReporter(self.summary_path)
# if not os.path.exists(self.summary_path):
# os.makedirs(self.summary_path)
self.stats: Dict[str, List] = defaultdict(list)
self.summary_writer = tf.summary.FileWriter(self.summary_path)
self._reward_buffer: Deque[float] = deque(maxlen=reward_buff_cap)
self.policy: TFPolicy = None # type: ignore # this will always get set
self.step: int = 0

"brain {2}.".format(k, self.__class__, self.brain_name)
)
def write_tensorboard_text(self, key: str, input_dict: Dict[str, Any]) -> None:
"""
Saves text to Tensorboard.
Note: Only works on tensorflow r1.2 or above.
:param key: The name of the text.
:param input_dict: A dictionary that will be displayed in a table on Tensorboard.
"""
try:
with tf.Session() as sess:
s_op = tf.summary.text(
key,
tf.convert_to_tensor(
([[str(x), str(input_dict[x])] for x in input_dict])
),
)
s = sess.run(s_op)
self.stats_reporter.write_text(s, self.get_step)
except Exception:
LOGGER.info(
"Cannot write text summary for Tensorboard. Tensorflow version must be r1.2 or above."
)
pass
def dict_to_str(self, param_dict: Dict[str, Any], num_tabs: int) -> str:
"""
Takes a parameter dictionary and converts it to a human-readable string.

"""
self.trainer_metrics.write_training_metrics()
def write_summary(
self, global_step: int, delta_train_start: float, lesson_num: int = 0
) -> None:
def write_summary(self, global_step: int, delta_train_start: float) -> None:
:param lesson_num: Current lesson number in curriculum.
:param global_step: The number of steps the simulation has been going for
"""
if (

else "Not Training."
)
step = min(self.get_step, self.get_max_steps)
if len(self.stats["Environment/Cumulative Reward"]) > 0:
mean_reward = np.mean(self.stats["Environment/Cumulative Reward"])
stats_summary = self.stats_reporter.get_stats_summaries(
"Environment/Cumulative Reward"
)
if stats_summary.num > 0:
LOGGER.info(
" {}: {}: Step: {}. "
"Time Elapsed: {:0.3f} s "

self.brain_name,
step,
delta_train_start,
mean_reward,
np.std(self.stats["Environment/Cumulative Reward"]),
stats_summary.mean,
stats_summary.std,
set_gauge(f"{self.brain_name}.mean_reward", mean_reward)
set_gauge(f"{self.brain_name}.mean_reward", stats_summary.mean)
else:
LOGGER.info(
" {}: {}: Step: {}. No episode was completed since last summary. {}".format(

summary = tf.Summary()
for key in self.stats:
if len(self.stats[key]) > 0:
stat_mean = float(np.mean(self.stats[key]))
summary.value.add(tag="{}".format(key), simple_value=stat_mean)
self.stats[key] = []
summary.value.add(tag="Environment/Lesson", simple_value=lesson_num)
self.summary_writer.add_summary(summary, step)
self.summary_writer.flush()
def write_tensorboard_text(self, key: str, input_dict: Dict[str, Any]) -> None:
"""
Saves text to Tensorboard.
Note: Only works on tensorflow r1.2 or above.
:param key: The name of the text.
:param input_dict: A dictionary that will be displayed in a table on Tensorboard.
"""
try:
with tf.Session() as sess:
s_op = tf.summary.text(
key,
tf.convert_to_tensor(
([[str(x), str(input_dict[x])] for x in input_dict])
),
)
s = sess.run(s_op)
self.summary_writer.add_summary(s, self.get_step)
except Exception:
LOGGER.info(
"Cannot write text summary for Tensorboard. Tensorflow version must be r1.2 or above."
)
pass
self.stats_reporter.write_stats(int(step))
def process_trajectory(self, trajectory: Trajectory) -> None:
"""

15
ml-agents/mlagents/trainers/trainer_controller.py


# Write training statistics to Tensorboard.
delta_train_start = time() - self.training_start_time
if self.meta_curriculum is not None:
trainer.write_summary(
global_step,
delta_train_start,
lesson_num=self.meta_curriculum.brains_to_curriculums[
brain_name
].lesson_num,
)
else:
trainer.write_summary(global_step, delta_train_start)
lesson_num = self.meta_curriculum.brains_to_curriculums[
brain_name
].lesson_num
trainer.stats_reporter.add_stat("Environment/Lesson", lesson_num)
trainer.write_summary(global_step, delta_train_start)
def start_trainer(self, trainer: Trainer, env_manager: EnvManager) -> None:
self.trainers[trainer.brain_name] = trainer

trainer.parameters["time_horizon"]
if "time_horizon" in trainer.parameters
else None,
trainer.stats_reporter,
)
)
self.managers[name] = agent_manager

4
ml-agents/mlagents/trainers/trainer_util.py


"""
trainer_parameters = trainer_config["default"].copy()
brain_name = brain_parameters.brain_name
trainer_parameters["summary_path"] = "{basedir}/{name}".format(
basedir=summaries_dir, name=str(run_id) + "_" + brain_name
)
trainer_parameters["summary_path"] = str(run_id) + "_" + brain_name
trainer_parameters["model_path"] = "{basedir}/{name}".format(
basedir=model_path, name=brain_name
)

119
ml-agents/mlagents/trainers/stats.py


from collections import defaultdict
from typing import List, Dict, NamedTuple
import numpy as np
import abc
import os
from mlagents.tf_utils import tf
class StatsWriter(abc.ABC):
"""
A StatsWriter abstract class. A StatsWriter takes in a category, key, scalar value, and step
and writes it out by some method.
"""
@abc.abstractmethod
def write_stats(self, category: str, key: str, value: float, step: int) -> None:
pass
@abc.abstractmethod
def write_text(self, category: str, text: str, step: int) -> None:
pass
class TensorboardWriter(StatsWriter):
def __init__(self, base_dir: str):
self.summary_writers: Dict[str, tf.summary.FileWriter] = {}
self.base_dir: str = base_dir
def write_stats(self, category: str, key: str, value: float, step: int) -> None:
self._maybe_create_summary_writer(category)
summary = tf.Summary()
summary.value.add(tag="{}".format(key), simple_value=value)
self.summary_writers[category].add_summary(summary, step)
self.summary_writers[category].flush()
def _maybe_create_summary_writer(self, category: str) -> None:
if category not in self.summary_writers:
filewriter_dir = "{basedir}/{category}".format(
basedir=self.base_dir, category=category
)
if not os.path.exists(filewriter_dir):
os.makedirs(filewriter_dir)
self.summary_writers[category] = tf.summary.FileWriter(filewriter_dir)
def write_text(self, category: str, text: str, step: int) -> None:
self._maybe_create_summary_writer(category)
self.summary_writers[category].add_summary(text, step)
class StatsSummary(NamedTuple):
mean: float
std: float
num: int
class StatsReporter:
writers: List[StatsWriter] = []
stats_dict: Dict[str, Dict[str, List]] = defaultdict(lambda: defaultdict(list))
def __init__(self, category):
"""
Generic StatsReporter. A category is the broadest type of storage (would
correspond the run name and trainer name, e.g. 3DBalltest_3DBall. A key is the
type of stat it is (e.g. Environment/Reward). Finally the Value is the float value
attached to this stat.
"""
self.category: str = category
@staticmethod
def add_writer(writer: StatsWriter) -> None:
StatsReporter.writers.append(writer)
def add_stat(self, key: str, value: float) -> None:
"""
Add a float value stat to the StatsReporter.
:param category: The highest categorization of the statistic, e.g. behavior name.
:param key: The type of statistic, e.g. Environment/Reward.
:param value: the value of the statistic.
"""
StatsReporter.stats_dict[self.category][key].append(value)
def write_stats(self, step: int) -> None:
"""
Write out all stored statistics that fall under the category specified.
The currently stored values will be averaged, written out as a single value,
and the buffer cleared.
:param category: The category which to write out the stats.
:param step: Training step which to write these stats as.
"""
for key in StatsReporter.stats_dict[self.category]:
if len(StatsReporter.stats_dict[self.category][key]) > 0:
stat_mean = float(np.mean(StatsReporter.stats_dict[self.category][key]))
for writer in StatsReporter.writers:
writer.write_stats(self.category, key, stat_mean, step)
del StatsReporter.stats_dict[self.category]
def write_text(self, text: str, step: int) -> None:
"""
Write out some text.
:param category: The highest categorization of the statistic, e.g. behavior name.
:param text: The text to write out.
:param step: Training step which to write these stats as.
"""
for writer in StatsReporter.writers:
writer.write_text(self.category, text, step)
def get_stats_summaries(self, key: str) -> StatsSummary:
"""
Get the mean, std, and count of a particular statistic, since last write.
:param category: The highest categorization of the statistic, e.g. behavior name.
:param key: The type of statistic, e.g. Environment/Reward.
:returns: A StatsSummary NamedTuple containing (mean, std, count).
"""
return StatsSummary(
mean=np.mean(StatsReporter.stats_dict[self.category][key]),
std=np.std(StatsReporter.stats_dict[self.category][key]),
num=len(StatsReporter.stats_dict[self.category][key]),
)

76
ml-agents/mlagents/trainers/tests/test_stats.py


import unittest.mock as mock
import os
from mlagents.trainers.stats import StatsReporter, TensorboardWriter
def test_stat_reporter_add_summary_write():
# Test add_writer
StatsReporter.writers.clear()
mock_writer1 = mock.Mock()
mock_writer2 = mock.Mock()
StatsReporter.add_writer(mock_writer1)
StatsReporter.add_writer(mock_writer2)
assert len(StatsReporter.writers) == 2
# Test add_stats and summaries
statsreporter1 = StatsReporter("category1")
statsreporter2 = StatsReporter("category2")
for i in range(10):
statsreporter1.add_stat("key1", float(i))
statsreporter2.add_stat("key2", float(i))
statssummary1 = statsreporter1.get_stats_summaries("key1")
statssummary2 = statsreporter2.get_stats_summaries("key2")
assert statssummary1.num == 10
assert statssummary2.num == 10
assert statssummary1.mean == 4.5
assert statssummary2.mean == 4.5
assert round(statssummary1.std, 1) == 2.9
assert round(statssummary2.std, 1) == 2.9
# Test write_stats
step = 10
statsreporter1.write_stats(step)
mock_writer1.write_stats.assert_called_once_with("category1", "key1", 4.5, step)
mock_writer2.write_stats.assert_called_once_with("category1", "key1", 4.5, step)
def test_stat_reporter_text():
# Test add_writer
mock_writer = mock.Mock()
StatsReporter.writers.clear()
StatsReporter.add_writer(mock_writer)
assert len(StatsReporter.writers) == 1
statsreporter1 = StatsReporter("category1")
# Test write_text
step = 10
statsreporter1.write_text("this is a text", step)
mock_writer.write_text.assert_called_once_with("category1", "this is a text", step)
@mock.patch("mlagents.tf_utils.tf.Summary")
@mock.patch("mlagents.tf_utils.tf.summary.FileWriter")
def test_tensorboard_writer(mock_filewriter, mock_summary):
# Test write_stats
base_dir = "base_dir"
category = "category1"
tb_writer = TensorboardWriter(base_dir)
tb_writer.write_stats("category1", "key1", 1.0, 10)
# Test that the filewriter has been created and the directory has been created.
filewriter_dir = "{basedir}/{category}".format(basedir=base_dir, category=category)
assert os.path.exists(filewriter_dir)
mock_filewriter.assert_called_once_with(filewriter_dir)
# Test that the filewriter was written to and the summary was added.
mock_summary.return_value.value.add.assert_called_once_with(
tag="key1", simple_value=1.0
)
mock_filewriter.return_value.add_summary.assert_called_once_with(
mock_summary.return_value, 10
)
mock_filewriter.return_value.flush.assert_called_once()
正在加载...
取消
保存