浏览代码

Added SUM as aggregation type for custom statistics (#4816)

/MLA-1734-demo-provider
GitHub 4 年前
当前提交
8a40c58a
共有 12 个文件被更改,包括 171 次插入38 次删除
  1. 2
      .gitignore
  2. 6
      Project/Assets/ML-Agents/Examples/Hallway/Scripts/HallwayAgent.cs
  3. 3
      com.unity.ml-agents/CHANGELOG.md
  4. 7
      com.unity.ml-agents/Runtime/StatsRecorder.cs
  5. 5
      ml-agents-envs/mlagents_envs/side_channel/stats_side_channel.py
  6. 15
      ml-agents/mlagents/trainers/agent_processor.py
  7. 80
      ml-agents/mlagents/trainers/stats.py
  8. 4
      ml-agents/mlagents/trainers/tests/check_env_trains.py
  9. 25
      ml-agents/mlagents/trainers/tests/test_agent_processor.py
  10. 16
      ml-agents/mlagents/trainers/tests/test_learn.py
  11. 3
      ml-agents/mlagents/trainers/tests/test_rl_trainer.py
  12. 43
      ml-agents/mlagents/trainers/tests/test_stats.py

2
.gitignore


/summaries
# Output Artifacts
/results
# Output Builds
/Builds
# Training environments
/envs

6
Project/Assets/ML-Agents/Examples/Hallway/Scripts/HallwayAgent.cs


Renderer m_GroundRenderer;
HallwaySettings m_HallwaySettings;
int m_Selection;
StatsRecorder m_statsRecorder;
public override void Initialize()
{

m_GroundMaterial = m_GroundRenderer.material;
m_statsRecorder = Academy.Instance.StatsRecorder;
}
public override void CollectObservations(VectorSensor sensor)

{
SetReward(1f);
StartCoroutine(GoalScoredSwapGroundMaterial(m_HallwaySettings.goalScoredMaterial, 0.5f));
m_statsRecorder.Add("Goal/Correct", 1, StatAggregationMethod.Sum);
m_statsRecorder.Add("Goal/Wrong", 1, StatAggregationMethod.Sum);
}
EndEpisode();
}

symbolXGoal.transform.position = new Vector3(7f, 0.5f, 22.29f) + area.transform.position;
symbolOGoal.transform.position = new Vector3(-7f, 0.5f, 22.29f) + area.transform.position;
}
m_statsRecorder.Add("Goal/Correct", 0, StatAggregationMethod.Sum);
m_statsRecorder.Add("Goal/Wrong", 0, StatAggregationMethod.Sum);
}
}

3
com.unity.ml-agents/CHANGELOG.md


### Minor Changes
#### com.unity.ml-agents / com.unity.ml-agents.extensions (C#)
- `StatAggregationMethod.Sum` can now be passed to `StatsRecorder.Add()`. This
will result in the values being summed (instead of averaged) when written to
TensorBoard. Thanks to @brccabral for the contribution! (#4816)
#### ml-agents / ml-agents-envs / gym-unity (Python)

7
com.unity.ml-agents/Runtime/StatsRecorder.cs


/// To avoid conflicts when training with multiple concurrent environments, only
/// stats from worker index 0 will be tracked.
/// </summary>
MostRecent = 1
MostRecent = 1,
/// <summary>
/// Values within the summary period are summed up before reporting.
/// </summary>
Sum = 2
}
/// <summary>

5
ml-agents-envs/mlagents_envs/side_channel/stats_side_channel.py


# Only the most recent value is reported.
MOST_RECENT = 1
# Values within the summary period are summed up before reporting.
SUM = 2
StatList = List[Tuple[float, StatsAggregationMethod]]
EnvironmentStats = Mapping[str, StatList]

def on_message_received(self, msg: IncomingMessage) -> None:
"""
Receive the message from the environment, and save it for later retrieval.
:param msg:
:return:
"""

def get_and_reset_stats(self) -> EnvironmentStats:
"""
Returns the current stats, and resets the internal storage of the stats.
:return:
"""
s = self.stats

15
ml-agents/mlagents/trainers/agent_processor.py


):
"""
Create an AgentProcessor.
:param trainer: Trainer instance connected to this AgentProcessor. Trainer is given trajectory
when it is finished.
:param policy: Policy instance associated with this AgentProcessor.

)
def _process_step(
self, step: Union[TerminalStep, DecisionStep], global_id: str, index: int
self,
step: Union[
TerminalStep, DecisionStep
], # pylint: disable=unsubscriptable-object
global_id: str,
index: int,
) -> None:
terminated = isinstance(step, TerminalStep)
stored_decision_step, idx = self.last_step_result.get(global_id, (None, None))

"""
Pass stats from the environment to the StatsReporter.
Depending on the StatsAggregationMethod, either StatsReporter.add_stat or StatsReporter.set_stat is used.
The worker_id is used to determin whether StatsReporter.set_stat should be used.
The worker_id is used to determine whether StatsReporter.set_stat should be used.
:param env_stats:
:param worker_id:
:return:

if agg_type == StatsAggregationMethod.AVERAGE:
self.stats_reporter.add_stat(stat_name, val)
self.stats_reporter.add_stat(stat_name, val, agg_type)
elif agg_type == StatsAggregationMethod.SUM:
self.stats_reporter.add_stat(stat_name, val, agg_type)
elif agg_type == StatsAggregationMethod.MOST_RECENT:
# In order to prevent conflicts between multiple environments,
# only stats from the first environment are recorded.

80
ml-agents/mlagents/trainers/stats.py


import time
from threading import RLock
from mlagents_envs.side_channel.stats_side_channel import StatsAggregationMethod
from mlagents_envs.logging_util import get_logger
from mlagents_envs.timers import set_gauge
from torch.utils.tensorboard import SummaryWriter

"""
Takes a parameter dictionary and converts it to a human-readable string.
Recurses if there are multiple levels of dict. Used to print out hyperparameters.
param: param_dict: A Dictionary of key, value parameters.
return: A string version of this dictionary.
:param param_dict: A Dictionary of key, value parameters.
:return: A string version of this dictionary.
"""
if not isinstance(param_dict, dict):
return str(param_dict)

)
class StatsSummary(NamedTuple):
class StatsSummary(NamedTuple): # pylint: disable=inherit-non-class
sum: float
aggregation_method: StatsAggregationMethod
return StatsSummary(0.0, 0.0, 0)
return StatsSummary(0.0, 0.0, 0, 0.0, StatsAggregationMethod.AVERAGE)
@property
def aggregated_value(self):
if self.aggregation_method == StatsAggregationMethod.SUM:
return self.sum
else:
return self.mean
class StatsPropertyType(Enum):

Add a generic property to the StatsWriter. This could be e.g. a Dict of hyperparameters,
a max step count, a trainer type, etc. Note that not all StatsWriters need to be compatible
with all types of properties. For instance, a TB writer doesn't need a max step.
:param type: The type of property.
:param property_type: The type of property.
:param value: The property itself.
"""
pass

GaugeWriter.sanitize_string(f"{category}.{val}.mean"),
float(stats_summary.mean),
)
set_gauge(
GaugeWriter.sanitize_string(f"{category}.{val}.sum"),
float(stats_summary.sum),
)
class ConsoleWriter(StatsWriter):

is_training = "Not Training"
if "Is Training" in values:
stats_summary = values["Is Training"]
if stats_summary.mean > 0.0:
if stats_summary.aggregated_value > 0.0:
is_training = "Training"
elapsed_time = time.time() - self.training_start_time

def __init__(self, base_dir: str, clear_past_data: bool = False):
"""
A StatsWriter that writes to a Tensorboard summary.
category.
category.
"""
self.summary_writers: Dict[str, SummaryWriter] = {}
self.base_dir: str = base_dir

) -> None:
self._maybe_create_summary_writer(category)
for key, value in values.items():
self.summary_writers[category].add_scalar(f"{key}", value.mean, step)
self.summary_writers[category].add_scalar(
f"{key}", value.aggregated_value, step
)
self.summary_writers[category].flush()
def _maybe_create_summary_writer(self, category: str) -> None:

writers: List[StatsWriter] = []
stats_dict: Dict[str, Dict[str, List]] = defaultdict(lambda: defaultdict(list))
lock = RLock()
stats_aggregation: Dict[str, Dict[str, StatsAggregationMethod]] = defaultdict(
lambda: defaultdict(lambda: StatsAggregationMethod.AVERAGE)
)
def __init__(self, category: str):
"""

Add a generic property to the StatsReporter. This could be e.g. a Dict of hyperparameters,
a max step count, a trainer type, etc. Note that not all StatsWriters need to be compatible
with all types of properties. For instance, a TB writer doesn't need a max step.
:param key: The type of property.
:param property_type: The type of property.
:param value: The property itself.
"""
with StatsReporter.lock:

def add_stat(self, key: str, value: float) -> None:
def add_stat(
self,
key: str,
value: float,
aggregation: StatsAggregationMethod = StatsAggregationMethod.AVERAGE,
) -> None:
:param aggregation: the aggregation method for the statistic, default StatsAggregationMethod.AVERAGE.
StatsReporter.stats_aggregation[self.category][key] = aggregation
StatsReporter.stats_aggregation[self.category][
key
] = StatsAggregationMethod.MOST_RECENT
def write_stats(self, step: int) -> None:
"""

:param step: Training step which to write these stats as.
"""
with StatsReporter.lock:

def get_stats_summaries(self, key: str) -> StatsSummary:
"""
Get the mean, std, and count of a particular statistic, since last write.
Get the mean, std, count, sum and aggregation method of a particular statistic, since last write.
:returns: A StatsSummary NamedTuple containing (mean, std, count).
:returns: A StatsSummary containing summary statistics.
if len(StatsReporter.stats_dict[self.category][key]) > 0:
return StatsSummary(
mean=np.mean(StatsReporter.stats_dict[self.category][key]),
std=np.std(StatsReporter.stats_dict[self.category][key]),
num=len(StatsReporter.stats_dict[self.category][key]),
)
return StatsSummary.empty()
stat_values = StatsReporter.stats_dict[self.category][key]
if len(stat_values) == 0:
return StatsSummary.empty()
return StatsSummary(
mean=np.mean(stat_values),
std=np.std(stat_values),
num=len(stat_values),
sum=np.sum(stat_values),
aggregation_method=StatsReporter.stats_aggregation[self.category][key],
)

4
ml-agents/mlagents/trainers/tests/check_env_trains.py


) -> None:
for val, stats_summary in values.items():
if val == "Environment/Cumulative Reward":
print(step, val, stats_summary.mean)
self._last_reward_summary[category] = stats_summary.mean
print(step, val, stats_summary.aggregated_value)
self._last_reward_summary[category] = stats_summary.aggregated_value
# The reward processor is passed as an argument to _check_environment_trains.

25
ml-agents/mlagents/trainers/tests/test_agent_processor.py


{
"averaged": [(1.0, StatsAggregationMethod.AVERAGE)],
"most_recent": [(2.0, StatsAggregationMethod.MOST_RECENT)],
"summed": [(3.1, StatsAggregationMethod.SUM)],
"summed": [(1.1, StatsAggregationMethod.SUM)],
},
]
for env_stats in all_env_stats:

"averaged": StatsSummary(mean=2.0, std=mock.ANY, num=2),
"most_recent": StatsSummary(mean=4.0, std=0.0, num=1),
"averaged": StatsSummary(
mean=2.0,
std=mock.ANY,
num=2,
sum=4.0,
aggregation_method=StatsAggregationMethod.AVERAGE,
),
"most_recent": StatsSummary(
mean=4.0,
std=0.0,
num=1,
sum=4.0,
aggregation_method=StatsAggregationMethod.MOST_RECENT,
),
"summed": StatsSummary(
mean=2.1,
std=mock.ANY,
num=2,
sum=4.2,
aggregation_method=StatsAggregationMethod.SUM,
),
}
stats_reporter.write_stats(123)
writer.write_stats.assert_any_call("FakeCategory", expected_stats, 123)

16
ml-agents/mlagents/trainers/tests/test_learn.py


from mlagents_envs.exception import UnityEnvironmentException
from mlagents.trainers.stats import StatsReporter
from mlagents.trainers.environment_parameter_manager import EnvironmentParameterManager
import os.path
def basic_options(extra_args=None):

learn.run_training(0, options)
mock_init.assert_called_once_with(
trainer_factory_mock.return_value,
"results/ppo",
os.path.join("results", "ppo"),
"ppo",
"mock_param_manager",
True,

"results/ppo", False, False, "results/notuselessrun"
os.path.join("results", "ppo"),
False,
False,
os.path.join("results", "notuselessrun"),
)
write_timing_tree_mock.assert_called_once_with(
os.path.join("results", "ppo", "run_logs")
)
write_run_options_mock.assert_called_once_with(
os.path.join("results", "ppo"), options
write_timing_tree_mock.assert_called_once_with("results/ppo/run_logs")
write_run_options_mock.assert_called_once_with("results/ppo", options)
StatsReporter.writers.clear() # make sure there aren't any writers as added by learn.py

3
ml-agents/mlagents/trainers/tests/test_rl_trainer.py


from mlagents.trainers.settings import TrainerSettings
from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes
from mlagents_envs.base_env import ActionSpec
import os.path
# Add concrete implementations of abstract methods

trainer.brain_name,
ModelCheckpoint(
step,
f"{trainer.model_saver.model_path}/{trainer.brain_name}-{step}.{export_ext}",
f"{trainer.model_saver.model_path}{os.path.sep}{trainer.brain_name}-{step}.{export_ext}",
None,
mock.ANY,
),

43
ml-agents/mlagents/trainers/tests/test_stats.py


GaugeWriter,
ConsoleWriter,
StatsPropertyType,
StatsAggregationMethod,
)

category = "category1"
with tempfile.TemporaryDirectory(prefix="unittest-") as base_dir:
tb_writer = TensorboardWriter(base_dir, clear_past_data=False)
statssummary1 = StatsSummary(mean=1.0, std=1.0, num=1)
statssummary1 = StatsSummary(
mean=1.0,
std=1.0,
num=1,
sum=1.0,
aggregation_method=StatsAggregationMethod.AVERAGE,
)
tb_writer.write_stats("category1", {"key1": statssummary1}, 10)
# Test that the filewriter has been created and the directory has been created.

def test_tensorboard_writer_clear(tmp_path):
tb_writer = TensorboardWriter(tmp_path, clear_past_data=False)
statssummary1 = StatsSummary(mean=1.0, std=1.0, num=1)
statssummary1 = StatsSummary(
mean=1.0,
std=1.0,
num=1,
sum=1.0,
aggregation_method=StatsAggregationMethod.AVERAGE,
)
tb_writer.write_stats("category1", {"key1": statssummary1}, 10)
# TB has some sort of timeout before making a new file
time.sleep(1.0)

with self.assertLogs("mlagents.trainers", level="INFO") as cm:
category = "category1"
console_writer = ConsoleWriter()
statssummary1 = StatsSummary(mean=1.0, std=1.0, num=1)
statssummary1 = StatsSummary(
mean=1.0,
std=1.0,
num=1,
sum=1.0,
aggregation_method=StatsAggregationMethod.AVERAGE,
)
console_writer.write_stats(
category,
{

10,
)
statssummary2 = StatsSummary(mean=0.0, std=0.0, num=1)
statssummary2 = StatsSummary(
mean=0.0,
std=0.0,
num=1,
sum=0.0,
aggregation_method=StatsAggregationMethod.AVERAGE,
)
"Environment/Cumulative Reward": statssummary1,
"Environment/Cumulative Reward": statssummary2,
"Is Training": statssummary2,
},
10,

category = "category1"
console_writer = ConsoleWriter()
console_writer.add_property(category, StatsPropertyType.SELF_PLAY, True)
statssummary1 = StatsSummary(mean=1.0, std=1.0, num=1)
statssummary1 = StatsSummary(
mean=1.0,
std=1.0,
num=1,
sum=1.0,
aggregation_method=StatsAggregationMethod.AVERAGE,
)
console_writer.write_stats(
category,
{

正在加载...
取消
保存