浏览代码

Change StatsSummary to use properties

/reward-dist
Arthur Juliani 3 年前
当前提交
987800f2
共有 3 个文件被更改,包括 27 次插入59 次删除
  1. 26
      ml-agents/mlagents/trainers/stats.py
  2. 21
      ml-agents/mlagents/trainers/tests/test_agent_processor.py
  3. 39
      ml-agents/mlagents/trainers/tests/test_stats.py

26
ml-agents/mlagents/trainers/stats.py


class StatsSummary(NamedTuple):
mean: float
std: float
num: int
sum: float
return StatsSummary(0.0, 0.0, 0, 0.0, [0.0], StatsAggregationMethod.AVERAGE)
return StatsSummary([0.0], StatsAggregationMethod.AVERAGE)
@property
def aggregated_value(self):

return self.mean
@property
def mean(self):
return np.mean(self.full_dist)
@property
def std(self):
return np.std(self.full_dist)
@property
def num(self):
return len(self.full_dist)
@property
def sum(self):
return np.sum(self.full_dist)
class StatsPropertyType(Enum):

return StatsSummary.empty()
return StatsSummary(
mean=np.mean(stat_values),
std=np.std(stat_values),
num=len(stat_values),
sum=np.sum(stat_values),
full_dist=stat_values,
aggregation_method=StatsReporter.stats_aggregation[self.category][key],
)

21
ml-agents/mlagents/trainers/tests/test_agent_processor.py


expected_stats = {
"averaged": StatsSummary(
mean=2.0,
std=mock.ANY,
num=2,
sum=4.0,
full_dist=mock.ANY,
aggregation_method=StatsAggregationMethod.AVERAGE,
full_dist=[1.0, 3.0], aggregation_method=StatsAggregationMethod.AVERAGE
mean=4.0,
std=0.0,
num=1,
sum=4.0,
full_dist=mock.ANY,
aggregation_method=StatsAggregationMethod.MOST_RECENT,
full_dist=[4.0], aggregation_method=StatsAggregationMethod.MOST_RECENT
mean=2.1,
std=mock.ANY,
num=2,
sum=4.2,
full_dist=mock.ANY,
aggregation_method=StatsAggregationMethod.SUM,
full_dist=[3.1, 1.1], aggregation_method=StatsAggregationMethod.SUM
),
}
stats_reporter.write_stats(123)

39
ml-agents/mlagents/trainers/tests/test_stats.py


with tempfile.TemporaryDirectory(prefix="unittest-") as base_dir:
tb_writer = TensorboardWriter(base_dir, clear_past_data=False)
statssummary1 = StatsSummary(
mean=1.0,
std=1.0,
num=1,
sum=1.0,
full_dist=[0.0],
aggregation_method=StatsAggregationMethod.AVERAGE,
full_dist=[1.0], aggregation_method=StatsAggregationMethod.AVERAGE
)
tb_writer.write_stats("category1", {"key1": statssummary1}, 10)

def test_tensorboard_writer_clear(tmp_path):
tb_writer = TensorboardWriter(tmp_path, clear_past_data=False)
statssummary1 = StatsSummary(
mean=1.0,
std=1.0,
num=1,
sum=1.0,
full_dist=[0.0],
aggregation_method=StatsAggregationMethod.AVERAGE,
full_dist=[1.0], aggregation_method=StatsAggregationMethod.AVERAGE
)
tb_writer.write_stats("category1", {"key1": statssummary1}, 10)
# TB has some sort of timeout before making a new file

category = "category1"
console_writer = ConsoleWriter()
statssummary1 = StatsSummary(
mean=1.0,
std=1.0,
num=1,
sum=1.0,
full_dist=[1.0],
aggregation_method=StatsAggregationMethod.AVERAGE,
full_dist=[1.0], aggregation_method=StatsAggregationMethod.AVERAGE
)
console_writer.write_stats(
category,

10,
)
statssummary2 = StatsSummary(
mean=0.0,
std=0.0,
num=1,
sum=0.0,
full_dist=[0.0],
aggregation_method=StatsAggregationMethod.AVERAGE,
full_dist=[0.0], aggregation_method=StatsAggregationMethod.AVERAGE
)
console_writer.write_stats(
category,

)
self.assertIn(
"Mean Reward: 1.000. Std of Reward: 1.000. Training.", cm.output[0]
"Mean Reward: 1.000. Std of Reward: 0.000. Training.", cm.output[0]
)
self.assertIn("Not Training.", cm.output[1])

console_writer = ConsoleWriter()
console_writer.add_property(category, StatsPropertyType.SELF_PLAY, True)
statssummary1 = StatsSummary(
mean=1.0,
std=1.0,
num=1,
sum=1.0,
full_dist=[1.0],
aggregation_method=StatsAggregationMethod.AVERAGE,
full_dist=[1.0], aggregation_method=StatsAggregationMethod.AVERAGE
)
console_writer.write_stats(
category,

)
self.assertIn(
"Mean Reward: 1.000. Std of Reward: 1.000. Training.", cm.output[0]
"Mean Reward: 1.000. Std of Reward: 0.000. Training.", cm.output[0]
)
正在加载...
取消
保存