浏览代码

Track hist of environment reward

/reward-dist
Arthur Juliani 3 年前
当前提交
ae816ec3
共有 1 个文件被更改,包括 13 次插入1 次删除
  1. 14
      ml-agents/mlagents/trainers/stats.py

14
ml-agents/mlagents/trainers/stats.py


std: float
num: int
sum: float
full_dist: np.array
return StatsSummary(0.0, 0.0, 0, 0.0, StatsAggregationMethod.AVERAGE)
return StatsSummary(
0.0, 0.0, 0, 0.0, np.zeros(1), StatsAggregationMethod.AVERAGE
)
@property
def aggregated_value(self):

self.summary_writers[category].add_scalar(
f"{key}", value.aggregated_value, step
)
if key == "Environment/Cumulative Reward":
self.summary_writers[category].add_histogram(
f"{key}_hist", value.full_dist, step
)
self.summary_writers[category].flush()
def _maybe_create_summary_writer(self, category: str) -> None:

if len(stat_values) == 0:
return StatsSummary.empty()
if key == "Environment/Cumulative Reward":
full = np.array(stat_values)
else:
full = np.zeros(1)
full_dist=full,
aggregation_method=StatsReporter.stats_aggregation[self.category][key],
)
正在加载...
取消
保存