浏览代码

[skip ci] fix first summary statement output

/distributed-training
Anupam Bhatnagar 4 年前
当前提交
95ba923d
共有 2 个文件被更改,包括 3 次插入4 次删除
  1. 2
      ml-agents/mlagents/trainers/ppo/trainer.py
  2. 5
      ml-agents/mlagents/trainers/trainer/rl_trainer.py

2
ml-agents/mlagents/trainers/ppo/trainer.py


The reward signal generators must be updated in this method at their own pace.
"""
self.cumulative_returns_since_policy_update.clear()
super()._update_policy()
# Make sure batch_size is a multiple of sequence length. During training, we
# will need to reshape the data into a batch_size x sequence_length tensor.

for stat, val in update_stats.items():
self._stats_reporter.add_stat(stat, val)
super()._update_policy()
self._clear_update_buffer()
def create_policy(self, brain_parameters: BrainParameters) -> TFPolicy:

5
ml-agents/mlagents/trainers/trainer/rl_trainer.py


"""
Uses demonstration_buffer to update model.
"""
# increment steps when training instead of when generating from environment
self._maybe_write_summary(self.get_step + self.trainer_parameters["buffer_size"])
self._maybe_write_summary(self.get_step + self.trainer_parameters["buffer_size"])
def _increment_step(self, n_steps: int, name_behavior_id: str) -> None:
"""

write the summary. This logic ensures summaries are written on the update step and not in between.
:param step_after_process: the step count after processing the next trajectory.
"""
if step_after_process >= self.next_summary_step and self.get_step != 0:
if step_after_process >= self.next_summary_step:
self._write_summary(self.next_summary_step)
def advance(self) -> None:

正在加载...
取消
保存