浏览代码

[skip ci] increment steps on training

/distributed-training
Anupam Bhatnagar 5 年前
当前提交
e8d09d00
共有 3 个文件被更改,包括 5 次插入6 次删除
  1. 3
      ml-agents/mlagents/trainers/ppo/trainer.py
  2. 2
      ml-agents/mlagents/trainers/trainer/rl_trainer.py
  3. 6
      ml-agents/mlagents/trainers/trainer/trainer.py

3
ml-agents/mlagents/trainers/ppo/trainer.py


self.update_buffer["advantages"].set(
(advantages - advantages.mean()) / (advantages.std() + 1e-10)
)
# increment steps when training instead of when generating from environment
self._increment_step(self.trainer_parameters["buffer_size"], self.brain_name)
num_epoch = self.trainer_parameters["num_epoch"]
batch_update_stats = defaultdict(list)
for _ in range(num_epoch):

2
ml-agents/mlagents/trainers/trainer/rl_trainer.py


:param trajectory: The Trajectory tuple containing the steps to be processed.
"""
self._maybe_write_summary(self.get_step + len(trajectory.steps))
self._increment_step(len(trajectory.steps), trajectory.behavior_id)
# self._increment_step(len(trajectory.steps), trajectory.behavior_id)
def _maybe_write_summary(self, step_after_process: int) -> None:
"""

6
ml-agents/mlagents/trainers/trainer/trainer.py


from mlagents.trainers.policy import Policy
from mlagents.trainers.exception import UnityTrainerException
from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers
import horovod.tensorflow as hvd
logger = get_logger(__name__)

stop training if it wasn't training to begin with, or if max_steps
is reached.
"""
if hvd.rank() == 0:
return self.is_training and self.get_step <= self.get_max_steps
else:
return True
return self.is_training and self.get_step <= self.get_max_steps
@property
def reward_buffer(self) -> Deque[float]:

正在加载...
取消
保存