浏览代码

[skip ci] moving step increment to trainer from environment for sac

/distributed-training
Anupam Bhatnagar 4 年前
当前提交
9d7dd3b6
共有 3 个文件被更改,包括 17 次插入5 次删除
  1. 5
      ml-agents/mlagents/trainers/ppo/trainer.py
  2. 14
      ml-agents/mlagents/trainers/sac/trainer.py
  3. 3
      ml-agents/mlagents/trainers/trainer/rl_trainer.py

5
ml-agents/mlagents/trainers/ppo/trainer.py


The reward signal generators must be updated in this method at their own pace.
"""
self.cumulative_returns_since_policy_update.clear()
super()._update_policy()
self._maybe_write_summary(
self.get_step + self.trainer_parameters["buffer_size"]
)
self._increment_step(self.trainer_parameters["buffer_size"], self.brain_name)
# Make sure batch_size is a multiple of sequence length. During training, we
# will need to reshape the data into a batch_size x sequence_length tensor.

14
ml-agents/mlagents/trainers/sac/trainer.py


"""
self.cumulative_returns_since_policy_update.clear()
self._maybe_write_summary(
self.get_step
+ self.trainer_parameters["num_update"]
* self.trainer_parameters["batch_size"]
)
self._increment_step(
self.trainer_parameters["num_update"]
* self.trainer_parameters["batch_size"],
self.brain_name,
)
num_updates = self.trainer_parameters["num_update"]
for _ in range(num_updates):
for _ in range(self.trainer_parameters["num_update"]):
logger.debug("Updating SAC policy at step {}".format(self.step))
buffer = self.update_buffer
if (

3
ml-agents/mlagents/trainers/trainer/rl_trainer.py


"""
Uses demonstration_buffer to update model.
"""
self._maybe_write_summary(self.get_step + self.trainer_parameters["buffer_size"])
self._increment_step(self.trainer_parameters["buffer_size"], self.brain_name)
pass
def _increment_step(self, n_steps: int, name_behavior_id: str) -> None:
"""

正在加载...
取消
保存