浏览代码

[skip ci] more fixes

/distributed-training
Anupam Bhatnagar 4 年前
当前提交
45bac63e
共有 2 个文件被更改,包括 8 次插入6 次删除
  1. 8
      ml-agents/mlagents/trainers/ppo/trainer.py
  2. 6
      ml-agents/mlagents/trainers/trainer/rl_trainer.py

8
ml-agents/mlagents/trainers/ppo/trainer.py


Processing involves calculating value and advantage targets for model updating step.
:param trajectory: The Trajectory tuple containing the steps to be processed.
"""
super()._process_trajectory(trajectory)
# super()._process_trajectory(trajectory)
agent_id = trajectory.agent_id # All the agents should have the same ID
agent_buffer_trajectory = trajectory.to_agentbuffer()

Uses demonstration_buffer to update the policy.
The reward signal generators must be updated in this method at their own pace.
"""
buffer_length = self.update_buffer.num_experiences
super()._update_policy()
# buffer_length = self.update_buffer.num_experiences
self.cumulative_returns_since_policy_update.clear()
# Make sure batch_size is a multiple of sequence length. During training, we

(advantages - advantages.mean()) / (advantages.std() + 1e-10)
)
# increment steps when training instead of when generating from environment
self._increment_step(self.trainer_parameters["buffer_size"], self.brain_name)
num_epoch = self.trainer_parameters["num_epoch"]
batch_update_stats = defaultdict(list)
for _ in range(num_epoch):

6
ml-agents/mlagents/trainers/trainer/rl_trainer.py


"""
return False
@abc.abstractmethod
# @abc.abstractmethod
# increment steps when training instead of when generating from environment
pass
self._increment_step(self.trainer_parameters["buffer_size"], self.brain_name)
def _increment_step(self, n_steps: int, name_behavior_id: str) -> None:
"""

Takes a trajectory and processes it, putting it into the update buffer.
:param trajectory: The Trajectory tuple containing the steps to be processed.
"""
pass
# self._maybe_write_summary(self.get_step + len(trajectory.steps))
# self._increment_step(len(trajectory.steps), trajectory.behavior_id)

正在加载...
取消
保存