浏览代码

[skip ci] moving summary writer to update_policy

[skip ci] more fixes

[skip ci] tweaking 3dball configs

[skip ci] swap summary writer and step increment order
/distributed-training
Anupam Bhatnagar 4 年前
当前提交
d49ceecc
共有 3 个文件被更改,包括 8 次插入11 次删除
  1. 4
      config/trainer_config.yaml
  2. 7
      ml-agents/mlagents/trainers/ppo/trainer.py
  3. 8
      ml-agents/mlagents/trainers/trainer/rl_trainer.py

4
config/trainer_config.yaml


3DBall:
normalize: true
batch_size: 64
buffer_size: 12000
summary_freq: 12000
max_steps: 1.0e5
3DBallHard:
normalize: true

7
ml-agents/mlagents/trainers/ppo/trainer.py


Processing involves calculating value and advantage targets for model updating step.
:param trajectory: The Trajectory tuple containing the steps to be processed.
"""
super()._process_trajectory(trajectory)
# super()._process_trajectory(trajectory)
agent_id = trajectory.agent_id # All the agents should have the same ID
agent_buffer_trajectory = trajectory.to_agentbuffer()

Uses demonstration_buffer to update the policy.
The reward signal generators must be updated in this method at their own pace.
"""
buffer_length = self.update_buffer.num_experiences
self.cumulative_returns_since_policy_update.clear()
# Make sure batch_size is a multiple of sequence length. During training, we

(advantages - advantages.mean()) / (advantages.std() + 1e-10)
)
# increment steps when training instead of when generating from environment
self._increment_step(self.trainer_parameters["buffer_size"], self.brain_name)
num_epoch = self.trainer_parameters["num_epoch"]
batch_update_stats = defaultdict(list)
for _ in range(num_epoch):

update_stats = self.optimizer.bc_module.update()
for stat, val in update_stats.items():
self._stats_reporter.add_stat(stat, val)
super()._update_policy()
self._clear_update_buffer()
def create_policy(self, brain_parameters: BrainParameters) -> TFPolicy:

8
ml-agents/mlagents/trainers/trainer/rl_trainer.py


"""
return False
@abc.abstractmethod
pass
# increment steps when training instead of when generating from environment
self._increment_step(self.trainer_parameters["buffer_size"], self.brain_name)
self._maybe_write_summary(self.get_step + self.trainer_parameters["buffer_size"])
def _increment_step(self, n_steps: int, name_behavior_id: str) -> None:
"""

Takes a trajectory and processes it, putting it into the update buffer.
:param trajectory: The Trajectory tuple containing the steps to be processed.
"""
self._maybe_write_summary(self.get_step + len(trajectory.steps))
# self._increment_step(len(trajectory.steps), trajectory.behavior_id)
pass
def _maybe_write_summary(self, step_after_process: int) -> None:
"""

正在加载...
取消
保存