浏览代码

Cleanup unused code in TrainerController (#2315)

* Removes unused SubprocessEnvManager import in trainer_controller
* Removes unused `steps` argument to `TrainerController._save_model`
* Consolidates unnecessary branching for curricula in
  `TrainerController.advance`
* Moves `reward_buffer` into `TFPolicy` from `PPOPolicy` and adds
  `BCTrainer` support so that we don't have a broken interface /
  undefined behavior when BCTrainer is used with curricula.
/develop-generalizationTraining-TrainerController
GitHub 5 年前
当前提交
9eb3f049
共有 5 个文件被更改,包括 33 次插入38 次删除
  1. 1
      ml-agents/mlagents/trainers/bc/trainer.py
  2. 16
      ml-agents/mlagents/trainers/ppo/trainer.py
  3. 2
      ml-agents/mlagents/trainers/tests/test_trainer_controller.py
  4. 14
      ml-agents/mlagents/trainers/trainer.py
  5. 38
      ml-agents/mlagents/trainers/trainer_controller.py

1
ml-agents/mlagents/trainers/bc/trainer.py


self.stats["Environment/Episode Length"].append(
self.episode_steps.get(agent_id, 0)
)
self.reward_buffer.appendleft(self.cumulative_rewards.get(agent_id, 0))
self.cumulative_rewards[agent_id] = 0
self.episode_steps[agent_id] = 0

16
ml-agents/mlagents/trainers/ppo/trainer.py


# Contains an implementation of PPO as described in: https://arxiv.org/abs/1707.06347
import logging
from collections import deque, defaultdict
from collections import defaultdict
from typing import List, Any
import numpy as np

:param seed: The seed the model will be initialized with
:param run_id: The identifier of the current run
"""
super(PPOTrainer, self).__init__(brain, trainer_parameters, training, run_id)
super().__init__(brain, trainer_parameters, training, run_id, reward_buff_cap)
self.param_keys = [
"batch_size",
"beta",

self.stats = stats
self.training_buffer = Buffer()
self._reward_buffer = deque(maxlen=reward_buff_cap)
self.episode_steps = {}
def __str__(self):

:return: the step count of the trainer
"""
return self.step
@property
def reward_buffer(self):
"""
Returns the reward buffer. The reward buffer contains the cumulative
rewards of the most recent episodes completed by agents using this
trainer.
:return: the reward buffer.
"""
return self._reward_buffer
def increment_step(self, n_steps: int) -> None:
"""

2
ml-agents/mlagents/trainers/tests/test_trainer_controller.py


env_mock.reset.assert_called_once()
assert tc.advance.call_count == trainer_mock.get_max_steps + 1
env_mock.close.assert_called_once()
tc._save_model.assert_called_once_with(steps=6)
tc._save_model.assert_called_once()
def test_start_learning_updates_meta_curriculum_lesson_number():

14
ml-agents/mlagents/trainers/trainer.py


import os
import tensorflow as tf
import numpy as np
from collections import deque
from mlagents.envs import UnityException, AllBrainInfo, ActionInfoOutputs
from mlagents.trainers import TrainerMetrics

class Trainer(object):
"""This class is the base class for the mlagents.envs.trainers"""
def __init__(self, brain, trainer_parameters, training, run_id):
def __init__(self, brain, trainer_parameters, training, run_id, reward_buff_cap=1):
"""
Responsible for collecting experiences and training a neural network model.
:BrainParameters brain: Brain to be trained.

)
self.summary_writer = tf.summary.FileWriter(self.summary_path)
self.policy = None
self._reward_buffer = deque(maxlen=reward_buff_cap)
def __str__(self):
return """{} Trainer""".format(self.__class__)

:return: the step count of the trainer
"""
raise UnityTrainerException("The get_step property was not implemented.")
@property
def reward_buffer(self):
"""
Returns the reward buffer. The reward buffer contains the cumulative
rewards of the most recent episodes completed by agents using this
trainer.
:return: the reward buffer.
"""
return self._reward_buffer
def increment_step(self, n_steps: int) -> None:
"""

38
ml-agents/mlagents/trainers/trainer_controller.py


from mlagents.envs import BrainParameters
from mlagents.envs.env_manager import StepInfo
from mlagents.envs.env_manager import EnvManager
from mlagents.envs.subprocess_env_manager import SubprocessEnvManager
from mlagents.envs.exception import UnityEnvironmentException
from mlagents.envs.timers import hierarchical_timer, get_timer_tree, timed
from mlagents.trainers import Trainer, TrainerMetrics

brain_names_to_measure_vals[brain_name] = measure_val
return brain_names_to_measure_vals
def _save_model(self, steps=0):
def _save_model(self):
:param steps: Current number of steps in training process.
:param saver: Tensorflow saver for session.
def _save_model_when_interrupted(self, steps=0):
def _save_model_when_interrupted(self):
"Learning was interrupted. Please wait " "while the graph is generated."
"Learning was interrupted. Please wait while the graph is generated."
self._save_model(steps)
self._save_model()
def _write_training_metrics(self):
"""

global_step += 1
if self._should_save_model(global_step):
# Save Tensorflow model
self._save_model(steps=global_step)
self._save_model()
self._save_model(steps=global_step)
self._save_model()
self._save_model_when_interrupted(steps=global_step)
self._save_model_when_interrupted()
pass
env_manager.close()
if self.train_model:

lessons_incremented = self.meta_curriculum.increment_lessons(
self._get_measure_vals(), reward_buff_sizes=reward_buff_sizes
)
else:
lessons_incremented = {}
# If any lessons were incremented or the environment is
# ready to be reset
if self.meta_curriculum and any(lessons_incremented.values()):
self._reset_env(env)
for brain_name, trainer in self.trainers.items():
trainer.end_episode()
for brain_name, changed in lessons_incremented.items():
if changed:
self.trainers[brain_name].reward_buffer.clear()
# If any lessons were incremented or the environment is
# ready to be reset
if any(lessons_incremented.values()):
self._reset_env(env)
for brain_name, trainer in self.trainers.items():
trainer.end_episode()
for brain_name, changed in lessons_incremented.items():
if changed:
self.trainers[brain_name].reward_buffer.clear()
with hierarchical_timer("env_step"):
time_start_step = time()

正在加载...
取消
保存