浏览代码

Reorganize to make metrics collection more accurate

/develop-generalizationTraining-TrainerController
eshvk 6 年前
当前提交
fb04c40c
共有 8 个文件被更改,包括 53 次插入34 次删除
  1. 3
      docs/Training-ML-Agents.md
  2. 2
      ml-agents/mlagents/trainers/bc/trainer.py
  3. 13
      ml-agents/mlagents/trainers/ppo/trainer.py
  4. 2
      ml-agents/mlagents/trainers/tests/test_trainer_controller.py
  5. 9
      ml-agents/mlagents/trainers/trainer.py
  6. 12
      ml-agents/mlagents/trainers/trainer_controller.py
  7. 35
      ml-agents/mlagents/trainers/trainer_metrics.py
  8. 11
      ml-agents/tests/trainers/test_trainer_metrics.py

3
docs/Training-ML-Agents.md


details.
* `--debug` - Specify this option to run ML-Agents in debug mode and log Trainer
Metrics to a CSV stored in the `summaries` directory. The metrics stored are:
brain name, Time to update policy, Time since start of training, Time for last experience collection, Number of experiences used for training, Mean return. This
brain name, time to update policy, time since start of training, time for last experience collection, number of experiences used for training, mean return. This
`
### Training config file

2
ml-agents/mlagents/trainers/bc/trainer.py


def end_episode(self):
"""
A signal that the Episode has ended. The buffer must be reset.
A signal that the Episode has ended. The buffer must be reset.
Get only called when the academy resets.
"""
self.evaluation_buffer.reset_local_buffers()

13
ml-agents/mlagents/trainers/ppo/trainer.py


from mlagents.trainers.ppo.policy import PPOPolicy
from mlagents.trainers.trainer import Trainer
logger = logging.getLogger("mlagents.trainers")

:param next_all_info: Dictionary of all current brains and corresponding BrainInfo.
:param take_action_outputs: The outputs of the Policy's get_action method.
"""
self.trainer_metrics.start_experience_collection_timer()
if take_action_outputs:
self.stats['Policy/Value Estimate'].append(take_action_outputs['value'].mean())
self.stats['Policy/Entropy'].append(take_action_outputs['entropy'].mean())

if agent_id not in self.episode_steps:
self.episode_steps[agent_id] = 0
self.episode_steps[agent_id] += 1
self.trainer_metrics.end_experience_collection_timer()
def process_experiences(self, current_info: AllBrainInfo, new_info: AllBrainInfo):
"""

:param new_info: Dictionary of all next brains and corresponding BrainInfo.
"""
self.trainer_metrics.start_experience_collection_timer()
info = new_info[self.brain_name]
for l in range(len(info.agents)):
agent_actions = self.training_buffer[info.agents[l]]['actions']

self.stats['Policy/Curiosity Reward'].append(
self.intrinsic_rewards.get(agent_id, 0))
self.intrinsic_rewards[agent_id] = 0
self.trainer_metrics.end_experience_collection_timer()
def end_episode(self):
"""

"""
Uses demonstration_buffer to update the policy.
"""
self.trainer_metrics.end_experience_collection_timer()
self.trainer_metrics.start_policy_update_timer(number_experiences=len(self.training_buffer.update_buffer['actions']),
mean_return = float(np.mean(self.cumulative_returns_since_policy_update)))
self.trainer_metrics.start_policy_update_timer(
number_experiences=len(self.training_buffer.update_buffer['actions']),
mean_return=float(np.mean(self.cumulative_returns_since_policy_update)))
n_sequences = max(int(self.trainer_parameters['batch_size'] / self.policy.sequence_length), 1)
value_total, policy_total, forward_total, inverse_total = [], [], [], []
advantages = self.training_buffer.update_buffer['advantages'].get_batch()

2
ml-agents/mlagents/trainers/tests/test_trainer_controller.py


import pytest
from mlagents.trainers import ActionInfo
from mlagents.trainers import TrainerMetrics
from mlagents.trainers.trainer_controller import TrainerController
from mlagents.trainers.ppo.trainer import PPOTrainer
from mlagents.trainers.bc.offline_trainer import OfflineBCTrainer

def assert_ppo_trainer_constructed(input_config, tc, expected_brain_info,
expected_config, expected_reward_buff_cap=0):
def mock_constructor(self, brain, reward_buff_cap, trainer_parameters, training, load, seed, run_id):
self.trainer_metrics = TrainerMetrics('', '')
assert(brain == expected_brain_info)
assert(trainer_parameters == expected_config)
assert(reward_buff_cap == expected_reward_buff_cap)

9
ml-agents/mlagents/trainers/trainer.py


self.summary_path = trainer_parameters['summary_path']
if not os.path.exists(self.summary_path):
os.makedirs(self.summary_path)
self.trainer_metrics = TrainerMetrics(path=self.summary_path + '.csv',
brain_name=self.brain_name)
self.trainer_metrics = TrainerMetrics(path=self.summary_path + '.csv',
brain_name=self.brain_name)
self.summary_writer = tf.summary.FileWriter(self.summary_path)
self.policy = None

:return: The ActionInfo given by the policy given the BrainInfo.
"""
self.trainer_metrics.start_experience_collection_timer()
return self.policy.get_action(curr_info)
action = self.policy.get_action(curr_info)
self.trainer_metrics.end_experience_collection_timer()
return action
def add_experiences(self, curr_info: AllBrainInfo, next_info: AllBrainInfo,
take_action_outputs):

12
ml-agents/mlagents/trainers/trainer_controller.py


self.train_model = train
self.keep_checkpoints = keep_checkpoints
self.trainers: Dict[str, Trainer] = {}
self.trainer_metrics: Dict[str, TrainerMetrics] = {}
self.global_step = 0
self.meta_curriculum = meta_curriculum
self.seed = training_seed

:return:
"""
for brain_name in self.trainers.keys():
self.trainers[brain_name].write_training_metrics()
if brain_name in self.trainer_metrics:
self.trainers[brain_name].write_training_metrics()
def _export_graph(self):
"""

:param trainer_config: The configurations of the trainers
"""
trainer_parameters_dict = {}
for brain_name in self.external_brains:
trainer_parameters = trainer_config['default'].copy()
trainer_parameters['summary_path'] = '{basedir}/{name}'.format(

trainer_parameters_dict[brain_name],
self.train_model, self.load_model, self.seed,
self.run_id)
self.trainer_metrics[brain_name] = self.trainers[brain_name].trainer_metrics
else:
raise UnityEnvironmentException('The trainer config contains '
'an unknown trainer type for '

take_action_text[brain_name] = action_info.text
take_action_value[brain_name] = action_info.value
take_action_outputs[brain_name] = action_info.outputs
time_start_step = time()
new_info = env.step(
vector_action=take_action_vector,
memory=take_action_memories,

delta_time_step = time() - time_start_step
if brain_name in self.trainer_metrics:
self.trainer_metrics[brain_name].add_delta_step(delta_time_step)
trainer.add_experiences(curr_info, new_info,
take_action_outputs[brain_name])
trainer.process_experiences(curr_info, new_info)

trainer.update_policy()
# Write training statistics to Tensorboard.
delta_train_start = time() - self.training_start_time

35
ml-agents/mlagents/trainers/trainer_metrics.py


from time import time
LOGGER = logging.getLogger("mlagents.trainers")
FIELD_NAMES = ['Brain name', 'Time to update policy',
'Time since start of training', 'Time for last experience collection',
'Number of experiences used for training', 'Mean return']
class TrainerMetrics:
"""

def __init__(self, path: str, brain_name: str):
"""
:str path: Fully qualified path where CSV is stored.

self.brain_name = brain_name
self.FIELD_NAMES = ['Brain name', 'Time to update policy',
'Time since start of training', 'Time for last experience collection',
'Number of experiences used for training', 'Mean return']
self.rows = []
self.time_start_experience_collection = None
self.time_training_start = time()

"""
Inform Metrics class that experience collection is done.
"""
if self.start_experience_collection_timer:
self.delta_last_experience_collection = time() - self.time_start_experience_collection
if self.time_start_experience_collection:
curr_delta = time() - self.time_start_experience_collection
if self.delta_last_experience_collection is None:
self.delta_last_experience_collection = curr_delta
else:
self.delta_last_experience_collection += curr_delta
self.time_start_experience_collection = None
def add_delta_step(self, delta: float):
"""
Inform Metrics class about time to step in environment.
"""
if self.delta_last_experience_collection:
self.delta_last_experience_collection += delta
self.delta_last_experience_collection = 0.0
self.time_start_experience_collection = None
self.delta_last_experience_collection = delta
def start_policy_update_timer(self, number_experiences: int, mean_return: float):
"""

for c in [self.delta_policy_update, delta_train_start,
self.delta_last_experience_collection,
self.last_buffer_length, self.last_mean_return])
self.delta_last_experience_collection = None
def end_policy_update(self):
"""
Inform Metrics class that policy update has started.

self.last_buffer_length, self.last_mean_return))
self._add_row(delta_train_start)
with open(self.path, 'w') as f:
writer = csv.writer(f)
writer.writerow(self.FIELD_NAMES)
with open(self.path, 'w') as file:
writer = csv.writer(file)
writer.writerow(FIELD_NAMES)
for row in self.rows:
writer.writerow(row)

11
ml-agents/tests/trainers/test_trainer_metrics.py


def test_field_names(self):
field_names = ['Brain name', 'Time to update policy',
'Time since start of training', 'Time for last experience collection', 'Number of experiences used for training', 'Mean return']
mock_path = 'fake'
mock_brain_name = 'fake'
trainer_metrics = TrainerMetrics(path=mock_path,
brain_name=mock_brain_name)
assert trainer_metrics.FIELD_NAMES == field_names
'Time since start of training',
'Time for last experience collection',
'Number of experiences used for training', 'Mean return']
from mlagents.trainers.trainer_metrics import FIELD_NAMES
assert FIELD_NAMES == field_names
@mock.patch('mlagents.trainers.trainer_metrics.time', mock.MagicMock(return_value=42))
def test_experience_collection_timer(self):

正在加载...
取消
保存