浏览代码

Merge remote-tracking branch 'upstream/develop' into develop-trainer-controller-cleanup

/develop-generalizationTraining-TrainerController
Deric Pang 6 年前
当前提交
6eba6940
共有 5 个文件被更改,包括 30 次插入20 次删除
  1. 12
      python/learn.py
  2. 8
      python/unityagents/environment.py
  3. 2
      python/unitytrainers/ppo/trainer.py
  4. 21
      python/unitytrainers/trainer.py
  5. 7
      python/unitytrainers/trainer_controller.py

12
python/learn.py


import os
import multiprocessing
import numpy as np
from unitytrainers.trainer_controller import TrainerController
from unitytrainers.exception import TrainerError

TRAINER_CONFIG_PATH = os.path.abspath(os.path.join(base_path, "trainer_config.yaml"))
def run_training(sub_id):
def run_training(sub_id, use_seed):
load_model, train_model, worker_id + sub_id, keep_checkpoints, lesson, seed,
load_model, train_model, worker_id + sub_id, keep_checkpoints, lesson, use_seed,
docker_target_name, TRAINER_CONFIG_PATH, no_graphics)
tc.start_learning()

jobs = []
for i in range(num_runs):
p = multiprocessing.Process(target=run_training, args=(i,))
if seed == -1:
use_seed = np.random.randint(0, 9999)
else:
use_seed = seed
p = multiprocessing.Process(target=run_training, args=(i, use_seed))
jobs.append(p)
p.start()

8
python/unityagents/environment.py


else:
[x.memories.extend([0] * (memory_size - len(x.memories))) for x in agent_info_list]
memory = np.array([x.memories for x in agent_info_list])
if any([np.isnan(x.reward) for x in agent_info_list]):
logger.warning("An agent had a NaN reward for brain "+b)
if any([np.isnan(x.stacked_vector_observation).any() for x in agent_info_list]):
logger.warning("An agent had a NaN observation for brain " + b)
vector_observation=np.array([x.stacked_vector_observation for x in agent_info_list]),
vector_observation=np.nan_to_num(np.array([x.stacked_vector_observation for x in agent_info_list])),
reward=[x.reward for x in agent_info_list],
reward=[x.reward if not np.isnan(x.reward) else 0 for x in agent_info_list],
agents=[x.id for x in agent_info_list],
local_done=[x.done for x in agent_info_list],
vector_action=np.array([x.stored_vector_actions for x in agent_info_list]),

2
python/unitytrainers/ppo/trainer.py


self.training_buffer[agent_id]['visual_obs%d' % i].append(
stored_info.visual_observations[i][idx])
self.training_buffer[agent_id]['next_visual_obs%d' % i].append(
next_info.visual_observations[i][idx])
next_info.visual_observations[i][next_idx])
if self.use_vector_obs:
self.training_buffer[agent_id]['vector_obs'].append(stored_info.vector_observations[idx])
self.training_buffer[agent_id]['next_vector_in'].append(

21
python/unitytrainers/trainer.py


@property
def get_step(self):
"""
Returns the number of steps the trainer has performed
Returns the number of training steps the trainer has performed
:return: the step count of the trainer
"""
raise UnityTrainerException("The get_step property was not implemented.")

"""
raise UnityTrainerException("The update_model method was not implemented.")
def write_summary(self, lesson_num=0):
def write_summary(self, global_step, lesson_num=0):
:param lesson_num: The lesson the trainer is at.
:param global_step: The number of steps the simulation has been going for
:param lesson_number: The lesson the trainer is at.
if (self.get_step % self.trainer_parameters['summary_freq'] == 0 and self.get_step != 0 and
self.is_training and self.get_step <= self.get_max_steps):
if global_step % self.trainer_parameters['summary_freq'] == 0 and global_step != 0:
is_training = "Training." if self.is_training and self.get_step <= self.get_max_steps else "Not Training."
logger.info("{}: {}: Step: {}. Mean Reward: {:0.3f}. Std of Reward: {:0.3f}."
.format(self.run_id, self.brain_name, self.get_step,
mean_reward, np.std(self.stats['cumulative_reward'])))
logger.info(" {}: {}: Step: {}. Mean Reward: {:0.3f}. Std of Reward: {:0.3f}. {}"
.format(self.run_id, self.brain_name, min(self.get_step, self.get_max_steps),
mean_reward, np.std(self.stats['cumulative_reward']), is_training))
logger.info("{}: {}: Step: {}. No episode was completed since last summary."
.format(self.run_id, self.brain_name, self.get_step))
logger.info(" {}: {}: Step: {}. No episode was completed since last summary. {}"
.format(self.run_id, self.brain_name, self.get_step, is_training))
summary = tf.Summary()
for key in self.stats:
if len(self.stats[key]) > 0:

7
python/unitytrainers/trainer_controller.py


self.worker_id = worker_id
self.keep_checkpoints = keep_checkpoints
self.trainers = {}
if seed == -1:
seed = np.random.randint(0, 999999)
self.seed = seed
np.random.seed(self.seed)
tf.set_random_seed(self.seed)

# Write training statistics to Tensorboard.
if self.meta_curriculum is not None:
trainer.write_summary(
lesson=self.meta_curriculum
global_step,
lesson_num=self.meta_curriculum
trainer.write_summary()
trainer.write_summary(global_step)
if self.train_model \
and trainer.get_step <= trainer.get_max_steps:
trainer.increment_step_and_update_last_reward()

正在加载...
取消
保存