浏览代码

Moved curriculum code out of environment code.

/develop-generalizationTraining-TrainerController
Deric Pang 7 年前
当前提交
8380f2f2
共有 9 个文件被更改,包括 44 次插入32 次删除
  1. 2
      docs/Training-Curriculum-Learning.md
  2. 1
      python/unityagents/__init__.py
  3. 29
      python/unityagents/environment.py
  4. 1
      python/unitytrainers/__init__.py
  5. 3
      python/unitytrainers/ppo/trainer.py
  6. 26
      python/unitytrainers/trainer_controller.py
  7. 4
      python/unitytrainers/curriculum.py
  8. 10
      python/curricula/push_curriculum.json
  9. 0
      /python/unitytrainers/curriculum.py

2
docs/Training-Curriculum-Learning.md


structure of the curriculum. Within it we can set at what points in the training process
our wall height will change, either based on the percentage of training steps which have
taken place, or what the average reward the agent has received in the recent past is.
Once these are in place, we simply launch ppo.py using the `–curriculum-file` flag to
Once these are in place, we simply launch learn.py using the `–curriculum-file` flag to
point to the JSON file, and PPO we will train using Curriculum Learning. Of course we can
then keep track of the current lesson and progress via TensorBoard.

1
python/unityagents/__init__.py


from .environment import *
from .brain import *
from .exception import *
from .curriculum import *

29
python/unityagents/environment.py


from .brain import BrainInfo, BrainParameters, AllBrainInfo
from .exception import UnityEnvironmentException, UnityActionException, UnityTimeOutException
from .curriculum import Curriculum
from communicator_objects import UnityRLInput, UnityRLOutput, AgentActionProto,\
EnvironmentParametersProto, UnityRLInitializationInput, UnityRLInitializationOutput,\

class UnityEnvironment(object):
def __init__(self, file_name=None, worker_id=0,
base_port=5005, curriculum=None,
seed=0, docker_training=False, no_graphics=False):
base_port=5005, seed=0,
docker_training=False, no_graphics=False):
"""
Starts a new unity environment and establishes a connection with the environment.
Notice: Currently communication between Unity and Python takes place over an open socket without authentication.

self._num_brains = len(self._brain_names)
self._num_external_brains = len(self._external_brain_names)
self._resetParameters = dict(aca_params.environment_parameters.float_parameters) # TODO
self._curriculum = Curriculum(curriculum, self._resetParameters)
@property
def curriculum(self):
return self._curriculum
@property
def logfile_path(self):

# return SocketCommunicator(worker_id, base_port)
def __str__(self):
_new_reset_param = self._curriculum.get_config()
# Set reset parameters from trainer.
'''_new_reset_param = self._curriculum.get_config()
self._resetParameters[k] = _new_reset_param[k]
self._resetParameters[k] = _new_reset_param[k]'''
Lesson number : {3}
Reset Parameters :\n\t\t{4}'''.format(self._academy_name, str(self._num_brains),
str(self._num_external_brains), self._curriculum.get_lesson_number,
"\n\t\t".join([str(k) + " -> " + str(self._resetParameters[k])
Reset Parameters :\n\t\t{3}'''.format(self._academy_name, str(self._num_brains),
str(self._num_external_brains),
"\n\t\t".join([str(k) + " -> " + str(self._resetParameters[k])
def reset(self, train_mode=True, config=None, lesson=None) -> AllBrainInfo:
def reset(self, config, train_mode=True, lesson=None) -> AllBrainInfo:
if config is None:
config = self._curriculum.get_config(lesson)
elif config != {}:
if config != {}:
logger.info("\nAcademy Reset with parameters : \t{0}"
.format(', '.join([str(x) + ' -> ' + str(config[x]) for x in config])))
for k in config:

1
python/unitytrainers/__init__.py


from .buffer import *
from .curriculum import *
from .models import *
from .trainer_controller import *
from .bc.models import *

3
python/unitytrainers/ppo/trainer.py


n_sequences = max(int(self.trainer_parameters['batch_size'] / self.sequence_length), 1)
value_total, policy_total, forward_total, inverse_total = [], [], [], []
advantages = self.training_buffer.update_buffer['advantages'].get_batch()
print('advantages:', advantages)
print('advantages mean:', advantages.mean())
print('advantages std:', advantages.std())
self.training_buffer.update_buffer['advantages'].set(
(advantages - advantages.mean()) / (advantages.std() + 1e-10))
num_epoch = self.trainer_parameters['num_epoch']

26
python/unitytrainers/trainer_controller.py


from unitytrainers.bc.trainer import BehavioralCloningTrainer
from unityagents import UnityEnvironment, UnityEnvironmentException
from .curriculum import Curriculum
from unityagents.exception import UnityEnvironmentException, UnityActionException, UnityTimeOutException
from communicator_objects import UnityRLInitializationInput
class TrainerController(object):
def __init__(self, env_path, run_id, save_freq, curriculum_file, fast_simulation, load, train,

np.random.seed(self.seed)
tf.set_random_seed(self.seed)
self.env = UnityEnvironment(file_name=env_path, worker_id=self.worker_id,
curriculum=self.curriculum_file, seed=self.seed,
docker_training=self.docker_training,
seed=self.seed, docker_training=self.docker_training,
self.curriculum = Curriculum(curriculum_file, self.env._resetParameters)
self.env.reset(config=self.curriculum.get_config(), train_mode=self.fast_simulation)
if self.env.curriculum.measure_type == "progress":
if self.curriculum.measure_type == "progress":
elif self.env.curriculum.measure_type == "reward":
elif self.curriculum.measure_type == "reward":
for brain_name in self.env.external_brain_names:
progress += self.trainers[brain_name].get_last_reward
return progress

.format(model_path))
def start_learning(self):
self.env.curriculum.set_lesson_number(self.lesson)
self.curriculum.set_lesson_number(self.lesson)
trainer_config = self._load_config()
self._create_model_path(self.model_path)

else:
sess.run(init)
global_step = 0 # This is only for saving the model
self.env.curriculum.increment_lesson(self._get_progress())
curr_info = self.env.reset(train_mode=self.fast_simulation)
self.curriculum.increment_lesson(self._get_progress())
curr_info = self.env.reset(config=self.curriculum.get_config(), train_mode=self.fast_simulation)
if self.train_model:
for brain_name, trainer in self.trainers.items():
trainer.write_tensorboard_text('Hyperparameters', trainer.parameters)

self.env.curriculum.increment_lesson(self._get_progress())
curr_info = self.env.reset(train_mode=self.fast_simulation)
self.curriculum.increment_lesson(self._get_progress())
curr_info = self.env.reset(config=self.curriculum.get_config(), train_mode=self.fast_simulation)
for brain_name, trainer in self.trainers.items():
trainer.end_episode()
# Decide and take an action

# Perform gradient descent with experience buffer
trainer.update_model()
# Write training statistics to Tensorboard.
trainer.write_summary(self.env.curriculum.lesson_number)
trainer.write_summary(self.curriculum.lesson_number)
if self.train_model and trainer.get_step <= trainer.get_max_steps:
trainer.increment_step_and_update_last_reward()
if self.train_model:

4
python/unitytrainers/curriculum.py


import json
from .exception import UnityEnvironmentException
from unityagents.exception import UnityEnvironmentException
import logging

def increment_lesson(self, progress):
"""
Increments the lesson number depending on the progree given.
Increments the lesson number depending on the progress given.
:param progress: Measure of progress (either reward or percentage steps completed).
"""
if self.data is None or progress is None:

10
python/curricula/push_curriculum.json


{
"measure" : "progress",
"thresholds" : [0.1],
"min_lesson_length" : 2,
"signal_smoothing" : true,
"parameters" :
{
"goal_size" : [25.0, 5.0]
}
}

/python/unityagents/curriculum.py → /python/unitytrainers/curriculum.py

正在加载...
取消
保存