浏览代码

Merge pull request #1058 from dericp/develop-trainer-controller-cleanup

Fixing trainer controller line lengths and splitting unitytrainers tests.
/develop-generalizationTraining-TrainerController
GitHub 6 年前
当前提交
514cd757
共有 4 个文件被更改,包括 375 次插入294 次删除
  1. 208
      python/unitytrainers/trainer_controller.py
  2. 56
      python/tests/test_buffer.py
  3. 187
      python/tests/test_trainer_controller.py
  4. 218
      python/tests/test_unitytrainers.py

208
python/unitytrainers/trainer_controller.py


# # Unity ML-Agents Toolkit
# ## ML-Agent Learning
# Launches unitytrainers for each External Brains in a Unity Environment
"""Launches unitytrainers for each External Brains in a Unity Environment."""
import logging
import logging
import numpy as np
import tensorflow as tf
from tensorflow.python.tools import freeze_graph

class TrainerController(object):
def __init__(self, env_path, run_id, save_freq, curriculum_folder, fast_simulation, load, train,
worker_id, keep_checkpoints, lesson, seed, docker_target_name, trainer_config_path,
def __init__(self, env_path, run_id, save_freq, curriculum_folder,
fast_simulation, load, train, worker_id, keep_checkpoints,
lesson, seed, docker_target_name, trainer_config_path,
:param curriculum_folder: Folder containing JSON curriculums for the env
:param fast_simulation: Whether to run the game at training speed
:param load: Whether to load the model or randomly initialize
:param train: Whether to train model, or only run inference
:param worker_id: Number to add to communication port (5005). Used for multi-environment
:param keep_checkpoints: How many model checkpoints to keep
:param lesson: Start learning from this lesson
:param curriculum_folder: Folder containing JSON curriculums for the
environment.
:param fast_simulation: Whether to run the game at training speed.
:param load: Whether to load the model or randomly initialize.
:param train: Whether to train model, or only run inference.
:param worker_id: Number to add to communication port (5005).
Used for multi-environment
:param keep_checkpoints: How many model checkpoints to keep.
:param lesson: Start learning from this lesson.
:param docker_target_name: Name of docker volume that will contain all data.
:param trainer_config_path: Fully qualified path to location of trainer configuration file
:param no_graphics: Whether to run the Unity simulator in no-graphics mode
:param docker_target_name: Name of docker volume that will contain all
data.
:param trainer_config_path: Fully qualified path to location of trainer
configuration file.
:param no_graphics: Whether to run the Unity simulator in no-graphics
mode.
# Strip out executable extensions if passed
.replace('.x86', '')) # Strip out executable extensions if passed
.replace('.x86', ''))
# Recognize and use docker volume if one is passed as an argument
if docker_target_name == '':

docker_target_name=docker_target_name,
run_id=run_id)
if env_path is not None:
env_path = '/{docker_target_name}/{env_name}'.format(docker_target_name=docker_target_name,
env_name=env_path)
env_path = '/{docker_target_name}/{env_name}'.format(
docker_target_name=docker_target_name, env_name=env_path)
self.curriculum_folder = '/{docker_target_name}/{curriculum_file}'.format(
self.curriculum_folder = \
'/{docker_target_name}/{curriculum_file}'.format(
self.summaries_dir = '/{docker_target_name}/summaries'.format(docker_target_name=docker_target_name)
self.summaries_dir = '/{docker_target_name}/summaries'.format(
docker_target_name=docker_target_name)
self.logger = logging.getLogger("unityagents")
self.run_id = run_id

self.seed = seed
np.random.seed(self.seed)
tf.set_random_seed(self.seed)
self.env = UnityEnvironment(file_name=env_path, worker_id=self.worker_id,
seed=self.seed, docker_training=self.docker_training,
self.env = UnityEnvironment(file_name=env_path,
worker_id=self.worker_id,
seed=self.seed,
docker_training=self.docker_training,
self.env_name = os.path.basename(os.path.normpath(env_path)) # Extract out name of environment
# Extract out name of environment
self.env_name = os.path.basename(os.path.normpath(env_path))
self.meta_curriculum = MetaCurriculum(self.curriculum_folder, self.env._resetParameters)
self.meta_curriculum = MetaCurriculum(self.curriculum_folder,
self.env._resetParameters)
if self.meta_curriculum is not None:
if self.meta_curriculum:
for brain_name in self.meta_curriculum.brains_to_curriculums.keys():
if brain_name not in self.env.external_brain_names:
raise MetaCurriculumError('One of the curriculums '

'whose curriculum it defines.')
def _get_progresses(self):
if self.meta_curriculum is not None:
if self.meta_curriculum:
for brain_name, curriculum in self.meta_curriculum.brains_to_curriculums.items():
for brain_name, curriculum \
in self.meta_curriculum.brains_to_curriculums.items():
progress = self.trainers[brain_name].get_step / self.trainers[brain_name].get_max_steps
progress = (self.trainers[brain_name].get_step /
self.trainers[brain_name].get_max_steps)
brain_names_to_progresses[brain_name] = progress
elif curriculum.measure == "reward":
progress = self.trainers[brain_name].get_last_reward

if scope == '/':
scope = ''
scopes += [scope]
if self.trainers[brain_name].parameters["trainer"] == "imitation":
if self.trainers[brain_name].parameters["trainer"] \
== "imitation":
nodes += [scope + x for x in ["action", "value_estimate", "action_probs", "value_estimate"]]
nodes += [scope + x for x in ["action", "value_estimate",
"action_probs", "value_estimate"]]
nodes += [scope + x for x in ["recurrent_out", "memory_size"]]
nodes += [scope + x for x in ["recurrent_out",
"memory_size"]]
if len(scopes) > 1:
self.logger.info("List of available scopes :")
for scope in scopes:

"""
last_checkpoint = self.model_path + '/model-' + str(steps) + '.cptk'
saver.save(sess, last_checkpoint)
tf.train.write_graph(sess.graph_def, self.model_path, 'raw_graph_def.pb', as_text=False)
tf.train.write_graph(sess.graph_def, self.model_path,
'raw_graph_def.pb', as_text=False)
self.logger.info("Saved Model")
def _export_graph(self):

target_nodes = ','.join(self._process_graph())
ckpt = tf.train.get_checkpoint_state(self.model_path)
freeze_graph.freeze_graph(input_graph=self.model_path + '/raw_graph_def.pb',
input_binary=True,
input_checkpoint=ckpt.model_checkpoint_path,
output_node_names=target_nodes,
output_graph=self.model_path + '/' + self.env_name + "_" + self.run_id + '.bytes',
clear_devices=True, initializer_nodes="", input_saver="",
restore_op_name="save/restore_all", filename_tensor_name="save/Const:0")
freeze_graph.freeze_graph(
input_graph=self.model_path + '/raw_graph_def.pb',
input_binary=True,
input_checkpoint=ckpt.model_checkpoint_path,
output_node_names=target_nodes,
output_graph=(self.model_path + '/' + self.env_name + "_"
+ self.run_id + '.bytes'),
clear_devices=True, initializer_nodes="", input_saver="",
restore_op_name="save/restore_all",
filename_tensor_name="save/Const:0")
def _initialize_trainers(self, trainer_config, sess):
trainer_parameters_dict = {}

trainer_parameters_dict[brain_name] = trainer_parameters.copy()
for brain_name in self.env.external_brain_names:
if trainer_parameters_dict[brain_name]['trainer'] == "imitation":
self.trainers[brain_name] = BehavioralCloningTrainer(sess, self.env, brain_name,
trainer_parameters_dict[brain_name],
self.train_model, self.seed, self.run_id)
self.trainers[brain_name] = BehavioralCloningTrainer(
sess, self.env, brain_name,
trainer_parameters_dict[brain_name], self.train_model,
self.seed, self.run_id)
self.trainers[brain_name] = PPOTrainer(sess, self.env, brain_name, trainer_parameters_dict[brain_name],
self.train_model, self.seed, self.run_id)
self.trainers[brain_name] = PPOTrainer(
sess, self.env, brain_name,
trainer_parameters_dict[brain_name],
self.train_model, self.seed, self.run_id)
raise UnityEnvironmentException("The trainer config contains an unknown trainer type for brain {}"
raise UnityEnvironmentException('The trainer config contains '
'an unknown trainer type for '
'brain {}'
.format(brain_name))
def _load_config(self):

return trainer_config
except IOError:
raise UnityEnvironmentException("""Parameter file could not be found here {}.
Will use default Hyper parameters"""
raise UnityEnvironmentException('Parameter file could not be found '
'here {}. Will use default Hyper '
'parameters.'
raise UnityEnvironmentException("There was an error decoding Trainer Config from this path : {}"
raise UnityEnvironmentException('There was an error decoding '
'Trainer Config from this path : {}'
.format(self.trainer_config_path))
@staticmethod

os.makedirs(model_path)
except Exception:
raise UnityEnvironmentException("The folder {} containing the generated model could not be accessed."
" Please make sure the permissions are set correctly."
raise UnityEnvironmentException('The folder {} containing the '
'generated model could not be '
'accessed. Please make sure the '
'permissions are set correctly.'
def _increment_lessons_and_reset_env(self):
"""Increments the lessons of curriculums if there is a metacurriculum
and resets the environment.
Returns:
A Data structure corresponding to the initial reset state of the
environment.
"""
if self.meta_curriculum is not None:
self.meta_curriculum.increment_lessons(self._get_progresses())
return self.env.reset(config=self.meta_curriculum.get_config(),
train_mode=self.fast_simulation)
else:
return self.env.reset(train_mode=self.fast_simulation)
# TODO: Should be able to start learning at different lesson numbers for each curriculum.
# TODO: Should be able to start learning at different lesson numbers
# for each curriculum.
if self.meta_curriculum is not None:
self.meta_curriculum.set_all_curriculums_to_lesson_num(self.lesson)
trainer_config = self._load_config()

self.logger.info('Loading Model...')
ckpt = tf.train.get_checkpoint_state(self.model_path)
if ckpt is None:
self.logger.info('The model {0} could not be found. Make sure you specified the right '
'--run-id'.format(self.model_path))
self.logger.info('The model {0} could not be found. Make '
'sure you specified the right '
'--run-id'
.format(self.model_path))
if self.meta_curriculum is not None:
self.meta_curriculum.increment_lessons(self._get_progresses())
curr_info = self.env.reset(config=self.meta_curriculum.get_config(), train_mode=self.fast_simulation)
else:
curr_info = self.env.reset(train_mode=self.fast_simulation)
curr_info = self._increment_lessons_and_reset_env()
trainer.write_tensorboard_text('Hyperparameters', trainer.parameters)
trainer.write_tensorboard_text('Hyperparameters',
trainer.parameters)
while any([t.get_step <= t.get_max_steps for k, t in self.trainers.items()]) or not self.train_model:
while any([t.get_step <= t.get_max_steps \
for k, t in self.trainers.items()]) \
or not self.train_model:
if self.meta_curriculum is not None:
self.meta_curriculum.increment_lessons(self._get_progresses())
curr_info = self.env.reset(config=self.meta_curriculum.get_config(), train_mode=self.fast_simulation)
else:
curr_info = self.env.reset(train_mode=self.fast_simulation)
curr_info = self._increment_lessons_and_reset_env()
for brain_name, trainer in self.trainers.items():
trainer.end_episode()
# Decide and take an action

take_action_memories[brain_name],
take_action_text[brain_name],
take_action_value[brain_name],
take_action_outputs[brain_name]) = trainer.take_action(curr_info)
new_info = self.env.step(vector_action=take_action_vector, memory=take_action_memories,
text_action=take_action_text, value=take_action_value)
take_action_outputs[brain_name]) = \
trainer.take_action(curr_info)
new_info = self.env.step(vector_action=take_action_vector,
memory=take_action_memories,
text_action=take_action_text,
value=take_action_value)
trainer.add_experiences(curr_info, new_info, take_action_outputs[brain_name])
trainer.add_experiences(curr_info, new_info,
take_action_outputs[brain_name])
if trainer.is_ready_update() and self.train_model and trainer.get_step <= trainer.get_max_steps:
if trainer.is_ready_update() and self.train_model \
and trainer.get_step <= trainer.get_max_steps:
# Perform gradient descent with experience buffer
trainer.update_model()
# Write training statistics to Tensorboard.

lesson=self.meta_curriculum.brains_to_curriculums[brain_name].lesson_num)
lesson_num=self.meta_curriculum
.brains_to_curriculums[brain_name]
.lesson_num)
if self.train_model and trainer.get_step <= trainer.get_max_steps:
if self.train_model \
and trainer.get_step <= trainer.get_max_steps:
global_step += 1
if global_step % self.save_freq == 0 and global_step != 0 and self.train_model:
if self.train_model:
global_step += 1
if global_step % self.save_freq == 0 and global_step != 0 \
and self.train_model:
# Save Tensorflow model
self._save_model(sess, steps=global_step, saver=saver)
curr_info = new_info

except KeyboardInterrupt:
print('--------------------------Now saving model-------------------------')
print('--------------------------Now saving model--------------'
'-----------')
self.logger.info("Learning was interrupted. Please wait while the graph is generated.")
self.logger.info('Learning was interrupted. Please wait '
'while the graph is generated.')
self._save_model(sess, steps=global_step, saver=saver)
pass
self.env.close()

56
python/tests/test_buffer.py


import json
import unittest.mock as mock
import yaml
import pytest
import numpy as np
from unitytrainers.trainer_controller import TrainerController
from unitytrainers.buffer import Buffer
from unitytrainers.ppo.trainer import PPOTrainer
from unitytrainers.bc.trainer import BehavioralCloningTrainer
from unitytrainers.curriculum import Curriculum
from unitytrainers.exception import CurriculumError
from unityagents.exception import UnityEnvironmentException
from .mock_communicator import MockCommunicator
def assert_array(a, b):
assert a.shape == b.shape
la = list(a.flatten())
lb = list(b.flatten())
for i in range(len(la)):
assert la[i] == lb[i]
def test_buffer():
b = Buffer()
for fake_agent_id in range(4):
for step in range(9):
b[fake_agent_id]['vector_observation'].append(
[100 * fake_agent_id + 10 * step + 1,
100 * fake_agent_id + 10 * step + 2,
100 * fake_agent_id + 10 * step + 3]
)
b[fake_agent_id]['action'].append([100 * fake_agent_id + 10 * step + 4,
100 * fake_agent_id + 10 * step + 5])
a = b[1]['vector_observation'].get_batch(batch_size=2, training_length=1, sequential=True)
assert_array(a, np.array([[171, 172, 173], [181, 182, 183]]))
a = b[2]['vector_observation'].get_batch(batch_size=2, training_length=3, sequential=True)
assert_array(a, np.array([
[[231, 232, 233], [241, 242, 243], [251, 252, 253]],
[[261, 262, 263], [271, 272, 273], [281, 282, 283]]
]))
a = b[2]['vector_observation'].get_batch(batch_size=2, training_length=3, sequential=False)
assert_array(a, np.array([
[[251, 252, 253], [261, 262, 263], [271, 272, 273]],
[[261, 262, 263], [271, 272, 273], [281, 282, 283]]
]))
b[4].reset_agent()
assert len(b[4]) == 0
b.append_update_buffer(3,
batch_size=None, training_length=2)
b.append_update_buffer(2,
batch_size=None, training_length=2)
assert len(b.update_buffer['action']) == 10
assert np.array(b.update_buffer['action']).shape == (10, 2, 2)

187
python/tests/test_trainer_controller.py


import json
import unittest.mock as mock
import yaml
import pytest
import tensorflow as tf
from unitytrainers.trainer_controller import TrainerController
from unitytrainers.buffer import Buffer
from unitytrainers.ppo.trainer import PPOTrainer
from unitytrainers.bc.trainer import BehavioralCloningTrainer
from unitytrainers.curriculum import Curriculum
from unitytrainers.exception import CurriculumError
from unityagents.exception import UnityEnvironmentException
from .mock_communicator import MockCommunicator
@pytest.fixture
def dummy_start():
return '''{ "AcademyName": "RealFakeAcademy",
"resetParameters": {},
"brainNames": ["RealFakeBrain"],
"externalBrainNames": ["RealFakeBrain"],
"logPath":"RealFakePath",
"apiNumber":"API-3",
"brainParameters": [{
"vectorObservationSize": 3,
"numStackedVectorObservations" : 2,
"vectorActionSize": 2,
"memorySize": 0,
"cameraResolutions": [],
"vectorActionDescriptions": ["",""],
"vectorActionSpaceType": 1
}]
}'''.encode()
@pytest.fixture
def dummy_config():
return yaml.load(
'''
default:
trainer: ppo
batch_size: 32
beta: 5.0e-3
buffer_size: 512
epsilon: 0.2
gamma: 0.99
hidden_units: 128
lambd: 0.95
learning_rate: 3.0e-4
max_steps: 5.0e4
normalize: true
num_epoch: 5
num_layers: 2
time_horizon: 64
sequence_length: 64
summary_freq: 1000
use_recurrent: false
memory_size: 8
use_curiosity: false
curiosity_strength: 0.0
curiosity_enc_size: 1
''')
@pytest.fixture
def dummy_bc_config():
return yaml.load(
'''
default:
trainer: imitation
brain_to_imitate: ExpertBrain
batches_per_epoch: 16
batch_size: 32
beta: 5.0e-3
buffer_size: 512
epsilon: 0.2
gamma: 0.99
hidden_units: 128
lambd: 0.95
learning_rate: 3.0e-4
max_steps: 5.0e4
normalize: true
num_epoch: 5
num_layers: 2
time_horizon: 64
sequence_length: 64
summary_freq: 1000
use_recurrent: false
memory_size: 8
use_curiosity: false
curiosity_strength: 0.0
curiosity_enc_size: 1
''')
@pytest.fixture
def dummy_bad_config():
return yaml.load(
'''
default:
trainer: incorrect_trainer
brain_to_imitate: ExpertBrain
batches_per_epoch: 16
batch_size: 32
beta: 5.0e-3
buffer_size: 512
epsilon: 0.2
gamma: 0.99
hidden_units: 128
lambd: 0.95
learning_rate: 3.0e-4
max_steps: 5.0e4
normalize: true
num_epoch: 5
num_layers: 2
time_horizon: 64
sequence_length: 64
summary_freq: 1000
use_recurrent: false
memory_size: 8
''')
@mock.patch('unityagents.UnityEnvironment.executable_launcher')
@mock.patch('unityagents.UnityEnvironment.get_communicator')
def test_initialization(mock_communicator, mock_launcher):
mock_communicator.return_value = MockCommunicator(
discrete_action=True, visual_inputs=1)
tc = TrainerController(' ', ' ', 1, None, True, True, False, 1,
1, 1, 1, '', "tests/test_unitytrainers.py", False)
assert(tc.env.brain_names[0] == 'RealFakeBrain')
@mock.patch('unityagents.UnityEnvironment.executable_launcher')
@mock.patch('unityagents.UnityEnvironment.get_communicator')
def test_load_config(mock_communicator, mock_launcher, dummy_config):
open_name = 'unitytrainers.trainer_controller' + '.open'
with mock.patch('yaml.load') as mock_load:
with mock.patch(open_name, create=True) as _:
mock_load.return_value = dummy_config
mock_communicator.return_value = MockCommunicator(
discrete_action=True, visual_inputs=1)
mock_load.return_value = dummy_config
tc = TrainerController(' ', ' ', 1, None, True, True, False, 1,
1, 1, 1, '','', False)
config = tc._load_config()
assert(len(config) == 1)
assert(config['default']['trainer'] == "ppo")
@mock.patch('unityagents.UnityEnvironment.executable_launcher')
@mock.patch('unityagents.UnityEnvironment.get_communicator')
def test_initialize_trainers(mock_communicator, mock_launcher, dummy_config,
dummy_bc_config, dummy_bad_config):
open_name = 'unitytrainers.trainer_controller' + '.open'
with mock.patch('yaml.load') as mock_load:
with mock.patch(open_name, create=True) as _:
mock_communicator.return_value = MockCommunicator(
discrete_action=True, visual_inputs=1)
tc = TrainerController(' ', ' ', 1, None, True, True, False, 1, 1,
1, 1, '', "tests/test_unitytrainers.py",
False)
# Test for PPO trainer
mock_load.return_value = dummy_config
config = tc._load_config()
tf.reset_default_graph()
with tf.Session() as sess:
tc._initialize_trainers(config, sess)
assert(len(tc.trainers) == 1)
assert(isinstance(tc.trainers['RealFakeBrain'], PPOTrainer))
# Test for Behavior Cloning Trainer
mock_load.return_value = dummy_bc_config
config = tc._load_config()
tf.reset_default_graph()
with tf.Session() as sess:
tc._initialize_trainers(config, sess)
assert(isinstance(tc.trainers['RealFakeBrain'], BehavioralCloningTrainer))
# Test for proper exception when trainer name is incorrect
mock_load.return_value = dummy_bad_config
config = tc._load_config()
tf.reset_default_graph()
with tf.Session() as sess:
with pytest.raises(UnityEnvironmentException):
tc._initialize_trainers(config, sess)

218
python/tests/test_unitytrainers.py


import json
import yaml
import unittest.mock as mock
import pytest
from unitytrainers.trainer_controller import TrainerController
from unitytrainers.buffer import Buffer
from unitytrainers.models import *
from unitytrainers.ppo.trainer import PPOTrainer
from unitytrainers.bc.trainer import BehavioralCloningTrainer
from unitytrainers.curriculum import Curriculum
from unitytrainers.exception import CurriculumError
from unityagents.exception import UnityEnvironmentException
from .mock_communicator import MockCommunicator
dummy_start = '''{
"AcademyName": "RealFakeAcademy",
"resetParameters": {},
"brainNames": ["RealFakeBrain"],
"externalBrainNames": ["RealFakeBrain"],
"logPath":"RealFakePath",
"apiNumber":"API-3",
"brainParameters": [{
"vectorObservationSize": 3,
"numStackedVectorObservations" : 2,
"vectorActionSize": 2,
"memorySize": 0,
"cameraResolutions": [],
"vectorActionDescriptions": ["",""],
"vectorActionSpaceType": 1
}]
}'''.encode()
dummy_config = yaml.load('''
default:
trainer: ppo
batch_size: 32
beta: 5.0e-3
buffer_size: 512
epsilon: 0.2
gamma: 0.99
hidden_units: 128
lambd: 0.95
learning_rate: 3.0e-4
max_steps: 5.0e4
normalize: true
num_epoch: 5
num_layers: 2
time_horizon: 64
sequence_length: 64
summary_freq: 1000
use_recurrent: false
memory_size: 8
use_curiosity: false
curiosity_strength: 0.0
curiosity_enc_size: 1
''')
dummy_bc_config = yaml.load('''
default:
trainer: imitation
brain_to_imitate: ExpertBrain
batches_per_epoch: 16
batch_size: 32
beta: 5.0e-3
buffer_size: 512
epsilon: 0.2
gamma: 0.99
hidden_units: 128
lambd: 0.95
learning_rate: 3.0e-4
max_steps: 5.0e4
normalize: true
num_epoch: 5
num_layers: 2
time_horizon: 64
sequence_length: 64
summary_freq: 1000
use_recurrent: false
memory_size: 8
use_curiosity: false
curiosity_strength: 0.0
curiosity_enc_size: 1
''')
dummy_bad_config = yaml.load('''
default:
trainer: incorrect_trainer
brain_to_imitate: ExpertBrain
batches_per_epoch: 16
batch_size: 32
beta: 5.0e-3
buffer_size: 512
epsilon: 0.2
gamma: 0.99
hidden_units: 128
lambd: 0.95
learning_rate: 3.0e-4
max_steps: 5.0e4
normalize: true
num_epoch: 5
num_layers: 2
time_horizon: 64
sequence_length: 64
summary_freq: 1000
use_recurrent: false
memory_size: 8
''')
@mock.patch('unityagents.UnityEnvironment.executable_launcher')
@mock.patch('unityagents.UnityEnvironment.get_communicator')
def test_initialization(mock_communicator, mock_launcher):
mock_communicator.return_value = MockCommunicator(
discrete_action=True, visual_inputs=1)
tc = TrainerController(' ', ' ', 1, None, True, True, False, 1,
1, 1, 1, '', "tests/test_unitytrainers.py", False)
assert(tc.env.brain_names[0] == 'RealFakeBrain')
@mock.patch('unityagents.UnityEnvironment.executable_launcher')
@mock.patch('unityagents.UnityEnvironment.get_communicator')
def test_load_config(mock_communicator, mock_launcher):
open_name = 'unitytrainers.trainer_controller' + '.open'
with mock.patch('yaml.load') as mock_load:
with mock.patch(open_name, create=True) as _:
mock_load.return_value = dummy_config
mock_communicator.return_value = MockCommunicator(
discrete_action=True, visual_inputs=1)
mock_load.return_value = dummy_config
tc = TrainerController(' ', ' ', 1, None, True, True, False, 1,
1, 1, 1, '','', False)
config = tc._load_config()
assert(len(config) == 1)
assert(config['default']['trainer'] == "ppo")
@mock.patch('unityagents.UnityEnvironment.executable_launcher')
@mock.patch('unityagents.UnityEnvironment.get_communicator')
def test_initialize_trainers(mock_communicator, mock_launcher):
open_name = 'unitytrainers.trainer_controller' + '.open'
with mock.patch('yaml.load') as mock_load:
with mock.patch(open_name, create=True) as _:
mock_communicator.return_value = MockCommunicator(
discrete_action=True, visual_inputs=1)
tc = TrainerController(' ', ' ', 1, None, True, True, False, 1,
1, 1, 1, '', "tests/test_unitytrainers.py", False)
# Test for PPO trainer
mock_load.return_value = dummy_config
config = tc._load_config()
tf.reset_default_graph()
with tf.Session() as sess:
tc._initialize_trainers(config, sess)
assert(len(tc.trainers) == 1)
assert(isinstance(tc.trainers['RealFakeBrain'], PPOTrainer))
# Test for Behavior Cloning Trainer
mock_load.return_value = dummy_bc_config
config = tc._load_config()
tf.reset_default_graph()
with tf.Session() as sess:
tc._initialize_trainers(config, sess)
assert(isinstance(tc.trainers['RealFakeBrain'], BehavioralCloningTrainer))
# Test for proper exception when trainer name is incorrect
mock_load.return_value = dummy_bad_config
config = tc._load_config()
tf.reset_default_graph()
with tf.Session() as sess:
with pytest.raises(UnityEnvironmentException):
tc._initialize_trainers(config, sess)
def assert_array(a, b):
assert a.shape == b.shape
la = list(a.flatten())
lb = list(b.flatten())
for i in range(len(la)):
assert la[i] == lb[i]
def test_buffer():
b = Buffer()
for fake_agent_id in range(4):
for step in range(9):
b[fake_agent_id]['vector_observation'].append(
[100 * fake_agent_id + 10 * step + 1,
100 * fake_agent_id + 10 * step + 2,
100 * fake_agent_id + 10 * step + 3]
)
b[fake_agent_id]['action'].append([100 * fake_agent_id + 10 * step + 4,
100 * fake_agent_id + 10 * step + 5])
a = b[1]['vector_observation'].get_batch(batch_size=2, training_length=1, sequential=True)
assert_array(a, np.array([[171, 172, 173], [181, 182, 183]]))
a = b[2]['vector_observation'].get_batch(batch_size=2, training_length=3, sequential=True)
assert_array(a, np.array([
[[231, 232, 233], [241, 242, 243], [251, 252, 253]],
[[261, 262, 263], [271, 272, 273], [281, 282, 283]]
]))
a = b[2]['vector_observation'].get_batch(batch_size=2, training_length=3, sequential=False)
assert_array(a, np.array([
[[251, 252, 253], [261, 262, 263], [271, 272, 273]],
[[261, 262, 263], [271, 272, 273], [281, 282, 283]]
]))
b[4].reset_agent()
assert len(b[4]) == 0
b.append_update_buffer(3,
batch_size=None, training_length=2)
b.append_update_buffer(2,
batch_size=None, training_length=2)
assert len(b.update_buffer['action']) == 10
assert np.array(b.update_buffer['action']).shape == (10, 2, 2)
if __name__ == '__main__':
pytest.main()
正在加载...
取消
保存