您最多选择25个主题
主题必须以中文或者字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符
270 行
9.1 KiB
270 行
9.1 KiB
import json
|
|
import yaml
|
|
import unittest.mock as mock
|
|
import pytest
|
|
|
|
from unitytrainers.trainer_controller import TrainerController
|
|
from unitytrainers.buffer import Buffer
|
|
from unitytrainers.models import *
|
|
from unitytrainers.ppo.trainer import PPOTrainer
|
|
from unitytrainers.bc.trainer import BehavioralCloningTrainer
|
|
from unitytrainers import Curriculum
|
|
from unityagents import UnityEnvironmentException
|
|
from .mock_communicator import MockCommunicator
|
|
|
|
dummy_start = '''{
|
|
"AcademyName": "RealFakeAcademy",
|
|
"resetParameters": {},
|
|
"brainNames": ["RealFakeBrain"],
|
|
"externalBrainNames": ["RealFakeBrain"],
|
|
"logPath":"RealFakePath",
|
|
"apiNumber":"API-3",
|
|
"brainParameters": [{
|
|
"vectorObservationSize": 3,
|
|
"numStackedVectorObservations" : 2,
|
|
"vectorActionSize": 2,
|
|
"memorySize": 0,
|
|
"cameraResolutions": [],
|
|
"vectorActionDescriptions": ["",""],
|
|
"vectorActionSpaceType": 1
|
|
}]
|
|
}'''.encode()
|
|
|
|
|
|
dummy_config = yaml.load('''
|
|
default:
|
|
trainer: ppo
|
|
batch_size: 32
|
|
beta: 5.0e-3
|
|
buffer_size: 512
|
|
epsilon: 0.2
|
|
gamma: 0.99
|
|
hidden_units: 128
|
|
lambd: 0.95
|
|
learning_rate: 3.0e-4
|
|
max_steps: 5.0e4
|
|
normalize: true
|
|
num_epoch: 5
|
|
num_layers: 2
|
|
time_horizon: 64
|
|
sequence_length: 64
|
|
summary_freq: 1000
|
|
use_recurrent: false
|
|
memory_size: 8
|
|
use_curiosity: false
|
|
curiosity_strength: 0.0
|
|
curiosity_enc_size: 1
|
|
''')
|
|
|
|
dummy_bc_config = yaml.load('''
|
|
default:
|
|
trainer: imitation
|
|
brain_to_imitate: ExpertBrain
|
|
batches_per_epoch: 16
|
|
batch_size: 32
|
|
beta: 5.0e-3
|
|
buffer_size: 512
|
|
epsilon: 0.2
|
|
gamma: 0.99
|
|
hidden_units: 128
|
|
lambd: 0.95
|
|
learning_rate: 3.0e-4
|
|
max_steps: 5.0e4
|
|
normalize: true
|
|
num_epoch: 5
|
|
num_layers: 2
|
|
time_horizon: 64
|
|
sequence_length: 64
|
|
summary_freq: 1000
|
|
use_recurrent: false
|
|
memory_size: 8
|
|
use_curiosity: false
|
|
curiosity_strength: 0.0
|
|
curiosity_enc_size: 1
|
|
''')
|
|
|
|
dummy_bad_config = yaml.load('''
|
|
default:
|
|
trainer: incorrect_trainer
|
|
brain_to_imitate: ExpertBrain
|
|
batches_per_epoch: 16
|
|
batch_size: 32
|
|
beta: 5.0e-3
|
|
buffer_size: 512
|
|
epsilon: 0.2
|
|
gamma: 0.99
|
|
hidden_units: 128
|
|
lambd: 0.95
|
|
learning_rate: 3.0e-4
|
|
max_steps: 5.0e4
|
|
normalize: true
|
|
num_epoch: 5
|
|
num_layers: 2
|
|
time_horizon: 64
|
|
sequence_length: 64
|
|
summary_freq: 1000
|
|
use_recurrent: false
|
|
memory_size: 8
|
|
''')
|
|
|
|
dummy_curriculum = json.loads('''{
|
|
"measure" : "reward",
|
|
"thresholds" : [10, 20, 50],
|
|
"min_lesson_length" : 3,
|
|
"signal_smoothing" : true,
|
|
"parameters" :
|
|
{
|
|
"param1" : [0.7, 0.5, 0.3, 0.1],
|
|
"param2" : [100, 50, 20, 15],
|
|
"param3" : [0.2, 0.3, 0.7, 0.9]
|
|
}
|
|
}''')
|
|
bad_curriculum = json.loads('''{
|
|
"measure" : "reward",
|
|
"thresholds" : [10, 20, 50],
|
|
"min_lesson_length" : 3,
|
|
"signal_smoothing" : false,
|
|
"parameters" :
|
|
{
|
|
"param1" : [0.7, 0.5, 0.3, 0.1],
|
|
"param2" : [100, 50, 20],
|
|
"param3" : [0.2, 0.3, 0.7, 0.9]
|
|
}
|
|
}''')
|
|
|
|
|
|
@mock.patch('unityagents.UnityEnvironment.executable_launcher')
|
|
@mock.patch('unityagents.UnityEnvironment.get_communicator')
|
|
def test_initialization(mock_communicator, mock_launcher):
|
|
mock_communicator.return_value = MockCommunicator(
|
|
discrete_action=True, visual_inputs=1)
|
|
tc = TrainerController(' ', ' ', 1, None, True, True, False, 1,
|
|
1, 1, 1, '', "tests/test_unitytrainers.py", False)
|
|
assert(tc.env.brain_names[0] == 'RealFakeBrain')
|
|
|
|
|
|
@mock.patch('unityagents.UnityEnvironment.executable_launcher')
|
|
@mock.patch('unityagents.UnityEnvironment.get_communicator')
|
|
def test_load_config(mock_communicator, mock_launcher):
|
|
open_name = 'unitytrainers.trainer_controller' + '.open'
|
|
with mock.patch('yaml.load') as mock_load:
|
|
with mock.patch(open_name, create=True) as _:
|
|
mock_load.return_value = dummy_config
|
|
mock_communicator.return_value = MockCommunicator(
|
|
discrete_action=True, visual_inputs=1)
|
|
mock_load.return_value = dummy_config
|
|
tc = TrainerController(' ', ' ', 1, None, True, True, False, 1,
|
|
1, 1, 1, '','', False)
|
|
config = tc._load_config()
|
|
assert(len(config) == 1)
|
|
assert(config['default']['trainer'] == "ppo")
|
|
|
|
|
|
@mock.patch('unityagents.UnityEnvironment.executable_launcher')
|
|
@mock.patch('unityagents.UnityEnvironment.get_communicator')
|
|
def test_initialize_trainers(mock_communicator, mock_launcher):
|
|
open_name = 'unitytrainers.trainer_controller' + '.open'
|
|
with mock.patch('yaml.load') as mock_load:
|
|
with mock.patch(open_name, create=True) as _:
|
|
mock_communicator.return_value = MockCommunicator(
|
|
discrete_action=True, visual_inputs=1)
|
|
tc = TrainerController(' ', ' ', 1, None, True, True, False, 1,
|
|
1, 1, 1, '', "tests/test_unitytrainers.py", False)
|
|
|
|
# Test for PPO trainer
|
|
mock_load.return_value = dummy_config
|
|
config = tc._load_config()
|
|
tf.reset_default_graph()
|
|
with tf.Session() as sess:
|
|
tc._initialize_trainers(config, sess)
|
|
assert(len(tc.trainers) == 1)
|
|
assert(isinstance(tc.trainers['RealFakeBrain'], PPOTrainer))
|
|
|
|
# Test for Behavior Cloning Trainer
|
|
mock_load.return_value = dummy_bc_config
|
|
config = tc._load_config()
|
|
tf.reset_default_graph()
|
|
with tf.Session() as sess:
|
|
tc._initialize_trainers(config, sess)
|
|
assert(isinstance(tc.trainers['RealFakeBrain'], BehavioralCloningTrainer))
|
|
|
|
# Test for proper exception when trainer name is incorrect
|
|
mock_load.return_value = dummy_bad_config
|
|
config = tc._load_config()
|
|
tf.reset_default_graph()
|
|
with tf.Session() as sess:
|
|
with pytest.raises(UnityEnvironmentException):
|
|
tc._initialize_trainers(config, sess)
|
|
|
|
|
|
def assert_array(a, b):
|
|
assert a.shape == b.shape
|
|
la = list(a.flatten())
|
|
lb = list(b.flatten())
|
|
for i in range(len(la)):
|
|
assert la[i] == lb[i]
|
|
|
|
|
|
def test_buffer():
|
|
b = Buffer()
|
|
for fake_agent_id in range(4):
|
|
for step in range(9):
|
|
b[fake_agent_id]['vector_observation'].append(
|
|
[100 * fake_agent_id + 10 * step + 1,
|
|
100 * fake_agent_id + 10 * step + 2,
|
|
100 * fake_agent_id + 10 * step + 3]
|
|
)
|
|
b[fake_agent_id]['action'].append([100 * fake_agent_id + 10 * step + 4,
|
|
100 * fake_agent_id + 10 * step + 5])
|
|
a = b[1]['vector_observation'].get_batch(batch_size=2, training_length=1, sequential=True)
|
|
assert_array(a, np.array([[171, 172, 173], [181, 182, 183]]))
|
|
a = b[2]['vector_observation'].get_batch(batch_size=2, training_length=3, sequential=True)
|
|
assert_array(a, np.array([
|
|
[[231, 232, 233], [241, 242, 243], [251, 252, 253]],
|
|
[[261, 262, 263], [271, 272, 273], [281, 282, 283]]
|
|
]))
|
|
a = b[2]['vector_observation'].get_batch(batch_size=2, training_length=3, sequential=False)
|
|
assert_array(a, np.array([
|
|
[[251, 252, 253], [261, 262, 263], [271, 272, 273]],
|
|
[[261, 262, 263], [271, 272, 273], [281, 282, 283]]
|
|
]))
|
|
b[4].reset_agent()
|
|
assert len(b[4]) == 0
|
|
b.append_update_buffer(3,
|
|
batch_size=None, training_length=2)
|
|
b.append_update_buffer(2,
|
|
batch_size=None, training_length=2)
|
|
assert len(b.update_buffer['action']) == 10
|
|
assert np.array(b.update_buffer['action']).shape == (10, 2, 2)
|
|
|
|
|
|
def test_curriculum():
|
|
open_name = '%s.open' % __name__
|
|
with mock.patch('json.load') as mock_load:
|
|
with mock.patch(open_name, create=True) as mock_open:
|
|
mock_open.return_value = 0
|
|
mock_load.return_value = bad_curriculum
|
|
with pytest.raises(UnityEnvironmentException):
|
|
Curriculum('tests/test_unityagents.py', {"param1": 1, "param2": 1, "param3": 1})
|
|
mock_load.return_value = dummy_curriculum
|
|
with pytest.raises(UnityEnvironmentException):
|
|
Curriculum('tests/test_unityagents.py', {"param1": 1, "param2": 1})
|
|
curriculum = Curriculum('tests/test_unityagents.py', {"param1": 1, "param2": 1, "param3": 1})
|
|
assert curriculum.get_lesson_number == 0
|
|
curriculum.set_lesson_number(1)
|
|
assert curriculum.get_lesson_number == 1
|
|
curriculum.increment_lesson(10)
|
|
assert curriculum.get_lesson_number == 1
|
|
curriculum.increment_lesson(30)
|
|
curriculum.increment_lesson(30)
|
|
assert curriculum.get_lesson_number == 1
|
|
assert curriculum.lesson_length == 3
|
|
curriculum.increment_lesson(30)
|
|
assert curriculum.get_config() == {'param1': 0.3, 'param2': 20, 'param3': 0.7}
|
|
assert curriculum.get_config(0) == {"param1": 0.7, "param2": 100, "param3": 0.2}
|
|
assert curriculum.lesson_length == 0
|
|
assert curriculum.get_lesson_number == 2
|
|
|
|
|
|
if __name__ == '__main__':
|
|
pytest.main()
|