import json import yaml import unittest.mock as mock import pytest from unitytrainers.trainer_controller import TrainerController from unitytrainers.buffer import Buffer from unitytrainers.models import * from unitytrainers.ppo.trainer import PPOTrainer from unitytrainers.bc.trainer import BehavioralCloningTrainer from unitytrainers.curriculum import Curriculum from unitytrainers.exception import CurriculumError from unityagents.exception import UnityEnvironmentException from .mock_communicator import MockCommunicator dummy_start = '''{ "AcademyName": "RealFakeAcademy", "resetParameters": {}, "brainNames": ["RealFakeBrain"], "externalBrainNames": ["RealFakeBrain"], "logPath":"RealFakePath", "apiNumber":"API-3", "brainParameters": [{ "vectorObservationSize": 3, "numStackedVectorObservations" : 2, "vectorActionSize": 2, "memorySize": 0, "cameraResolutions": [], "vectorActionDescriptions": ["",""], "vectorActionSpaceType": 1 }] }'''.encode() dummy_config = yaml.load(''' default: trainer: ppo batch_size: 32 beta: 5.0e-3 buffer_size: 512 epsilon: 0.2 gamma: 0.99 hidden_units: 128 lambd: 0.95 learning_rate: 3.0e-4 max_steps: 5.0e4 normalize: true num_epoch: 5 num_layers: 2 time_horizon: 64 sequence_length: 64 summary_freq: 1000 use_recurrent: false memory_size: 8 use_curiosity: false curiosity_strength: 0.0 curiosity_enc_size: 1 ''') dummy_bc_config = yaml.load(''' default: trainer: imitation brain_to_imitate: ExpertBrain batches_per_epoch: 16 batch_size: 32 beta: 5.0e-3 buffer_size: 512 epsilon: 0.2 gamma: 0.99 hidden_units: 128 lambd: 0.95 learning_rate: 3.0e-4 max_steps: 5.0e4 normalize: true num_epoch: 5 num_layers: 2 time_horizon: 64 sequence_length: 64 summary_freq: 1000 use_recurrent: false memory_size: 8 use_curiosity: false curiosity_strength: 0.0 curiosity_enc_size: 1 ''') dummy_bad_config = yaml.load(''' default: trainer: incorrect_trainer brain_to_imitate: ExpertBrain batches_per_epoch: 16 batch_size: 32 beta: 5.0e-3 buffer_size: 512 epsilon: 0.2 gamma: 0.99 hidden_units: 128 lambd: 0.95 learning_rate: 3.0e-4 max_steps: 5.0e4 normalize: true num_epoch: 5 num_layers: 2 time_horizon: 64 sequence_length: 64 summary_freq: 1000 use_recurrent: false memory_size: 8 ''') dummy_curriculum = json.loads('''{ "measure" : "reward", "thresholds" : [10, 20, 50], "min_lesson_length" : 3, "signal_smoothing" : true, "parameters" : { "param1" : [0.7, 0.5, 0.3, 0.1], "param2" : [100, 50, 20, 15], "param3" : [0.2, 0.3, 0.7, 0.9] } }''') bad_curriculum = json.loads('''{ "measure" : "reward", "thresholds" : [10, 20, 50], "min_lesson_length" : 3, "signal_smoothing" : false, "parameters" : { "param1" : [0.7, 0.5, 0.3, 0.1], "param2" : [100, 50, 20], "param3" : [0.2, 0.3, 0.7, 0.9] } }''') @mock.patch('unityagents.UnityEnvironment.executable_launcher') @mock.patch('unityagents.UnityEnvironment.get_communicator') def test_initialization(mock_communicator, mock_launcher): mock_communicator.return_value = MockCommunicator( discrete_action=True, visual_inputs=1) tc = TrainerController(' ', ' ', 1, None, True, True, False, 1, 1, 1, 1, '', "tests/test_unitytrainers.py", False) assert(tc.env.brain_names[0] == 'RealFakeBrain') @mock.patch('unityagents.UnityEnvironment.executable_launcher') @mock.patch('unityagents.UnityEnvironment.get_communicator') def test_load_config(mock_communicator, mock_launcher): open_name = 'unitytrainers.trainer_controller' + '.open' with mock.patch('yaml.load') as mock_load: with mock.patch(open_name, create=True) as _: mock_load.return_value = dummy_config mock_communicator.return_value = MockCommunicator( discrete_action=True, visual_inputs=1) mock_load.return_value = dummy_config tc = TrainerController(' ', ' ', 1, None, True, True, False, 1, 1, 1, 1, '','', False) config = tc._load_config() assert(len(config) == 1) assert(config['default']['trainer'] == "ppo") @mock.patch('unityagents.UnityEnvironment.executable_launcher') @mock.patch('unityagents.UnityEnvironment.get_communicator') def test_initialize_trainers(mock_communicator, mock_launcher): open_name = 'unitytrainers.trainer_controller' + '.open' with mock.patch('yaml.load') as mock_load: with mock.patch(open_name, create=True) as _: mock_communicator.return_value = MockCommunicator( discrete_action=True, visual_inputs=1) tc = TrainerController(' ', ' ', 1, None, True, True, False, 1, 1, 1, 1, '', "tests/test_unitytrainers.py", False) # Test for PPO trainer mock_load.return_value = dummy_config config = tc._load_config() tf.reset_default_graph() with tf.Session() as sess: tc._initialize_trainers(config, sess) assert(len(tc.trainers) == 1) assert(isinstance(tc.trainers['RealFakeBrain'], PPOTrainer)) # Test for Behavior Cloning Trainer mock_load.return_value = dummy_bc_config config = tc._load_config() tf.reset_default_graph() with tf.Session() as sess: tc._initialize_trainers(config, sess) assert(isinstance(tc.trainers['RealFakeBrain'], BehavioralCloningTrainer)) # Test for proper exception when trainer name is incorrect mock_load.return_value = dummy_bad_config config = tc._load_config() tf.reset_default_graph() with tf.Session() as sess: with pytest.raises(UnityEnvironmentException): tc._initialize_trainers(config, sess) def assert_array(a, b): assert a.shape == b.shape la = list(a.flatten()) lb = list(b.flatten()) for i in range(len(la)): assert la[i] == lb[i] def test_buffer(): b = Buffer() for fake_agent_id in range(4): for step in range(9): b[fake_agent_id]['vector_observation'].append( [100 * fake_agent_id + 10 * step + 1, 100 * fake_agent_id + 10 * step + 2, 100 * fake_agent_id + 10 * step + 3] ) b[fake_agent_id]['action'].append([100 * fake_agent_id + 10 * step + 4, 100 * fake_agent_id + 10 * step + 5]) a = b[1]['vector_observation'].get_batch(batch_size=2, training_length=1, sequential=True) assert_array(a, np.array([[171, 172, 173], [181, 182, 183]])) a = b[2]['vector_observation'].get_batch(batch_size=2, training_length=3, sequential=True) assert_array(a, np.array([ [[231, 232, 233], [241, 242, 243], [251, 252, 253]], [[261, 262, 263], [271, 272, 273], [281, 282, 283]] ])) a = b[2]['vector_observation'].get_batch(batch_size=2, training_length=3, sequential=False) assert_array(a, np.array([ [[251, 252, 253], [261, 262, 263], [271, 272, 273]], [[261, 262, 263], [271, 272, 273], [281, 282, 283]] ])) b[4].reset_agent() assert len(b[4]) == 0 b.append_update_buffer(3, batch_size=None, training_length=2) b.append_update_buffer(2, batch_size=None, training_length=2) assert len(b.update_buffer['action']) == 10 assert np.array(b.update_buffer['action']).shape == (10, 2, 2) def test_curriculum(): open_name = '%s.open' % __name__ with mock.patch('json.load') as mock_load: with mock.patch(open_name, create=True) as mock_open: mock_open.return_value = 0 mock_load.return_value = bad_curriculum with pytest.raises(CurriculumError): Curriculum('tests/test_unityagents.py', {"param1": 1, "param2": 1, "param3": 1}) mock_load.return_value = dummy_curriculum with pytest.raises(CurriculumError): Curriculum('tests/test_unityagents.py', {"param1": 1, "param2": 1}) curriculum = Curriculum('tests/test_unityagents.py', {"param1": 1, "param2": 1, "param3": 1}) assert curriculum.get_lesson_number == 0 curriculum.set_lesson_number(1) assert curriculum.get_lesson_number == 1 curriculum.increment_lesson(10) assert curriculum.get_lesson_number == 1 curriculum.increment_lesson(30) curriculum.increment_lesson(30) assert curriculum.get_lesson_number == 1 assert curriculum.lesson_length == 3 curriculum.increment_lesson(30) assert curriculum.get_config() == {'param1': 0.3, 'param2': 20, 'param3': 0.7} assert curriculum.get_config(0) == {"param1": 0.7, "param2": 100, "param3": 0.2} assert curriculum.lesson_length == 0 assert curriculum.get_lesson_number == 2 if __name__ == '__main__': pytest.main()