import json
import unittest.mock as mock
from unittest.mock import *
import tensorflow as tf
from mlagents.trainers.trainer_controller import TrainerController
from mlagents.trainers.ppo.trainer import PPOTrainer
from tests.mock_communicator import MockCommunicator
@pytest.fixture
curiosity_enc_size : 1
''' )
@pytest.fixture
def dummy_offline_bc_config_with_override ( ) :
base = dummy_offline_bc_config ( )
base [ ' testbrain ' ] = { }
base [ ' testbrain ' ] [ ' normalize ' ] = False
return base
@pytest.fixture
def dummy_bad_config ( ) :
memory_size : 8
''' )
@pytest.fixture
def basic_trainer_controller ( brain_info ) :
return TrainerController (
model_path = ' test_model_path ' ,
summaries_dir = ' test_summaries_dir ' ,
run_id = ' test_run_id ' ,
save_freq = 100 ,
meta_curriculum = None ,
load = True ,
train = True ,
keep_checkpoints = False ,
lesson = None ,
external_brains = { ' testbrain ' : brain_info } ,
training_seed = 99
)
@mock.patch ( ' mlagents.envs.UnityEnvironment.executable_launcher ' )
@mock.patch ( ' mlagents.envs.UnityEnvironment.get_communicator ' )
def test_initialization ( mock_communicator , mock_launcher ) :
mock_communicator . return_value = MockCommunicator (
discrete_action = True , visual_inputs = 1 )
tc = TrainerController ( ' ' , ' ' , 1 , None , True , True , False , 1 ,
1 , 1 , 1 , ' ' , " tests/test_mlagents.trainers.py " , False )
assert ( tc . env . brain_names [ 0 ] == ' RealFakeBrain ' )
@patch ( ' numpy.random.seed ' )
@patch ( ' tensorflow.set_random_seed ' )
def test_initialization_seed ( numpy_random_seed , tensorflow_set_seed ) :
seed = 27
TrainerController ( ' ' , ' ' , ' 1 ' , 1 , None , True , False , False , None , { } , seed )
numpy_random_seed . assert_called_with ( seed )
tensorflow_set_seed . assert_called_with ( seed )
def assert_bc_trainer_constructed ( trainer_cls , input_config , tc , expected_brain_info , expected_config ) :
def mock_constructor ( self , brain , trainer_params , training , load , seed , run_id ) :
assert ( brain == expected_brain_info )
assert ( trainer_params == expected_config )
assert ( training == tc . train_model )
assert ( load == tc . load_model )
assert ( seed == tc . seed )
assert ( run_id == tc . run_id )
with patch . object ( trainer_cls , " __init__ " , mock_constructor ) :
tc . initialize_trainers ( input_config )
assert ( ' testbrain ' in tc . trainers )
assert ( isinstance ( tc . trainers [ ' testbrain ' ] , trainer_cls ) )
def assert_ppo_trainer_constructed ( input_config , tc , expected_brain_info ,
expected_config , expected_reward_buff_cap = 0 ) :
def mock_constructor ( self , brain , reward_buff_cap , trainer_parameters , training , load , seed , run_id ) :
assert ( brain == expected_brain_info )
assert ( trainer_parameters == expected_config )
assert ( reward_buff_cap == expected_reward_buff_cap )
assert ( training == tc . train_model )
assert ( load == tc . load_model )
assert ( seed == tc . seed )
assert ( run_id == tc . run_id )
with patch . object ( PPOTrainer , " __init__ " , mock_constructor ) :
tc . initialize_trainers ( input_config )
assert ( ' testbrain ' in tc . trainers )
assert ( isinstance ( tc . trainers [ ' testbrain ' ] , PPOTrainer ) )
@patch ( ' mlagents.envs.BrainInfo ' )
def test_initialize_trainer_parameters_uses_defaults ( BrainInfoMock ) :
brain_info_mock = BrainInfoMock ( )
tc = basic_trainer_controller ( brain_info_mock )
full_config = dummy_offline_bc_config ( )
expected_config = full_config [ ' default ' ]
expected_config [ ' summary_path ' ] = tc . summaries_dir + ' /test_run_id_testbrain '
expected_config [ ' model_path ' ] = tc . model_path + ' /testbrain '
expected_config [ ' keep_checkpoints ' ] = tc . keep_checkpoints
assert_bc_trainer_constructed ( OfflineBCTrainer , full_config , tc , brain_info_mock , expected_config )
@patch ( ' mlagents.envs.BrainInfo ' )
def test_initialize_trainer_parameters_override_defaults ( BrainInfoMock ) :
brain_info_mock = BrainInfoMock ( )
tc = basic_trainer_controller ( brain_info_mock )
full_config = dummy_offline_bc_config_with_override ( )
expected_config = full_config [ ' default ' ]
expected_config [ ' summary_path ' ] = tc . summaries_dir + ' /test_run_id_testbrain '
expected_config [ ' model_path ' ] = tc . model_path + ' /testbrain '
expected_config [ ' keep_checkpoints ' ] = tc . keep_checkpoints
@mock.patch ( ' mlagents.envs.UnityEnvironment.executable_launcher ' )
@mock.patch ( ' mlagents.envs.UnityEnvironment.get_communicator ' )
def test_load_config ( mock_communicator , mock_launcher , dummy_config ) :
open_name = ' mlagents.trainers.trainer_controller ' + ' .open '
with mock . patch ( ' yaml.load ' ) as mock_load :
with mock . patch ( open_name , create = True ) as _ :
mock_load . return_value = dummy_config
mock_communicator . return_value = MockCommunicator (
discrete_action = True , visual_inputs = 1 )
mock_load . return_value = dummy_config
tc = TrainerController ( ' ' , ' ' , 1 , None , True , True , False , 1 ,
1 , 1 , 1 , ' ' , ' ' , False )
config = tc . _load_config ( )
assert ( len ( config ) == 1 )
assert ( config [ ' default ' ] [ ' trainer ' ] == " ppo " )
# Override value from specific brain config
expected_config [ ' normalize ' ] = False
assert_bc_trainer_constructed ( OfflineBCTrainer , full_config , tc , brain_info_mock , expected_config )
@mock.patch ( ' mlagents.envs.UnityEnvironment.executable_launcher ' )
@mock.patch ( ' mlagents.envs.UnityEnvironment.get_communicator ' )
def test_initialize_trainers ( mock_communicator , mock_launcher , dummy_config ,
dummy_offline_bc_config , dummy_online_bc_config , dummy_bad_config ) :
open_name = ' mlagents.trainers.trainer_controller ' + ' .open '
with mock . patch ( ' yaml.load ' ) as mock_load :
with mock . patch ( open_name , create = True ) as _ :
mock_communicator . return_value = MockCommunicator (
discrete_action = True , visual_inputs = 1 )
tc = TrainerController ( ' ' , ' ' , 1 , None , True , False , False , 1 , 1 ,
1 , 1 , ' ' , " tests/test_mlagents.trainers.py " ,
False )
# Test for PPO trainer
mock_load . return_value = dummy_config
config = tc . _load_config ( )
tf . reset_default_graph ( )
tc . _initialize_trainers ( config )
assert ( len ( tc . trainers ) == 1 )
assert ( isinstance ( tc . trainers [ ' RealFakeBrain ' ] , PPOTrainer ) )
@patch ( ' mlagents.envs.BrainInfo ' )
def test_initialize_online_bc_trainer ( BrainInfoMock ) :
brain_info_mock = BrainInfoMock ( )
tc = basic_trainer_controller ( brain_info_mock )
# Test for Online Behavior Cloning Trainer
mock_load . return_value = dummy_online_bc_config
config = tc . _load_config ( )
tf . reset_default_graph ( )
tc . _initialize_trainers ( config )
assert ( isinstance ( tc . trainers [ ' RealFakeBrain ' ] , OnlineBCTrainer ) )
full_config = dummy_online_bc_config ( )
expected_config = full_config [ ' default ' ]
expected_config [ ' summary_path ' ] = tc . summaries_dir + ' /test_run_id_testbrain '
expected_config [ ' model_path ' ] = tc . model_path + ' /testbrain '
expected_config [ ' keep_checkpoints ' ] = tc . keep_checkpoints
# Test for proper exception when trainer name is incorrect
mock_load . return_value = dummy_bad_config
config = tc . _load_config ( )
tf . reset_default_graph ( )
with pytest . raises ( UnityEnvironmentException ) :
tc . _initialize_trainers ( config )
assert_bc_trainer_constructed ( OnlineBCTrainer , full_config , tc , brain_info_mock , expected_config )
@mock.patch ( ' mlagents.envs.UnityEnvironment.executable_launcher ' )
@mock.patch ( ' mlagents.envs.UnityEnvironment.get_communicator ' )
def test_initialize_offline_trainers ( mock_communicator , mock_launcher , dummy_config ,
dummy_offline_bc_config , dummy_online_bc_config , dummy_bad_config ) :
open_name = ' mlagents.trainers.trainer_controller ' + ' .open '
with mock . patch ( ' yaml.load ' ) as mock_load :
with mock . patch ( open_name , create = True ) as _ :
mock_communicator . return_value = MockCommunicator (
discrete_action = False , stack = False , visual_inputs = 0 ,
brain_name = " Ball3DBrain " , vec_obs_size = 8 )
tc = TrainerController ( ' ' , ' ' , 1 , None , True , False , False , 1 , 1 ,
1 , 1 , ' ' , " tests/test_mlagents.trainers.py " ,
False )
@patch ( ' mlagents.envs.BrainInfo ' )
def test_initialize_ppo_trainer ( BrainInfoMock ) :
brain_info_mock = BrainInfoMock ( )
tc = basic_trainer_controller ( brain_info_mock )
# Test for Offline Behavior Cloning Trainer
mock_load . return_value = dummy_offline_bc_config
config = tc . _load_config ( )
tf . reset_default_graph ( )
tc . _initialize_trainers ( config )
assert ( isinstance ( tc . trainers [ ' Ball3DBrain ' ] , OfflineBCTrainer ) )
full_config = dummy_config ( )
expected_config = full_config [ ' default ' ]
expected_config [ ' summary_path ' ] = tc . summaries_dir + ' /test_run_id_testbrain '
expected_config [ ' model_path ' ] = tc . model_path + ' /testbrain '
expected_config [ ' keep_checkpoints ' ] = tc . keep_checkpoints
assert_ppo_trainer_constructed ( full_config , tc , brain_info_mock , expected_config )
@patch ( ' mlagents.envs.BrainInfo ' )
def test_initialize_invalid_trainer_raises_exception ( BrainInfoMock ) :
brain_info_mock = BrainInfoMock ( )
tc = basic_trainer_controller ( brain_info_mock )
bad_config = dummy_bad_config ( )
try :
tc . initialize_trainers ( bad_config )
assert ( 1 == 0 , " Initialize trainers with bad config did not raise an exception. " )
except UnityEnvironmentException :
pass
def trainer_controller_with_start_learning_mocks ( ) :
trainer_mock = MagicMock ( )
trainer_mock . get_step = 0
trainer_mock . get_max_steps = 5
trainer_mock . parameters = { ' some ' : ' parameter ' }
trainer_mock . write_tensorboard_text = MagicMock ( )
brain_info_mock = MagicMock ( )
tc = basic_trainer_controller ( brain_info_mock )
tc . initialize_trainers = MagicMock ( )
tc . trainers = { ' testbrain ' : trainer_mock }
tc . take_step = MagicMock ( )
def take_step_sideeffect ( env , curr_info ) :
tc . trainers [ ' testbrain ' ] . get_step + = 1
if tc . trainers [ ' testbrain ' ] . get_step > 10 :
raise KeyboardInterrupt
tc . take_step . side_effect = take_step_sideeffect
tc . _export_graph = MagicMock ( )
tc . _save_model = MagicMock ( )
return tc , trainer_mock
@patch ( ' tensorflow.reset_default_graph ' )
def test_start_learning_trains_forever_if_no_train_model ( tf_reset_graph ) :
tc , trainer_mock = trainer_controller_with_start_learning_mocks ( )
tc . train_model = False
trainer_config = dummy_config ( )
tf_reset_graph . return_value = None
env_mock = MagicMock ( )
env_mock . close = MagicMock ( )
env_mock . reset = MagicMock ( )
tc . start_learning ( env_mock , trainer_config )
tf_reset_graph . assert_called_once ( )
tc . initialize_trainers . assert_called_once_with ( trainer_config )
env_mock . reset . assert_called_once ( )
assert ( tc . take_step . call_count == 11 )
tc . _export_graph . assert_not_called ( )
tc . _save_model . assert_not_called ( )
env_mock . close . assert_called_once ( )
@patch ( ' tensorflow.reset_default_graph ' )
def test_start_learning_trains_until_max_steps_then_saves ( tf_reset_graph ) :
tc , trainer_mock = trainer_controller_with_start_learning_mocks ( )
trainer_config = dummy_config ( )
tf_reset_graph . return_value = None
brain_info_mock = MagicMock ( )
env_mock = MagicMock ( )
env_mock . close = MagicMock ( )
env_mock . reset = MagicMock ( return_value = brain_info_mock )
tc . start_learning ( env_mock , trainer_config )
tf_reset_graph . assert_called_once ( )
tc . initialize_trainers . assert_called_once_with ( trainer_config )
env_mock . reset . assert_called_once ( )
assert ( tc . take_step . call_count == trainer_mock . get_max_steps + 1 )
env_mock . close . assert_called_once ( )
tc . _save_model . assert_called_once_with ( steps = 6 )
def test_start_learning_updates_meta_curriculum_lesson_number ( ) :
tc , trainer_mock = trainer_controller_with_start_learning_mocks ( )
trainer_config = dummy_config ( )
brain_info_mock = MagicMock ( )
env_mock = MagicMock ( )
env_mock . close = MagicMock ( )
env_mock . reset = MagicMock ( return_value = brain_info_mock )
meta_curriculum_mock = MagicMock ( )
meta_curriculum_mock . set_all_curriculums_to_lesson_num = MagicMock ( )
tc . meta_curriculum = meta_curriculum_mock
tc . lesson = 5
tc . start_learning ( env_mock , trainer_config )
meta_curriculum_mock . set_all_curriculums_to_lesson_num . assert_called_once_with ( tc . lesson )
def trainer_controller_with_take_step_mocks ( ) :
trainer_mock = MagicMock ( )
trainer_mock . get_step = 0
trainer_mock . get_max_steps = 5
trainer_mock . parameters = { ' some ' : ' parameter ' }
trainer_mock . write_tensorboard_text = MagicMock ( )
brain_info_mock = MagicMock ( )
tc = basic_trainer_controller ( brain_info_mock )
tc . trainers = { ' testbrain ' : trainer_mock }
return tc , trainer_mock
def test_take_step_resets_env_on_global_done ( ) :
tc , trainer_mock = trainer_controller_with_take_step_mocks ( )
brain_info_mock = MagicMock ( )
action_data_mock_out = [ None , None , None , None , None ]
trainer_mock . take_action = MagicMock ( return_value = action_data_mock_out )
trainer_mock . add_experiences = MagicMock ( )
trainer_mock . process_experiences = MagicMock ( )
trainer_mock . update_policy = MagicMock ( )
trainer_mock . write_summary = MagicMock ( )
trainer_mock . trainer . increment_step_and_update_last_reward = MagicMock ( )
env_mock = MagicMock ( )
step_data_mock_out = MagicMock ( )
env_mock . step = MagicMock ( return_value = step_data_mock_out )
env_mock . close = MagicMock ( )
env_mock . reset = MagicMock ( return_value = brain_info_mock )
env_mock . global_done = True
tc . take_step ( env_mock , brain_info_mock )
env_mock . reset . assert_called_once ( )
def test_take_step_adds_experiences_to_trainer_and_trains ( ) :
tc , trainer_mock = trainer_controller_with_take_step_mocks ( )
curr_info_mock = MagicMock ( )
trainer_action_output_mock = [
' action ' ,
' memory ' ,
' actiontext ' ,
' value ' ,
' output ' ,
]
trainer_mock . take_action = MagicMock ( return_value = trainer_action_output_mock )
trainer_mock . is_ready_update = MagicMock ( return_value = True )
env_mock = MagicMock ( )
env_step_output_mock = MagicMock ( )
env_mock . step = MagicMock ( return_value = env_step_output_mock )
env_mock . close = MagicMock ( )
env_mock . reset = MagicMock ( return_value = curr_info_mock )
env_mock . global_done = False
tc . take_step ( env_mock , curr_info_mock )
env_mock . reset . assert_not_called ( )
trainer_mock . take_action . assert_called_once_with ( curr_info_mock )
env_mock . step . assert_called_once_with (
vector_action = { ' testbrain ' : trainer_action_output_mock [ 0 ] } ,
memory = { ' testbrain ' : trainer_action_output_mock [ 1 ] } ,
text_action = { ' testbrain ' : trainer_action_output_mock [ 2 ] } ,
value = { ' testbrain ' : trainer_action_output_mock [ 3 ] }
)
trainer_mock . add_experiences . assert_called_once_with (
curr_info_mock , env_step_output_mock , trainer_action_output_mock [ 4 ]
)
trainer_mock . process_experiences . assert_called_once_with ( curr_info_mock , env_step_output_mock )
trainer_mock . update_policy . assert_called_once ( )
trainer_mock . write_summary . assert_called_once ( )
trainer_mock . increment_step_and_update_last_reward . assert_called_once ( )