from typing import Dict , Any
from mlagents.trainers.tests.simple_test_envs import (
Simple1D Environment ,
Memory1D Environment ,
Record1D Environment ,
SimpleEnvironment ,
MemoryEnvironment ,
RecordEnvironment ,
)
from mlagents.trainers.trainer_controller import TrainerController
from mlagents.trainers.trainer_util import TrainerFactory
@pytest.mark.parametrize ( " use_discrete " , [ True , False ] )
def test_simple_ppo ( use_discrete ) :
env = Simple1DEnvironment ( [ BRAIN_NAME ] , use_discrete = use_discrete )
env = SimpleEnvironment ( [ BRAIN_NAME ] , use_discrete = use_discrete )
config = generate_config ( PPO_CONFIG )
_check_environment_trains ( env , config )
@pytest.mark.parametrize ( " use_discrete " , [ True , False ] )
def test_2d_ppo ( use_discrete ) :
env = SimpleEnvironment (
[ BRAIN_NAME ] , use_discrete = use_discrete , action_size = 2 , step_size = 0.5
)
config = generate_config ( PPO_CONFIG )
_check_environment_trains ( env , config )
def test_visual_ppo ( num_visual , use_discrete ) :
env = Simple1D Environment (
env = SimpleEnvironment (
[ BRAIN_NAME ] ,
use_discrete = use_discrete ,
num_visual = num_visual ,
@pytest.mark.parametrize ( " num_visual " , [ 1 , 2 ] )
@pytest.mark.parametrize ( " vis_encode_type " , [ " resnet " , " nature_cnn " ] )
def test_visual_advanced_ppo ( vis_encode_type , num_visual ) :
env = Simple1D Environment (
env = SimpleEnvironment (
[ BRAIN_NAME ] ,
use_discrete = True ,
num_visual = num_visual ,
@pytest.mark.parametrize ( " use_discrete " , [ True , False ] )
def test_recurrent_ppo ( use_discrete ) :
env = Memory1DEnvironment ( [ BRAIN_NAME ] , use_discrete = use_discrete )
env = MemoryEnvironment ( [ BRAIN_NAME ] , use_discrete = use_discrete )
" max_steps " : 3000 ,
" max_steps " : 4000 ,
" learning_rate " : 1e-3 ,
_check_environment_trains ( env , config )
_check_environment_trains ( env , config , success_threshold = 0.9 )
env = Simple1DEnvironment ( [ BRAIN_NAME ] , use_discrete = use_discrete )
env = SimpleEnvironment ( [ BRAIN_NAME ] , use_discrete = use_discrete )
def test_2d_sac ( use_discrete ) :
env = SimpleEnvironment (
[ BRAIN_NAME ] , use_discrete = use_discrete , action_size = 2 , step_size = 0.5
)
override_vals = { " buffer_init_steps " : 2000 , " max_steps " : 3000 }
config = generate_config ( SAC_CONFIG , override_vals )
_check_environment_trains ( env , config )
@pytest.mark.parametrize ( " use_discrete " , [ True , False ] )
env = Simple1DEnvironment (
env = SimpleEnvironment (
[ BRAIN_NAME ] ,
use_discrete = use_discrete ,
num_visual = num_visual ,
@pytest.mark.parametrize ( " num_visual " , [ 1 , 2 ] )
@pytest.mark.parametrize ( " vis_encode_type " , [ " resnet " , " nature_cnn " ] )
def test_visual_advanced_sac ( vis_encode_type , num_visual ) :
env = Simple1D Environment (
env = SimpleEnvironment (
[ BRAIN_NAME ] ,
use_discrete = True ,
num_visual = num_visual ,
@pytest.mark.parametrize ( " use_discrete " , [ True , False ] )
def test_recurrent_sac ( use_discrete ) :
env = Memory1D Environment ( [ BRAIN_NAME ] , use_discrete = use_discrete )
env = MemoryEnvironment ( [ BRAIN_NAME ] , use_discrete = use_discrete )
override_vals = { " batch_size " : 32 , " use_recurrent " : True , " max_steps " : 2000 }
config = generate_config ( SAC_CONFIG , override_vals )
_check_environment_trains ( env , config )
def test_simple_ghost ( use_discrete ) :
env = Simple1D Environment (
env = SimpleEnvironment (
[ BRAIN_NAME + " ?team=0 " , BRAIN_NAME + " ?team=1 " ] , use_discrete = use_discrete
)
override_vals = {
@pytest.mark.parametrize ( " use_discrete " , [ True , False ] )
def test_simple_ghost_fails ( use_discrete ) :
env = Simple1D Environment (
env = SimpleEnvironment (
[ BRAIN_NAME + " ?team=0 " , BRAIN_NAME + " ?team=1 " ] , use_discrete = use_discrete
)
# This config should fail because the ghosted policy is never swapped with a competent policy.
@pytest.fixture ( scope = " session " )
def simple_record ( tmpdir_factory ) :
def record_demo ( use_discrete , num_visual = 0 , num_vector = 1 ) :
env = Record1D Environment (
env = RecordEnvironment (
[ BRAIN_NAME ] ,
use_discrete = use_discrete ,
num_visual = num_visual ,
@pytest.mark.parametrize ( " trainer_config " , [ PPO_CONFIG , SAC_CONFIG ] )
def test_gail ( simple_record , use_discrete , trainer_config ) :
demo_path = simple_record ( use_discrete )
env = Simple1D Environment ( [ BRAIN_NAME ] , use_discrete = use_discrete , step_size = 0.2 )
env = SimpleEnvironment ( [ BRAIN_NAME ] , use_discrete = use_discrete , step_size = 0.2 )
override_vals = {
" max_steps " : 500 ,
" behavioral_cloning " : { " demo_path " : demo_path , " strength " : 1.0 , " steps " : 1000 } ,
@pytest.mark.parametrize ( " use_discrete " , [ True , False ] )
def test_gail_visual_ppo ( simple_record , use_discrete ) :
demo_path = simple_record ( use_discrete , num_visual = 1 , num_vector = 0 )
env = Simple1D Environment (
env = SimpleEnvironment (
[ BRAIN_NAME ] ,
num_visual = 1 ,
num_vector = 0 ,
@pytest.mark.parametrize ( " use_discrete " , [ True , False ] )
def test_gail_visual_sac ( simple_record , use_discrete ) :
demo_path = simple_record ( use_discrete , num_visual = 1 , num_vector = 0 )
env = Simple1D Environment (
env = SimpleEnvironment (
[ BRAIN_NAME ] ,
num_visual = 1 ,
num_vector = 0 ,