|
|
|
|
|
|
from mlagents.trainers.trainer_controller import TrainerController |
|
|
|
from mlagents.trainers.trainer_util import TrainerFactory |
|
|
|
from mlagents.trainers.simple_env_manager import SimpleEnvManager |
|
|
|
from mlagents.trainers.subprocess_env_manager import SubprocessEnvManager |
|
|
|
from mlagents.trainers.sampler_class import SamplerManager |
|
|
|
from mlagents.trainers.demo_loader import write_demo |
|
|
|
from mlagents.trainers.stats import StatsReporter, StatsWriter, StatsSummary |
|
|
|
|
|
|
|
|
|
|
import gym |
|
|
|
from mlagents_envs.gym_to_unity_wrapper import GymToUnityWrapper |
|
|
|
from mlagents_envs.side_channel.engine_configuration_channel import EngineConfig |
|
|
|
|
|
|
|
BRAIN_NAME = "1D" |
|
|
|
|
|
|
|
|
|
|
resampling_interval=None, |
|
|
|
save_freq=save_freq, |
|
|
|
) |
|
|
|
|
|
|
|
env_manager.close() |
|
|
|
if ( |
|
|
|
success_threshold is not None |
|
|
|
): # For tests where we are just checking setup and not reward |
|
|
|
|
|
|
"gym_name,target_return", |
|
|
|
[ |
|
|
|
pytest.param("CartPole-v0", 150), # optimal 200 |
|
|
|
pytest.param("MountainCar-v0", -199), # solved if more than -200 |
|
|
|
pytest.param("MountainCarContinuous-v0", 0), # optimal 90 |
|
|
|
# pytest.param("MountainCar-v0", -199), # solved if more than -200 |
|
|
|
# pytest.param("MountainCarContinuous-v0", 0), # optimal 90 |
|
|
|
], |
|
|
|
) |
|
|
|
def test_ppo_gym_training(gym_name, target_return, pytestconfig): |
|
|
|
|
|
|
) |
|
|
|
env = GymToUnityWrapper(gym.make(gym_name), BRAIN_NAME) |
|
|
|
override_vals = { |
|
|
|
"max_steps": 1000000, |
|
|
|
"batch_size": 1024, |
|
|
|
"buffer_size": 10240, |
|
|
|
"num_layers": 2, |
|
|
|
"hidden_units": 128, |
|
|
|
"max_steps": 50000, |
|
|
|
"buffer_size": 1000, |
|
|
|
"num_layers": 1, |
|
|
|
"hidden_units": 64, |
|
|
|
"normalize": True, |
|
|
|
"curiosity": {"strength": 0.01, "gamma": 0.95, "encoding_size": 256}, |
|
|
|
"extrinsic": {"strength": 1, "gamma": 0.999}, |
|
|
|
config = generate_config(PPO_CONFIG, override_vals) |
|
|
|
_check_environment_trains(env, config, success_threshold=target_return) |
|
|
|
config = generate_config(SAC_CONFIG, override_vals) |
|
|
|
def factory(worker_id, side_channels): |
|
|
|
return GymToUnityWrapper(gym.make(gym_name), BRAIN_NAME) |
|
|
|
@pytest.mark.gym |
|
|
|
@pytest.mark.parametrize( |
|
|
|
"gym_name,target_return", |
|
|
|
[ |
|
|
|
pytest.param("CartPole-v0", 150), # optimal 200 |
|
|
|
pytest.param("MountainCar-v0", -199), # solved if more than -200 |
|
|
|
pytest.param("MountainCarContinuous-v0", 0), # optimal 90 |
|
|
|
], |
|
|
|
) |
|
|
|
def test_sac_gym_training(gym_name, target_return, pytestconfig): |
|
|
|
if "gym" not in pytestconfig.getoption(name="-m", skip=False): |
|
|
|
raise pytest.skip( |
|
|
|
"Dit not run the gym tests, add the marker gym to run these tests" |
|
|
|
) |
|
|
|
env = GymToUnityWrapper(gym.make(gym_name), BRAIN_NAME) |
|
|
|
override_vals = { |
|
|
|
"max_steps": 1000000, |
|
|
|
"buffer_size": 10240, |
|
|
|
"num_layers": 2, |
|
|
|
"hidden_units": 128, |
|
|
|
"time_horizon": 256, |
|
|
|
"learning_rate_schedule": "linear", |
|
|
|
"curiosity": {"strength": 0.01, "gamma": 0.95, "encoding_size": 256}, |
|
|
|
"learning_rate": 3.0e-4, |
|
|
|
} |
|
|
|
config = generate_config(PPO_CONFIG, override_vals) |
|
|
|
_check_environment_trains(env, config, success_threshold=target_return) |
|
|
|
manager = SubprocessEnvManager(factory, EngineConfig.default_config(), 30) |
|
|
|
_check_environment_trains( |
|
|
|
env, config, success_threshold=target_return, env_manager=manager |
|
|
|
) |