Unity 机器学习代理工具包 (ML-Agents) 是一个开源项目,它使游戏和模拟能够作为训练智能代理的环境。
您最多选择25个主题 主题必须以中文或者字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符
 
 
 
 
 

133 行
4.2 KiB

import attr
import pytest
from mlagents.trainers.tests.simple_test_envs import (
SimpleEnvironment,
MemoryEnvironment,
)
from mlagents.trainers.settings import NetworkSettings
from mlagents.trainers.tests.dummy_config import ppo_dummy_config, sac_dummy_config
from mlagents.trainers.tests.check_env_trains import check_environment_trains
BRAIN_NAME = "1D"
PPO_TORCH_CONFIG = ppo_dummy_config()
SAC_TORCH_CONFIG = sac_dummy_config()
@pytest.mark.check_environment_trains
@pytest.mark.parametrize("action_size", [(1, 1), (2, 2), (1, 2), (2, 1)])
def test_hybrid_ppo(action_size):
env = SimpleEnvironment([BRAIN_NAME], action_sizes=action_size, step_size=0.8)
new_network_settings = attr.evolve(PPO_TORCH_CONFIG.network_settings)
new_hyperparams = attr.evolve(
PPO_TORCH_CONFIG.hyperparameters,
batch_size=64,
buffer_size=1024,
learning_rate=1e-3,
)
config = attr.evolve(
PPO_TORCH_CONFIG,
hyperparameters=new_hyperparams,
network_settings=new_network_settings,
max_steps=10000,
)
check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)
@pytest.mark.check_environment_trains
@pytest.mark.parametrize("num_visual", [1, 2])
def test_hybrid_visual_ppo(num_visual):
env = SimpleEnvironment(
[BRAIN_NAME], num_visual=num_visual, num_vector=0, action_sizes=(1, 1)
)
new_hyperparams = attr.evolve(
PPO_TORCH_CONFIG.hyperparameters, learning_rate=3.0e-4
)
config = attr.evolve(PPO_TORCH_CONFIG, hyperparameters=new_hyperparams)
check_environment_trains(env, {BRAIN_NAME: config}, training_seed=1336)
@pytest.mark.check_environment_trains
def test_hybrid_recurrent_ppo():
env = MemoryEnvironment([BRAIN_NAME], action_sizes=(1, 1), step_size=0.5)
new_network_settings = attr.evolve(
PPO_TORCH_CONFIG.network_settings,
memory=NetworkSettings.MemorySettings(memory_size=16),
)
new_hyperparams = attr.evolve(
PPO_TORCH_CONFIG.hyperparameters,
learning_rate=1.0e-3,
batch_size=64,
buffer_size=512,
)
config = attr.evolve(
PPO_TORCH_CONFIG,
hyperparameters=new_hyperparams,
network_settings=new_network_settings,
max_steps=5000,
)
check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)
@pytest.mark.check_environment_trains
@pytest.mark.parametrize("action_size", [(1, 1), (2, 2), (1, 2), (2, 1)])
def test_hybrid_sac(action_size):
env = SimpleEnvironment([BRAIN_NAME], action_sizes=action_size, step_size=0.8)
new_hyperparams = attr.evolve(
SAC_TORCH_CONFIG.hyperparameters,
buffer_size=50000,
batch_size=256,
buffer_init_steps=0,
)
config = attr.evolve(
SAC_TORCH_CONFIG, hyperparameters=new_hyperparams, max_steps=2200
)
check_environment_trains(
env, {BRAIN_NAME: config}, success_threshold=0.9, training_seed=1336
)
@pytest.mark.check_environment_trains
@pytest.mark.parametrize("num_visual", [1, 2])
def test_hybrid_visual_sac(num_visual):
env = SimpleEnvironment(
[BRAIN_NAME], num_visual=num_visual, num_vector=0, action_sizes=(1, 1)
)
new_hyperparams = attr.evolve(
SAC_TORCH_CONFIG.hyperparameters,
buffer_size=50000,
batch_size=128,
learning_rate=3.0e-4,
)
config = attr.evolve(
SAC_TORCH_CONFIG, hyperparameters=new_hyperparams, max_steps=3000
)
check_environment_trains(env, {BRAIN_NAME: config})
@pytest.mark.check_environment_trains
def test_hybrid_recurrent_sac():
env = MemoryEnvironment([BRAIN_NAME], action_sizes=(1, 1), step_size=0.5)
new_networksettings = attr.evolve(
SAC_TORCH_CONFIG.network_settings,
memory=NetworkSettings.MemorySettings(memory_size=16, sequence_length=16),
)
new_hyperparams = attr.evolve(
SAC_TORCH_CONFIG.hyperparameters,
batch_size=256,
learning_rate=1e-3,
buffer_init_steps=1000,
steps_per_update=2,
)
config = attr.evolve(
SAC_TORCH_CONFIG,
hyperparameters=new_hyperparams,
network_settings=new_networksettings,
max_steps=3500,
)
check_environment_trains(env, {BRAIN_NAME: config}, training_seed=1212)