Unity 机器学习代理工具包 (ML-Agents) 是一个开源项目,它使游戏和模拟能够作为训练智能代理的环境。
您最多选择25个主题 主题必须以中文或者字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符
 
 
 
 
 

127 行
3.9 KiB

import pytest
import attr
from mlagents.trainers.tests.simple_test_envs import (
SimpleEnvironment,
MemoryEnvironment,
RecordEnvironment,
)
from mlagents.trainers.demo_loader import write_demo
from mlagents.trainers.settings import (
NetworkSettings,
SelfPlaySettings,
BehavioralCloningSettings,
GAILSettings,
RewardSignalType,
EncoderType,
FrameworkType,
)
from mlagents_envs.communicator_objects.demonstration_meta_pb2 import (
DemonstrationMetaProto,
)
from mlagents_envs.communicator_objects.brain_parameters_pb2 import BrainParametersProto
from mlagents_envs.communicator_objects.space_type_pb2 import discrete, continuous
from mlagents.trainers.tests.dummy_config import ppo_dummy_config, sac_dummy_config
from mlagents.trainers.tests.check_env_trains import (
check_environment_trains,
default_reward_processor,
)
BRAIN_NAME = "1D"
PPO_TORCH_CONFIG = attr.evolve(ppo_dummy_config(), framework=FrameworkType.PYTORCH)
SAC_TORCH_CONFIG = attr.evolve(sac_dummy_config(), framework=FrameworkType.PYTORCH)
def test_hybrid_ppo():
env = SimpleEnvironment(
[BRAIN_NAME], continuous_action_size=1, discrete_action_size=1
)
config = attr.evolve(PPO_TORCH_CONFIG)
check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=1.0)
def test_conthybrid_ppo():
env = SimpleEnvironment(
[BRAIN_NAME], continuous_action_size=1, discrete_action_size=0
)
config = attr.evolve(PPO_TORCH_CONFIG)
check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=1.0)
def test_dischybrid_ppo():
env = SimpleEnvironment(
[BRAIN_NAME], continuous_action_size=0, discrete_action_size=1
)
config = attr.evolve(PPO_TORCH_CONFIG)
check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=1.0)
@pytest.mark.parametrize("num_visual", [1, 2])
def test_visual_ppo(num_visual):
env = SimpleEnvironment(
[BRAIN_NAME],
num_visual=num_visual,
num_vector=0,
continuous_action_size=1,
discrete_action_size=1,
)
new_hyperparams = attr.evolve(
PPO_TORCH_CONFIG.hyperparameters, learning_rate=3.0e-4
)
config = attr.evolve(PPO_TORCH_CONFIG, hyperparameters=new_hyperparams)
check_environment_trains(env, {BRAIN_NAME: config})
def test_recurrent_ppo():
env = MemoryEnvironment(
[BRAIN_NAME], continuous_action_size=1, discrete_action_size=1
)
new_network_settings = attr.evolve(
PPO_TORCH_CONFIG.network_settings,
memory=NetworkSettings.MemorySettings(memory_size=16),
)
new_hyperparams = attr.evolve(
PPO_TORCH_CONFIG.hyperparameters,
learning_rate=1.0e-3,
batch_size=64,
buffer_size=128,
)
config = attr.evolve(
PPO_TORCH_CONFIG,
hyperparameters=new_hyperparams,
network_settings=new_network_settings,
max_steps=100000,
)
check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)
# def test_3cdhybrid_ppo():
# env = SimpleEnvironment(
# [BRAIN_NAME], continuous_action_size=2, discrete_action_size=1, step_size=0.8
# )
# new_hyperparams = attr.evolve(
# PPO_TORCH_CONFIG.hyperparameters, batch_size=128, buffer_size=1280, beta=0.01
# )
# config = attr.evolve(
# PPO_TORCH_CONFIG, hyperparameters=new_hyperparams, max_steps=10000
# )
# check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=1.0)
#
#
# def test_3ddhybrid_ppo():
# env = SimpleEnvironment(
# [BRAIN_NAME], continuous_action_size=1, discrete_action_size=2, step_size=0.8
# )
# new_hyperparams = attr.evolve(
# PPO_TORCH_CONFIG.hyperparameters, batch_size=128, buffer_size=1280, beta=0.01
# )
# config = attr.evolve(
# PPO_TORCH_CONFIG, hyperparameters=new_hyperparams, max_steps=10000
# )
# check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=1.0)