Unity 机器学习代理工具包 (ML-Agents) 是一个开源项目,它使游戏和模拟能够作为训练智能代理的环境。
您最多选择25个主题 主题必须以中文或者字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符
 
 
 
 
 

144 行
5.3 KiB

from unittest.mock import MagicMock
import pytest
import mlagents.trainers.tests.mock_brain as mb
import numpy as np
import os
from mlagents.trainers.policy.torch_policy import TorchPolicy
from mlagents.trainers.torch.components.bc.module import BCModule
from mlagents.trainers.settings import (
TrainerSettings,
BehavioralCloningSettings,
NetworkSettings,
)
def create_bc_module(mock_behavior_specs, bc_settings, use_rnn, tanhresample):
# model_path = env.external_brain_names[0]
trainer_config = TrainerSettings()
trainer_config.network_settings.memory = (
NetworkSettings.MemorySettings() if use_rnn else None
)
policy = TorchPolicy(
0, mock_behavior_specs, trainer_config, tanhresample, tanhresample
)
bc_module = BCModule(
policy,
settings=bc_settings,
policy_learning_rate=trainer_config.hyperparameters.learning_rate,
default_batch_size=trainer_config.hyperparameters.batch_size,
default_num_epoch=3,
)
return bc_module
# Test default values
def test_bcmodule_defaults():
# See if default values match
mock_specs = mb.create_mock_3dball_behavior_specs()
bc_settings = BehavioralCloningSettings(
demo_path=os.path.dirname(os.path.abspath(__file__)) + "/" + "test.demo"
)
bc_module = create_bc_module(mock_specs, bc_settings, False, False)
assert bc_module.num_epoch == 3
assert bc_module.batch_size == TrainerSettings().hyperparameters.batch_size
# Assign strange values and see if it overrides properly
bc_settings = BehavioralCloningSettings(
demo_path=os.path.dirname(os.path.abspath(__file__)) + "/" + "test.demo",
num_epoch=100,
batch_size=10000,
)
bc_module = create_bc_module(mock_specs, bc_settings, False, False)
assert bc_module.num_epoch == 100
assert bc_module.batch_size == 10000
# Test with continuous control env and vector actions
@pytest.mark.parametrize("is_sac", [True, False], ids=["sac", "ppo"])
def test_bcmodule_update(is_sac):
mock_specs = mb.create_mock_3dball_behavior_specs()
bc_settings = BehavioralCloningSettings(
demo_path=os.path.dirname(os.path.abspath(__file__)) + "/" + "test.demo"
)
bc_module = create_bc_module(mock_specs, bc_settings, False, is_sac)
stats = bc_module.update()
for _, item in stats.items():
assert isinstance(item, np.float32)
# Test with constant pretraining learning rate
@pytest.mark.parametrize("is_sac", [True, False], ids=["sac", "ppo"])
def test_bcmodule_constant_lr_update(is_sac):
mock_specs = mb.create_mock_3dball_behavior_specs()
bc_settings = BehavioralCloningSettings(
demo_path=os.path.dirname(os.path.abspath(__file__)) + "/" + "test.demo",
steps=0,
)
bc_module = create_bc_module(mock_specs, bc_settings, False, is_sac)
stats = bc_module.update()
for _, item in stats.items():
assert isinstance(item, np.float32)
old_learning_rate = bc_module.current_lr
_ = bc_module.update()
assert old_learning_rate == bc_module.current_lr
# Test with constant pretraining learning rate
@pytest.mark.parametrize("is_sac", [True, False], ids=["sac", "ppo"])
def test_bcmodule_linear_lr_update(is_sac):
mock_specs = mb.create_mock_3dball_behavior_specs()
bc_settings = BehavioralCloningSettings(
demo_path=os.path.dirname(os.path.abspath(__file__)) + "/" + "test.demo",
steps=100,
)
bc_module = create_bc_module(mock_specs, bc_settings, False, is_sac)
# Should decay by 10/100 * 0.0003 = 0.00003
bc_module.policy.get_current_step = MagicMock(return_value=10)
old_learning_rate = bc_module.current_lr
_ = bc_module.update()
assert old_learning_rate - 0.00003 == pytest.approx(bc_module.current_lr, abs=0.01)
# Test with RNN
@pytest.mark.parametrize("is_sac", [True, False], ids=["sac", "ppo"])
def test_bcmodule_rnn_update(is_sac):
mock_specs = mb.create_mock_3dball_behavior_specs()
bc_settings = BehavioralCloningSettings(
demo_path=os.path.dirname(os.path.abspath(__file__)) + "/" + "test.demo"
)
bc_module = create_bc_module(mock_specs, bc_settings, True, is_sac)
stats = bc_module.update()
for _, item in stats.items():
assert isinstance(item, np.float32)
# Test with discrete control and visual observations
@pytest.mark.parametrize("is_sac", [True, False], ids=["sac", "ppo"])
def test_bcmodule_dc_visual_update(is_sac):
mock_specs = mb.create_mock_banana_behavior_specs()
bc_settings = BehavioralCloningSettings(
demo_path=os.path.dirname(os.path.abspath(__file__)) + "/" + "testdcvis.demo"
)
bc_module = create_bc_module(mock_specs, bc_settings, False, is_sac)
stats = bc_module.update()
for _, item in stats.items():
assert isinstance(item, np.float32)
# Test with discrete control, visual observations and RNN
@pytest.mark.parametrize("is_sac", [True, False], ids=["sac", "ppo"])
def test_bcmodule_rnn_dc_update(is_sac):
mock_specs = mb.create_mock_banana_behavior_specs()
bc_settings = BehavioralCloningSettings(
demo_path=os.path.dirname(os.path.abspath(__file__)) + "/" + "testdcvis.demo"
)
bc_module = create_bc_module(mock_specs, bc_settings, True, is_sac)
stats = bc_module.update()
for _, item in stats.items():
assert isinstance(item, np.float32)
if __name__ == "__main__":
pytest.main()