您最多选择25个主题
主题必须以中文或者字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符
73 行
2.3 KiB
73 行
2.3 KiB
import pytest
|
|
from mlagents.trainers.torch.components.reward_providers import (
|
|
ExtrinsicRewardProvider,
|
|
create_reward_provider,
|
|
)
|
|
from mlagents_envs.base_env import BehaviorSpec, ActionSpec
|
|
from mlagents.trainers.settings import RewardSignalSettings, RewardSignalType
|
|
from mlagents.trainers.tests.torch.test_reward_providers.utils import (
|
|
create_agent_buffer,
|
|
)
|
|
from mlagents.trainers.tests.dummy_config import create_observation_specs_with_shapes
|
|
|
|
|
|
ACTIONSPEC_CONTINUOUS = ActionSpec.create_continuous(5)
|
|
ACTIONSPEC_TWODISCRETE = ActionSpec.create_discrete((2, 3))
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"behavior_spec",
|
|
[
|
|
BehaviorSpec(
|
|
create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS
|
|
),
|
|
BehaviorSpec(
|
|
create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE
|
|
),
|
|
],
|
|
)
|
|
def test_construction(behavior_spec: BehaviorSpec) -> None:
|
|
settings = RewardSignalSettings()
|
|
settings.gamma = 0.2
|
|
extrinsic_rp = ExtrinsicRewardProvider(behavior_spec, settings)
|
|
assert extrinsic_rp.gamma == 0.2
|
|
assert extrinsic_rp.name == "Extrinsic"
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"behavior_spec",
|
|
[
|
|
BehaviorSpec(
|
|
create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS
|
|
),
|
|
BehaviorSpec(
|
|
create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE
|
|
),
|
|
],
|
|
)
|
|
def test_factory(behavior_spec: BehaviorSpec) -> None:
|
|
settings = RewardSignalSettings()
|
|
extrinsic_rp = create_reward_provider(
|
|
RewardSignalType.EXTRINSIC, behavior_spec, settings
|
|
)
|
|
assert extrinsic_rp.name == "Extrinsic"
|
|
|
|
|
|
@pytest.mark.parametrize("reward", [2.0, 3.0, 4.0])
|
|
@pytest.mark.parametrize(
|
|
"behavior_spec",
|
|
[
|
|
BehaviorSpec(
|
|
create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS
|
|
),
|
|
BehaviorSpec(
|
|
create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE
|
|
),
|
|
],
|
|
)
|
|
def test_reward(behavior_spec: BehaviorSpec, reward: float) -> None:
|
|
buffer = create_agent_buffer(behavior_spec, 1000, reward)
|
|
settings = RewardSignalSettings()
|
|
extrinsic_rp = ExtrinsicRewardProvider(behavior_spec, settings)
|
|
generated_rewards = extrinsic_rp.evaluate(buffer)
|
|
assert (generated_rewards == reward).all()
|