|
|
|
|
|
|
CuriosityRewardProvider, |
|
|
|
create_reward_provider, |
|
|
|
) |
|
|
|
from mlagents_envs.base_env import BehaviorSpec, ActionSpec |
|
|
|
from mlagents_envs.base_env import BehaviorSpec, ActionSpec, SensorType |
|
|
|
from mlagents.trainers.settings import CuriositySettings, RewardSignalType |
|
|
|
from mlagents.trainers.tests.torch.test_reward_providers.utils import ( |
|
|
|
create_agent_buffer, |
|
|
|
|
|
|
@pytest.mark.parametrize( |
|
|
|
"behavior_spec", |
|
|
|
[ |
|
|
|
BehaviorSpec([(10,)], ACTIONSPEC_CONTINUOUS), |
|
|
|
BehaviorSpec([(10,)], ACTIONSPEC_TWODISCRETE), |
|
|
|
BehaviorSpec([(10,)], [SensorType.OBSERVATION], ACTIONSPEC_CONTINUOUS), |
|
|
|
BehaviorSpec([(10,)], [SensorType.OBSERVATION], ACTIONSPEC_TWODISCRETE), |
|
|
|
], |
|
|
|
) |
|
|
|
def test_construction(behavior_spec: BehaviorSpec) -> None: |
|
|
|
|
|
|
@pytest.mark.parametrize( |
|
|
|
"behavior_spec", |
|
|
|
[ |
|
|
|
BehaviorSpec([(10,)], ACTIONSPEC_CONTINUOUS), |
|
|
|
BehaviorSpec([(10,), (64, 66, 3), (84, 86, 1)], ACTIONSPEC_CONTINUOUS), |
|
|
|
BehaviorSpec([(10,), (64, 66, 1)], ACTIONSPEC_TWODISCRETE), |
|
|
|
BehaviorSpec([(10,)], ACTIONSPEC_DISCRETE), |
|
|
|
BehaviorSpec([(10,)], [SensorType.OBSERVATION], ACTIONSPEC_CONTINUOUS), |
|
|
|
BehaviorSpec( |
|
|
|
[(10,), (64, 66, 3), (84, 86, 1)], |
|
|
|
[SensorType.OBSERVATION, SensorType.OBSERVATION, SensorType.OBSERVATION], |
|
|
|
ACTIONSPEC_CONTINUOUS, |
|
|
|
), |
|
|
|
BehaviorSpec( |
|
|
|
[(10,), (64, 66, 1)], |
|
|
|
[SensorType.OBSERVATION, SensorType.OBSERVATION], |
|
|
|
ACTIONSPEC_TWODISCRETE, |
|
|
|
), |
|
|
|
BehaviorSpec([(10,)], [SensorType.OBSERVATION], ACTIONSPEC_DISCRETE), |
|
|
|
], |
|
|
|
) |
|
|
|
def test_factory(behavior_spec: BehaviorSpec) -> None: |
|
|
|
|
|
|
@pytest.mark.parametrize( |
|
|
|
"behavior_spec", |
|
|
|
[ |
|
|
|
BehaviorSpec([(10,), (64, 66, 3), (24, 26, 1)], ACTIONSPEC_CONTINUOUS), |
|
|
|
BehaviorSpec([(10,)], ACTIONSPEC_TWODISCRETE), |
|
|
|
BehaviorSpec([(10,)], ACTIONSPEC_DISCRETE), |
|
|
|
BehaviorSpec( |
|
|
|
[(10,), (64, 66, 3), (24, 26, 1)], |
|
|
|
[SensorType.OBSERVATION, SensorType.OBSERVATION, SensorType.OBSERVATION], |
|
|
|
ACTIONSPEC_CONTINUOUS, |
|
|
|
), |
|
|
|
BehaviorSpec([(10,)], [SensorType.OBSERVATION], ACTIONSPEC_TWODISCRETE), |
|
|
|
BehaviorSpec([(10,)], [SensorType.OBSERVATION], ACTIONSPEC_DISCRETE), |
|
|
|
], |
|
|
|
) |
|
|
|
def test_reward_decreases(behavior_spec: BehaviorSpec, seed: int) -> None: |
|
|
|
|
|
|
|
|
|
|
@pytest.mark.parametrize("seed", SEED) |
|
|
|
@pytest.mark.parametrize( |
|
|
|
"behavior_spec", [BehaviorSpec([(10,)], ACTIONSPEC_CONTINUOUS)] |
|
|
|
"behavior_spec", |
|
|
|
[BehaviorSpec([(10,)], [SensorType.OBSERVATION], ACTIONSPEC_CONTINUOUS)], |
|
|
|
) |
|
|
|
def test_continuous_action_prediction(behavior_spec: BehaviorSpec, seed: int) -> None: |
|
|
|
np.random.seed(seed) |
|
|
|
|
|
|
@pytest.mark.parametrize( |
|
|
|
"behavior_spec", |
|
|
|
[ |
|
|
|
BehaviorSpec([(10,), (64, 66, 3), (24, 26, 1)], ACTIONSPEC_CONTINUOUS), |
|
|
|
BehaviorSpec([(10,)], ACTIONSPEC_TWODISCRETE), |
|
|
|
BehaviorSpec([(10,)], ACTIONSPEC_DISCRETE), |
|
|
|
BehaviorSpec( |
|
|
|
[(10,), (64, 66, 3), (24, 26, 1)], |
|
|
|
[SensorType.OBSERVATION, SensorType.OBSERVATION, SensorType.OBSERVATION], |
|
|
|
ACTIONSPEC_CONTINUOUS, |
|
|
|
), |
|
|
|
BehaviorSpec([(10,)], [SensorType.OBSERVATION], ACTIONSPEC_TWODISCRETE), |
|
|
|
BehaviorSpec([(10,)], [SensorType.OBSERVATION], ACTIONSPEC_DISCRETE), |
|
|
|
], |
|
|
|
) |
|
|
|
def test_next_state_prediction(behavior_spec: BehaviorSpec, seed: int) -> None: |
|
|
|