|
|
|
|
|
|
CuriosityRewardProvider, |
|
|
|
create_reward_provider, |
|
|
|
) |
|
|
|
from mlagents_envs.base_env import BehaviorSpec, ActionType |
|
|
|
from mlagents_envs.base_env import BehaviorSpec, ActionSpec |
|
|
|
from mlagents.trainers.settings import CuriositySettings, RewardSignalType |
|
|
|
from mlagents.trainers.tests.torch.test_reward_providers.utils import ( |
|
|
|
create_agent_buffer, |
|
|
|
|
|
|
SEED = [42] |
|
|
|
|
|
|
|
ACTIONSPEC_CONTINUOUS = ActionSpec(5, ()) |
|
|
|
ACTIONSPEC_TWODISCRETE = ActionSpec(0, (2, 3)) |
|
|
|
ACTIONSPEC_DISCRETE = ActionSpec(0, (2,)) |
|
|
|
|
|
|
|
BehaviorSpec([(10,)], ActionType.CONTINUOUS, 5), |
|
|
|
BehaviorSpec([(10,)], ActionType.DISCRETE, (2, 3)), |
|
|
|
BehaviorSpec([(10,)], ACTIONSPEC_CONTINUOUS), |
|
|
|
BehaviorSpec([(10,)], ACTIONSPEC_TWODISCRETE), |
|
|
|
], |
|
|
|
) |
|
|
|
def test_construction(behavior_spec: BehaviorSpec) -> None: |
|
|
|
|
|
|
@pytest.mark.parametrize( |
|
|
|
"behavior_spec", |
|
|
|
[ |
|
|
|
BehaviorSpec([(10,)], ActionType.CONTINUOUS, 5), |
|
|
|
BehaviorSpec([(10,), (64, 66, 3), (84, 86, 1)], ActionType.CONTINUOUS, 5), |
|
|
|
BehaviorSpec([(10,), (64, 66, 1)], ActionType.DISCRETE, (2, 3)), |
|
|
|
BehaviorSpec([(10,)], ActionType.DISCRETE, (2,)), |
|
|
|
BehaviorSpec([(10,)], ACTIONSPEC_CONTINUOUS), |
|
|
|
BehaviorSpec([(10,), (64, 66, 3), (84, 86, 1)], ACTIONSPEC_CONTINUOUS), |
|
|
|
BehaviorSpec([(10,), (64, 66, 1)], ACTIONSPEC_TWODISCRETE), |
|
|
|
BehaviorSpec([(10,)], ACTIONSPEC_DISCRETE), |
|
|
|
], |
|
|
|
) |
|
|
|
def test_factory(behavior_spec: BehaviorSpec) -> None: |
|
|
|
|
|
|
@pytest.mark.parametrize( |
|
|
|
"behavior_spec", |
|
|
|
[ |
|
|
|
BehaviorSpec([(10,), (64, 66, 3), (24, 26, 1)], ActionType.CONTINUOUS, 5), |
|
|
|
BehaviorSpec([(10,)], ActionType.DISCRETE, (2, 3)), |
|
|
|
BehaviorSpec([(10,)], ActionType.DISCRETE, (2,)), |
|
|
|
BehaviorSpec([(10,), (64, 66, 3), (24, 26, 1)], ACTIONSPEC_CONTINUOUS), |
|
|
|
BehaviorSpec([(10,)], ACTIONSPEC_TWODISCRETE), |
|
|
|
BehaviorSpec([(10,)], ACTIONSPEC_DISCRETE), |
|
|
|
], |
|
|
|
) |
|
|
|
def test_reward_decreases(behavior_spec: BehaviorSpec, seed: int) -> None: |
|
|
|
|
|
|
|
|
|
|
@pytest.mark.parametrize("seed", SEED) |
|
|
|
@pytest.mark.parametrize( |
|
|
|
"behavior_spec", [BehaviorSpec([(10,)], ActionType.CONTINUOUS, 5)] |
|
|
|
"behavior_spec", [BehaviorSpec([(10,)], ACTIONSPEC_CONTINUOUS)] |
|
|
|
) |
|
|
|
def test_continuous_action_prediction(behavior_spec: BehaviorSpec, seed: int) -> None: |
|
|
|
np.random.seed(seed) |
|
|
|
|
|
|
@pytest.mark.parametrize( |
|
|
|
"behavior_spec", |
|
|
|
[ |
|
|
|
BehaviorSpec([(10,), (64, 66, 3)], ActionType.CONTINUOUS, 5), |
|
|
|
BehaviorSpec([(10,)], ActionType.DISCRETE, (2, 3)), |
|
|
|
BehaviorSpec([(10,)], ActionType.DISCRETE, (2,)), |
|
|
|
BehaviorSpec([(10,), (64, 66, 3), (24, 26, 1)], ACTIONSPEC_CONTINUOUS), |
|
|
|
BehaviorSpec([(10,)], ACTIONSPEC_TWODISCRETE), |
|
|
|
BehaviorSpec([(10,)], ACTIONSPEC_DISCRETE), |
|
|
|
], |
|
|
|
) |
|
|
|
def test_next_state_prediction(behavior_spec: BehaviorSpec, seed: int) -> None: |
|
|
|