浏览代码

Update second half of tests

/MLA-1734-demo-provider
Arthur Juliani 4 年前
当前提交
b8f22fd7
共有 7 个文件被更改,包括 101 次插入46 次删除
  1. 8
      ml-agents-envs/mlagents_envs/tests/test_rpc_utils.py
  2. 8
      ml-agents-envs/mlagents_envs/tests/test_steps.py
  3. 8
      ml-agents/mlagents/trainers/tests/mock_brain.py
  4. 45
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_curiosity.py
  5. 14
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_extrinsic.py
  6. 32
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_gail.py
  7. 32
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_rnd.py

8
ml-agents-envs/mlagents_envs/tests/test_rpc_utils.py


n_agents = 10
shapes = [(3,), (4,)]
sensor_type = [SensorType.OBSERVATION, SensorType.OBSERVATION]
behavior_spec = BehaviorSpec(shapes, sensor_type, ActionSpec.create_discrete((7, 3)))
behavior_spec = BehaviorSpec(
shapes, sensor_type, ActionSpec.create_discrete((7, 3))
)
ap_list = generate_list_agent_proto(n_agents, shapes)
decision_steps, terminal_steps = steps_from_proto(ap_list, behavior_spec)
masks = decision_steps.action_mask

n_agents = 10
shapes = [(3,), (4,)]
sensor_type = [SensorType.OBSERVATION, SensorType.OBSERVATION]
behavior_spec = BehaviorSpec(shapes, sensor_type, ActionSpec.create_discrete((2, 2, 6)))
behavior_spec = BehaviorSpec(
shapes, sensor_type, ActionSpec.create_discrete((2, 2, 6))
)
ap_list = generate_list_agent_proto(n_agents, shapes)
decision_steps, terminal_steps = steps_from_proto(ap_list, behavior_spec)
masks = decision_steps.action_mask

8
ml-agents-envs/mlagents_envs/tests/test_steps.py


def test_empty_decision_steps():
sensor_type = [SensorType.OBSERVATION, SensorType.OBSERVATION]
specs = BehaviorSpec(
observation_shapes=[(3, 2), (5,)], sensor_types=sensor_type, action_spec=ActionSpec.create_continuous(3)
observation_shapes=[(3, 2), (5,)],
sensor_types=sensor_type,
action_spec=ActionSpec.create_continuous(3),
)
ds = DecisionSteps.empty(specs)
assert len(ds.obs) == 2

def test_empty_terminal_steps():
sensor_type = [SensorType.OBSERVATION, SensorType.OBSERVATION]
specs = BehaviorSpec(
observation_shapes=[(3, 2), (5,)], sensor_types=sensor_type, action_spec=ActionSpec.create_continuous(3)
observation_shapes=[(3, 2), (5,)],
sensor_types=sensor_type,
action_spec=ActionSpec.create_continuous(3),
)
ts = TerminalSteps.empty(specs)
assert len(ts.obs) == 2

8
ml-agents/mlagents/trainers/tests/mock_brain.py


TerminalSteps,
BehaviorSpec,
ActionSpec,
SensorType,
)

obs_list = []
for _shape in observation_shapes:
obs_list.append(np.ones((num_agents,) + _shape, dtype=np.float32))
sensor_types = [SensorType.OBSERVATION for i in range(len(obs_list))]
action_mask = None
if action_spec.is_discrete():
action_mask = [

reward = np.array(num_agents * [1.0], dtype=np.float32)
interrupted = np.array(num_agents * [False], dtype=np.bool)
agent_id = np.arange(num_agents, dtype=np.int32)
behavior_spec = BehaviorSpec(observation_shapes, action_spec)
behavior_spec = BehaviorSpec(observation_shapes, sensor_types, action_spec)
if done:
return (
DecisionSteps.empty(behavior_spec),

else:
action_spec = ActionSpec.create_continuous(vector_action_space)
behavior_spec = BehaviorSpec(
[(84, 84, 3)] * int(use_visual) + [(vector_obs_space,)], action_spec
[(84, 84, 3)] * int(use_visual) + [(vector_obs_space,)],
[SensorType.OBSERVATION],
action_spec,
)
return behavior_spec

45
ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_curiosity.py


CuriosityRewardProvider,
create_reward_provider,
)
from mlagents_envs.base_env import BehaviorSpec, ActionSpec
from mlagents_envs.base_env import BehaviorSpec, ActionSpec, SensorType
from mlagents.trainers.settings import CuriositySettings, RewardSignalType
from mlagents.trainers.tests.torch.test_reward_providers.utils import (
create_agent_buffer,

@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec([(10,)], ACTIONSPEC_CONTINUOUS),
BehaviorSpec([(10,)], ACTIONSPEC_TWODISCRETE),
BehaviorSpec([(10,)], [SensorType.OBSERVATION], ACTIONSPEC_CONTINUOUS),
BehaviorSpec([(10,)], [SensorType.OBSERVATION], ACTIONSPEC_TWODISCRETE),
],
)
def test_construction(behavior_spec: BehaviorSpec) -> None:

@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec([(10,)], ACTIONSPEC_CONTINUOUS),
BehaviorSpec([(10,), (64, 66, 3), (84, 86, 1)], ACTIONSPEC_CONTINUOUS),
BehaviorSpec([(10,), (64, 66, 1)], ACTIONSPEC_TWODISCRETE),
BehaviorSpec([(10,)], ACTIONSPEC_DISCRETE),
BehaviorSpec([(10,)], [SensorType.OBSERVATION], ACTIONSPEC_CONTINUOUS),
BehaviorSpec(
[(10,), (64, 66, 3), (84, 86, 1)],
[SensorType.OBSERVATION, SensorType.OBSERVATION, SensorType.OBSERVATION],
ACTIONSPEC_CONTINUOUS,
),
BehaviorSpec(
[(10,), (64, 66, 1)],
[SensorType.OBSERVATION, SensorType.OBSERVATION],
ACTIONSPEC_TWODISCRETE,
),
BehaviorSpec([(10,)], [SensorType.OBSERVATION], ACTIONSPEC_DISCRETE),
],
)
def test_factory(behavior_spec: BehaviorSpec) -> None:

@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec([(10,), (64, 66, 3), (24, 26, 1)], ACTIONSPEC_CONTINUOUS),
BehaviorSpec([(10,)], ACTIONSPEC_TWODISCRETE),
BehaviorSpec([(10,)], ACTIONSPEC_DISCRETE),
BehaviorSpec(
[(10,), (64, 66, 3), (24, 26, 1)],
[SensorType.OBSERVATION, SensorType.OBSERVATION, SensorType.OBSERVATION],
ACTIONSPEC_CONTINUOUS,
),
BehaviorSpec([(10,)], [SensorType.OBSERVATION], ACTIONSPEC_TWODISCRETE),
BehaviorSpec([(10,)], [SensorType.OBSERVATION], ACTIONSPEC_DISCRETE),
],
)
def test_reward_decreases(behavior_spec: BehaviorSpec, seed: int) -> None:

@pytest.mark.parametrize("seed", SEED)
@pytest.mark.parametrize(
"behavior_spec", [BehaviorSpec([(10,)], ACTIONSPEC_CONTINUOUS)]
"behavior_spec",
[BehaviorSpec([(10,)], [SensorType.OBSERVATION], ACTIONSPEC_CONTINUOUS)],
)
def test_continuous_action_prediction(behavior_spec: BehaviorSpec, seed: int) -> None:
np.random.seed(seed)

@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec([(10,), (64, 66, 3), (24, 26, 1)], ACTIONSPEC_CONTINUOUS),
BehaviorSpec([(10,)], ACTIONSPEC_TWODISCRETE),
BehaviorSpec([(10,)], ACTIONSPEC_DISCRETE),
BehaviorSpec(
[(10,), (64, 66, 3), (24, 26, 1)],
[SensorType.OBSERVATION, SensorType.OBSERVATION, SensorType.OBSERVATION],
ACTIONSPEC_CONTINUOUS,
),
BehaviorSpec([(10,)], [SensorType.OBSERVATION], ACTIONSPEC_TWODISCRETE),
BehaviorSpec([(10,)], [SensorType.OBSERVATION], ACTIONSPEC_DISCRETE),
],
)
def test_next_state_prediction(behavior_spec: BehaviorSpec, seed: int) -> None:

14
ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_extrinsic.py


ExtrinsicRewardProvider,
create_reward_provider,
)
from mlagents_envs.base_env import BehaviorSpec, ActionSpec
from mlagents_envs.base_env import BehaviorSpec, ActionSpec, SensorType
from mlagents.trainers.settings import RewardSignalSettings, RewardSignalType
from mlagents.trainers.tests.torch.test_reward_providers.utils import (
create_agent_buffer,

@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec([(10,)], ACTIONSPEC_CONTINUOUS),
BehaviorSpec([(10,)], ACTIONSPEC_TWODISCRETE),
BehaviorSpec([(10,)], [SensorType.OBSERVATION], ACTIONSPEC_CONTINUOUS),
BehaviorSpec([(10,)], [SensorType.OBSERVATION], ACTIONSPEC_TWODISCRETE),
],
)
def test_construction(behavior_spec: BehaviorSpec) -> None:

@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec([(10,)], ACTIONSPEC_CONTINUOUS),
BehaviorSpec([(10,)], ACTIONSPEC_TWODISCRETE),
BehaviorSpec([(10,)], [SensorType.OBSERVATION], ACTIONSPEC_CONTINUOUS),
BehaviorSpec([(10,)], [SensorType.OBSERVATION], ACTIONSPEC_TWODISCRETE),
],
)
def test_factory(behavior_spec: BehaviorSpec) -> None:

@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec([(10,)], ACTIONSPEC_CONTINUOUS),
BehaviorSpec([(10,)], ACTIONSPEC_TWODISCRETE),
BehaviorSpec([(10,)], [SensorType.OBSERVATION], ACTIONSPEC_CONTINUOUS),
BehaviorSpec([(10,)], [SensorType.OBSERVATION], ACTIONSPEC_TWODISCRETE),
],
)
def test_reward(behavior_spec: BehaviorSpec, reward: float) -> None:

32
ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_gail.py


GAILRewardProvider,
create_reward_provider,
)
from mlagents_envs.base_env import BehaviorSpec, ActionSpec
from mlagents_envs.base_env import BehaviorSpec, ActionSpec, SensorType
from mlagents.trainers.settings import GAILSettings, RewardSignalType
from mlagents.trainers.tests.torch.test_reward_providers.utils import (
create_agent_buffer,

ACTIONSPEC_DISCRETE = ActionSpec.create_discrete((20,))
@pytest.mark.parametrize("behavior_spec", [BehaviorSpec([(8,)], ACTIONSPEC_CONTINUOUS)])
@pytest.mark.parametrize(
"behavior_spec",
[BehaviorSpec([(8,)], [SensorType.OBSERVATION], ACTIONSPEC_CONTINUOUS)],
)
def test_construction(behavior_spec: BehaviorSpec) -> None:
gail_settings = GAILSettings(demo_path=CONTINUOUS_PATH)
gail_rp = GAILRewardProvider(behavior_spec, gail_settings)

@pytest.mark.parametrize("behavior_spec", [BehaviorSpec([(8,)], ACTIONSPEC_CONTINUOUS)])
@pytest.mark.parametrize(
"behavior_spec",
[BehaviorSpec([(8,)], [SensorType.OBSERVATION], ACTIONSPEC_CONTINUOUS)],
)
def test_factory(behavior_spec: BehaviorSpec) -> None:
gail_settings = GAILSettings(demo_path=CONTINUOUS_PATH)
gail_rp = create_reward_provider(

@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec([(8,), (24, 26, 1)], ACTIONSPEC_CONTINUOUS),
BehaviorSpec([(50,)], ACTIONSPEC_FOURDISCRETE),
BehaviorSpec([(10,)], ACTIONSPEC_DISCRETE),
BehaviorSpec(
[(8,), (24, 26, 1)],
[SensorType.OBSERVATION, SensorType.OBSERVATION],
ACTIONSPEC_CONTINUOUS,
),
BehaviorSpec([(50,)], [SensorType.OBSERVATION], ACTIONSPEC_FOURDISCRETE),
BehaviorSpec([(10,)], [SensorType.OBSERVATION], ACTIONSPEC_DISCRETE),
],
)
@pytest.mark.parametrize("use_actions", [False, True])

@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec([(8,), (24, 26, 1)], ACTIONSPEC_CONTINUOUS),
BehaviorSpec([(50,)], ACTIONSPEC_FOURDISCRETE),
BehaviorSpec([(10,)], ACTIONSPEC_DISCRETE),
BehaviorSpec(
[(8,), (24, 26, 1)],
[SensorType.OBSERVATION, SensorType.OBSERVATION],
ACTIONSPEC_CONTINUOUS,
),
BehaviorSpec([(50,)], [SensorType.OBSERVATION], ACTIONSPEC_FOURDISCRETE),
BehaviorSpec([(10,)], [SensorType.OBSERVATION], ACTIONSPEC_DISCRETE),
],
)
@pytest.mark.parametrize("use_actions", [False, True])

32
ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_rnd.py


RNDRewardProvider,
create_reward_provider,
)
from mlagents_envs.base_env import BehaviorSpec, ActionSpec
from mlagents_envs.base_env import BehaviorSpec, ActionSpec, SensorType
from mlagents.trainers.settings import RNDSettings, RewardSignalType
from mlagents.trainers.tests.torch.test_reward_providers.utils import (
create_agent_buffer,

@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec([(10,)], ACTIONSPEC_CONTINUOUS),
BehaviorSpec([(10,)], ACTIONSPEC_TWODISCRETE),
BehaviorSpec([(10,)], [SensorType.OBSERVATION], ACTIONSPEC_CONTINUOUS),
BehaviorSpec([(10,)], [SensorType.OBSERVATION], ACTIONSPEC_TWODISCRETE),
],
)
def test_construction(behavior_spec: BehaviorSpec) -> None:

@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec([(10,)], ACTIONSPEC_CONTINUOUS),
BehaviorSpec([(10,), (64, 66, 3), (84, 86, 1)], ACTIONSPEC_CONTINUOUS),
BehaviorSpec([(10,), (64, 66, 1)], ACTIONSPEC_TWODISCRETE),
BehaviorSpec([(10,)], ACTIONSPEC_DISCRETE),
BehaviorSpec([(10,)], [SensorType.OBSERVATION], ACTIONSPEC_CONTINUOUS),
BehaviorSpec(
[(10,), (64, 66, 3), (84, 86, 1)],
[SensorType.OBSERVATION, SensorType.OBSERVATION, SensorType.OBSERVATION],
ACTIONSPEC_CONTINUOUS,
),
BehaviorSpec(
[(10,), (64, 66, 1)],
[SensorType.OBSERVATION, SensorType.OBSERVATION],
ACTIONSPEC_TWODISCRETE,
),
BehaviorSpec([(10,)], [SensorType.OBSERVATION], ACTIONSPEC_DISCRETE),
],
)
def test_factory(behavior_spec: BehaviorSpec) -> None:

@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec([(10,), (64, 66, 3), (24, 26, 1)], ACTIONSPEC_CONTINUOUS),
BehaviorSpec([(10,)], ACTIONSPEC_TWODISCRETE),
BehaviorSpec([(10,)], ACTIONSPEC_DISCRETE),
BehaviorSpec(
[(10,), (64, 66, 3), (24, 26, 1)],
[SensorType.OBSERVATION, SensorType.OBSERVATION, SensorType.OBSERVATION],
ACTIONSPEC_CONTINUOUS,
),
BehaviorSpec([(10,)], [SensorType.OBSERVATION], ACTIONSPEC_TWODISCRETE),
BehaviorSpec([(10,)], [SensorType.OBSERVATION], ACTIONSPEC_DISCRETE),
],
)
def test_reward_decreases(behavior_spec: BehaviorSpec, seed: int) -> None:

正在加载...
取消
保存