浏览代码

Fix some mis-renamings

/MLA-1734-demo-provider
Arthur Juliani 4 年前
当前提交
7c37c759
共有 13 个文件被更改,包括 116 次插入41 次删除
  1. 9
      ml-agents/mlagents/trainers/demo_loader.py
  2. 6
      ml-agents/mlagents/trainers/tests/dummy_config.py
  3. 4
      ml-agents/mlagents/trainers/tests/mock_brain.py
  4. 2
      ml-agents/mlagents/trainers/tests/test_rl_trainer.py
  5. 4
      ml-agents/mlagents/trainers/tests/test_trajectory.py
  6. 38
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_curiosity.py
  7. 24
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_extrinsic.py
  8. 22
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_gail.py
  9. 24
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_rnd.py
  10. 4
      ml-agents/mlagents/trainers/torch/components/reward_providers/curiosity_reward_provider.py
  11. 4
      ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py
  12. 12
      ml-agents/mlagents/trainers/torch/networks.py
  13. 4
      ml-agents/tests/yamato/scripts/run_llapi.py

9
ml-agents/mlagents/trainers/demo_loader.py


)
)
# check observations match
if len(behavior_spec.observation_specs) != len(expected_behavior_spec.observation_specs):
if len(behavior_spec.observation_specs) != len(
expected_behavior_spec.observation_specs
):
zip(behavior_spec.observation_specs, expected_behavior_spec.observation_specs)
zip(
behavior_spec.observation_specs,
expected_behavior_spec.observation_specs,
)
):
if demo_obs.shape != policy_obs.shape:
raise RuntimeError(

6
ml-agents/mlagents/trainers/tests/dummy_config.py


def create_observation_specs_with_shapes(
shapes: List[Tuple[int, ...]]
) -> List[ObservationSpec]:
obs_spec: List[ObservationSpec] = []
obs_specs: List[ObservationSpec] = []
obs_spec.append(spec)
return obs_spec
obs_specs.append(spec)
return obs_specs

4
ml-agents/mlagents/trainers/tests/mock_brain.py


) -> Tuple[DecisionSteps, TerminalSteps]:
return create_mock_steps(
num_agents=num_agents,
observation_spec=behavior_spec.observation_spec,
observation_specs=behavior_spec.observation_specs,
action_spec=behavior_spec.action_spec,
)

else:
action_spec = ActionSpec.create_continuous(vector_action_space)
observation_shapes = [(84, 84, 3)] * int(use_visual) + [(vector_obs_space,)]
obs_spec = create_observation_spec_with_shapes(observation_shapes)
obs_spec = create_observation_specs_with_shapes(observation_shapes)
behavior_spec = BehaviorSpec(obs_spec, action_spec)
return behavior_spec

2
ml-agents/mlagents/trainers/tests/test_rl_trainer.py


checkpoint_interval = trainer.trainer_settings.checkpoint_interval
trajectory = mb.make_fake_trajectory(
length=time_horizon,
observation_spec=create_observation_spec_with_shapes([(1,)]),
observation_spec=create_observation_specs_with_shapes([(1,)]),
max_step_complete=True,
action_spec=ActionSpec.create_discrete((2,)),
)

4
ml-agents/mlagents/trainers/tests/test_trajectory.py


wanted_keys = set(wanted_keys)
trajectory = make_fake_trajectory(
length=length,
observation_specs=create_observation_specs_with_shapes([(VEC_OBS_SIZE,), (84, 84, 3)]),
observation_specs=create_observation_specs_with_shapes(
[(VEC_OBS_SIZE,), (84, 84, 3)]
),
action_spec=ActionSpec.create_continuous(ACTION_SIZE),
)
agentbuffer = trajectory.to_agentbuffer()

38
ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_curiosity.py


@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec(create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS),
BehaviorSpec(create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),
BehaviorSpec(
create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS
),
BehaviorSpec(
create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE
),
],
)
def test_construction(behavior_spec: BehaviorSpec) -> None:

@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec(create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS),
BehaviorSpec(
create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS
),
BehaviorSpec(
create_observation_specs_with_shapes([(10,), (64, 66, 3), (84, 86, 1)]),
ACTIONSPEC_CONTINUOUS,

ACTIONSPEC_TWODISCRETE,
),
BehaviorSpec(create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_DISCRETE),
BehaviorSpec(
create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_DISCRETE
),
],
)
def test_factory(behavior_spec: BehaviorSpec) -> None:

create_observation_specs_with_shapes([(10,), (64, 66, 3), (24, 26, 1)]),
ACTIONSPEC_CONTINUOUS,
),
BehaviorSpec(create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),
BehaviorSpec(create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_DISCRETE),
BehaviorSpec(
create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE
),
BehaviorSpec(
create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_DISCRETE
),
],
)
def test_reward_decreases(behavior_spec: BehaviorSpec, seed: int) -> None:

@pytest.mark.parametrize("seed", SEED)
@pytest.mark.parametrize(
"behavior_spec",
[BehaviorSpec(create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS)],
[
BehaviorSpec(
create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS
)
],
)
def test_continuous_action_prediction(behavior_spec: BehaviorSpec, seed: int) -> None:
np.random.seed(seed)

create_observation_specs_with_shapes([(10,), (64, 66, 3), (24, 26, 1)]),
ACTIONSPEC_CONTINUOUS,
),
BehaviorSpec(create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),
BehaviorSpec(create_observation_spec_with_shapes([(10,)]), ACTIONSPEC_DISCRETE),
BehaviorSpec(
create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE
),
BehaviorSpec(
create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_DISCRETE
),
],
)
def test_next_state_prediction(behavior_spec: BehaviorSpec, seed: int) -> None:

24
ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_extrinsic.py


@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec(create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS),
BehaviorSpec(create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),
BehaviorSpec(
create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS
),
BehaviorSpec(
create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE
),
],
)
def test_construction(behavior_spec: BehaviorSpec) -> None:

@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec(create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS),
BehaviorSpec(create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),
BehaviorSpec(
create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS
),
BehaviorSpec(
create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE
),
],
)
def test_factory(behavior_spec: BehaviorSpec) -> None:

@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec(create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS),
BehaviorSpec(create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),
BehaviorSpec(
create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS
),
BehaviorSpec(
create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE
),
],
)
def test_reward(behavior_spec: BehaviorSpec, reward: float) -> None:

22
ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_gail.py


"behavior_spec",
[
BehaviorSpec(
create_observation_specs_with_shapes([(8,), (24, 26, 1)]), ACTIONSPEC_CONTINUOUS
create_observation_specs_with_shapes([(8,), (24, 26, 1)]),
ACTIONSPEC_CONTINUOUS,
),
BehaviorSpec(
create_observation_specs_with_shapes([(50,)]), ACTIONSPEC_FOURDISCRETE
),
BehaviorSpec(
create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_DISCRETE
BehaviorSpec(create_observation_specs_with_shapes([(50,)]), ACTIONSPEC_FOURDISCRETE),
BehaviorSpec(create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_DISCRETE),
],
)
@pytest.mark.parametrize("use_actions", [False, True])

"behavior_spec",
[
BehaviorSpec(
create_observation_spec_with_shapes([(8,), (24, 26, 1)]), ACTIONSPEC_CONTINUOUS
create_observation_specs_with_shapes([(8,), (24, 26, 1)]),
ACTIONSPEC_CONTINUOUS,
),
BehaviorSpec(
create_observation_specs_with_shapes([(50,)]), ACTIONSPEC_FOURDISCRETE
),
BehaviorSpec(
create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_DISCRETE
BehaviorSpec(create_observation_spec_with_shapes([(50,)]), ACTIONSPEC_FOURDISCRETE),
BehaviorSpec(create_observation_spec_with_shapes([(10,)]), ACTIONSPEC_DISCRETE),
],
)
@pytest.mark.parametrize("use_actions", [False, True])

24
ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_rnd.py


@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec(create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS),
BehaviorSpec(create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),
BehaviorSpec(
create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS
),
BehaviorSpec(
create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE
),
],
)
def test_construction(behavior_spec: BehaviorSpec) -> None:

@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec(create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS),
BehaviorSpec(
create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS
),
BehaviorSpec(
create_observation_specs_with_shapes([(10,), (64, 66, 3), (84, 86, 1)]),
ACTIONSPEC_CONTINUOUS,

ACTIONSPEC_TWODISCRETE,
),
BehaviorSpec(create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_DISCRETE),
BehaviorSpec(
create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_DISCRETE
),
],
)
def test_factory(behavior_spec: BehaviorSpec) -> None:

create_observation_specs_with_shapes([(10,), (64, 66, 3), (24, 26, 1)]),
ACTIONSPEC_CONTINUOUS,
),
BehaviorSpec(create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),
BehaviorSpec(create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_DISCRETE),
BehaviorSpec(
create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE
),
BehaviorSpec(
create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_DISCRETE
),
],
)
def test_reward_decreases(behavior_spec: BehaviorSpec, seed: int) -> None:

4
ml-agents/mlagents/trainers/torch/components/reward_providers/curiosity_reward_provider.py


vis_encode_type=EncoderType.SIMPLE,
memory=None,
)
self._state_encoder = NetworkBody(specs.observation_spec, state_encoder_settings)
self._state_encoder = NetworkBody(
specs.observation_spec, state_encoder_settings
)
self._action_flattener = ActionFlattener(self._action_spec)

4
ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py


unencoded_size = (
self._action_flattener.flattened_size + 1 if settings.use_actions else 0
) # +1 is for dones
self.encoder = NetworkBody(specs.observation_specs, encoder_settings, unencoded_size)
self.encoder = NetworkBody(
specs.observation_specs, encoder_settings, unencoded_size
)
estimator_input_size = settings.encoding_size
if settings.use_vail:

12
ml-agents/mlagents/trainers/torch/networks.py


):
self.use_lstm = network_settings.memory is not None
super().__init__(
observation_spec, network_settings, action_spec, conditional_sigma, tanh_squash
observation_spec,
network_settings,
action_spec,
conditional_sigma,
tanh_squash,
)
self.stream_names = stream_names
self.value_heads = ValueHeads(stream_names, self.encoding_size)

):
self.use_lstm = network_settings.memory is not None
super().__init__(
observation_spec, network_settings, action_spec, conditional_sigma, tanh_squash
observation_spec,
network_settings,
action_spec,
conditional_sigma,
tanh_squash,
)
self.stream_names = stream_names
self.critic = ValueNetwork(stream_names, observation_spec, network_settings)

4
ml-agents/tests/yamato/scripts/run_llapi.py


print("Number of observations : ", len(group_spec.observation_specs))
# Is there a visual observation ?
vis_obs = any(len(obs_spec.shape) == 3 for obs_spec in group_spec.observation_specs)
vis_obs = any(
len(obs_spec.shape) == 3 for obs_spec in group_spec.observation_specs
)
print("Is there a visual observation ?", vis_obs)
# Examine the state space for the first observation for the first agent

正在加载...
取消
保存