浏览代码

[MLA-1809] catch mismatched observation sizes (#5030)

/develop/gail-srl-hack
GitHub 4 年前
当前提交
638475ad
共有 3 个文件被更改,包括 91 次插入28 次删除
  1. 1
      com.unity.ml-agents/CHANGELOG.md
  2. 76
      ml-agents-envs/mlagents_envs/rpc_utils.py
  3. 42
      ml-agents-envs/mlagents_envs/tests/test_rpc_utils.py

1
com.unity.ml-agents/CHANGELOG.md


#### com.unity.ml-agents / com.unity.ml-agents.extensions (C#)
#### ml-agents / ml-agents-envs / gym-unity (Python)
- An issue that caused `GAIL` to fail for environments where agents can terminate episodes by self-sacrifice has been fixed. (#4971)
- Made the error message when observations of different shapes are sent to the trainer clearer. (#5030)
- Fix an issue where queuing InputEvents overwrote data from previous events in the same frame.
## [1.8.0-preview] - 2021-02-17

76
ml-agents-envs/mlagents_envs/rpc_utils.py


return img
def _check_observations_match_spec(
obs_index: int,
observation_spec: ObservationSpec,
agent_info_list: Collection[AgentInfoProto],
) -> None:
"""
Check that all the observations match the expected size.
This gives a nicer error than a cryptic numpy error later.
"""
expected_obs_shape = tuple(observation_spec.shape)
for agent_info in agent_info_list:
agent_obs_shape = tuple(agent_info.observations[obs_index].shape)
if expected_obs_shape != agent_obs_shape:
raise UnityObservationException(
f"Observation at index={obs_index} for agent with "
f"id={agent_info.id} didn't match the ObservationSpec. "
f"Expected shape {expected_obs_shape} but got {agent_obs_shape}."
)
@timed
def _observation_to_np_array(
obs: ObservationProto, expected_shape: Optional[Iterable[int]] = None

@timed
def _process_maybe_compressed_observation(
obs_index: int,
shape: Tuple[int, int, int],
observation_spec: ObservationSpec,
shape = cast(Tuple[int, int, int], observation_spec.shape)
batched_visual = [
_observation_to_np_array(agent_obs.observations[obs_index], shape)
for agent_obs in agent_info_list
]
try:
batched_visual = [
_observation_to_np_array(agent_obs.observations[obs_index], shape)
for agent_obs in agent_info_list
]
except ValueError:
# Try to get a more useful error message
_check_observations_match_spec(obs_index, observation_spec, agent_info_list)
# If that didn't raise anything, raise the original error
raise
return np.array(batched_visual, dtype=np.float32)

@timed
def _process_rank_one_or_two_observation(
obs_index: int, shape: Tuple[int, ...], agent_info_list: Collection[AgentInfoProto]
obs_index: int,
observation_spec: ObservationSpec,
agent_info_list: Collection[AgentInfoProto],
return np.zeros((0,) + shape, dtype=np.float32)
np_obs = np.array(
[
agent_obs.observations[obs_index].float_data.data
for agent_obs in agent_info_list
],
dtype=np.float32,
).reshape((len(agent_info_list),) + shape)
return np.zeros((0,) + observation_spec.shape, dtype=np.float32)
try:
np_obs = np.array(
[
agent_obs.observations[obs_index].float_data.data
for agent_obs in agent_info_list
],
dtype=np.float32,
).reshape((len(agent_info_list),) + observation_spec.shape)
except ValueError:
# Try to get a more useful error message
_check_observations_match_spec(obs_index, observation_spec, agent_info_list)
# If that didn't raise anything, raise the original error
raise
_raise_on_nan_and_inf(np_obs, "observations")
return np_obs

]
decision_obs_list: List[np.ndarray] = []
terminal_obs_list: List[np.ndarray] = []
for obs_index, observation_specs in enumerate(behavior_spec.observation_specs):
is_visual = len(observation_specs.shape) == 3
for obs_index, observation_spec in enumerate(behavior_spec.observation_specs):
is_visual = len(observation_spec.shape) == 3
obs_shape = cast(Tuple[int, int, int], observation_specs.shape)
obs_index, obs_shape, decision_agent_info_list
obs_index, observation_spec, decision_agent_info_list
obs_index, obs_shape, terminal_agent_info_list
obs_index, observation_spec, terminal_agent_info_list
obs_index, observation_specs.shape, decision_agent_info_list
obs_index, observation_spec, decision_agent_info_list
obs_index, observation_specs.shape, terminal_agent_info_list
obs_index, observation_spec, terminal_agent_info_list
)
)
decision_rewards = np.array(

42
ml-agents-envs/mlagents_envs/tests/test_rpc_utils.py


def test_vector_observation():
n_agents = 10
shapes = [(3,), (4,)]
obs_specs = create_observation_specs_with_shapes(shapes)
arr = _process_rank_one_or_two_observation(obs_index, shape, list_proto)
arr = _process_rank_one_or_two_observation(
obs_index, obs_specs[obs_index], list_proto
)
in_array_1 = np.random.rand(128, 64, 3)
shape = (128, 64, 3)
in_array_1 = np.random.rand(*shape)
in_array_2 = np.random.rand(128, 64, 3)
in_array_2 = np.random.rand(*shape)
in_array_2_mapping = [0, 1, 2]
proto_obs_2 = generate_compressed_proto_obs_with_mapping(
in_array_2, in_array_2_mapping

ap2 = AgentInfoProto()
ap2.observations.extend([proto_obs_2])
ap_list = [ap1, ap2]
arr = _process_maybe_compressed_observation(0, (128, 64, 3), ap_list)
obs_spec = create_observation_specs_with_shapes([shape])[0]
arr = _process_maybe_compressed_observation(0, obs_spec, ap_list)
assert list(arr.shape) == [2, 128, 64, 3]
assert np.allclose(arr[0, :, :, :], in_array_1, atol=0.01)
assert np.allclose(arr[1, :, :, :], in_array_2, atol=0.01)

ap2 = AgentInfoProto()
ap2.observations.extend([proto_obs_2])
ap_list = [ap1, ap2]
arr = _process_maybe_compressed_observation(0, (128, 64, 1), ap_list)
shape = (128, 64, 1)
obs_spec = create_observation_specs_with_shapes([shape])[0]
arr = _process_maybe_compressed_observation(0, obs_spec, ap_list)
assert list(arr.shape) == [2, 128, 64, 1]
assert np.allclose(arr[0, :, :, :], expected_out_array_1, atol=0.01)
assert np.allclose(arr[1, :, :, :], expected_out_array_2, atol=0.01)

ap1 = AgentInfoProto()
ap1.observations.extend([proto_obs_1])
ap_list = [ap1]
arr = _process_maybe_compressed_observation(0, (128, 64, 8), ap_list)
shape = (128, 64, 8)
obs_spec = create_observation_specs_with_shapes([shape])[0]
arr = _process_maybe_compressed_observation(0, obs_spec, ap_list)
assert list(arr.shape) == [1, 128, 64, 8]
assert np.allclose(arr[0, :, :, :], expected_out_array_1, atol=0.01)

ap1 = AgentInfoProto()
ap1.observations.extend([proto_obs_1])
ap_list = [ap1]
shape = (128, 42, 3)
obs_spec = create_observation_specs_with_shapes([shape])[0]
_process_maybe_compressed_observation(0, (128, 42, 3), ap_list)
_process_maybe_compressed_observation(0, obs_spec, ap_list)
def test_batched_step_result_from_proto():

assert decision_steps.obs[1].shape[1] == shapes[1][0]
assert terminal_steps.obs[0].shape[1] == shapes[0][0]
assert terminal_steps.obs[1].shape[1] == shapes[1][0]
def test_mismatch_observations_raise_in_step_result_from_proto():
n_agents = 10
shapes = [(3,), (4,)]
spec = BehaviorSpec(
create_observation_specs_with_shapes(shapes), ActionSpec.create_continuous(3)
)
ap_list = generate_list_agent_proto(n_agents, shapes)
# Hack an observation to be larger, we should get an exception
ap_list[0].observations[0].shape[0] += 1
ap_list[0].observations[0].float_data.data.append(0.42)
with pytest.raises(UnityObservationException):
steps_from_proto(ap_list, spec)
def test_action_masking_discrete():

正在加载...
取消
保存