[MLA-1809] catch mismatched observation sizes (#5030)

4 年前 · 638475ad
--- a/com.unity.ml-agents/CHANGELOG.md
+++ b/com.unity.ml-agents/CHANGELOG.md
 #### com.unity.ml-agents / com.unity.ml-agents.extensions (C#)
 #### ml-agents / ml-agents-envs / gym-unity (Python)
 - An issue that caused `GAIL` to fail for environments where agents can terminate episodes by self-sacrifice has been fixed. (#4971)
+- Made the error message when observations of different shapes are sent to the trainer clearer. (#5030)
 - Fix an issue where queuing InputEvents overwrote data from previous events in the same frame.

 ## [1.8.0-preview] - 2021-02-17
--- a/ml-agents-envs/mlagents_envs/rpc_utils.py
+++ b/ml-agents-envs/mlagents_envs/rpc_utils.py
    return img


+def _check_observations_match_spec(
+    obs_index: int,
+    observation_spec: ObservationSpec,
+    agent_info_list: Collection[AgentInfoProto],
+) -> None:
+    """
+    Check that all the observations match the expected size.
+    This gives a nicer error than a cryptic numpy error later.
+    """
+    expected_obs_shape = tuple(observation_spec.shape)
+    for agent_info in agent_info_list:
+        agent_obs_shape = tuple(agent_info.observations[obs_index].shape)
+        if expected_obs_shape != agent_obs_shape:
+            raise UnityObservationException(
+                f"Observation at index={obs_index} for agent with "
+                f"id={agent_info.id} didn't match the ObservationSpec. "
+                f"Expected shape {expected_obs_shape} but got {agent_obs_shape}."
+            )
+
+
@timed
 def _observation_to_np_array(
    obs: ObservationProto, expected_shape: Optional[Iterable[int]] = None
@timed
 def _process_maybe_compressed_observation(
    obs_index: int,
-    shape: Tuple[int, int, int],
+    observation_spec: ObservationSpec,
+    shape = cast(Tuple[int, int, int], observation_spec.shape)
-    batched_visual = [
-        _observation_to_np_array(agent_obs.observations[obs_index], shape)
-        for agent_obs in agent_info_list
-    ]
+    try:
+        batched_visual = [
+            _observation_to_np_array(agent_obs.observations[obs_index], shape)
+            for agent_obs in agent_info_list
+        ]
+    except ValueError:
+        # Try to get a more useful error message
+        _check_observations_match_spec(obs_index, observation_spec, agent_info_list)
+        # If that didn't raise anything, raise the original error
+        raise
    return np.array(batched_visual, dtype=np.float32)



@timed
 def _process_rank_one_or_two_observation(
-    obs_index: int, shape: Tuple[int, ...], agent_info_list: Collection[AgentInfoProto]
+    obs_index: int,
+    observation_spec: ObservationSpec,
+    agent_info_list: Collection[AgentInfoProto],
-        return np.zeros((0,) + shape, dtype=np.float32)
-    np_obs = np.array(
-        [
-            agent_obs.observations[obs_index].float_data.data
-            for agent_obs in agent_info_list
-        ],
-        dtype=np.float32,
-    ).reshape((len(agent_info_list),) + shape)
+        return np.zeros((0,) + observation_spec.shape, dtype=np.float32)
+    try:
+        np_obs = np.array(
+            [
+                agent_obs.observations[obs_index].float_data.data
+                for agent_obs in agent_info_list
+            ],
+            dtype=np.float32,
+        ).reshape((len(agent_info_list),) + observation_spec.shape)
+    except ValueError:
+        # Try to get a more useful error message
+        _check_observations_match_spec(obs_index, observation_spec, agent_info_list)
+        # If that didn't raise anything, raise the original error
+        raise
    _raise_on_nan_and_inf(np_obs, "observations")
    return np_obs

    ]
    decision_obs_list: List[np.ndarray] = []
    terminal_obs_list: List[np.ndarray] = []
-    for obs_index, observation_specs in enumerate(behavior_spec.observation_specs):
-        is_visual = len(observation_specs.shape) == 3
+    for obs_index, observation_spec in enumerate(behavior_spec.observation_specs):
+        is_visual = len(observation_spec.shape) == 3
-            obs_shape = cast(Tuple[int, int, int], observation_specs.shape)
-                    obs_index, obs_shape, decision_agent_info_list
+                    obs_index, observation_spec, decision_agent_info_list
-                    obs_index, obs_shape, terminal_agent_info_list
+                    obs_index, observation_spec, terminal_agent_info_list
-                    obs_index, observation_specs.shape, decision_agent_info_list
+                    obs_index, observation_spec, decision_agent_info_list
-                    obs_index, observation_specs.shape, terminal_agent_info_list
+                    obs_index, observation_spec, terminal_agent_info_list
                )
            )
    decision_rewards = np.array(
--- a/ml-agents-envs/mlagents_envs/tests/test_rpc_utils.py
+++ b/ml-agents-envs/mlagents_envs/tests/test_rpc_utils.py
 def test_vector_observation():
    n_agents = 10
    shapes = [(3,), (4,)]
+    obs_specs = create_observation_specs_with_shapes(shapes)
-        arr = _process_rank_one_or_two_observation(obs_index, shape, list_proto)
+        arr = _process_rank_one_or_two_observation(
+            obs_index, obs_specs[obs_index], list_proto
+        )
-    in_array_1 = np.random.rand(128, 64, 3)
+    shape = (128, 64, 3)
+    in_array_1 = np.random.rand(*shape)
-    in_array_2 = np.random.rand(128, 64, 3)
+    in_array_2 = np.random.rand(*shape)
    in_array_2_mapping = [0, 1, 2]
    proto_obs_2 = generate_compressed_proto_obs_with_mapping(
        in_array_2, in_array_2_mapping
    ap2 = AgentInfoProto()
    ap2.observations.extend([proto_obs_2])
    ap_list = [ap1, ap2]
-    arr = _process_maybe_compressed_observation(0, (128, 64, 3), ap_list)
+    obs_spec = create_observation_specs_with_shapes([shape])[0]
+    arr = _process_maybe_compressed_observation(0, obs_spec, ap_list)
    assert list(arr.shape) == [2, 128, 64, 3]
    assert np.allclose(arr[0, :, :, :], in_array_1, atol=0.01)
    assert np.allclose(arr[1, :, :, :], in_array_2, atol=0.01)
    ap2 = AgentInfoProto()
    ap2.observations.extend([proto_obs_2])
    ap_list = [ap1, ap2]
-    arr = _process_maybe_compressed_observation(0, (128, 64, 1), ap_list)
+    shape = (128, 64, 1)
+    obs_spec = create_observation_specs_with_shapes([shape])[0]
+    arr = _process_maybe_compressed_observation(0, obs_spec, ap_list)
    assert list(arr.shape) == [2, 128, 64, 1]
    assert np.allclose(arr[0, :, :, :], expected_out_array_1, atol=0.01)
    assert np.allclose(arr[1, :, :, :], expected_out_array_2, atol=0.01)
    ap1 = AgentInfoProto()
    ap1.observations.extend([proto_obs_1])
    ap_list = [ap1]
-    arr = _process_maybe_compressed_observation(0, (128, 64, 8), ap_list)
+    shape = (128, 64, 8)
+    obs_spec = create_observation_specs_with_shapes([shape])[0]
+
+    arr = _process_maybe_compressed_observation(0, obs_spec, ap_list)
    assert list(arr.shape) == [1, 128, 64, 8]
    assert np.allclose(arr[0, :, :, :], expected_out_array_1, atol=0.01)

    ap1 = AgentInfoProto()
    ap1.observations.extend([proto_obs_1])
    ap_list = [ap1]
+
+    shape = (128, 42, 3)
+    obs_spec = create_observation_specs_with_shapes([shape])[0]
+
-        _process_maybe_compressed_observation(0, (128, 42, 3), ap_list)
+        _process_maybe_compressed_observation(0, obs_spec, ap_list)


 def test_batched_step_result_from_proto():
    assert decision_steps.obs[1].shape[1] == shapes[1][0]
    assert terminal_steps.obs[0].shape[1] == shapes[0][0]
    assert terminal_steps.obs[1].shape[1] == shapes[1][0]
+
+
+def test_mismatch_observations_raise_in_step_result_from_proto():
+    n_agents = 10
+    shapes = [(3,), (4,)]
+    spec = BehaviorSpec(
+        create_observation_specs_with_shapes(shapes), ActionSpec.create_continuous(3)
+    )
+    ap_list = generate_list_agent_proto(n_agents, shapes)
+    # Hack an observation to be larger, we should get an exception
+    ap_list[0].observations[0].shape[0] += 1
+    ap_list[0].observations[0].float_data.data.append(0.42)
+    with pytest.raises(UnityObservationException):
+        steps_from_proto(ap_list, spec)


 def test_action_masking_discrete():