C# and Python checks for infinity and NaN. (#3418)

5 年前 · 47649555
--- a/com.unity.ml-agents/Runtime/Agent.cs
+++ b/com.unity.ml-agents/Runtime/Agent.cs
        public void SetReward(float reward)
        {
 #if DEBUG
-            if (float.IsNaN(reward))
-            {
-                throw new ArgumentException("NaN reward passed to SetReward.");
-            }
+            Utilities.DebugCheckNanAndInfinity(reward, nameof(reward), nameof(SetReward));
 #endif
            m_CumulativeReward += (reward - m_Reward);
            m_Reward = reward;
        public void AddReward(float increment)
        {
 #if DEBUG
-            if (float.IsNaN(increment))
-            {
-                throw new ArgumentException("NaN reward passed to AddReward.");
-            }
+            Utilities.DebugCheckNanAndInfinity(increment, nameof(increment), nameof(AddReward));
 #endif
            m_Reward += increment;
            m_CumulativeReward += increment;
--- a/com.unity.ml-agents/Runtime/Sensor/VectorSensor.cs
+++ b/com.unity.ml-agents/Runtime/Sensor/VectorSensor.cs
        void AddFloatObs(float obs)
        {
 #if DEBUG
-            if (float.IsNaN(obs))
-            {
-                throw new System.ArgumentException("NaN value passed to observation.");
-            }
+            Utilities.DebugCheckNanAndInfinity(obs, nameof(obs), nameof(AddFloatObs));
 #endif
            m_Observations.Add(obs);
        }
--- a/com.unity.ml-agents/Runtime/Utilities.cs
+++ b/com.unity.ml-agents/Runtime/Utilities.cs
+using System;
 using UnityEngine;
 using System.Collections.Generic;

            }
            return numFloatObservations;
        }
+
+#if DEBUG
+        internal static void DebugCheckNanAndInfinity(float value, string valueCategory, string caller)
+        {
+
+            if (float.IsNaN(value))
+            {
+                throw new ArgumentException($"NaN {valueCategory} passed to {caller}.");
+            }
+            if (float.IsInfinity(value))
+            {
+                throw new ArgumentException($"Inifinity {valueCategory} passed to {caller}.");
+            }
+        }
+#endif
+
 }
--- a/ml-agents-envs/mlagents_envs/rpc_utils.py
+++ b/ml-agents-envs/mlagents_envs/rpc_utils.py
    return np.array(batched_visual, dtype=np.float32)


+def _raise_on_nan_and_inf(data: np.array, source: str) -> np.array:
+    # Check for NaNs or Infinite values in the observation or reward data.
+    # If there's a NaN in the observations, the np.mean() result will be NaN
+    # If there's an Infinite value (either sign) then the result will be Inf
+    # See https://stackoverflow.com/questions/6736590/fast-check-for-nan-in-numpy for background
+    # Note that a very large values (larger than sqrt(float_max)) will result in an Inf value here
+    # Raise a Runtime error in the case that NaNs or Infinite values make it into the data.
+    if data.size == 0:
+        return data
+
+    d = np.mean(data)
+    has_nan = np.isnan(d)
+    has_inf = not np.isfinite(d)
+
+    if has_nan:
+        raise RuntimeError(f"The {source} provided had NaN values.")
+    if has_inf:
+        raise RuntimeError(f"The {source} provided had Infinite values.")
+
+
@timed
 def _process_vector_observation(
    obs_index: int,
        ],
        dtype=np.float32,
    )
-    # Check for NaNs or infs in the observations
-    # If there's a NaN in the observations, the np.mean() result will be NaN
-    # If there's an Inf (either sign) then the result will be Inf
-    # See https://stackoverflow.com/questions/6736590/fast-check-for-nan-in-numpy for background
-    # Note that a very large values (larger than sqrt(float_max)) will result in an Inf value here
-    # This is OK though, worst case it results in an unnecessary (but harmless) nan_to_num call.
-    d = np.mean(np_obs)
-    has_nan = np.isnan(d)
-    has_inf = not np.isfinite(d)
-
-    # In we have any NaN or Infs, use np.nan_to_num to replace these with finite values
-    if has_nan or has_inf:
-        np_obs = np.nan_to_num(np_obs)
-
-    if has_nan:
-        logger.warning(f"An agent had a NaN observation in the environment")
+    _raise_on_nan_and_inf(np_obs, "observations")
    return np_obs


        [agent_info.reward for agent_info in agent_info_list], dtype=np.float32
    )

-    d = np.dot(rewards, rewards)
-    has_nan = np.isnan(d)
-    has_inf = not np.isfinite(d)
-    # In we have any NaN or Infs, use np.nan_to_num to replace these with finite values
-    if has_nan or has_inf:
-        rewards = np.nan_to_num(rewards)
-    if has_nan:
-        logger.warning(f"An agent had a NaN reward in the environment")
+    _raise_on_nan_and_inf(rewards, "rewards")

    done = np.array([agent_info.done for agent_info in agent_info_list], dtype=np.bool)
    max_step = np.array(
--- a/ml-agents-envs/mlagents_envs/tests/test_rpc_utils.py
+++ b/ml-agents-envs/mlagents_envs/tests/test_rpc_utils.py


 def generate_list_agent_proto(
-    n_agent: int, shape: List[Tuple[int]]
+    n_agent: int,
+    shape: List[Tuple[int]],
+    infinite_rewards: bool = False,
+    nan_observations: bool = False,
-        ap.reward = agent_index
+        ap.reward = float("inf") if infinite_rewards else agent_index
        ap.done = agent_index % 2 == 0
        ap.max_step_reached = agent_index % 2 == 1
        ap.id = agent_index
            obs_proto = ObservationProto()
            obs_proto.shape.extend(list(shape[obs_index]))
            obs_proto.compression_type = NONE
-            obs_proto.float_data.data.extend([0.1] * np.prod(shape[obs_index]))
+            obs_proto.float_data.data.extend(
+                ([float("nan")] if nan_observations else [0.1])
+                * np.prod(shape[obs_index])
+            )
            obs_proto_list.append(obs_proto)
        ap.observations.extend(obs_proto_list)
        result.append(ap)
    assert not group_spec.is_action_discrete()
    assert group_spec.is_action_continuous()
    assert group_spec.action_size == 6
+
+
+def test_batched_step_result_from_proto_raises_on_infinite():
+    n_agents = 10
+    shapes = [(3,), (4,)]
+    group_spec = AgentGroupSpec(shapes, ActionType.CONTINUOUS, 3)
+    ap_list = generate_list_agent_proto(n_agents, shapes, infinite_rewards=True)
+    with pytest.raises(RuntimeError):
+        batched_step_result_from_proto(ap_list, group_spec)
+
+
+def test_batched_step_result_from_proto_raises_on_nan():
+    n_agents = 10
+    shapes = [(3,), (4,)]
+    group_spec = AgentGroupSpec(shapes, ActionType.CONTINUOUS, 3)
+    ap_list = generate_list_agent_proto(n_agents, shapes, nan_observations=True)
+    with pytest.raises(RuntimeError):
+        batched_step_result_from_proto(ap_list, group_spec)