浏览代码

C# and Python checks for infinity and NaN. (#3418)

/asymm-envs
GitHub 5 年前
当前提交
47649555
共有 5 个文件被更改,包括 69 次插入39 次删除
  1. 10
      com.unity.ml-agents/Runtime/Agent.cs
  2. 5
      com.unity.ml-agents/Runtime/Sensor/VectorSensor.cs
  3. 17
      com.unity.ml-agents/Runtime/Utilities.cs
  4. 46
      ml-agents-envs/mlagents_envs/rpc_utils.py
  5. 30
      ml-agents-envs/mlagents_envs/tests/test_rpc_utils.py

10
com.unity.ml-agents/Runtime/Agent.cs


public void SetReward(float reward)
{
#if DEBUG
if (float.IsNaN(reward))
{
throw new ArgumentException("NaN reward passed to SetReward.");
}
Utilities.DebugCheckNanAndInfinity(reward, nameof(reward), nameof(SetReward));
#endif
m_CumulativeReward += (reward - m_Reward);
m_Reward = reward;

public void AddReward(float increment)
{
#if DEBUG
if (float.IsNaN(increment))
{
throw new ArgumentException("NaN reward passed to AddReward.");
}
Utilities.DebugCheckNanAndInfinity(increment, nameof(increment), nameof(AddReward));
#endif
m_Reward += increment;
m_CumulativeReward += increment;

5
com.unity.ml-agents/Runtime/Sensor/VectorSensor.cs


void AddFloatObs(float obs)
{
#if DEBUG
if (float.IsNaN(obs))
{
throw new System.ArgumentException("NaN value passed to observation.");
}
Utilities.DebugCheckNanAndInfinity(obs, nameof(obs), nameof(AddFloatObs));
#endif
m_Observations.Add(obs);
}

17
com.unity.ml-agents/Runtime/Utilities.cs


using System;
using UnityEngine;
using System.Collections.Generic;

}
return numFloatObservations;
}
#if DEBUG
internal static void DebugCheckNanAndInfinity(float value, string valueCategory, string caller)
{
if (float.IsNaN(value))
{
throw new ArgumentException($"NaN {valueCategory} passed to {caller}.");
}
if (float.IsInfinity(value))
{
throw new ArgumentException($"Inifinity {valueCategory} passed to {caller}.");
}
}
#endif
}

46
ml-agents-envs/mlagents_envs/rpc_utils.py


return np.array(batched_visual, dtype=np.float32)
def _raise_on_nan_and_inf(data: np.array, source: str) -> np.array:
# Check for NaNs or Infinite values in the observation or reward data.
# If there's a NaN in the observations, the np.mean() result will be NaN
# If there's an Infinite value (either sign) then the result will be Inf
# See https://stackoverflow.com/questions/6736590/fast-check-for-nan-in-numpy for background
# Note that a very large values (larger than sqrt(float_max)) will result in an Inf value here
# Raise a Runtime error in the case that NaNs or Infinite values make it into the data.
if data.size == 0:
return data
d = np.mean(data)
has_nan = np.isnan(d)
has_inf = not np.isfinite(d)
if has_nan:
raise RuntimeError(f"The {source} provided had NaN values.")
if has_inf:
raise RuntimeError(f"The {source} provided had Infinite values.")
@timed
def _process_vector_observation(
obs_index: int,

],
dtype=np.float32,
)
# Check for NaNs or infs in the observations
# If there's a NaN in the observations, the np.mean() result will be NaN
# If there's an Inf (either sign) then the result will be Inf
# See https://stackoverflow.com/questions/6736590/fast-check-for-nan-in-numpy for background
# Note that a very large values (larger than sqrt(float_max)) will result in an Inf value here
# This is OK though, worst case it results in an unnecessary (but harmless) nan_to_num call.
d = np.mean(np_obs)
has_nan = np.isnan(d)
has_inf = not np.isfinite(d)
# In we have any NaN or Infs, use np.nan_to_num to replace these with finite values
if has_nan or has_inf:
np_obs = np.nan_to_num(np_obs)
if has_nan:
logger.warning(f"An agent had a NaN observation in the environment")
_raise_on_nan_and_inf(np_obs, "observations")
return np_obs

[agent_info.reward for agent_info in agent_info_list], dtype=np.float32
)
d = np.dot(rewards, rewards)
has_nan = np.isnan(d)
has_inf = not np.isfinite(d)
# In we have any NaN or Infs, use np.nan_to_num to replace these with finite values
if has_nan or has_inf:
rewards = np.nan_to_num(rewards)
if has_nan:
logger.warning(f"An agent had a NaN reward in the environment")
_raise_on_nan_and_inf(rewards, "rewards")
done = np.array([agent_info.done for agent_info in agent_info_list], dtype=np.bool)
max_step = np.array(

30
ml-agents-envs/mlagents_envs/tests/test_rpc_utils.py


def generate_list_agent_proto(
n_agent: int, shape: List[Tuple[int]]
n_agent: int,
shape: List[Tuple[int]],
infinite_rewards: bool = False,
nan_observations: bool = False,
ap.reward = agent_index
ap.reward = float("inf") if infinite_rewards else agent_index
ap.done = agent_index % 2 == 0
ap.max_step_reached = agent_index % 2 == 1
ap.id = agent_index

obs_proto = ObservationProto()
obs_proto.shape.extend(list(shape[obs_index]))
obs_proto.compression_type = NONE
obs_proto.float_data.data.extend([0.1] * np.prod(shape[obs_index]))
obs_proto.float_data.data.extend(
([float("nan")] if nan_observations else [0.1])
* np.prod(shape[obs_index])
)
obs_proto_list.append(obs_proto)
ap.observations.extend(obs_proto_list)
result.append(ap)

assert not group_spec.is_action_discrete()
assert group_spec.is_action_continuous()
assert group_spec.action_size == 6
def test_batched_step_result_from_proto_raises_on_infinite():
n_agents = 10
shapes = [(3,), (4,)]
group_spec = AgentGroupSpec(shapes, ActionType.CONTINUOUS, 3)
ap_list = generate_list_agent_proto(n_agents, shapes, infinite_rewards=True)
with pytest.raises(RuntimeError):
batched_step_result_from_proto(ap_list, group_spec)
def test_batched_step_result_from_proto_raises_on_nan():
n_agents = 10
shapes = [(3,), (4,)]
group_spec = AgentGroupSpec(shapes, ActionType.CONTINUOUS, 3)
ap_list = generate_list_agent_proto(n_agents, shapes, nan_observations=True)
with pytest.raises(RuntimeError):
batched_step_result_from_proto(ap_list, group_spec)
正在加载...
取消
保存