浏览代码

large normalization obs unit test

/fix-walker
Andrew Cohen 4 年前
当前提交
4b094d25
共有 3 个文件被更改,包括 115 次插入7 次删除
  1. 3
      ml-agents/mlagents/trainers/tests/mock_brain.py
  2. 113
      ml-agents/mlagents/trainers/tests/test_nn_policy.py
  3. 6
      ml-agents/mlagents/trainers/tf/models.py

3
ml-agents/mlagents/trainers/tests/mock_brain.py


memory=memory,
)
steps_list.append(experience)
obs = []
for _shape in observation_shapes:
obs.append(np.ones(_shape, dtype=np.float32))
last_experience = AgentExperience(
obs=obs,
reward=reward,

113
ml-agents/mlagents/trainers/tests/test_nn_policy.py


DISCRETE_ACTION_SPACE = [3, 3, 3, 2]
BUFFER_INIT_SAMPLES = 32
NUM_AGENTS = 12
EPSILON = 1e-7
def create_policy_mock(

assert run_out["action"].shape == (NUM_AGENTS, VECTOR_ACTION_SPACE)
def test_large_normalization():
behavior_spec = mb.setup_test_behavior_specs(
use_discrete=True, use_visual=False, vector_action_space=[2], vector_obs_space=1
)
# Taken from Walker seed 3713 which causes NaN without proper initialization
large_obs1 = [
1800.00036621,
1799.96972656,
1800.01245117,
1800.07214355,
1800.02758789,
1799.98303223,
1799.88647461,
1799.89575195,
1800.03479004,
1800.14025879,
1800.17675781,
1800.20581055,
1800.33740234,
1800.36450195,
1800.43457031,
1800.45544434,
1800.44604492,
1800.56713867,
1800.73901367,
]
large_obs2 = [
1799.99975586,
1799.96679688,
1799.92980957,
1799.89550781,
1799.93774414,
1799.95300293,
1799.94067383,
1799.92993164,
1799.84057617,
1799.69873047,
1799.70605469,
1799.82849121,
1799.85095215,
1799.76977539,
1799.78283691,
1799.76708984,
1799.67163086,
1799.59191895,
1799.5135498,
1799.45556641,
1799.3717041,
]
policy = TFPolicy(
0,
behavior_spec,
TrainerSettings(network_settings=NetworkSettings(normalize=True)),
"testdir",
False,
)
time_horizon = len(large_obs1)
trajectory = make_fake_trajectory(
length=time_horizon,
max_step_complete=True,
observation_shapes=[(1,)],
action_space=[2],
)
for i in range(time_horizon):
trajectory.steps[i].obs[0] = np.array([large_obs1[i]], dtype=np.float32)
trajectory_buffer = trajectory.to_agentbuffer()
policy.update_normalization(trajectory_buffer["vector_obs"])
# Check that the running mean and variance is correct
steps, mean, variance = policy.sess.run(
[policy.normalization_steps, policy.running_mean, policy.running_variance]
)
assert mean[0] == pytest.approx(np.mean(large_obs1, dtype=np.float32), abs=0.01)
assert variance[0] / steps == pytest.approx(
np.var(large_obs1, dtype=np.float32), abs=0.01
)
time_horizon = len(large_obs2)
trajectory = make_fake_trajectory(
length=time_horizon,
max_step_complete=True,
observation_shapes=[(1,)],
action_space=[2],
)
for i in range(time_horizon):
trajectory.steps[i].obs[0] = np.array([large_obs2[i]], dtype=np.float32)
trajectory_buffer = trajectory.to_agentbuffer()
policy.update_normalization(trajectory_buffer["vector_obs"])
steps, mean, variance = policy.sess.run(
[policy.normalization_steps, policy.running_mean, policy.running_variance]
)
assert mean[0] == pytest.approx(
np.mean(large_obs1 + large_obs2, dtype=np.float32), abs=0.01
)
assert variance[0] / steps == pytest.approx(
np.var(large_obs1 + large_obs2, dtype=np.float32), abs=0.01
)
time_horizon = 6
trajectory = make_fake_trajectory(
length=time_horizon,

assert steps == 6
assert mean[0] == 0.5
# Note: variance is divided by number of steps, and initialized to 1 to avoid
# divide by 0. The right answer is 0.25
assert (variance[0] - 1) / steps == 0.25
# Note: variance is initalized to the variance of the initial trajectory + EPSILON
# (to avoid divide by 0) and multiplied by the number of steps. The correct answer is 0.25
assert variance[0] / steps == pytest.approx(0.25, abs=0.01)
# Make another update, this time with all 1's
time_horizon = 10
trajectory = make_fake_trajectory(

assert steps == 16
assert mean[0] == 0.8125
assert (variance[0] - 1) / steps == pytest.approx(0.152, abs=0.01)
assert variance[0] / steps == pytest.approx(0.152, abs=0.01)
def test_min_visual_size():

6
ml-agents/mlagents/trainers/tf/models.py


# First mean and variance calculated normally
initial_mean, initial_variance = tf.nn.moments(vector_input, axes=[0])
initialize_mean = tf.assign(running_mean, initial_mean)
initialize_variance = tf.assign(running_variance, initial_variance + EPSILON)
# Multiplied by total_new_step because it is divided by total_new_step in the normalization
initialize_variance = tf.assign(
running_variance,
(initial_variance + EPSILON) * tf.cast(total_new_steps, dtype=tf.float32),
)
return (
tf.group([initialize_mean, initialize_variance, update_norm_step]),
tf.group([update_mean, update_variance, update_norm_step]),

正在加载...
取消
保存