浏览代码

cleaned up initialization of variance/mean

/fix-walker
Andrew Cohen 4 年前
当前提交
ce9bcefe
共有 2 个文件被更改,包括 17 次插入15 次删除
  1. 12
      ml-agents/mlagents/trainers/policy/tf_policy.py
  2. 20
      ml-agents/mlagents/trainers/tf/models.py

12
ml-agents/mlagents/trainers/policy/tf_policy.py


if self.use_vec_obs and self.normalize:
if self.first_normalization_update:
self.sess.run(
self.init_normalization_op,
feed_dict={self.initial_mean: np.mean(vector_obs, axis=0)},
self.init_normalization_op, feed_dict={self.vector_in: vector_obs}
self.sess.run(
self.update_normalization_op, feed_dict={self.vector_in: vector_obs}
)
else:
self.sess.run(
self.update_normalization_op, feed_dict={self.vector_in: vector_obs}
)
@property
def use_vis_obs(self):

self.normalization_steps: Optional[tf.Variable] = None
self.running_mean: Optional[tf.Variable] = None
self.running_variance: Optional[tf.Variable] = None
self.init_normalization_op: Optional[tf.Operation] = None
self.update_normalization_op: Optional[tf.Operation] = None
self.value: Optional[tf.Tensor] = None
self.all_log_probs: tf.Tensor = None

self.update_normalization_op = normalization_tensors.update_op
self.init_normalization_op = normalization_tensors.init_op
self.normalization_steps = normalization_tensors.steps
self.initial_mean = normalization_tensors.initial_mean
self.running_mean = normalization_tensors.running_mean
self.running_variance = normalization_tensors.running_variance
self.processed_vector_in = ModelUtils.normalize_vector_obs(

20
ml-agents/mlagents/trainers/tf/models.py


init_op: tf.Operation
update_op: tf.Operation
steps: tf.Tensor
initial_mean: tf.Tensor
running_mean: tf.Tensor
running_variance: tf.Tensor

and the update operation.
"""
vec_obs_size = vector_obs.shape[1]
initial_mean = tf.placeholder(
shape=[vec_obs_size], dtype=tf.float32, name="initial_mean"
)
steps = tf.get_variable(
"normalization_steps",
[],

dtype=tf.float32,
initializer=tf.ones_initializer(),
)
update_normalization = ModelUtils.create_normalizer_update(
initialize_normalization, update_normalization = ModelUtils.create_normalizer_update(
initialize_normalization = tf.assign(running_mean, initial_mean)
initial_mean,
running_mean,
running_variance,
)

steps: tf.Tensor,
running_mean: tf.Tensor,
running_variance: tf.Tensor,
) -> tf.Operation:
) -> Tuple[tf.Operation, tf.Operation]:
"""
Creates the update operation for the normalizer.
:param vector_input: Vector observation to use for updating the running mean and variance.

update_mean = tf.assign(running_mean, new_mean)
update_variance = tf.assign(running_variance, new_variance)
update_norm_step = tf.assign(steps, total_new_steps)
return tf.group([update_mean, update_variance, update_norm_step])
# First mean and variance calculated normally
initial_mean, initial_variance = tf.nn.moments(vector_input, axes=[0])
initialize_mean = tf.assign(running_mean, initial_mean)
initialize_variance = tf.assign(running_variance, initial_variance + EPSILON)
return (
tf.group([initialize_mean, initialize_variance, update_norm_step]),
tf.group([update_mean, update_variance, update_norm_step]),
)
@staticmethod
def create_vector_observation_encoder(

正在加载...
取消
保存