浏览代码

use mean of first trajectory to initialize the normalizer

/fix-walker
Andrew Cohen 4 年前
当前提交
18ff42a6
共有 2 个文件被更改,包括 24 次插入3 次删除
  1. 11
      ml-agents/mlagents/trainers/policy/tf_policy.py
  2. 16
      ml-agents/mlagents/trainers/tf/models.py

11
ml-agents/mlagents/trainers/policy/tf_policy.py


self.assign_ops: List[tf.Operation] = []
self.update_dict: Dict[str, tf.Tensor] = {}
self.inference_dict: Dict[str, tf.Tensor] = {}
self.first_normalization_update: bool = False
self.graph = tf.Graph()
self.sess = tf.Session(

self._initialize_tensorflow_references()
self.grads = None
self.update_batch: Optional[tf.Operation] = None

:param vector_obs: The vector observations to add to the running estimate of the distribution.
"""
if self.use_vec_obs and self.normalize:
if self.first_normalization_update:
self.sess.run(
self.init_normalization_op,
feed_dict={self.initial_mean: np.mean(vector_obs, axis=0)},
)
self.first_normalization_update = False
self.sess.run(
self.update_normalization_op, feed_dict={self.vector_in: vector_obs}
)

self.behavior_spec.observation_shapes
)
if self.normalize:
self.first_normalization_update = True
self.init_normalization_op = normalization_tensors.init_op
self.initial_mean = normalization_tensors.initial_mean
self.running_mean = normalization_tensors.running_mean
self.running_variance = normalization_tensors.running_variance
self.processed_vector_in = ModelUtils.normalize_vector_obs(

16
ml-agents/mlagents/trainers/tf/models.py


class NormalizerTensors(NamedTuple):
init_op: tf.Operation
initial_mean: tf.Tensor
running_mean: tf.Tensor
running_variance: tf.Tensor

:return: A NormalizerTensors tuple that holds running mean, running variance, number of steps,
and the update operation.
"""
initial_mean = tf.placeholder(
shape=[vec_obs_size], dtype=tf.float32, name="initial_mean"
)
initializer=tf.constant_initializer(100),
initializer=tf.zeros_initializer(),
)
running_mean = tf.get_variable(
"running_mean",

update_normalization = ModelUtils.create_normalizer_update(
vector_obs, steps, running_mean, running_variance
)
initialize_normalization = tf.assign(running_mean, initial_mean)
update_normalization, steps, running_mean, running_variance
initialize_normalization,
update_normalization,
steps,
initial_mean,
running_mean,
running_variance,
)
@staticmethod

正在加载...
取消
保存