浏览代码

reduce sum to do squashing properly

/develop/tanhsquash
Ervin Teng 5 年前
当前提交
b501f75b
共有 1 个文件被更改,包括 10 次插入10 次删除
  1. 20
      ml-agents/mlagents/trainers/ppo/models.py

20
ml-agents/mlagents/trainers/ppo/models.py


# Compute probability of model output.
all_probs = (
-0.5 * tf.square(tf.stop_gradient(_policy_out) - mu) / sigma_sq
-0.5
* tf.reduce_sum(
tf.square(tf.stop_gradient(_policy_out) - mu) / sigma_sq,
axis=1,
keepdims=True,
)
- 0.5 * self.log_sigma_sq
- 0.5 * tf.reduce_sum(self.log_sigma_sq)
# Correct for tanh squash (source: https://arxiv.org/abs/1801.01290)
all_probs -= tf.reduce_sum(
tf.log(1 - self.output_pre ** 2 + EPSILON), axis=1, keepdims=True

self.create_value_heads(self.stream_names, hidden_value)
self.all_old_log_probs = tf.placeholder(
shape=[None, self.act_size[0]], dtype=tf.float32, name="old_probabilities"
shape=[None, 1], dtype=tf.float32, name="old_probabilities"
self.log_probs = tf.reduce_sum(
(tf.identity(self.all_log_probs)), axis=1, keepdims=True
)
self.old_log_probs = tf.reduce_sum(
(tf.identity(self.all_old_log_probs)), axis=1, keepdims=True
)
self.log_probs = self.all_log_probs
self.old_log_probs = self.all_old_log_probs
def create_dc_actor_critic(
self, h_size: int, num_layers: int, vis_encode_type: EncoderType

正在加载...
取消
保存