浏览代码

add bi-forward-loss back

/develop/bisim-sac-transfer
yanchaosun 4 年前
当前提交
c5d9e376
共有 1 个文件被更改,包括 33 次插入33 次删除
  1. 66
      ml-agents/mlagents/trainers/policy/transfer_policy.py

66
ml-agents/mlagents/trainers/policy/transfer_policy.py


def create_forward_loss(self, reuse: bool, transfer: bool):
# if not transfer:
if reuse:
encoded_next_state = tf.stop_gradient(self.next_encoder)
else:
encoded_next_state = self.next_targ_encoder # gradient of target encode is already stopped
if not transfer:
if reuse:
encoded_next_state = tf.stop_gradient(self.next_encoder)
else:
encoded_next_state = self.next_targ_encoder # gradient of target encode is already stopped
squared_difference = 0.5 * tf.reduce_sum(
tf.squared_difference(tf.tanh(self.predict), encoded_next_state), axis=1
)
self.forward_loss = tf.reduce_mean(
tf.dynamic_partition(squared_difference, self.mask, 2)[1]
)
squared_difference = 0.5 * tf.reduce_sum(
tf.squared_difference(tf.tanh(self.predict), encoded_next_state), axis=1
)
self.forward_loss = tf.reduce_mean(
tf.dynamic_partition(squared_difference, self.mask, 2)[1]
)
# else:
# if reuse:
# squared_difference_1 = 0.5 * tf.reduce_sum(
# tf.squared_difference(tf.tanh(self.predict), tf.stop_gradient(self.next_encoder)),
# axis=1
# )
# squared_difference_2 = 0.5 * tf.reduce_sum(
# tf.squared_difference(tf.tanh(tf.stop_gradient(self.predict)), self.next_encoder),
# axis=1
# )
# else:
# squared_difference_1 = 0.5 * tf.reduce_sum(
# tf.squared_difference(tf.tanh(self.predict), self.next_targ_encoder),
# axis=1
# )
# squared_difference_2 = 0.5 * tf.reduce_sum(
# tf.squared_difference(tf.tanh(self.targ_predict), self.next_encoder),
# axis=1
# )
# self.forward_loss = tf.reduce_mean(
# tf.dynamic_partition(0.5 * squared_difference_1 + 0.5 * squared_difference_2, self.mask, 2)[1]
# )
else:
if reuse:
squared_difference_1 = 0.5 * tf.reduce_sum(
tf.squared_difference(tf.tanh(self.predict), tf.stop_gradient(self.next_encoder)),
axis=1
)
squared_difference_2 = 0.5 * tf.reduce_sum(
tf.squared_difference(tf.tanh(tf.stop_gradient(self.predict)), self.next_encoder),
axis=1
)
else:
squared_difference_1 = 0.5 * tf.reduce_sum(
tf.squared_difference(tf.tanh(self.predict), self.next_targ_encoder),
axis=1
)
squared_difference_2 = 0.5 * tf.reduce_sum(
tf.squared_difference(tf.tanh(self.targ_predict), self.next_encoder),
axis=1
)
self.forward_loss = tf.reduce_mean(
tf.dynamic_partition(0.5 * squared_difference_1 + 0.5 * squared_difference_2, self.mask, 2)[1]
)
def create_reward_model(

正在加载...
取消
保存