|
|
|
|
|
|
action_feature_size=16, |
|
|
|
transfer=False, |
|
|
|
separate_train=False, |
|
|
|
separate_model_train=False, |
|
|
|
var_encoder=False, |
|
|
|
var_predict=True, |
|
|
|
predict_return=True, |
|
|
|
|
|
|
self.current_action, |
|
|
|
forward_layers, |
|
|
|
var_predict=var_predict, |
|
|
|
separate_train=separate_model_train |
|
|
|
) |
|
|
|
|
|
|
|
self.targ_predict, self.targ_predict_distribution = self.create_forward_model( |
|
|
|
|
|
|
var_predict=var_predict, |
|
|
|
reuse=True |
|
|
|
reuse=True, |
|
|
|
separate_train=separate_model_train |
|
|
|
) |
|
|
|
|
|
|
|
self.create_forward_loss(self.reuse_encoder, self.transfer) |
|
|
|
|
|
|
self.create_reward_model( |
|
|
|
self.encoder, self.current_action, forward_layers |
|
|
|
self.encoder, self.current_action, forward_layers, separate_train=separate_model_train |
|
|
|
) |
|
|
|
|
|
|
|
if self.use_bisim: |
|
|
|
|
|
|
forward_layers: int, |
|
|
|
var_predict: bool = False, |
|
|
|
reuse: bool = False, |
|
|
|
separate_train: bool = False |
|
|
|
) -> None: |
|
|
|
""" |
|
|
|
Creates forward model TensorFlow ops for Curiosity module. |
|
|
|
|
|
|
combined_input = tf.concat([encoded_state, encoded_action], axis=1) |
|
|
|
hidden = combined_input |
|
|
|
|
|
|
|
if not self.transfer: |
|
|
|
hidden = tf.stop_gradient(hidden) |
|
|
|
if separate_train: |
|
|
|
hidden = tf.stop_gradient(hidden) |
|
|
|
|
|
|
|
for i in range(forward_layers): |
|
|
|
hidden = tf.layers.dense( |
|
|
|
|
|
|
encoded_state: tf.Tensor, |
|
|
|
encoded_action: tf.Tensor, |
|
|
|
forward_layers: int, |
|
|
|
separate_train: bool = False |
|
|
|
if not self.transfer: |
|
|
|
if separate_train: |
|
|
|
hidden = tf.stop_gradient(hidden) |
|
|
|
for i in range(forward_layers): |
|
|
|
hidden = tf.layers.dense( |
|
|
|