|
|
|
|
|
|
self.dones_holder = tf.placeholder( |
|
|
|
shape=[None], dtype=tf.float32, name="dones_holder" |
|
|
|
) |
|
|
|
# This is just a dummy to get BC to work. PPO has this but SAC doesn't. |
|
|
|
# TODO: Proper input and output specs for models |
|
|
|
self.epsilon = tf.placeholder( |
|
|
|
shape=[None, self.policy.act_size[0]], dtype=tf.float32, name="epsilon" |
|
|
|
) |
|
|
|
|
|
|
|
if self.policy.use_recurrent: |
|
|
|
self.memory_in = self.policy_network.memory_in |
|
|
|