|
|
|
|
|
|
def _create_ppo_optimizer_ops(self): |
|
|
|
self.tf_optimizer = self.create_optimizer_op(self.learning_rate) |
|
|
|
self.grads = self.tf_optimizer.compute_gradients(self.loss) |
|
|
|
self.sensitivity = self.tf_optimizer.compute_gradients( |
|
|
|
self.policy.output, var_list=self.policy.vector_in |
|
|
|
self.sensitivity = tf.reduce_mean( |
|
|
|
tf.square(tf.gradients(self.policy.output, self.policy.vector_in)), axis=1 |
|
|
|
) |
|
|
|
self.update_batch = self.tf_optimizer.minimize(self.loss) |
|
|
|
|
|
|
|
|
|
|
update_vals = self._execute_model(feed_dict, self.update_dict) |
|
|
|
for stat_name, update_name in stats_needed.items(): |
|
|
|
update_stats[stat_name] = update_vals[update_name] |
|
|
|
|
|
|
|
print( |
|
|
|
len( |
|
|
|
np.mean( |
|
|
|
self._execute_model(feed_dict, {"sensi": self.sensitivity})[ |
|
|
|
"sensi" |
|
|
|
][0][0], |
|
|
|
axis=0, |
|
|
|
) |
|
|
|
) |
|
|
|
) |
|
|
|
return update_stats |
|
|
|
return update_stats |
|
|
|
def compute_input_sensitivity(self, batch: AgentBuffer, num_sequences: int) -> None: |
|
|
|
feed_dict = self._construct_feed_dict(batch, num_sequences) |
|
|
|
sens = self._execute_model(feed_dict, {"sensi": self.sensitivity})["sensi"][0] |
|
|
|
for obs, grad in sorted(enumerate(sens), reverse=True, key=lambda x: x[1]): |
|
|
|
print(obs, grad) |
|
|
|
|
|
|
|
def _construct_feed_dict( |
|
|
|
self, mini_batch: AgentBuffer, num_sequences: int |
|
|
|