浏览代码

write hist to tb

/sensitivity
Andrew Cohen 4 年前
当前提交
fa35292c
共有 5 个文件被更改,包括 72 次插入22 次删除
  1. 8
      ml-agents/mlagents/trainers/optimizer/tf_optimizer.py
  2. 14
      ml-agents/mlagents/trainers/policy/nn_policy.py
  3. 10
      ml-agents/mlagents/trainers/ppo/optimizer.py
  4. 2
      ml-agents/mlagents/trainers/ppo/trainer.py
  5. 60
      ml-agents/mlagents/trainers/stats.py

8
ml-agents/mlagents/trainers/optimizer/tf_optimizer.py


def get_saliency(self, batch: AgentBuffer) -> List[float]:
feed_dict: Dict[tf.Tensor, Any] = {}
feed_dict[self.policy.vector_in] = batch["vector_obs"]
feed_dict[self.policy.output] = batch["actions"]
if self.policy.output_pre is not None and "actions_pre" in batch:
feed_dict[self.policy.output_pre] = batch["actions_pre"]
else:
feed_dict[self.policy.output] = batch["actions"]
if self.policy.use_recurrent:
feed_dict[self.policy.prev_action] = batch["prev_action"]
feed_dict[self.policy.action_masks] = batch["action_mask"]
saliencies = self.sess.run(self.policy.saliency, feed_dict)
return np.mean(saliencies, axis=0)

14
ml-agents/mlagents/trainers/policy/nn_policy.py


self.reparameterize,
self.condition_sigma_on_obs,
)
self.saliency = tf.reduce_mean(
tf.square(tf.gradients(self.output, self.vector_in)), axis=1
)
self.saliency = tf.reduce_mean(
tf.square(tf.gradients(self.output_pre, self.vector_in)), axis=1
)
self.trainable_variables = tf.get_collection(
tf.GraphKeys.TRAINABLE_VARIABLES, scope="policy"
)

self.saliency = tf.reduce_mean(
tf.square(tf.gradients(self.output, self.vector_in)), axis=1
)
self.inference_dict: Dict[str, tf.Tensor] = {
"action": self.output,
"log_probs": self.all_log_probs,

# We do an initialize to make the Policy usable out of the box. If an optimizer is needed,
# it will re-load the full graph
self._initialize_graph()
@timed
def evaluate(

10
ml-agents/mlagents/trainers/ppo/optimizer.py


update_stats[stat_name] = update_vals[update_name]
return update_stats
def compute_input_sensitivity(
self, batch: AgentBuffer, num_sequences: int
) -> Dict[int, float]:
feed_dict = self._construct_feed_dict(batch, num_sequences)
sens = self._execute_model(feed_dict, {"sensi": self.sensitivity})["sensi"][0]
out = {obs: StatsSummary(grad, 0.0, 0.0) for obs, grad in enumerate(sens)}
for obs, grad in sorted(enumerate(sens), reverse=True, key=lambda x: x[1]):
print(f"Observation {obs} has relevance {grad}")
return out
def _construct_feed_dict(
self, mini_batch: AgentBuffer, num_sequences: int
) -> Dict[tf.Tensor, Any]:

2
ml-agents/mlagents/trainers/ppo/trainer.py


)
num_epoch = self.hyperparameters.num_epoch
batch_update_stats = defaultdict(list)
for _ in range(num_epoch):
self.update_buffer.shuffle(sequence_length=self.policy.sequence_length)
buffer = self.update_buffer

60
ml-agents/mlagents/trainers/stats.py


elif property_type == StatsPropertyType.SALIENCY:
self._maybe_create_summary_writer(category)
with tf.Session(config=generate_session_config()) as sess:
hist_op = tf.summary.histogram(category, value)
hist = sess.run(hist_op)
# self.summary_writers[category].add_summary(hist, 0)
# self.trajectories += 1
# self.summary_writers[category].flush()
# adapted from https://gist.github.com/gyglim/1f8dfb1b5c82627ae3efcfbbadb9f514
counts, bin_edges = np.histogram(value, bins=len(value))
hist = tf.HistogramProto()
#print(float(np.min(value)))
#print(float(np.max(value)))
#print(int(np.prod(value.shape)))
#print(float(np.sum(value)))
#print(float(np.sum(value**2)))
#print(bin_edges[1:])
#hist.min = float(np.min(value))
#hist.max = float(np.max(value))
#hist.num = int(np.prod(value.shape))
#hist.sum = float(np.sum(value))
#hist.sum_squares = float(np.sum(value**2))
#value = np.log(value)
value = value / np.sum(value)
value = np.log(value)
value = value - np.min(value)
value = value / np.sum(value)
for obs, grad in sorted(enumerate(value), reverse=True, key=lambda x: x[1]):
print(f"Observation {obs} has relevance {grad}")
hist.min = 0.0
hist.max = float(len(value))#float(np.max(value))
hist.num = len(value)#int(np.prod(value.shape))
hist.sum = float(np.sum(value))
hist.sum_squares = float(np.sum(value**2))
bin_edges = bin_edges[1:]
for edge in range(len(value)):#counts:
#print(edge)
hist.bucket_limit.append(edge+.5)
for c in value:
#print(c)
hist.bucket.append(c)
# Add bin edges and counts
# for edge,i in zip(range(1,len(value)), bin_edges):
# hist.bucket_limit.append(i)
# for c,i in zip(value, counts):
# hist.bucket.append(i)
# Create and write Summary
summary = tf.Summary(value=[tf.Summary.Value(tag="Saliency", histo=hist)])
self.summary_writers[category].add_summary(summary, self.trajectories)
self.summary_writers[category].flush()
#summary = tf.Summary()
#summary.value.add(tag="Saliency", histo=hist)
#self.summary_writers[category].add_summary(summary)#, self.trajectories)
self.trajectories += 1
#self.summary_writers[category].flush()
#with tf.Session(config=generate_session_config()) as sess:
# hist_op = tf.summary.histogram(category, value)
# hist = sess.run(hist_op)
def _dict_to_tensorboard(self, name: str, input_dict: Dict[str, Any]) -> str:
"""
Convert a dict to a Tensorboard-encoded string.

正在加载...
取消
保存