浏览代码

ignore precommit

/sensitivity
Andrew Cohen 5 年前
当前提交
02df39ab
共有 5 个文件被更改,包括 29 次插入12 次删除
  1. 7
      ml-agents/mlagents/trainers/optimizer/tf_optimizer.py
  2. 4
      ml-agents/mlagents/trainers/policy/nn_policy.py
  3. 7
      ml-agents/mlagents/trainers/ppo/optimizer.py
  4. 12
      ml-agents/mlagents/trainers/ppo/trainer.py
  5. 11
      ml-agents/mlagents/trainers/stats.py

7
ml-agents/mlagents/trainers/optimizer/tf_optimizer.py


default_num_epoch=3,
)
def get_saliency(self, batch: AgentBuffer) -> List[float]:
feed_dict: Dict[tf.Tensor, Any] = {}
feed_dict[self.policy.vector_in] = batch["vector_obs"]
feed_dict[self.policy.output] = batch["actions"]
saliencies = self.sess.run(self.policy.saliency, feed_dict)
return np.mean(saliencies, axis=0)
def get_trajectory_value_estimates(
self, batch: AgentBuffer, next_obs: List[np.ndarray], done: bool
) -> Tuple[Dict[str, np.ndarray], Dict[str, float]]:

4
ml-agents/mlagents/trainers/policy/nn_policy.py


tf.GraphKeys.TRAINABLE_VARIABLES, scope="lstm"
) # LSTMs need to be root scope for Barracuda export
self.saliency = tf.reduce_mean(
tf.square(tf.gradients(self.output, self.vector_in)), axis=1
)
self.inference_dict: Dict[str, tf.Tensor] = {
"action": self.output,
"log_probs": self.all_log_probs,

7
ml-agents/mlagents/trainers/ppo/optimizer.py


def _create_ppo_optimizer_ops(self):
self.tf_optimizer = self.create_optimizer_op(self.learning_rate)
self.grads = self.tf_optimizer.compute_gradients(self.loss)
self.sensitivity = tf.reduce_mean(
tf.square(tf.gradients(self.policy.output, self.policy.vector_in)), axis=1
)
self.update_batch = self.tf_optimizer.minimize(self.loss)
@timed

) -> Dict[int, float]:
feed_dict = self._construct_feed_dict(batch, num_sequences)
sens = self._execute_model(feed_dict, {"sensi": self.sensitivity})["sensi"][0]
out = dict((obs, StatsSummary(grad, 0.0, 0.0)) for obs, grad in enumerate(sens))
out = {obs: StatsSummary(grad, 0.0, 0.0) for obs, grad in enumerate(sens)}
print("Observation {} has relevance {}".format(obs, grad))
print(f"Observation {obs} has relevance {grad}")
return out
def _construct_feed_dict(

12
ml-agents/mlagents/trainers/ppo/trainer.py


from mlagents.trainers.trajectory import Trajectory
from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers
from mlagents.trainers.settings import TrainerSettings, PPOSettings
from mlagents.trainers.stats import CSVWriter
from mlagents.trainers.stats import StatsPropertyType
logger = get_logger(__name__)

self.load = load
self.seed = seed
self.policy: NNPolicy = None # type: ignore
self.csv_writer = CSVWriter("sensitivity")
def _process_trajectory(self, trajectory: Trajectory) -> None:
"""

trajectory.next_obs,
trajectory.done_reached and not trajectory.interrupted,
)
saliencies = self.optimizer.get_saliency(agent_buffer_trajectory)
self._stats_reporter.add_property(StatsPropertyType.SALIENCY, saliencies)
for name, v in value_estimates.items():
agent_buffer_trajectory[f"{name}_value_estimates"].extend(v)
self._stats_reporter.add_stat(

)
num_epoch = self.hyperparameters.num_epoch
batch_update_stats = defaultdict(list)
sensitivities = self.optimizer.compute_input_sensitivity(
self.update_buffer, self.policy.sequence_length
)
self.csv_writer.write_stats("sensitivity", sensitivities, self.step)
for _ in range(num_epoch):
self.update_buffer.shuffle(sequence_length=self.policy.sequence_length)

11
ml-agents/mlagents/trainers/stats.py


class StatsPropertyType(Enum):
HYPERPARAMETERS = "hyperparameters"
SELF_PLAY = "selfplay"
SALIENCY = "saliency"
class StatsWriter(abc.ABC):

self.summary_writers: Dict[str, tf.summary.FileWriter] = {}
self.base_dir: str = base_dir
self._clear_past_data = clear_past_data
self.trajectories = 0
def write_stats(
self, category: str, values: Dict[str, StatsSummary], step: int

text = self._dict_to_tensorboard("Hyperparameters", value)
self._maybe_create_summary_writer(category)
self.summary_writers[category].add_summary(text, 0)
elif property_type == StatsPropertyType.SALIENCY:
self._maybe_create_summary_writer(category)
with tf.Session(config=generate_session_config()) as sess:
hist_op = tf.summary.histogram(category, value)
hist = sess.run(hist_op)
# self.summary_writers[category].add_summary(hist, 0)
# self.trajectories += 1
# self.summary_writers[category].flush()
def _dict_to_tensorboard(self, name: str, input_dict: Dict[str, Any]) -> str:
"""

正在加载...
取消
保存