比较提交

...
此合并请求有变更与目标分支冲突。
/ml-agents/mlagents/trainers/stats.py
/ml-agents/mlagents/trainers/ppo/trainer.py
/ml-agents/mlagents/trainers/optimizer/tf_optimizer.py
/ml-agents/mlagents/trainers/policy/tf_policy.py
/ml-agents/mlagents/trainers/ppo/optimizer.py

3 次代码提交

作者 SHA1 备注 提交日期
Andrew Cohen e55ecd61 'clean up' for Scott 4 年前
Andrew Cohen 06e4356c Merge branch 'master' into sensitivity 4 年前
Andrew Cohen fa35292c write hist to tb 4 年前
共有 5 个文件被更改,包括 56 次插入2 次删除
  1. 13
      ml-agents/mlagents/trainers/optimizer/tf_optimizer.py
  2. 3
      ml-agents/mlagents/trainers/policy/tf_policy.py
  3. 3
      ml-agents/mlagents/trainers/ppo/optimizer.py
  4. 7
      ml-agents/mlagents/trainers/ppo/trainer.py
  5. 32
      ml-agents/mlagents/trainers/stats.py

13
ml-agents/mlagents/trainers/optimizer/tf_optimizer.py


default_num_epoch=3,
)
def get_saliency(self, batch: AgentBuffer) -> List[float]:
feed_dict: Dict[tf.Tensor, Any] = {}
feed_dict[self.policy.vector_in] = batch["vector_obs"]
if self.policy.output_pre is not None and "actions_pre" in batch:
feed_dict[self.policy.output_pre] = batch["actions_pre"]
else:
feed_dict[self.policy.output] = batch["actions"]
if self.policy.use_recurrent:
feed_dict[self.policy.prev_action] = batch["prev_action"]
feed_dict[self.policy.action_masks] = batch["action_mask"]
saliencies = self.sess.run(self.policy.saliency, feed_dict)
return np.mean(saliencies, axis=0)
def get_trajectory_value_estimates(
self, batch: AgentBuffer, next_obs: List[np.ndarray], done: bool
) -> Tuple[Dict[str, np.ndarray], Dict[str, float]]:

3
ml-agents/mlagents/trainers/policy/tf_policy.py


)
else:
self._create_dc_actor(encoded)
self.saliency = tf.reduce_mean(
tf.square(tf.gradients(self.output, self.vector_in)), axis=1
)
self.trainable_variables = tf.get_collection(
tf.GraphKeys.TRAINABLE_VARIABLES, scope="policy"
)

3
ml-agents/mlagents/trainers/ppo/optimizer.py


from mlagents.trainers.buffer import AgentBuffer
from mlagents.trainers.settings import TrainerSettings, PPOSettings
from mlagents.trainers.stats import StatsSummary
class PPOOptimizer(TFOptimizer):
def __init__(self, policy: TFPolicy, trainer_params: TrainerSettings):

"""
# Create the graph here to give more granular control of the TF graph to the Optimizer.
policy.create_tf_graph()
with policy.graph.as_default():
with tf.variable_scope("optimizer/"):
super().__init__(policy, trainer_params)

7
ml-agents/mlagents/trainers/ppo/trainer.py


from mlagents.trainers.trajectory import Trajectory
from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers
from mlagents.trainers.settings import TrainerSettings, PPOSettings
from mlagents.trainers.stats import StatsPropertyType
logger = get_logger(__name__)

trajectory.next_obs,
trajectory.done_reached and not trajectory.interrupted,
)
saliencies = self.optimizer.get_saliency(agent_buffer_trajectory)
self._stats_reporter.add_property(StatsPropertyType.SALIENCY, saliencies)
for name, v in value_estimates.items():
agent_buffer_trajectory[f"{name}_value_estimates"].extend(v)
self._stats_reporter.add_stat(

)
num_epoch = self.hyperparameters.num_epoch
batch_update_stats = defaultdict(list)
for _ in range(num_epoch):
self.update_buffer.shuffle(sequence_length=self.policy.sequence_length)
buffer = self.update_buffer

32
ml-agents/mlagents/trainers/stats.py


class StatsPropertyType(Enum):
HYPERPARAMETERS = "hyperparameters"
SELF_PLAY = "selfplay"
SALIENCY = "saliency"
class StatsWriter(abc.ABC):

self.summary_writers: Dict[str, tf.summary.FileWriter] = {}
self.base_dir: str = base_dir
self._clear_past_data = clear_past_data
self.trajectories = 0
def write_stats(
self, category: str, values: Dict[str, StatsSummary], step: int

self._maybe_create_summary_writer(category)
if summary is not None:
self.summary_writers[category].add_summary(summary, 0)
elif property_type == StatsPropertyType.SALIENCY:
self._maybe_create_summary_writer(category)
# adapted from https://gist.github.com/gyglim/1f8dfb1b5c82627ae3efcfbbadb9f514
counts, bin_edges = np.histogram(value, bins=len(value))
hist = tf.HistogramProto()
value = value / np.sum(value)
value = np.log(value)
value = value - np.min(value)
value = value / np.sum(value)
# for obs, grad in sorted(enumerate(value), reverse=True, key=lambda x: x[1]):
# print(f"Observation {obs} has relevance {grad}")
hist.min = 0.0
hist.max = float(len(value))
hist.num = len(value)
hist.sum = float(np.sum(value))
hist.sum_squares = float(np.sum(value ** 2))
bin_edges = bin_edges[1:]
for edge in range(len(value)):
hist.bucket_limit.append(edge + 0.5)
for c in value:
hist.bucket.append(c)
# Create and write Summary
summary = tf.Summary(value=[tf.Summary.Value(tag="Saliency", histo=hist)])
self.summary_writers[category].add_summary(summary, self.trajectories)
self.summary_writers[category].flush()
self.trajectories += 1
def _dict_to_tensorboard(
self, name: str, input_dict: Dict[str, Any]

正在加载...
取消
保存