浏览代码

Make entropy reporting same as TF

/develop/debugtorchfood
Ervin Teng 4 年前
当前提交
be159ad3
共有 2 个文件被更改,包括 11 次插入6 次删除
  1. 10
      ml-agents/mlagents/trainers/policy/torch_policy.py
  2. 7
      ml-agents/mlagents/trainers/ppo/optimizer_torch.py

10
ml-agents/mlagents/trainers/policy/torch_policy.py


actions = actions[:, :, 0]
else:
actions = actions[:, 0, :]
return (actions, all_logs if all_log_probs else log_probs, entropies, memories)
# Use the sum of entropy across actions, not the mean
entropy_sum = torch.sum(entropies, dim=1)
return (
actions,
all_logs if all_log_probs else log_probs,
entropy_sum,
memories,
)
def evaluate_actions(
self,

7
ml-agents/mlagents/trainers/ppo/optimizer_torch.py


ModelUtils.list_to_tensor(batch["action_probs"]),
loss_masks,
)
# Use the sum of entropy across actions, not the mean
entropy_sum = torch.sum(entropy, dim=1)
- decay_bet * ModelUtils.masked_mean(entropy_sum, loss_masks)
- decay_bet * ModelUtils.masked_mean(entropy, loss_masks)
)
# Set optimizer learning rate

self.optimizer.step()
update_stats = {
"Losses/Policy Loss": policy_loss.item(),
"Losses/Policy Loss": torch.abs(policy_loss).item(),
"Losses/Value Loss": value_loss.item(),
"Policy/Learning Rate": decay_lr,
"Policy/Epsilon": decay_eps,

正在加载...
取消
保存