浏览代码

[add-fire] Fix masked mean for 2d tensors (#4364)

/develop/add-fire
GitHub 4 年前
当前提交
6de31a03
共有 4 个文件被更改,包括 14 次插入4 次删除
  1. 4
      ml-agents/mlagents/trainers/ppo/optimizer_torch.py
  2. 4
      ml-agents/mlagents/trainers/sac/optimizer_torch.py
  3. 6
      ml-agents/mlagents/trainers/tests/torch/test_utils.py
  4. 4
      ml-agents/mlagents/trainers/torch/utils.py

4
ml-agents/mlagents/trainers/ppo/optimizer_torch.py


torch.clamp(r_theta, 1.0 - decay_epsilon, 1.0 + decay_epsilon) * advantage
)
policy_loss = -1 * ModelUtils.masked_mean(
torch.min(p_opt_a, p_opt_b).flatten(), loss_masks
torch.min(p_opt_a, p_opt_b), loss_masks
)
return policy_loss

loss = (
policy_loss
+ 0.5 * value_loss
- decay_bet * ModelUtils.masked_mean(entropy.flatten(), loss_masks)
- decay_bet * ModelUtils.masked_mean(entropy, loss_masks)
)
# Set optimizer learning rate

4
ml-agents/mlagents/trainers/sac/optimizer_torch.py


memories = None
next_memories = None
# Q network memories are 0'ed out, since we don't have them during inference.
q_memories = torch.zeros_like(next_memories)
q_memories = (
torch.zeros_like(next_memories) if next_memories is not None else None
)
vis_obs: List[torch.Tensor] = []
next_vis_obs: List[torch.Tensor] = []

6
ml-agents/mlagents/trainers/tests/torch/test_utils.py


masks = torch.tensor([False, False, False, False, False])
mean = ModelUtils.masked_mean(test_input, masks=masks)
assert mean == 0.0
# Make sure it works with 2d arrays of shape (mask_length, N)
test_input = torch.tensor([1, 2, 3, 4, 5]).repeat(2, 1).T
masks = torch.tensor([False, False, True, True, True])
mean = ModelUtils.masked_mean(test_input, masks=masks)
assert mean == 4.0

4
ml-agents/mlagents/trainers/torch/utils.py


:param tensor: Tensor which needs mean computation.
:param masks: Boolean tensor of masks with same dimension as tensor.
"""
return (tensor * masks).sum() / torch.clamp(masks.float().sum(), min=1.0)
return (tensor.T * masks).sum() / torch.clamp(
(torch.ones_like(tensor.T) * masks).float().sum(), min=1.0
)
正在加载...
取消
保存