浏览代码

Fix for memories

/develop/add-fire
Arthur Juliani 5 年前
当前提交
29223931
共有 2 个文件被更改,包括 6 次插入60 次删除
  1. 60
      ml-agents/mlagents/trainers/models_torch.py
  2. 6
      ml-agents/mlagents/trainers/ppo/optimizer_torch.py

60
ml-agents/mlagents/trainers/models_torch.py


from enum import Enum
from typing import Callable, List, NamedTuple
from typing import Callable, NamedTuple
import numpy as np
import torch
from torch import nn

conv_3 = torch.relu(self.conv3(conv_2))
hidden = self.dense(conv_3.reshape([-1, self.final_flat]))
return hidden
class DiscreteActionMask(nn.Module):
def __init__(self, action_size):
super(DiscreteActionMask, self).__init__()
self.action_size = action_size
@staticmethod
def break_into_branches(
concatenated_logits: torch.Tensor, action_size: List[int]
) -> List[torch.Tensor]:
"""
Takes a concatenated set of logits that represent multiple discrete action branches
and breaks it up into one Tensor per branch.
:param concatenated_logits: Tensor that represents the concatenated action branches
:param action_size: List of ints containing the number of possible actions for each branch.
:return: A List of Tensors containing one tensor per branch.
"""
action_idx = [0] + list(np.cumsum(action_size))
branched_logits = [
concatenated_logits[:, action_idx[i] : action_idx[i + 1]]
for i in range(len(action_size))
]
return branched_logits
def forward(self, branches_logits, action_masks):
branch_masks = self.break_into_branches(action_masks, self.action_size)
raw_probs = [
torch.mul(
torch.softmax(branches_logits[k], dim=-1) + EPSILON, branch_masks[k]
)
for k in range(len(self.action_size))
]
normalized_probs = [
torch.div(raw_probs[k], torch.sum(raw_probs[k], dim=1, keepdims=True))
for k in range(len(self.action_size))
]
output = torch.cat(
[
torch.multinomial(torch.log(normalized_probs[k] + EPSILON), 1)
for k in range(len(self.action_size))
],
dim=1,
)
return (
output,
torch.cat(
[normalized_probs[k] for k in range(len(self.action_size))], dim=1
),
torch.cat(
[
torch.log(normalized_probs[k] + EPSILON)
for k in range(len(self.action_size))
],
axis=1,
),
)
class GlobalSteps(nn.Module):

6
ml-agents/mlagents/trainers/ppo/optimizer_torch.py


vec_obs = [torch.Tensor(vec_obs)]
act_masks = torch.Tensor(np.array(batch["action_mask"]))
actions = [torch.Tensor(np.array(batch["actions"]))]
memories = torch.Tensor(np.array(batch["memory"])).unsqueeze(0)
memories = [
torch.Tensor(np.array(batch["memory"][i]))
for i in range(0, len(batch["memory"]), self.policy.sequence_length)
]
memories = torch.stack(memories).unsqueeze(0)
if self.policy.use_vis_obs:
vis_obs = []

正在加载...
取消
保存