浏览代码

Add conditional sigma for distribution

/develop/add-fire
Arthur Juliani 5 年前
当前提交
61d671d8
共有 4 个文件被更改,包括 16 次插入8 次删除
  1. 20
      ml-agents/mlagents/trainers/distributions_torch.py
  2. 2
      ml-agents/mlagents/trainers/models_torch.py
  3. 1
      ml-agents/mlagents/trainers/optimizer/torch_optimizer.py
  4. 1
      ml-agents/mlagents/trainers/ppo/trainer.py

20
ml-agents/mlagents/trainers/distributions_torch.py


class GaussianDistribution(nn.Module):
def __init__(self, hidden_size, num_outputs, **kwargs):
def __init__(self, hidden_size, num_outputs, conditional_sigma=False, **kwargs):
self.conditional_sigma = conditional_sigma
# self.log_sigma_sq = nn.Linear(hidden_size, num_outputs)
self.log_sigma = nn.Parameter(torch.zeros(1, num_outputs, requires_grad=True))
# nn.init.xavier_uniform(self.log_sigma_sq.weight, gain=0.01)
if conditional_sigma:
self.log_sigma = nn.Linear(hidden_size, num_outputs)
nn.init.xavier_uniform(self.log_sigma.weight, gain=0.01)
else:
self.log_sigma = nn.Parameter(
torch.zeros(1, num_outputs, requires_grad=True)
)
# log_sig = torch.tanh(self.log_sigma_sq(inputs)) * 3.0
return [distributions.normal.Normal(loc=mu, scale=torch.exp(self.log_sigma))]
if self.conditional_sigma:
log_sigma = self.log_sigma(inputs)
else:
log_sigma = self.log_sigma
return [distributions.normal.Normal(loc=mu, scale=torch.exp(log_sigma))]
class MultiCategoricalDistribution(nn.Module):

2
ml-agents/mlagents/trainers/models_torch.py


self.layers = [nn.Linear(input_size, hidden_size)]
for _ in range(num_layers - 1):
self.layers.append(nn.Linear(hidden_size, hidden_size))
self.layers.append(nn.ReLU())
self.layers.append(nn.Tanh())
self.layers = nn.ModuleList(self.layers)
def forward(self, inputs):

1
ml-agents/mlagents/trainers/optimizer/torch_optimizer.py


visual_obs = batch["visual_obs"]
else:
visual_obs = []
next_obs = [torch.Tensor(next_obs[0])]
value_estimates, mean_value = self.policy.critic(vector_obs, visual_obs)

1
ml-agents/mlagents/trainers/ppo/trainer.py


lambd=self.trainer_parameters["lambd"],
)
local_return = local_advantage + local_value_estimates
# This is later use as target for the different value estimates
agent_buffer_trajectory["{}_returns".format(name)].set(local_return)
agent_buffer_trajectory["{}_advantage".format(name)].set(local_advantage)

正在加载...
取消
保存