浏览代码

_

/exp-robot
vincentpierre 4 年前
当前提交
bf16bad6
共有 2 个文件被更改,包括 4 次插入11 次删除
  1. 10
      ml-agents/mlagents/trainers/sac/optimizer_torch.py
  2. 5
      ml-agents/mlagents/trainers/torch/networks.py

10
ml-agents/mlagents/trainers/sac/optimizer_torch.py


policy_loss = self.sac_policy_loss(log_probs, q1p_out, masks)
entropy_loss = self.sac_entropy_loss(log_probs, masks)
# Compute surrogate loss for predicting cube position :
l_1 = self.value_network.q1_network.network_body.get_surrogate_loss(current_obs)

surrogate_loss_p = self.policy.actor_critic.network_body.get_surrogate_loss(current_obs) * 0.05
surrogate_loss_p = (
self.policy.actor_critic.network_body.get_surrogate_loss(current_obs) * 0.05
)
total_value_loss = q1_loss + q2_loss + value_loss

5
ml-agents/mlagents/trainers/torch/networks.py


normalize=self.normalize,
)
self.linear_encoder = LinearEncoder(
total_enc_size, network_settings.num_layers, self.h_size

actions: Optional[torch.Tensor] = None,
memories: Optional[torch.Tensor] = None,
sequence_length: int = 1,
retrun_target = False
retrun_target=False,
) -> Tuple[torch.Tensor, torch.Tensor]:
encodes = []
for idx, processor in enumerate(self.processors):

loss = torch.sum((prediction - target) ** 2, dim=1)
loss = torch.mean(loss)
return loss
class ValueNetwork(nn.Module):

正在加载...
取消
保存