比较提交

...
此合并请求有变更与目标分支冲突。
/ml-agents/mlagents/trainers/sac/optimizer_torch.py
/ml-agents/mlagents/trainers/torch/model_serialization.py
/ml-agents/mlagents/trainers/torch/networks.py
/Project/Assets/ML-Agents/Examples/Arena-Sequence/Prefabs.meta

4 次代码提交

作者 SHA1 备注 提交日期
vincentpierre d2d71116 adding meta files 4 年前
vincentpierre 6a61eb05 - 4 年前
vincentpierre bf16bad6 _ 4 年前
vincentpierre 9fbc2e0e _ 4 年前
共有 6 个文件被更改,包括 79 次插入26 次删除
  1. 18
      ml-agents/mlagents/trainers/sac/optimizer_torch.py
  2. 22
      ml-agents/mlagents/trainers/torch/model_serialization.py
  3. 49
      ml-agents/mlagents/trainers/torch/networks.py
  4. 8
      Project/Assets/ML-Agents/Examples/Arena-Sequence.meta
  5. 8
      Project/Assets/ML-Agents/Examples/Arena-Sequence/Prefabs.meta

18
ml-agents/mlagents/trainers/sac/optimizer_torch.py


policy_loss = self.sac_policy_loss(log_probs, q1p_out, masks)
entropy_loss = self.sac_entropy_loss(log_probs, masks)
# Compute surrogate loss for predicting cube position :
l_1 = self.value_network.q1_network.network_body.get_surrogate_loss(current_obs)
l_2 = self.value_network.q2_network.network_body.get_surrogate_loss(current_obs)
l_v = self.target_network.network_body.get_surrogate_loss(current_obs)
surrogate_loss_v = (l_1 + l_2 + l_v) * 0.05
surrogate_loss_p = (
self.policy.actor_critic.network_body.get_surrogate_loss(current_obs) * 0.05
)
surrogate_loss = surrogate_loss_v + surrogate_loss_p
policy_loss.backward()
(policy_loss + surrogate_loss_p).backward()
total_value_loss.backward()
(total_value_loss + surrogate_loss_v).backward()
self.value_optimizer.step()
ModelUtils.update_learning_rate(self.entropy_optimizer, decay_lr)

"Losses/Value Loss": value_loss.item(),
"Losses/Q1 Loss": q1_loss.item(),
"Losses/Q2 Loss": q2_loss.item(),
"Losses/Surrogate Loss": surrogate_loss.item(),
"Policy/Discrete Entropy Coeff": torch.mean(
torch.exp(self._log_ent_coef.discrete)
).item(),

22
ml-agents/mlagents/trainers/torch/model_serialization.py


self.dynamic_axes = {name: {0: "batch"} for name in self.input_names}
self.output_names = ["version_number", "memory_size"]
if self.policy.behavior_spec.action_spec.continuous_size > 0:
if True:
self.output_names += [
"continuous_actions",
"continuous_action_output_shape",

self.output_names += ["discrete_actions", "discrete_action_output_shape"]
self.dynamic_axes.update({"discrete_actions": {0: "batch"}})
if (
self.policy.behavior_spec.action_spec.continuous_size == 0
or self.policy.behavior_spec.action_spec.discrete_size == 0
):
self.output_names += [
"action",
"is_continuous_control",
"action_output_shape",
]
self.dynamic_axes.update({"action": {0: "batch"}})
# if (
# self.policy.behavior_spec.action_spec.continuous_size == 0
# or self.policy.behavior_spec.action_spec.discrete_size == 0
# ):
# self.output_names += [
# "action",
# "is_continuous_control",
# "action_output_shape",
# ]
# self.dynamic_axes.update({"action": {0: "batch"}})
def export_policy_model(self, output_filepath: str) -> None:
"""

49
ml-agents/mlagents/trainers/torch/networks.py


normalize=self.normalize,
)
total_enc_size = sum(self.embedding_sizes) + encoded_act_size
total_enc_size = sum(self.embedding_sizes) + encoded_act_size - 9
self.surrogate_predictor = torch.nn.Linear(self.h_size, 9)
self.linear_encoder = LinearEncoder(
total_enc_size, network_settings.num_layers, self.h_size
)

actions: Optional[torch.Tensor] = None,
memories: Optional[torch.Tensor] = None,
sequence_length: int = 1,
retrun_target=False,
processed_obs = processor(obs_input)
encodes.append(processed_obs)
if obs_input.shape[1] == 9:
target = obs_input
if retrun_target:
return target
else:
processed_obs = processor(obs_input)
encodes.append(processed_obs)
if len(encodes) == 0:
raise Exception("No valid inputs to network.")

encoding = encoding.reshape([-1, self.m_size // 2])
return encoding, memories
def get_surrogate_loss(self, inputs: List[torch.Tensor]) -> torch.Tensor:
prediction, _ = self.forward(inputs)
prediction = self.surrogate_predictor(prediction)
target = self.forward(inputs, retrun_target=True)
loss = torch.sum((prediction - target) ** 2, dim=1)
loss = torch.mean(loss)
return loss
def get_prediction(self, inputs: List[torch.Tensor]) -> torch.Tensor:
prediction, _ = self.forward(inputs)
prediction = self.surrogate_predictor(prediction)
return prediction
class ValueNetwork(nn.Module):
def __init__(

action_out_deprecated,
) = self.action_model.get_action_out(encoding, masks)
export_out = [self.version_number, self.memory_size_vector]
if self.action_spec.continuous_size > 0:
export_out += [cont_action_out, self.continuous_act_size_vector]
if True:
# export_out += [cont_action_out, self.continuous_act_size_vector]
export_out += [self.network_body.get_prediction(inputs), torch.nn.Parameter(
torch.Tensor([int(9)]), requires_grad=False
)]
# Only export deprecated nodes with non-hybrid action spec
if self.action_spec.continuous_size == 0 or self.action_spec.discrete_size == 0:
export_out += [
action_out_deprecated,
self.is_continuous_int_deprecated,
self.act_size_vector_deprecated,
]
# # Only export deprecated nodes with non-hybrid action spec
# if self.action_spec.continuous_size == 0 or self.action_spec.discrete_size == 0:
# export_out += [
# action_out_deprecated,
# self.is_continuous_int_deprecated,
# self.act_size_vector_deprecated,
# ]
return tuple(export_out)

8
Project/Assets/ML-Agents/Examples/Arena-Sequence.meta


fileFormatVersion: 2
guid: ab46f01a215b74b588a0a3c180a88813
folderAsset: yes
DefaultImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

8
Project/Assets/ML-Agents/Examples/Arena-Sequence/Prefabs.meta


fileFormatVersion: 2
guid: af7cee3bddc2e4ed595824b3c6d542b6
folderAsset: yes
DefaultImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:
正在加载...
取消
保存