浏览代码

Add clip to export and make optional in policy

/develop/torch-clip-scale
Ervin Teng 4 年前
当前提交
78f88c15
共有 2 个文件被更改,包括 10 次插入1 次删除
  1. 6
      ml-agents/mlagents/trainers/policy/torch_policy.py
  2. 5
      ml-agents/mlagents/trainers/torch/networks.py

6
ml-agents/mlagents/trainers/policy/torch_policy.py


conditional_sigma=self.condition_sigma_on_obs,
tanh_squash=tanh_squash,
)
self._clip_action = not tanh_squash
# Save the m_size needed for export
self._export_m_size = self.m_size
# m_size needed for training is determined by network, not trainer settings

vec_obs, vis_obs, masks=masks, memories=memories
)
clipped_action = torch.clamp(action, -3, 3) / 3
if self._clip_action:
clipped_action = torch.clamp(action, -3, 3) / 3
else:
clipped_action = action
run_out["pre_action"] = ModelUtils.to_numpy(action)
run_out["action"] = ModelUtils.to_numpy(clipped_action)
# Todo - make pre_action difference

5
ml-agents/mlagents/trainers/torch/networks.py


self.distribution = MultiCategoricalDistribution(
self.encoding_size, self.action_spec.discrete_branches
)
# During training, clipping is done in TorchPolicy, but we need to clip before ONNX
# export as well.
self._clip_action_on_export = not tanh_squash
@property
def memory_size(self) -> int:

action_out = torch.stack(action_list, dim=-1)
else:
action_out = torch.cat([dist.all_log_prob() for dist in dists], dim=1)
if self._clip_action_on_export:
action_out = torch.clamp(action_out, -3, 3) / 3
return (
action_out,
self.version_number,

正在加载...
取消
保存