Add clip to export and make optional in policy

4 年前 · 78f88c15
--- a/ml-agents/mlagents/trainers/policy/torch_policy.py
+++ b/ml-agents/mlagents/trainers/policy/torch_policy.py
            conditional_sigma=self.condition_sigma_on_obs,
            tanh_squash=tanh_squash,
        )
+        self._clip_action = not tanh_squash
        # Save the m_size needed for export
        self._export_m_size = self.m_size
        # m_size needed for training is determined by network, not trainer settings
                vec_obs, vis_obs, masks=masks, memories=memories
            )

-        clipped_action = torch.clamp(action, -3, 3) / 3
+        if self._clip_action:
+            clipped_action = torch.clamp(action, -3, 3) / 3
+        else:
+            clipped_action = action
        run_out["pre_action"] = ModelUtils.to_numpy(action)
        run_out["action"] = ModelUtils.to_numpy(clipped_action)
        # Todo - make pre_action difference
--- a/ml-agents/mlagents/trainers/torch/networks.py
+++ b/ml-agents/mlagents/trainers/torch/networks.py
            self.distribution = MultiCategoricalDistribution(
                self.encoding_size, self.action_spec.discrete_branches
            )
+        # During training, clipping is done in TorchPolicy, but we need to clip before ONNX
+        # export as well.
+        self._clip_action_on_export = not tanh_squash

    @property
    def memory_size(self) -> int:
            action_out = torch.stack(action_list, dim=-1)
        else:
            action_out = torch.cat([dist.all_log_prob() for dist in dists], dim=1)
+        if self._clip_action_on_export:
+            action_out = torch.clamp(action_out, -3, 3) / 3
        return (
            action_out,
            self.version_number,