adding meta files

-
_
--- a/ml-agents/mlagents/trainers/sac/optimizer_torch.py
+++ b/ml-agents/mlagents/trainers/sac/optimizer_torch.py
        policy_loss = self.sac_policy_loss(log_probs, q1p_out, masks)
        entropy_loss = self.sac_entropy_loss(log_probs, masks)

+        # Compute surrogate loss for predicting cube position :
+
+        l_1 = self.value_network.q1_network.network_body.get_surrogate_loss(current_obs)
+        l_2 = self.value_network.q2_network.network_body.get_surrogate_loss(current_obs)
+        l_v = self.target_network.network_body.get_surrogate_loss(current_obs)
+        surrogate_loss_v = (l_1 + l_2 + l_v) * 0.05
+
+        surrogate_loss_p = (
+            self.policy.actor_critic.network_body.get_surrogate_loss(current_obs) * 0.05
+        )
+
+        surrogate_loss = surrogate_loss_v + surrogate_loss_p
+
-        policy_loss.backward()
+        (policy_loss + surrogate_loss_p).backward()
-        total_value_loss.backward()
+        (total_value_loss + surrogate_loss_v).backward()
        self.value_optimizer.step()

        ModelUtils.update_learning_rate(self.entropy_optimizer, decay_lr)
            "Losses/Value Loss": value_loss.item(),
            "Losses/Q1 Loss": q1_loss.item(),
            "Losses/Q2 Loss": q2_loss.item(),
+            "Losses/Surrogate Loss": surrogate_loss.item(),
            "Policy/Discrete Entropy Coeff": torch.mean(
                torch.exp(self._log_ent_coef.discrete)
            ).item(),
--- a/ml-agents/mlagents/trainers/torch/model_serialization.py
+++ b/ml-agents/mlagents/trainers/torch/model_serialization.py
        self.dynamic_axes = {name: {0: "batch"} for name in self.input_names}

        self.output_names = ["version_number", "memory_size"]
-        if self.policy.behavior_spec.action_spec.continuous_size > 0:
+        if True:
            self.output_names += [
                "continuous_actions",
                "continuous_action_output_shape",
            self.output_names += ["discrete_actions", "discrete_action_output_shape"]
            self.dynamic_axes.update({"discrete_actions": {0: "batch"}})
-        if (
-            self.policy.behavior_spec.action_spec.continuous_size == 0
-            or self.policy.behavior_spec.action_spec.discrete_size == 0
-        ):
-            self.output_names += [
-                "action",
-                "is_continuous_control",
-                "action_output_shape",
-            ]
-            self.dynamic_axes.update({"action": {0: "batch"}})
+        # if (
+        #     self.policy.behavior_spec.action_spec.continuous_size == 0
+        #     or self.policy.behavior_spec.action_spec.discrete_size == 0
+        # ):
+        #     self.output_names += [
+        #         "action",
+        #         "is_continuous_control",
+        #         "action_output_shape",
+        #     ]
+        #     self.dynamic_axes.update({"action": {0: "batch"}})

    def export_policy_model(self, output_filepath: str) -> None:
        """
--- a/ml-agents/mlagents/trainers/torch/networks.py
+++ b/ml-agents/mlagents/trainers/torch/networks.py
            normalize=self.normalize,
        )

-        total_enc_size = sum(self.embedding_sizes) + encoded_act_size
+        total_enc_size = sum(self.embedding_sizes) + encoded_act_size - 9
+
+        self.surrogate_predictor = torch.nn.Linear(self.h_size, 9)
+
        self.linear_encoder = LinearEncoder(
            total_enc_size, network_settings.num_layers, self.h_size
        )
        actions: Optional[torch.Tensor] = None,
        memories: Optional[torch.Tensor] = None,
        sequence_length: int = 1,
+        retrun_target=False,
-            processed_obs = processor(obs_input)
-            encodes.append(processed_obs)
-
+            if obs_input.shape[1] == 9:
+                target = obs_input
+                if retrun_target:
+                    return target
+            else:
+                processed_obs = processor(obs_input)
+                encodes.append(processed_obs)
        if len(encodes) == 0:
            raise Exception("No valid inputs to network.")

            encoding = encoding.reshape([-1, self.m_size // 2])
        return encoding, memories

+    def get_surrogate_loss(self, inputs: List[torch.Tensor]) -> torch.Tensor:
+        prediction, _ = self.forward(inputs)
+        prediction = self.surrogate_predictor(prediction)
+        target = self.forward(inputs, retrun_target=True)
+        loss = torch.sum((prediction - target) ** 2, dim=1)
+        loss = torch.mean(loss)
+        return loss
+
+    def get_prediction(self, inputs: List[torch.Tensor]) -> torch.Tensor:
+        prediction, _ = self.forward(inputs)
+        prediction = self.surrogate_predictor(prediction)
+        return prediction

 class ValueNetwork(nn.Module):
    def __init__(
            action_out_deprecated,
        ) = self.action_model.get_action_out(encoding, masks)
        export_out = [self.version_number, self.memory_size_vector]
-        if self.action_spec.continuous_size > 0:
-            export_out += [cont_action_out, self.continuous_act_size_vector]
+        if True:
+            # export_out += [cont_action_out, self.continuous_act_size_vector]
+            export_out += [self.network_body.get_prediction(inputs), torch.nn.Parameter(
+            torch.Tensor([int(9)]), requires_grad=False
+        )]
-        # Only export deprecated nodes with non-hybrid action spec
-        if self.action_spec.continuous_size == 0 or self.action_spec.discrete_size == 0:
-            export_out += [
-                action_out_deprecated,
-                self.is_continuous_int_deprecated,
-                self.act_size_vector_deprecated,
-            ]
+        # # Only export deprecated nodes with non-hybrid action spec
+        # if self.action_spec.continuous_size == 0 or self.action_spec.discrete_size == 0:
+        #     export_out += [
+        #         action_out_deprecated,
+        #         self.is_continuous_int_deprecated,
+        #         self.act_size_vector_deprecated,
+        #     ]
        return tuple(export_out)


--- a/Project/Assets/ML-Agents/Examples/Arena-Sequence.meta
+++ b/Project/Assets/ML-Agents/Examples/Arena-Sequence.meta
+fileFormatVersion: 2
+guid: ab46f01a215b74b588a0a3c180a88813
+folderAsset: yes
+DefaultImporter:
+  externalObjects: {}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
--- a/Project/Assets/ML-Agents/Examples/Arena-Sequence/Prefabs.meta
+++ b/Project/Assets/ML-Agents/Examples/Arena-Sequence/Prefabs.meta
+fileFormatVersion: 2
+guid: af7cee3bddc2e4ed595824b3c6d542b6
+folderAsset: yes
+DefaultImporter:
+  externalObjects: {}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant:
作者	SHA1	备注	提交日期
vincentpierre	d2d71116	adding meta files	4 年前
vincentpierre	6a61eb05	-	4 年前
vincentpierre	bf16bad6	_	4 年前
vincentpierre	9fbc2e0e	_	4 年前