Replace torch.detach().cpu().numpy() with a utils method

4 年前 · 108fac9a
--- a/ml-agents/mlagents/trainers/optimizer/torch_optimizer.py
+++ b/ml-agents/mlagents/trainers/optimizer/torch_optimizer.py
        )

        for name, estimate in value_estimates.items():
-            value_estimates[name] = estimate.detach().cpu().numpy()
-            next_value_estimate[name] = next_value_estimate[name].detach().cpu().numpy()
+            value_estimates[name] = ModelUtils.to_numpy(estimate)
+            next_value_estimate[name] = ModelUtils.to_numpy(next_value_estimate[name])

        if done:
            for k in next_value_estimate:
--- a/ml-agents/mlagents/trainers/policy/torch_policy.py
+++ b/ml-agents/mlagents/trainers/policy/torch_policy.py
            action, log_probs, entropy, value_heads, memories = self.sample_actions(
                vec_obs, vis_obs, masks=masks, memories=memories
            )
-        run_out["action"] = action.detach().cpu().numpy()
-        run_out["pre_action"] = action.detach().cpu().numpy()
+        run_out["action"] = ModelUtils.to_numpy(action)
+        run_out["pre_action"] = ModelUtils.to_numpy(action)
-        run_out["log_probs"] = log_probs.detach().cpu().numpy()
-        run_out["entropy"] = entropy.detach().cpu().numpy()
+        run_out["log_probs"] = ModelUtils.to_numpy(log_probs)
+        run_out["entropy"] = ModelUtils.to_numpy(entropy)
-            name: t.detach().cpu().numpy() for name, t in value_heads.items()
+            name: ModelUtils.to_numpy(t) for name, t in value_heads.items()
-            run_out["memory_out"] = memories.detach().cpu().numpy().squeeze(0)
+            run_out["memory_out"] = ModelUtils.to_numpy(memories).squeeze(0)
        return run_out

    def get_action(
--- a/ml-agents/mlagents/trainers/ppo/optimizer_torch.py
+++ b/ml-agents/mlagents/trainers/ppo/optimizer_torch.py

        self.optimizer.step()
        update_stats = {
-            "Losses/Policy Loss": abs(policy_loss.detach().cpu().numpy()),
-            "Losses/Value Loss": value_loss.detach().cpu().numpy(),
+            "Losses/Policy Loss": abs(ModelUtils.to_numpy(policy_loss)),
+            "Losses/Value Loss": ModelUtils.to_numpy(value_loss),
            "Policy/Learning Rate": decay_lr,
            "Policy/Epsilon": decay_eps,
            "Policy/Beta": decay_bet,
--- a/ml-agents/mlagents/trainers/sac/optimizer_torch.py
+++ b/ml-agents/mlagents/trainers/sac/optimizer_torch.py
        # Update target network
        self.soft_update(self.policy.actor_critic.critic, self.target_network, self.tau)
        update_stats = {
-            "Losses/Policy Loss": abs(policy_loss.detach().cpu().numpy()),
-            "Losses/Value Loss": value_loss.detach().cpu().numpy(),
-            "Losses/Q1 Loss": q1_loss.detach().cpu().numpy(),
-            "Losses/Q2 Loss": q2_loss.detach().cpu().numpy(),
-            "Policy/Entropy Coeff": torch.exp(self._log_ent_coef)
-            .detach()
-            .cpu()
-            .numpy(),
+            "Losses/Policy Loss": abs(ModelUtils.to_numpy(policy_loss)),
+            "Losses/Value Loss": ModelUtils.to_numpy(value_loss),
+            "Losses/Q1 Loss": ModelUtils.to_numpy(q1_loss),
+            "Losses/Q2 Loss": ModelUtils.to_numpy(q2_loss),
+            "Policy/Entropy Coeff": ModelUtils.to_numpy(torch.exp(self._log_ent_coef)),
            "Policy/Learning Rate": decay_lr,
        }

--- a/ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_curiosity.py
+++ b/ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_curiosity.py
 from mlagents.trainers.tests.torch.test_reward_providers.utils import (
    create_agent_buffer,
 )
+from mlagents.trainers.torch.utils import ModelUtils

 SEED = [42]

    buffer = create_agent_buffer(behavior_spec, 5)
    for _ in range(200):
        curiosity_rp.update(buffer)
-    prediction = curiosity_rp._network.predict_action(buffer)[0].detach()
+    prediction = ModelUtils.to_numpy(curiosity_rp._network.predict_action(buffer)[0])
    target = buffer["actions"][0]
    error = float(torch.mean((prediction - target) ** 2))
    assert error < 0.001
        curiosity_rp.update(buffer)
    prediction = curiosity_rp._network.predict_next_state(buffer)[0]
    target = curiosity_rp._network.get_next_state(buffer)[0]
-    error = float(torch.mean((prediction - target) ** 2).detach())
+    error = float(ModelUtils.to_numpy(torch.mean((prediction - target) ** 2)))
    assert error < 0.001
--- a/ml-agents/mlagents/trainers/torch/components/bc/module.py
+++ b/ml-agents/mlagents/trainers/torch/components/bc/module.py
        bc_loss.backward()

        self.optimizer.step()
-        run_out = {"loss": bc_loss.detach().cpu().numpy()}
+        run_out = {"loss": ModelUtils.to_numpy(bc_loss)}
        return run_out
--- a/ml-agents/mlagents/trainers/torch/components/reward_providers/curiosity_reward_provider.py
+++ b/ml-agents/mlagents/trainers/torch/components/reward_providers/curiosity_reward_provider.py

    def evaluate(self, mini_batch: AgentBuffer) -> np.ndarray:
        with torch.no_grad():
-            rewards = self._network.compute_reward(mini_batch).detach().cpu().numpy()
+            rewards = ModelUtils.to_numpy(self._network.compute_reward(mini_batch))
        rewards = np.minimum(rewards, 1.0 / self.strength)
        return rewards * self._has_updated_once

        loss.backward()
        self.optimizer.step()
        return {
-            "Losses/Curiosity Forward Loss": forward_loss.detach().cpu().numpy(),
-            "Losses/Curiosity Inverse Loss": inverse_loss.detach().cpu().numpy(),
+            "Losses/Curiosity Forward Loss": ModelUtils.to_numpy(forward_loss),
+            "Losses/Curiosity Inverse Loss": ModelUtils.to_numpy(inverse_loss),
        }


--- a/ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py
+++ b/ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py
            estimates, _ = self._discriminator_network.compute_estimate(
                mini_batch, use_vail_noise=False
            )
-            return (
+            return ModelUtils.to_numpy(
-                .detach()
-                .cpu()
-                .numpy()
            )

    def update(self, mini_batch: AgentBuffer) -> Dict[str, np.ndarray]:
        expert_estimate, expert_mu = self.compute_estimate(
            expert_batch, use_vail_noise=True
        )
-        stats_dict["Policy/GAIL Policy Estimate"] = (
-            policy_estimate.mean().detach().cpu().numpy()
+        stats_dict["Policy/GAIL Policy Estimate"] = ModelUtils.to_numpy(
+            policy_estimate.mean()
-        stats_dict["Policy/GAIL Expert Estimate"] = (
-            expert_estimate.mean().detach().cpu().numpy()
+        stats_dict["Policy/GAIL Expert Estimate"] = ModelUtils.to_numpy(
+            expert_estimate.mean()
-        stats_dict["Losses/GAIL Loss"] = discriminator_loss.detach().cpu().numpy()
+        stats_dict["Losses/GAIL Loss"] = ModelUtils.to_numpy(discriminator_loss)
        total_loss += discriminator_loss
        if self._settings.use_vail:
            # KL divergence loss (encourage latent representation to be normal)
                    torch.tensor(0.0),
                )
            total_loss += vail_loss
-            stats_dict["Policy/GAIL Beta"] = self._beta.detach().cpu().numpy()
-            stats_dict["Losses/GAIL KL Loss"] = kl_loss.detach().cpu().numpy()
+            stats_dict["Policy/GAIL Beta"] = ModelUtils.to_numpy(self._beta)
+            stats_dict["Losses/GAIL KL Loss"] = ModelUtils.to_numpy(kl_loss)
        if self.gradient_penalty_weight > 0.0:
            total_loss += (
                self.gradient_penalty_weight
--- a/ml-agents/mlagents/trainers/torch/utils.py
+++ b/ml-agents/mlagents/trainers/torch/utils.py
        return torch.as_tensor(np.asanyarray(ndarray_list), dtype=dtype)

    @staticmethod
+    def to_numpy(tensor: torch.Tensor) -> np.ndarray:
+        """
+        Converts a Torch Tensor to a numpy array. If the Tensor is on the GPU, it will
+        be brought to the CPU.
+        """
+        return tensor.detach().cpu().numpy()
+
+    @staticmethod
    def break_into_branches(
        concatenated_logits: torch.Tensor, action_size: List[int]
    ) -> List[torch.Tensor]: