Fix for discrete actions (#4181)

5 年前 · 0d80d87a
--- a/ml-agents/mlagents/trainers/distributions_torch.py
+++ b/ml-agents/mlagents/trainers/distributions_torch.py
        return torch.multinomial(self.probs, 1)

    def pdf(self, value):
-        return torch.diag(self.probs.T[value.flatten()])
+        return torch.diag(self.probs.T[value.flatten().long()])

    def log_prob(self, value):
        return torch.log(self.pdf(value))
--- a/ml-agents/mlagents/trainers/policy/policy.py
+++ b/ml-agents/mlagents/trainers/policy/policy.py
            self.num_branches = len(self.brain.vector_action_space_size)
        self.previous_action_dict: Dict[str, np.array] = {}
        self.memory_dict: Dict[str, np.ndarray] = {}
-        self.normalize = trainer_settings
+        self.normalize = trainer_settings.network_settings.normalize
        self.use_recurrent = trainer_settings.network_settings.memory is not None
        self.model_path = trainer_settings.init_path

--- a/ml-agents/mlagents/trainers/policy/torch_policy.py
+++ b/ml-agents/mlagents/trainers/policy/torch_policy.py

        actions = self.actor_critic.sample_action(dists)
        log_probs, entropies = self.actor_critic.get_probs_and_entropy(actions, dists)
-        if self.act_type == "continuous":
-            actions.squeeze_(-1)
+        actions = torch.squeeze(actions)

        return actions, log_probs, entropies, value_heads, memories

        fake_vec_obs = [torch.zeros([1] + [self.brain.vector_observation_space_size])]
        fake_vis_obs = [torch.zeros([1] + [84, 84, 3])]
        fake_masks = torch.ones([1] + self.actor_critic.act_size)
-        fake_memories = torch.zeros([1] + [self.m_size])
+        # fake_memories = torch.zeros([1] + [self.m_size])
        export_path = "./model-" + str(step) + ".onnx"
        output_names = ["action", "action_probs"]
        input_names = ["vector_observation", "action_mask"]
--- a/ml-agents/mlagents/trainers/ppo/optimizer_torch.py
+++ b/ml-agents/mlagents/trainers/ppo/optimizer_torch.py
        if self.policy.use_continuous_act:
            actions = torch.as_tensor(batch["actions"]).unsqueeze(-1)
        else:
-            actions = torch.as_tensor(batch["actions"])
+            actions = torch.as_tensor(batch["actions"], dtype=torch.long)

        memories = [
            torch.as_tensor(batch["memory"][i])