discrete runs/cont passes

4 年前 · b36fcf16
--- a/ml-agents/mlagents/trainers/policy/torch_policy.py
+++ b/ml-agents/mlagents/trainers/policy/torch_policy.py
-from typing import Any, Dict, List, Tuple, Optional
+from typing import Any, Dict, List, Tuple, Optional, Union
 import numpy as np
 from mlagents.torch_utils import torch, default_device
 import copy
        memories: Optional[torch.Tensor] = None,
        seq_len: int = 1,
        all_log_probs: bool = False,
-    ) -> Tuple[List[torch.Tensor], torch.Tensor, torch.Tensor, torch.Tensor]:
+    ) -> Tuple[List[torch.Tensor], Union[torch.Tensor, List[torch.Tensor]], torch.Tensor, torch.Tensor]:
        """
        :param vec_obs: List of vector observations.
        :param vis_obs: List of visual observations.
                vec_obs, vis_obs, masks=masks, memories=memories
            )
        run_out["action"] = action.to_numpy_dict()
-        run_out["pre_action"] = action.to_numpy_dict()["continuous_action"] # Todo - make pre_action difference
+        run_out["pre_action"] = action.to_numpy_dict()["continuous_action"] if self.use_continuous_act else None# Todo - make pre_action difference
        run_out["log_probs"] = log_probs.to_numpy_dict()
        run_out["entropy"] = ModelUtils.to_numpy(entropy)
        run_out["learning_rate"] = 0.0
--- a/ml-agents/mlagents/trainers/tests/torch/test_simple_rl.py
+++ b/ml-agents/mlagents/trainers/tests/torch/test_simple_rl.py
 SAC_TORCH_CONFIG = attr.evolve(sac_dummy_config(), framework=FrameworkType.PYTORCH)


-@pytest.mark.parametrize("use_discrete", [False])
+@pytest.mark.parametrize("use_discrete", [True, False])
 def test_simple_ppo(use_discrete):
    env = SimpleEnvironment([BRAIN_NAME], use_discrete=use_discrete)
    config = attr.evolve(PPO_TORCH_CONFIG)
--- a/ml-agents/mlagents/trainers/torch/utils.py
+++ b/ml-agents/mlagents/trainers/torch/utils.py
        if self.continuous is not None:
            action_arrays_dict["continuous_action"] = ModelUtils.to_numpy(self.continuous)
        if self.discrete is not None:
-            action_arrays_dict["discrete_action"] = ModelUtils.to_numpy(self.discrete)
+            action_arrays_dict["discrete_action"] = np.array([ModelUtils.to_numpy(_disc) for _disc in self.discrete])
        return action_arrays_dict

    def to_tensor_list(self) -> List[torch.Tensor]:
        if self.continuous is not None:
            log_prob_arrays_dict["continuous_log_probs"] = ModelUtils.to_numpy(self.continuous)
        if self.discrete is not None:
-            log_prob_arrays_dict["discrete_log_probs"] = ModelUtils.to_numpy(self.discrete)
+            log_prob_arrays_dict["discrete_log_probs"] = np.array([ModelUtils.to_numpy(_disc) for _disc in self.discrete])
        return log_prob_arrays_dict

    def to_tensor_list(self) -> List[torch.Tensor]:
--- a/ml-agents/mlagents/trainers/trajectory.py
+++ b/ml-agents/mlagents/trainers/trajectory.py
            agent_buffer_trajectory["done"].append(exp.done)
            # Add the outputs of the last eval
            if exp.action_pre is not None:
-                actions_pre = exp.action_pre
-                agent_buffer_trajectory["actions_pre"].append(actions_pre)
+                agent_buffer_trajectory["actions_pre"].append(exp.action_pre)

            # Adds the log prob and action of continuous/discrete separately 
            for act_type, act_array in exp.action.items():