move AgentAction, ActionLogProbs, and ActionFlattener to separate files

4 年前 · 17496265
--- a/ml-agents/mlagents/trainers/policy/torch_policy.py
+++ b/ml-agents/mlagents/trainers/policy/torch_policy.py
    GlobalSteps,
 )

-from mlagents.trainers.torch.utils import ModelUtils, AgentAction, ActionLogProbs
+from mlagents.trainers.torch.utils import ModelUtils
+from mlagents.trainers.torch.agent_action import AgentAction
+from mlagents.trainers.torch.action_log_probs import ActionLogProbs

 EPSILON = 1e-7  # Small value to avoid divide by zero

--- a/ml-agents/mlagents/trainers/ppo/optimizer_torch.py
+++ b/ml-agents/mlagents/trainers/ppo/optimizer_torch.py
 from mlagents.trainers.policy.torch_policy import TorchPolicy
 from mlagents.trainers.optimizer.torch_optimizer import TorchOptimizer
 from mlagents.trainers.settings import TrainerSettings, PPOSettings
-from mlagents.trainers.torch.utils import ModelUtils, AgentAction, ActionLogProbs
+from mlagents.trainers.torch.agent_action import AgentAction
+from mlagents.trainers.torch.action_log_probs import ActionLogProbs
+from mlagents.trainers.torch.utils import ModelUtils


 class TorchPPOOptimizer(TorchOptimizer):
--- a/ml-agents/mlagents/trainers/sac/optimizer_torch.py
+++ b/ml-agents/mlagents/trainers/sac/optimizer_torch.py
 from mlagents.trainers.policy.torch_policy import TorchPolicy
 from mlagents.trainers.settings import NetworkSettings
 from mlagents.trainers.torch.networks import ValueNetwork
-from mlagents.trainers.torch.utils import ModelUtils, AgentAction, ActionLogProbs
+from mlagents.trainers.torch.agent_action import AgentAction
+from mlagents.trainers.torch.action_log_probs import ActionLogProbs
+from mlagents.trainers.torch.utils import ModelUtils
 from mlagents.trainers.buffer import AgentBuffer
 from mlagents_envs.timers import timed
 from mlagents.trainers.exception import UnityTrainerException
--- a/ml-agents/mlagents/trainers/tests/torch/test_simple_rl.py
+++ b/ml-agents/mlagents/trainers/tests/torch/test_simple_rl.py

@pytest.mark.parametrize("action_sizes", [(0, 2), (2, 0)])
 def test_2d_ppo(action_sizes):
-    env = SimpleEnvironment(
-        [BRAIN_NAME], action_sizes=action_sizes, action_size=2, step_size=0.8
-    )
+    env = SimpleEnvironment([BRAIN_NAME], action_sizes=action_sizes, step_size=0.8)
    new_hyperparams = attr.evolve(
        PPO_TORCH_CONFIG.hyperparameters, batch_size=64, buffer_size=640
    )

@pytest.mark.parametrize("action_sizes", [(0, 2), (2, 0)])
 def test_2d_sac(action_sizes):
-    env = SimpleEnvironment(
-        [BRAIN_NAME], action_sizes=action_sizes, action_size=2, step_size=0.8
-    )
+    env = SimpleEnvironment([BRAIN_NAME], action_sizes=action_sizes, step_size=0.8)
    new_hyperparams = attr.evolve(
        SAC_TORCH_CONFIG.hyperparameters, buffer_init_steps=2000
    )
--- a/ml-agents/mlagents/trainers/torch/action_model.py
+++ b/ml-agents/mlagents/trainers/torch/action_model.py
    GaussianDistribution,
    MultiCategoricalDistribution,
 )
-
-from mlagents.trainers.torch.utils import AgentAction, ActionLogProbs
+from mlagents.trainers.torch.agent_action import AgentAction
+from mlagents.trainers.torch.action_log_probs import ActionLogProbs
 from mlagents_envs.base_env import ActionSpec

 EPSILON = 1e-7  # Small value to avoid divide by zero
--- a/ml-agents/mlagents/trainers/torch/components/bc/module.py
+++ b/ml-agents/mlagents/trainers/torch/components/bc/module.py
 from mlagents.trainers.policy.torch_policy import TorchPolicy
 from mlagents.trainers.demo_loader import demo_to_buffer
 from mlagents.trainers.settings import BehavioralCloningSettings, ScheduleType
-from mlagents.trainers.torch.utils import ModelUtils, AgentAction, ActionLogProbs
+from mlagents.trainers.torch.agent_action import AgentAction
+from mlagents.trainers.torch.action_log_probs import ActionLogProbs
+from mlagents.trainers.torch.utils import ModelUtils


 class BCModule:
--- a/ml-agents/mlagents/trainers/torch/components/reward_providers/curiosity_reward_provider.py
+++ b/ml-agents/mlagents/trainers/torch/components/reward_providers/curiosity_reward_provider.py
 from mlagents.trainers.settings import CuriositySettings

 from mlagents_envs.base_env import BehaviorSpec
-from mlagents.trainers.torch.utils import ModelUtils, AgentAction
+from mlagents.trainers.torch.agent_action import AgentAction
+from mlagents.trainers.torch.action_flattener import ActionFlattener
+from mlagents.trainers.torch.utils import ModelUtils
 from mlagents.trainers.torch.networks import NetworkBody
 from mlagents.trainers.torch.layers import LinearEncoder, linear_layer
 from mlagents.trainers.settings import NetworkSettings, EncoderType
            specs.observation_shapes, state_encoder_settings
        )

-        self._action_flattener = ModelUtils.ActionFlattener(self._action_spec)
+        self._action_flattener = ActionFlattener(self._action_spec)

        self.inverse_model_action_encoding = torch.nn.Sequential(
            LinearEncoder(2 * settings.encoding_size, 1, 256)
--- a/ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py
+++ b/ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py
 )
 from mlagents.trainers.settings import GAILSettings
 from mlagents_envs.base_env import BehaviorSpec
-from mlagents.trainers.torch.utils import ModelUtils, AgentAction
+from mlagents.trainers.torch.utils import ModelUtils
+from mlagents.trainers.torch.agent_action import AgentAction
+from mlagents.trainers.torch.action_flattener import ActionFlattener
 from mlagents.trainers.torch.networks import NetworkBody
 from mlagents.trainers.torch.layers import linear_layer, Initialization
 from mlagents.trainers.settings import NetworkSettings, EncoderType
            vis_encode_type=EncoderType.SIMPLE,
            memory=None,
        )
-        self._action_flattener = ModelUtils.ActionFlattener(specs.action_spec)
+        self._action_flattener = ActionFlattener(specs.action_spec)
        unencoded_size = (
            self._action_flattener.flattened_size + 1 if settings.use_actions else 0
        )  # +1 is for dones
--- a/ml-agents/mlagents/trainers/torch/networks.py
+++ b/ml-agents/mlagents/trainers/torch/networks.py

 from mlagents_envs.base_env import ActionSpec
 from mlagents.trainers.torch.action_model import ActionModel
+from mlagents.trainers.torch.agent_action import AgentAction
+from mlagents.trainers.torch.action_log_probs import ActionLogProbs
-from mlagents.trainers.torch.utils import ModelUtils, AgentAction, ActionLogProbs
+from mlagents.trainers.torch.utils import ModelUtils
 from mlagents.trainers.torch.decoders import ValueHeads
 from mlagents.trainers.torch.layers import LSTM, LinearEncoder
 from mlagents.trainers.torch.model_serialization import exporting_to_onnx
--- a/ml-agents/mlagents/trainers/torch/utils.py
+++ b/ml-agents/mlagents/trainers/torch/utils.py
-from typing import List, Optional, Tuple, NamedTuple, Dict
+from typing import List, Optional, Tuple
 from mlagents.torch_utils import torch, nn
 import numpy as np

 )
 from mlagents.trainers.settings import EncoderType, ScheduleType
 from mlagents.trainers.exception import UnityTrainerException
-from mlagents_envs.base_env import ActionSpec
-
-
-class AgentAction(NamedTuple):
-    """
-    A NamedTuple containing the tensor for continuous actions and list of tensors for
-    discrete actions. Utility functions provide numpy <=> tensor conversions to be
-    sent as actions to the environment manager as well as used by the optimizers.
-    :param continuous_tensor: Torch tensor corresponding to continuous actions
-    :param discrete_list: List of Torch tensors each corresponding to discrete actions
-    """
-
-    continuous_tensor: torch.Tensor
-    discrete_list: Optional[List[torch.Tensor]]
-
-    @property
-    def discrete_tensor(self):
-        """
-        Returns the discrete action list as a stacked tensor
-        """
-        return torch.stack(self.discrete_list, dim=-1)
-
-    def to_numpy_dict(self) -> Dict[str, np.ndarray]:
-        """
-        Returns a Dict of np arrays with an entry correspinding to the continuous action
-        and an entry corresponding to the discrete action. "continuous_action" and
-        "discrete_action" are added to the agents buffer individually to maintain a flat buffer.
-        """
-        array_dict: Dict[str, np.ndarray] = {}
-        if self.continuous_tensor is not None:
-            array_dict["continuous_action"] = ModelUtils.to_numpy(
-                self.continuous_tensor
-            )
-        if self.discrete_list is not None:
-            array_dict["discrete_action"] = ModelUtils.to_numpy(
-                self.discrete_tensor[:, 0, :]
-            )
-        return array_dict
-
-    @staticmethod
-    def from_dict(buff: Dict[str, np.ndarray]) -> "AgentAction":
-        """
-        A static method that accesses continuous and discrete action fields in an AgentBuffer
-        and constructs the corresponding AgentAction from the retrieved np arrays.
-        """
-        continuous: torch.Tensor = None
-        discrete: List[torch.Tensor] = None  # type: ignore
-        if "continuous_action" in buff:
-            continuous = ModelUtils.list_to_tensor(buff["continuous_action"])
-        if "discrete_action" in buff:
-            discrete_tensor = ModelUtils.list_to_tensor(
-                buff["discrete_action"], dtype=torch.long
-            )
-            discrete = [
-                discrete_tensor[..., i] for i in range(discrete_tensor.shape[-1])
-            ]
-        return AgentAction(continuous, discrete)
-
-
-class ActionLogProbs(NamedTuple):
-    """
-    A NamedTuple containing the tensor for continuous log probs and list of tensors for
-    discrete log probs of individual actions as well as all the log probs for an entire branch.
-    Utility functions provide numpy <=> tensor conversions to be used by the optimizers.
-    :param continuous_tensor: Torch tensor corresponding to log probs of continuous actions
-    :param discrete_list: List of Torch tensors each corresponding to log probs of the discrete actions that were
-    sampled.
-    :param all_discrete_list: List of Torch tensors each corresponding to all log probs of
-    a discrete action branch, even the discrete actions that were not sampled. all_discrete_list is a list of Tensors,
-    each Tensor corresponds to one discrete branch log probabilities.
-    """
-
-    continuous_tensor: torch.Tensor
-    discrete_list: Optional[List[torch.Tensor]]
-    all_discrete_list: Optional[List[torch.Tensor]]
-
-    @property
-    def discrete_tensor(self):
-        """
-        Returns the discrete log probs list as a stacked tensor
-        """
-        return torch.stack(self.discrete_list, dim=-1)
-
-    @property
-    def all_discrete_tensor(self):
-        """
-        Returns the discrete log probs of each branch as a tensor
-        """
-        return torch.cat(self.all_discrete_list, dim=1)
-
-    def to_numpy_dict(self) -> Dict[str, np.ndarray]:
-        """
-        Returns a Dict of np arrays with an entry correspinding to the continuous log probs
-        and an entry corresponding to the discrete log probs. "continuous_log_probs" and
-        "discrete_log_probs" are added to the agents buffer individually to maintain a flat buffer.
-        """
-        array_dict: Dict[str, np.ndarray] = {}
-        if self.continuous_tensor is not None:
-            array_dict["continuous_log_probs"] = ModelUtils.to_numpy(
-                self.continuous_tensor
-            )
-        if self.discrete_list is not None:
-            array_dict["discrete_log_probs"] = ModelUtils.to_numpy(self.discrete_tensor)
-        return array_dict
-
-    def _to_tensor_list(self) -> List[torch.Tensor]:
-        """
-        Returns the tensors in the ActionLogProbs as a flat List of torch Tensors. This
-        is private and serves as a utility for self.flatten()
-        """
-        tensor_list: List[torch.Tensor] = []
-        if self.continuous_tensor is not None:
-            tensor_list.append(self.continuous_tensor)
-        if self.discrete_list is not None:
-            tensor_list.append(self.discrete_tensor)
-        return tensor_list
-
-    def flatten(self) -> torch.Tensor:
-        """
-        A utility method that returns all log probs in ActionLogProbs as a flattened tensor.
-        This is useful for algorithms like PPO which can treat all log probs in the same way.
-        """
-        return torch.cat(self._to_tensor_list(), dim=1)
-
-    @staticmethod
-    def from_dict(buff: Dict[str, np.ndarray]) -> "ActionLogProbs":
-        """
-        A static method that accesses continuous and discrete log probs fields in an AgentBuffer
-        and constructs the corresponding ActionLogProbs from the retrieved np arrays.
-        """
-        continuous: torch.Tensor = None
-        discrete: List[torch.Tensor] = None  # type: ignore
-
-        if "continuous_log_probs" in buff:
-            continuous = ModelUtils.list_to_tensor(buff["continuous_log_probs"])
-        if "discrete_log_probs" in buff:
-            discrete_tensor = ModelUtils.list_to_tensor(buff["discrete_log_probs"])
-            discrete = [
-                discrete_tensor[..., i] for i in range(discrete_tensor.shape[-1])
-            ]
-        return ActionLogProbs(continuous, discrete, None)


 class ModelUtils:
        EncoderType.NATURE_CNN: 36,
        EncoderType.RESNET: 15,
    }
-
-    class ActionFlattener:
-        def __init__(self, action_spec: ActionSpec):
-            self._specs = action_spec
-
-        @property
-        def flattened_size(self) -> int:
-            return self._specs.continuous_size + sum(self._specs.discrete_branches)
-
-        def forward(self, action: AgentAction) -> torch.Tensor:
-            action_list: List[torch.Tensor] = []
-            if self._specs.continuous_size > 0:
-                action_list.append(action.continuous_tensor)
-            if self._specs.discrete_size > 0:
-                flat_discrete = torch.cat(
-                    ModelUtils.actions_to_onehot(
-                        torch.as_tensor(action.discrete_tensor, dtype=torch.long),
-                        self._specs.discrete_branches,
-                    ),
-                    dim=1,
-                )
-                action_list.append(flat_discrete)
-            return torch.cat(action_list, dim=1)

    @staticmethod
    def update_learning_rate(optim: torch.optim.Optimizer, lr: float) -> None: