ActionTuple default is now np.array, not None

4 年前 · 453a2bba
--- a/ml-agents-envs/mlagents_envs/base_env.py
+++ b/ml-agents-envs/mlagents_envs/base_env.py
        )


-class ActionTuple(NamedTuple):
+class ActionTuple:
-    A NamedTuple whose fields correspond to actions of different types.
-    Continuous and discrete actions are numpy arrays.
+    An object whose fields correspond to actions of different types.
+    Continuous and discrete actions are numpy arrays of type float32 and
+    int32, respectively and are type checked on construction.
+    Dimensions are of (n_agents, continuous_size) and (n_agents, discrete_size),
+    respectively.
-    continuous: np.ndarray  # dims (n_agents, continuous_size)
-    discrete: np.ndarray  # dims (n_agents, discrete_size)
+    def __init__(self, continuous: np.ndarray, discrete: np.ndarray):
+        if continuous.dtype != np.float32:
+            continuous = continuous.astype(np.float32, copy=False)
+        self._continuous = continuous
+        if discrete.dtype != np.int32:
+            discrete = discrete.astype(np.int32, copy=False)
+        self._discrete = discrete
+
+    @property
+    def continuous(self) -> np.ndarray:
+        return self._continuous
+
+    @property
+    def discrete(self) -> np.ndarray:
+        return self._discrete


 class ActionSpec(NamedTuple):
        for a number of agents.
        :param n_agents: The number of agents that will have actions generated
        """
-        continuous: np.ndarray = None
-        discrete: np.ndarray = None
-        if self.continuous_size > 0:
-            continuous = np.zeros((n_agents, self.continuous_size), dtype=np.float32)
-
-        if self.discrete_size > 0:
-            discrete = np.zeros((n_agents, self.discrete_size), dtype=np.int32)
+        continuous = np.zeros((n_agents, self.continuous_size), dtype=np.float32)
+        discrete = np.zeros((n_agents, self.discrete_size), dtype=np.int32)
        return ActionTuple(continuous, discrete)

    def random_action(self, n_agents: int) -> ActionTuple:
        :param n_agents: The number of agents that will have actions generated
        """
-        continuous: np.ndarray = None
-        discrete: np.ndarray = None
-        if self.continuous_size > 0:
-            continuous = np.random.uniform(
-                low=-1.0, high=1.0, size=(n_agents, self.continuous_size)
-            ).astype(np.float32)
-
+        continuous = np.random.uniform(
+            low=-1.0, high=1.0, size=(n_agents, self.continuous_size)
+        )
+        discrete = np.array([])
        if self.discrete_size > 0:
            discrete = np.column_stack(
                [
        for the correct number of agents and ensures the type.
        """
        _expected_shape = (n_agents, self.continuous_size)
-        if self.continuous_size > 0 and actions.continuous.shape != _expected_shape:
+        if actions.continuous.shape != _expected_shape:
-        if actions.continuous.dtype != np.float32:
-            actions.continuous = actions.continuous.astype(np.float32)
-
-        if self.discrete_size > 0 and actions.discrete.shape != _expected_shape:
+        if actions.discrete.shape != _expected_shape:
-        if actions.discrete.dtype != np.int32:
-            actions.discrete = actions.discrete.astype(np.int32)
        return actions

    @staticmethod
--- a/ml-agents-envs/mlagents_envs/tests/test_steps.py
+++ b/ml-agents-envs/mlagents_envs/tests/test_steps.py
    specs = ActionSpec.create_continuous(action_len)
    zero_action = specs.empty_action(4).continuous
    assert np.array_equal(zero_action, np.zeros((4, action_len), dtype=np.float32))
+    print(specs.random_action(4))
+    print(random_action)
    assert random_action.dtype == np.float32
    assert random_action.shape == (4, action_len)
    assert np.min(random_action) >= -1
--- a/ml-agents/mlagents/trainers/env_manager.py
+++ b/ml-agents/mlagents/trainers/env_manager.py
    def action_buffers_from_numpy_dict(
        action_dict: Dict[str, np.ndarray]
    ) -> ActionTuple:
-        continuous: np.ndarray = None
-        discrete: np.ndarray = None
+        continuous: np.ndarray = np.array([], dtype=np.float32)
+        discrete: np.ndarray = np.array([], dtype=np.int32)
        if "continuous_action" in action_dict:
            continuous = action_dict["continuous_action"]
        if "discrete_action" in action_dict:
--- a/ml-agents/mlagents/trainers/policy/policy.py
+++ b/ml-agents/mlagents/trainers/policy/policy.py
        :return: Dict of action type to np.ndarray
        """
        act_dict: Dict[str, np.ndarray] = {}
-        action_buffer = self.behavior_spec.action_spec.empty_action(num_agents)
-        if action_buffer.continuous is not None:
-            act_dict["continuous_action"] = action_buffer.continuous
-        if action_buffer.discrete is not None:
-            act_dict["discrete_action"] = action_buffer.discrete
+        action_tuple = self.behavior_spec.action_spec.empty_action(num_agents)
+        if self.behavior_spec.action_spec.continuous_size > 0:
+            act_dict["continuous_action"] = action_tuple.continuous
+        if self.behavior_spec.action_spec.discrete_size > 0:
+            act_dict["discrete_action"] = action_tuple.discrete
        return act_dict

    def save_previous_action(
--- a/ml-agents/mlagents/trainers/tests/simple_test_envs.py
+++ b/ml-agents/mlagents/trainers/tests/simple_test_envs.py
        else:
            action_spec = ActionSpec.create_continuous(action_size)
        self.behavior_spec = BehaviorSpec(self._make_obs_spec(), action_spec)
+        self.action_spec = action_spec
        self.action_size = action_size
        self.names = brain_names
        self.positions: Dict[str, List[float]] = {}
    def _take_action(self, name: str) -> bool:
        deltas = []
        _act = self.action[name]
-        if _act.discrete is not None:
+        if self.action_spec.discrete_size > 0:
-        if _act.continuous is not None:
+        if self.action_spec.continuous_size > 0:
            for _cont in _act.continuous[0]:
                deltas.append(_cont)
        for i, _delta in enumerate(deltas):
            for name in self.names:
                if self.discrete:
                    self.action[name] = ActionTuple(
-                        [[]], np.array([[1]] if self.goal[name] > 0 else [[0]])
+                        np.array([], dtype=np.float32),
+                        np.array(
+                            [[1]] if self.goal[name] > 0 else [[0]], dtype=np.int32
+                        ),
-                        np.array([[float(self.goal[name])]]), [[]]
+                        np.array([[float(self.goal[name])]], dtype=np.float32),
+                        np.array([], dtype=np.int32),
                    )
            self.step()