Added a random action creator on the BehaviorSpecs (#4030)

* Added a random action creator on the BehaviorSpecs * Bumping numpy version * Bumping numpy version * Not using np.random.Generator as it seems to still be under developement
5 年前 · 0d75f3da
--- a/ml-agents-envs/mlagents_envs/base_env.py
+++ b/ml-agents-envs/mlagents_envs/base_env.py
            return None

    def create_empty_action(self, n_agents: int) -> np.ndarray:
+        """
+        Generates a numpy array corresponding to an empty action (all zeros)
+        for a number of agents.
+        :param n_agents: The number of agents that will have actions generated
+        """
+
+    def create_random_action(self, n_agents: int) -> np.ndarray:
+        """
+        Generates a numpy array corresponding to a random action (either discrete
+        or continuous) for a number of agents.
+        :param n_agents: The number of agents that will have actions generated
+        :param generator: The random number generator used for creating random action
+        """
+        if self.is_action_continuous():
+            action = np.random.uniform(
+                low=-1.0, high=1.0, size=(n_agents, self.action_size)
+            ).astype(np.float32)
+            return action
+        elif self.is_action_discrete():
+            branch_size = self.discrete_action_branches
+            action = np.column_stack(
+                [
+                    np.random.randint(
+                        0,
+                        branch_size[i],  # type: ignore
+                        size=(n_agents),
+                        dtype=np.int32,
+                    )
+                    for i in range(self.action_size)
+                ]
+            )
+            return action


 class BehaviorMapping(Mapping):
--- a/ml-agents-envs/mlagents_envs/tests/test_steps.py
+++ b/ml-agents-envs/mlagents_envs/tests/test_steps.py
    assert specs.action_size == 1
    assert specs.create_empty_action(5).shape == (5, 1)
    assert specs.create_empty_action(5).dtype == np.int32
+
+
+def test_action_generator():
+    # Continuous
+    action_len = 30
+    specs = BehaviorSpec(
+        observation_shapes=[(5,)],
+        action_type=ActionType.CONTINUOUS,
+        action_shape=action_len,
+    )
+    zero_action = specs.create_empty_action(4)
+    assert np.array_equal(zero_action, np.zeros((4, action_len), dtype=np.float32))
+    random_action = specs.create_random_action(4)
+    assert random_action.dtype == np.float32
+    assert random_action.shape == (4, action_len)
+    assert np.min(random_action) >= -1
+    assert np.max(random_action) <= 1
+
+    # Discrete
+    action_shape = (10, 20, 30)
+    specs = BehaviorSpec(
+        observation_shapes=[(5,)],
+        action_type=ActionType.DISCRETE,
+        action_shape=action_shape,
+    )
+    zero_action = specs.create_empty_action(4)
+    assert np.array_equal(zero_action, np.zeros((4, len(action_shape)), dtype=np.int32))
+
+    random_action = specs.create_random_action(4)
+    assert random_action.dtype == np.int32
+    assert random_action.shape == (4, len(action_shape))
+    assert np.min(random_action) >= 0
+    for index, branch_size in enumerate(action_shape):
+        assert np.max(random_action[:, index]) < branch_size