Add test env for hybrid actions, clean up BehaviorSpec (#4522)

4 年前 · 827525f9
--- a/ml-agents-envs/mlagents_envs/base_env.py
+++ b/ml-agents-envs/mlagents_envs/base_env.py
 BehaviorName = str


+class HybridAction(NamedTuple):
+    """
+    Contains continuous and discrete actions as numpy arrays.
+    """
+
+    continuous: np.ndarray
+    discrete: np.ndarray
+
+
 class DecisionStep(NamedTuple):
    """
    Contains the data a single Agent collected since the last
 class ActionType(Enum):
    DISCRETE = 0
    CONTINUOUS = 1
+    HYBRID = 2


 class BehaviorSpec(NamedTuple):
    def discrete_action_branches(self) -> Optional[Tuple[int, ...]]:
        return self.discrete_action_shape  # type: ignore

-    def create_empty_action(self, n_agents: int) -> np.ndarray:
-        return np.zeros((n_agents, self.action_size), dtype=np.float32)
+    def create_empty_action(self, n_agents: int) -> Tuple[np.ndarray, np.ndarray]:
+        return HybridAction(
+            np.zeros((n_agents, self.continuous_action_size), dtype=np.float32),
+            np.zeros((n_agents, self.discrete_action_size), dtype=np.int32),
+        )

    def create_random_action(self, n_agents: int) -> np.ndarray:
        continuous_action = np.random.uniform(
                for i in range(self.discrete_action_size)
            ]
        )
-        return np.concatenate(discrete_action, continuous_action)
+        return HybridAction(continuous_action, discrete_action)


 class BehaviorSpec(NamedTuple):
        """

    @abstractmethod
-    def set_actions(self, behavior_name: BehaviorName, action: np.ndarray) -> None:
+    def set_actions(
+        self, behavior_name: BehaviorName, action: Union[HybridAction, np.ndarray]
+    ) -> None:
        """
        Sets the action for all of the agents in the simulation for the next
        step. The Actions must be in the same order as the order received in

    @abstractmethod
    def set_action_for_agent(
-        self, behavior_name: BehaviorName, agent_id: AgentId, action: np.ndarray
+        self,
+        behavior_name: BehaviorName,
+        agent_id: AgentId,
+        action: Union[HybridAction, np.ndarray],
    ) -> None:
        """
        Sets the action for one of the agents in the simulation for the next
--- a/ml-agents/mlagents/trainers/tests/simple_test_envs.py
+++ b/ml-agents/mlagents/trainers/tests/simple_test_envs.py
    BaseEnv,
    BehaviorSpec,
    DecisionSteps,
+    HybridBehaviorSpec,
+    BehaviorName,
+    HybridAction,
 )
 from mlagents_envs.tests.test_rpc_utils import proto_from_steps_and_action
 from mlagents_envs.communicator_objects.agent_info_action_pair_pb2 import (

    def close(self):
        pass
+
+
+class HybridEnvironment(SimpleEnvironment):
+    def __init__(
+        self,
+        brain_names,
+        step_size=STEP_SIZE,
+        num_visual=0,
+        num_vector=1,
+        vis_obs_size=VIS_OBS_SIZE,
+        vec_obs_size=OBS_SIZE,
+        action_size=1,
+    ):
+        self.continuous_env = SimpleEnvironment(
+            brain_names,
+            False,
+            step_size,
+            num_visual,
+            num_vector,
+            vis_obs_size,
+            vec_obs_size,
+            action_size,
+        )
+        self.discrete_env = SimpleEnvironment(
+            brain_names,
+            True,
+            step_size,
+            num_visual,
+            num_vector,
+            vis_obs_size,
+            vec_obs_size,
+            action_size,
+        )
+        # Number of steps to reveal the goal for. Lower is harder. Should be
+        # less than 1/step_size to force agent to use memory
+        self.behavior_spec = HybridBehaviorSpec(
+            self._make_obs_spec(), action_size, tuple(2 for _ in range(action_size))
+        )
+        self.continuous_action = {}
+        self.discrete_action = {}
+
+    def step(self) -> None:
+        self.continuous_env.step()
+        self.discrete_env.step()
+
+    def reset(self) -> None:  # type: ignore
+        self.continuous_env.reset()
+        self.discrete_env.reset()
+
+    def set_actions(self, behavior_name: BehaviorName, action: HybridAction) -> None:
+        self.continuous_env.set_actions(action.continuous)
+        self.discrete_env.set_actions(action.discrete)


 class MemoryEnvironment(SimpleEnvironment):