add static method to create continuous/discrete

4 年前 · 0e28dd8f
--- a/gym-unity/gym_unity/tests/test_gym.py
+++ b/gym-unity/gym_unity/tests/test_gym.py
            vector_action_space_size = 2
        else:
            vector_action_space_size = vector_action_space_size[0]
-        action_spec = ActionSpec(vector_action_space_size, ())
+        action_spec = ActionSpec.make_continuous(vector_action_space_size)
-        action_spec = ActionSpec(0, vector_action_space_size)
+        action_spec = ActionSpec.make_discrete(vector_action_space_size)
    obs_shapes = [(vector_observation_space_size,)]
    for _ in range(number_visual_observations):
        obs_shapes += [(8, 8, 3)]
--- a/ml-agents-envs/mlagents_envs/base_env.py
+++ b/ml-agents-envs/mlagents_envs/base_env.py
    def size(self) -> int:
        return self.discrete_size + self.continuous_size

-    @property
-    def total_size(self) -> int:
-        return sum(self.discrete_branches) + self.continuous_size
-
    def create_empty(self, n_agents: int) -> np.ndarray:
        if self.is_continuous():
            return np.zeros((n_agents, self.continuous_size), dtype=np.float32)
                ]
            )
        return action
+
+    @staticmethod
+    def make_continuous(continuous_size: int) -> "ActionSpec":
+        """
+        Creates an ActionSpec that is homogenously continuous
+        """
+        return ActionSpec(continuous_size, ())
+
+    @staticmethod
+    def make_discrete(discrete_branches: Tuple[int]) -> "ActionSpec":
+        """
+        Creates an ActionSpec that is homogenously discrete
+        """
+        return ActionSpec(0, discrete_branches)


 class BehaviorSpec(NamedTuple):
--- a/ml-agents-envs/mlagents_envs/rpc_utils.py
+++ b/ml-agents-envs/mlagents_envs/rpc_utils.py
        [agent_info.id for agent_info in terminal_agent_info_list], dtype=np.int32
    )
    action_mask = None
-    if behavior_spec.action_spec.is_discrete():
+    if behavior_spec.action_spec.discrete_size > 0:
        if any(
            [agent_info.action_mask is not None]
            for agent_info in decision_agent_info_list
--- a/ml-agents-envs/mlagents_envs/tests/test_rpc_utils.py
+++ b/ml-agents-envs/mlagents_envs/tests/test_rpc_utils.py
 def test_batched_step_result_from_proto():
    n_agents = 10
    shapes = [(3,), (4,)]
-    spec = BehaviorSpec(shapes, ActionSpec(3, ()))
+    spec = BehaviorSpec(shapes, ActionSpec.make_continuous(3))
    ap_list = generate_list_agent_proto(n_agents, shapes)
    decision_steps, terminal_steps = steps_from_proto(ap_list, spec)
    for agent_id in range(n_agents):
 def test_action_masking_discrete():
    n_agents = 10
    shapes = [(3,), (4,)]
-    behavior_spec = BehaviorSpec(shapes, ActionSpec(0, (7, 3)))
+    behavior_spec = BehaviorSpec(shapes, ActionSpec.make_discrete((7, 3)))
    ap_list = generate_list_agent_proto(n_agents, shapes)
    decision_steps, terminal_steps = steps_from_proto(ap_list, behavior_spec)
    masks = decision_steps.action_mask
 def test_action_masking_discrete_1():
    n_agents = 10
    shapes = [(3,), (4,)]
-    behavior_spec = BehaviorSpec(shapes, ActionSpec(0, (10,)))
+    behavior_spec = BehaviorSpec(shapes, ActionSpec.make_discrete((10,)))
    ap_list = generate_list_agent_proto(n_agents, shapes)
    decision_steps, terminal_steps = steps_from_proto(ap_list, behavior_spec)
    masks = decision_steps.action_mask
 def test_action_masking_discrete_2():
    n_agents = 10
    shapes = [(3,), (4,)]
-    behavior_spec = BehaviorSpec(shapes, ActionSpec(0, (2, 2, 6)))
+    behavior_spec = BehaviorSpec(shapes, ActionSpec.make_discrete((2, 2, 6)))
    ap_list = generate_list_agent_proto(n_agents, shapes)
    decision_steps, terminal_steps = steps_from_proto(ap_list, behavior_spec)
    masks = decision_steps.action_mask
 def test_action_masking_continuous():
    n_agents = 10
    shapes = [(3,), (4,)]
-    behavior_spec = BehaviorSpec(shapes, ActionSpec(10, ()))
+    behavior_spec = BehaviorSpec(shapes, ActionSpec.make_continuous(10))
    ap_list = generate_list_agent_proto(n_agents, shapes)
    decision_steps, terminal_steps = steps_from_proto(ap_list, behavior_spec)
    masks = decision_steps.action_mask
 def test_batched_step_result_from_proto_raises_on_infinite():
    n_agents = 10
    shapes = [(3,), (4,)]
-    behavior_spec = BehaviorSpec(shapes, ActionSpec(3, ()))
+    behavior_spec = BehaviorSpec(shapes, ActionSpec.make_continuous(3))
    ap_list = generate_list_agent_proto(n_agents, shapes, infinite_rewards=True)
    with pytest.raises(RuntimeError):
        steps_from_proto(ap_list, behavior_spec)
    n_agents = 10
    shapes = [(3,), (4,)]
-    behavior_spec = BehaviorSpec(shapes, ActionSpec(3, ()))
+    behavior_spec = BehaviorSpec(shapes, ActionSpec.make_continuous(3))
    ap_list = generate_list_agent_proto(n_agents, shapes, nan_observations=True)
    with pytest.raises(RuntimeError):
        steps_from_proto(ap_list, behavior_spec)
--- a/ml-agents-envs/mlagents_envs/tests/test_steps.py
+++ b/ml-agents-envs/mlagents_envs/tests/test_steps.py

 def test_empty_decision_steps():
    specs = BehaviorSpec(
-        observation_shapes=[(3, 2), (5,)], action_spec=ActionSpec(3, ())
+        observation_shapes=[(3, 2), (5,)], action_spec=ActionSpec.make_continuous(3)
    )
    ds = DecisionSteps.empty(specs)
    assert len(ds.obs) == 2

 def test_empty_terminal_steps():
    specs = BehaviorSpec(
-        observation_shapes=[(3, 2), (5,)], action_spec=ActionSpec(3, ())
+        observation_shapes=[(3, 2), (5,)], action_spec=ActionSpec.make_continuous(3)
    )
    ts = TerminalSteps.empty(specs)
    assert len(ts.obs) == 2

 def test_specs():
-    specs = ActionSpec(3, ())
+    specs = ActionSpec.make_continuous(3)
-    specs = ActionSpec(0, (3,))
+    specs = ActionSpec.make_discrete((3,))
    assert specs.discrete_branches == (3,)
    assert specs.size == 1
    assert specs.create_empty(5).shape == (5, 1)
 def test_action_generator():
    # Continuous
    action_len = 30
-    specs = ActionSpec(action_len, ())
+    specs = ActionSpec.make_continuous(action_len)
    zero_action = specs.create_empty(4)
    assert np.array_equal(zero_action, np.zeros((4, action_len), dtype=np.float32))
    random_action = specs.create_random(4)

    # Discrete
    action_shape = (10, 20, 30)
-    specs = ActionSpec(0, action_shape)
+    specs = ActionSpec.make_discrete(action_shape)
    zero_action = specs.create_empty(4)
    assert np.array_equal(zero_action, np.zeros((4, len(action_shape)), dtype=np.int32))

--- a/ml-agents/mlagents/trainers/tests/mock_brain.py
+++ b/ml-agents/mlagents/trainers/tests/mock_brain.py
    """
    steps_list = []
    action_size = action_spec.size
-    action_probs = np.ones(np.sum(action_spec.total_size), dtype=np.float32)
+    action_probs = np.ones(
+        int(np.sum(action_spec.discrete_branches) + action_spec.continuous_size),
+        dtype=np.float32,
+    )
    for _i in range(length - 1):
        obs = []
        for _shape in observation_shapes:
    use_discrete=True, use_visual=False, vector_action_space=2, vector_obs_space=8
 ):
    if use_discrete:
-        action_spec = ActionSpec(0, tuple(vector_action_space))
+        action_spec = ActionSpec.make_discrete(tuple(vector_action_space))
-        action_spec = ActionSpec(vector_action_space, ())
+        action_spec = ActionSpec.make_continuous(vector_action_space)
    behavior_spec = BehaviorSpec(
        [(84, 84, 3)] * int(use_visual) + [(vector_obs_space,)], action_spec
    )
--- a/ml-agents/mlagents/trainers/tests/simple_test_envs.py
+++ b/ml-agents/mlagents/trainers/tests/simple_test_envs.py
        self.vis_obs_size = vis_obs_size
        self.vec_obs_size = vec_obs_size
        if use_discrete:
-            action_spec = ActionSpec(0, tuple(2 for _ in range(action_size)))
+            action_spec = ActionSpec.make_discrete(tuple(2 for _ in range(action_size)))
-            action_spec = ActionSpec(action_size, ())
+            action_spec = ActionSpec.make_continuous(action_size)
        self.behavior_spec = BehaviorSpec(self._make_obs_spec(), action_spec)
        self.action_size = action_size
        self.names = brain_names
--- a/ml-agents/mlagents/trainers/tests/tensorflow/test_models.py
+++ b/ml-agents/mlagents/trainers/tests/tensorflow/test_models.py
 def create_behavior_spec(num_visual, num_vector, vector_size):
    behavior_spec = BehaviorSpec(
        [(84, 84, 3)] * int(num_visual) + [(vector_size,)] * int(num_vector),
-        ActionSpec(0, (1,)),
+        ActionSpec.make_discrete((1,)),
    )
    return behavior_spec

--- a/ml-agents/mlagents/trainers/tests/tensorflow/test_ppo.py
+++ b/ml-agents/mlagents/trainers/tests/tensorflow/test_ppo.py
 BUFFER_INIT_SAMPLES = 64
 NUM_AGENTS = 12

-CONTINUOUS_ACTION_SPEC = ActionSpec(VECTOR_ACTION_SPACE, ())
-DISCRETE_ACTION_SPEC = ActionSpec(0, tuple(DISCRETE_ACTION_SPACE))
+CONTINUOUS_ACTION_SPEC = ActionSpec.make_continuous(VECTOR_ACTION_SPACE)
+DISCRETE_ACTION_SPEC = ActionSpec.make_discrete(tuple(DISCRETE_ACTION_SPACE))


 def _create_ppo_optimizer_ops_mock(dummy_config, use_rnn, use_discrete, use_visual):
--- a/ml-agents/mlagents/trainers/tests/tensorflow/test_tf_policy.py
+++ b/ml-agents/mlagents/trainers/tests/tensorflow/test_tf_policy.py
 from mlagents.trainers.policy.tf_policy import TFPolicy
-from mlagents_envs.base_env import ActionSpec, DecisionSteps, BehaviorSpec
+from mlagents_envs.base_env import DecisionSteps, BehaviorSpec
+from mlagents_envs.base_env import ActionSpec
-    dummy_actionspec = ActionSpec(1, ())
+    dummy_actionspec = ActionSpec.make_continuous(1)
    dummy_groupspec = BehaviorSpec([(1,)], dummy_actionspec)
    return dummy_groupspec

--- a/ml-agents/mlagents/trainers/tests/test_agent_processor.py
+++ b/ml-agents/mlagents/trainers/tests/test_agent_processor.py
 from mlagents.trainers.behavior_id_utils import get_global_agent_id
 from mlagents_envs.side_channel.stats_side_channel import StatsAggregationMethod

+from mlagents_envs.base_env import ActionSpec
+

 def create_mock_policy():
    mock_policy = mock.Mock()
    mock_decision_steps, mock_terminal_steps = mb.create_mock_steps(
        num_agents=2,
        observation_shapes=[(8,)] + num_vis_obs * [(84, 84, 3)],
-        action_shape=2,
+        action_spec=ActionSpec.make_continuous(2),
    )
    fake_action_info = ActionInfo(
        action=[0.1, 0.1],
    mock_decision_steps, mock_terminal_steps = mb.create_mock_steps(
        num_agents=0,
        observation_shapes=[(8,)] + num_vis_obs * [(84, 84, 3)],
-        action_shape=2,
+        action_spec=ActionSpec.make_continuous(2),
    )
    processor.add_experiences(
        mock_decision_steps, mock_terminal_steps, 0, ActionInfo([], [], {}, [])
        "log_probs": [0.1],
    }
    mock_decision_step, mock_terminal_step = mb.create_mock_steps(
-        num_agents=1, observation_shapes=[(8,)], action_shape=2
+        num_agents=1,
+        observation_shapes=[(8,)],
+        action_spec=ActionSpec.make_continuous(2),
-        num_agents=1, observation_shapes=[(8,)], action_shape=2, done=True
+        num_agents=1,
+        observation_shapes=[(8,)],
+        action_spec=ActionSpec.make_continuous(2),
+        done=True,
    )
    fake_action_info = ActionInfo(
        action=[0.1],
        "log_probs": [0.1],
    }
    mock_decision_step, mock_terminal_step = mb.create_mock_steps(
-        num_agents=1, observation_shapes=[(8,)], action_shape=2
+        num_agents=1,
+        observation_shapes=[(8,)],
+        action_spec=ActionSpec.make_continuous(2),
    )
    fake_action_info = ActionInfo(
        action=[0.1],
--- a/ml-agents/mlagents/trainers/tests/test_rl_trainer.py
+++ b/ml-agents/mlagents/trainers/tests/test_rl_trainer.py
 from mlagents.trainers.tests.test_buffer import construct_fake_buffer
 from mlagents.trainers.agent_processor import AgentManagerQueue
 from mlagents.trainers.settings import TrainerSettings, FrameworkType
+
 from mlagents_envs.base_env import ActionSpec


        length=time_horizon,
        observation_shapes=[(1,)],
        max_step_complete=True,
-        action_spec=ActionSpec(0, (2,)),
+        action_spec=ActionSpec.make_discrete((2,)),
    )
    trajectory_queue.put(trajectory)

        length=time_horizon,
        observation_shapes=[(1,)],
        max_step_complete=True,
-        action_spec=ActionSpec(0, (2,)),
+        action_spec=ActionSpec.make_discrete((2,)),
    )
    # Check that we can turn off the trainer and that the buffer is cleared
    num_trajectories = 5
--- a/ml-agents/mlagents/trainers/tests/test_trajectory.py
+++ b/ml-agents/mlagents/trainers/tests/test_trajectory.py
    trajectory = make_fake_trajectory(
        length=length,
        observation_shapes=[(VEC_OBS_SIZE,), (84, 84, 3)],
-        action_spec=ActionSpec(ACTION_SIZE, ()),
+        action_spec=ActionSpec.make_continuous(ACTION_SIZE),
    )
    agentbuffer = trajectory.to_agentbuffer()
    seen_keys = set()
--- a/ml-agents/mlagents/trainers/tests/torch/test_networks.py
+++ b/ml-agents/mlagents/trainers/tests/torch/test_networks.py
    SeparateActorCritic,
 )
 from mlagents.trainers.settings import NetworkSettings
-from mlagents_envs.base_env import ActionSpec
+
+from mlagents_envs.base_env import ActionSpec


 def test_networkbody_vector():
    act_size = [2]
    if use_discrete:
        masks = torch.ones((1, 1))
-        action_spec = ActionSpec(0, tuple(act_size))
+        action_spec = ActionSpec.make_discrete(tuple(act_size))
-        action_spec = ActionSpec(act_size[0], ())
+        action_spec = ActionSpec.make_continuous(act_size[0])
    actor = SimpleActor(obs_shapes, network_settings, action_spec)
    # Test get_dist
    sample_obs = torch.ones((1, obs_size))
    obs_shapes = [(obs_size,)]
    act_size = [2]
    stream_names = [f"stream_name{n}" for n in range(4)]
-    action_spec = ActionSpec(act_size[0], ())
+    action_spec = ActionSpec.make_continuous(act_size[0])
    actor = ac_type(obs_shapes, network_settings, action_spec, stream_names)
    if lstm:
        sample_obs = torch.ones((1, network_settings.memory.sequence_length, obs_size))
--- a/ml-agents/mlagents/trainers/tests/torch/test_ppo.py
+++ b/ml-agents/mlagents/trainers/tests/torch/test_ppo.py
 BUFFER_INIT_SAMPLES = 64
 NUM_AGENTS = 12

-CONTINUOUS_ACTION_SPEC = ActionSpec(VECTOR_ACTION_SPACE, ())
-DISCRETE_ACTION_SPEC = ActionSpec(0, tuple(DISCRETE_ACTION_SPACE))
+CONTINUOUS_ACTION_SPEC = ActionSpec.make_continuous(VECTOR_ACTION_SPACE)
+DISCRETE_ACTION_SPEC = ActionSpec.make_discrete(tuple(DISCRETE_ACTION_SPACE))


 def create_test_ppo_optimizer(dummy_config, use_rnn, use_discrete, use_visual):
--- a/ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_curiosity.py
+++ b/ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_curiosity.py

 SEED = [42]

-ACTIONSPEC_CONTINUOUS = ActionSpec(5, ())
-ACTIONSPEC_TWODISCRETE = ActionSpec(0, (2, 3))
-ACTIONSPEC_DISCRETE = ActionSpec(0, (2,))
+ACTIONSPEC_CONTINUOUS = ActionSpec.make_continuous(5)
+ACTIONSPEC_TWODISCRETE = ActionSpec.make_discrete((2, 3))
+ACTIONSPEC_DISCRETE = ActionSpec.make_discrete((2,))


@pytest.mark.parametrize(
--- a/ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_extrinsic.py
+++ b/ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_extrinsic.py
    create_agent_buffer,
 )

-ACTIONSPEC_CONTINUOUS = ActionSpec(5, ())
-ACTIONSPEC_TWODISCRETE = ActionSpec(0, (2, 3))
+
+ACTIONSPEC_CONTINUOUS = ActionSpec.make_continuous(5)
+ACTIONSPEC_TWODISCRETE = ActionSpec.make_discrete((2, 3))


@pytest.mark.parametrize(
--- a/ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_gail.py
+++ b/ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_gail.py
    DiscriminatorNetwork,
 )

+
 CONTINUOUS_PATH = (
    os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir, os.pardir)
    + "/test.demo"
 )
 SEED = [42]

-ACTIONSPEC_CONTINUOUS = ActionSpec(2, ())
-ACTIONSPEC_FOURDISCRETE = ActionSpec(0, (2, 3, 3, 3))
-ACTIONSPEC_DISCRETE = ActionSpec(0, (20,))
+ACTIONSPEC_CONTINUOUS = ActionSpec.make_continuous(2)
+ACTIONSPEC_FOURDISCRETE = ActionSpec.make_discrete((2, 3, 3, 3))
+ACTIONSPEC_DISCRETE = ActionSpec.make_discrete((20,))


@pytest.mark.parametrize("behavior_spec", [BehaviorSpec([(8,)], ACTIONSPEC_CONTINUOUS)])
--- a/ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_rnd.py
+++ b/ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_rnd.py
    create_agent_buffer,
 )

+
-ACTIONSPEC_CONTINUOUS = ActionSpec(5, ())
-ACTIONSPEC_TWODISCRETE = ActionSpec(0, (2, 3))
-ACTIONSPEC_DISCRETE = ActionSpec(0, (2,))
+ACTIONSPEC_CONTINUOUS = ActionSpec.make_continuous(5)
+ACTIONSPEC_TWODISCRETE = ActionSpec.make_discrete((2, 3))
+ACTIONSPEC_DISCRETE = ActionSpec.make_discrete((2,))


@pytest.mark.parametrize(
--- a/ml-agents/mlagents/trainers/torch/networks.py
+++ b/ml-agents/mlagents/trainers/torch/networks.py
    ):
        super().__init__()
        self.action_spec = action_spec
-        self.act_size = self.action_spec.size
-            torch.Tensor([self.action_spec.total_size]), requires_grad=False
+            torch.Tensor(
+                [
+                    self.action_spec.continuous_size
+                    + sum(self.action_spec.discrete_branches)
+                ]
+            ),
+            requires_grad=False,
        )
        self.network_body = NetworkBody(observation_shapes, network_settings)
        if network_settings.memory is not None: