浏览代码

add static method to create continuous/discrete

/develop/action-spec-gym
Andrew Cohen 4 年前
当前提交
0e28dd8f
共有 20 个文件被更改,包括 90 次插入57 次删除
  1. 4
      gym-unity/gym_unity/tests/test_gym.py
  2. 18
      ml-agents-envs/mlagents_envs/base_env.py
  3. 2
      ml-agents-envs/mlagents_envs/rpc_utils.py
  4. 14
      ml-agents-envs/mlagents_envs/tests/test_rpc_utils.py
  5. 12
      ml-agents-envs/mlagents_envs/tests/test_steps.py
  6. 9
      ml-agents/mlagents/trainers/tests/mock_brain.py
  7. 4
      ml-agents/mlagents/trainers/tests/simple_test_envs.py
  8. 2
      ml-agents/mlagents/trainers/tests/tensorflow/test_models.py
  9. 4
      ml-agents/mlagents/trainers/tests/tensorflow/test_ppo.py
  10. 5
      ml-agents/mlagents/trainers/tests/tensorflow/test_tf_policy.py
  11. 19
      ml-agents/mlagents/trainers/tests/test_agent_processor.py
  12. 5
      ml-agents/mlagents/trainers/tests/test_rl_trainer.py
  13. 2
      ml-agents/mlagents/trainers/tests/test_trajectory.py
  14. 9
      ml-agents/mlagents/trainers/tests/torch/test_networks.py
  15. 4
      ml-agents/mlagents/trainers/tests/torch/test_ppo.py
  16. 6
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_curiosity.py
  17. 5
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_extrinsic.py
  18. 7
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_gail.py
  19. 7
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_rnd.py
  20. 9
      ml-agents/mlagents/trainers/torch/networks.py

4
gym-unity/gym_unity/tests/test_gym.py


vector_action_space_size = 2
else:
vector_action_space_size = vector_action_space_size[0]
action_spec = ActionSpec(vector_action_space_size, ())
action_spec = ActionSpec.make_continuous(vector_action_space_size)
action_spec = ActionSpec(0, vector_action_space_size)
action_spec = ActionSpec.make_discrete(vector_action_space_size)
obs_shapes = [(vector_observation_space_size,)]
for _ in range(number_visual_observations):
obs_shapes += [(8, 8, 3)]

18
ml-agents-envs/mlagents_envs/base_env.py


def size(self) -> int:
return self.discrete_size + self.continuous_size
@property
def total_size(self) -> int:
return sum(self.discrete_branches) + self.continuous_size
def create_empty(self, n_agents: int) -> np.ndarray:
if self.is_continuous():
return np.zeros((n_agents, self.continuous_size), dtype=np.float32)

]
)
return action
@staticmethod
def make_continuous(continuous_size: int) -> "ActionSpec":
"""
Creates an ActionSpec that is homogenously continuous
"""
return ActionSpec(continuous_size, ())
@staticmethod
def make_discrete(discrete_branches: Tuple[int]) -> "ActionSpec":
"""
Creates an ActionSpec that is homogenously discrete
"""
return ActionSpec(0, discrete_branches)
class BehaviorSpec(NamedTuple):

2
ml-agents-envs/mlagents_envs/rpc_utils.py


[agent_info.id for agent_info in terminal_agent_info_list], dtype=np.int32
)
action_mask = None
if behavior_spec.action_spec.is_discrete():
if behavior_spec.action_spec.discrete_size > 0:
if any(
[agent_info.action_mask is not None]
for agent_info in decision_agent_info_list

14
ml-agents-envs/mlagents_envs/tests/test_rpc_utils.py


def test_batched_step_result_from_proto():
n_agents = 10
shapes = [(3,), (4,)]
spec = BehaviorSpec(shapes, ActionSpec(3, ()))
spec = BehaviorSpec(shapes, ActionSpec.make_continuous(3))
ap_list = generate_list_agent_proto(n_agents, shapes)
decision_steps, terminal_steps = steps_from_proto(ap_list, spec)
for agent_id in range(n_agents):

def test_action_masking_discrete():
n_agents = 10
shapes = [(3,), (4,)]
behavior_spec = BehaviorSpec(shapes, ActionSpec(0, (7, 3)))
behavior_spec = BehaviorSpec(shapes, ActionSpec.make_discrete((7, 3)))
ap_list = generate_list_agent_proto(n_agents, shapes)
decision_steps, terminal_steps = steps_from_proto(ap_list, behavior_spec)
masks = decision_steps.action_mask

def test_action_masking_discrete_1():
n_agents = 10
shapes = [(3,), (4,)]
behavior_spec = BehaviorSpec(shapes, ActionSpec(0, (10,)))
behavior_spec = BehaviorSpec(shapes, ActionSpec.make_discrete((10,)))
ap_list = generate_list_agent_proto(n_agents, shapes)
decision_steps, terminal_steps = steps_from_proto(ap_list, behavior_spec)
masks = decision_steps.action_mask

def test_action_masking_discrete_2():
n_agents = 10
shapes = [(3,), (4,)]
behavior_spec = BehaviorSpec(shapes, ActionSpec(0, (2, 2, 6)))
behavior_spec = BehaviorSpec(shapes, ActionSpec.make_discrete((2, 2, 6)))
ap_list = generate_list_agent_proto(n_agents, shapes)
decision_steps, terminal_steps = steps_from_proto(ap_list, behavior_spec)
masks = decision_steps.action_mask

def test_action_masking_continuous():
n_agents = 10
shapes = [(3,), (4,)]
behavior_spec = BehaviorSpec(shapes, ActionSpec(10, ()))
behavior_spec = BehaviorSpec(shapes, ActionSpec.make_continuous(10))
ap_list = generate_list_agent_proto(n_agents, shapes)
decision_steps, terminal_steps = steps_from_proto(ap_list, behavior_spec)
masks = decision_steps.action_mask

def test_batched_step_result_from_proto_raises_on_infinite():
n_agents = 10
shapes = [(3,), (4,)]
behavior_spec = BehaviorSpec(shapes, ActionSpec(3, ()))
behavior_spec = BehaviorSpec(shapes, ActionSpec.make_continuous(3))
ap_list = generate_list_agent_proto(n_agents, shapes, infinite_rewards=True)
with pytest.raises(RuntimeError):
steps_from_proto(ap_list, behavior_spec)

n_agents = 10
shapes = [(3,), (4,)]
behavior_spec = BehaviorSpec(shapes, ActionSpec(3, ()))
behavior_spec = BehaviorSpec(shapes, ActionSpec.make_continuous(3))
ap_list = generate_list_agent_proto(n_agents, shapes, nan_observations=True)
with pytest.raises(RuntimeError):
steps_from_proto(ap_list, behavior_spec)

12
ml-agents-envs/mlagents_envs/tests/test_steps.py


def test_empty_decision_steps():
specs = BehaviorSpec(
observation_shapes=[(3, 2), (5,)], action_spec=ActionSpec(3, ())
observation_shapes=[(3, 2), (5,)], action_spec=ActionSpec.make_continuous(3)
)
ds = DecisionSteps.empty(specs)
assert len(ds.obs) == 2

def test_empty_terminal_steps():
specs = BehaviorSpec(
observation_shapes=[(3, 2), (5,)], action_spec=ActionSpec(3, ())
observation_shapes=[(3, 2), (5,)], action_spec=ActionSpec.make_continuous(3)
)
ts = TerminalSteps.empty(specs)
assert len(ts.obs) == 2

def test_specs():
specs = ActionSpec(3, ())
specs = ActionSpec.make_continuous(3)
specs = ActionSpec(0, (3,))
specs = ActionSpec.make_discrete((3,))
assert specs.discrete_branches == (3,)
assert specs.size == 1
assert specs.create_empty(5).shape == (5, 1)

def test_action_generator():
# Continuous
action_len = 30
specs = ActionSpec(action_len, ())
specs = ActionSpec.make_continuous(action_len)
zero_action = specs.create_empty(4)
assert np.array_equal(zero_action, np.zeros((4, action_len), dtype=np.float32))
random_action = specs.create_random(4)

# Discrete
action_shape = (10, 20, 30)
specs = ActionSpec(0, action_shape)
specs = ActionSpec.make_discrete(action_shape)
zero_action = specs.create_empty(4)
assert np.array_equal(zero_action, np.zeros((4, len(action_shape)), dtype=np.int32))

9
ml-agents/mlagents/trainers/tests/mock_brain.py


"""
steps_list = []
action_size = action_spec.size
action_probs = np.ones(np.sum(action_spec.total_size), dtype=np.float32)
action_probs = np.ones(
int(np.sum(action_spec.discrete_branches) + action_spec.continuous_size),
dtype=np.float32,
)
for _i in range(length - 1):
obs = []
for _shape in observation_shapes:

use_discrete=True, use_visual=False, vector_action_space=2, vector_obs_space=8
):
if use_discrete:
action_spec = ActionSpec(0, tuple(vector_action_space))
action_spec = ActionSpec.make_discrete(tuple(vector_action_space))
action_spec = ActionSpec(vector_action_space, ())
action_spec = ActionSpec.make_continuous(vector_action_space)
behavior_spec = BehaviorSpec(
[(84, 84, 3)] * int(use_visual) + [(vector_obs_space,)], action_spec
)

4
ml-agents/mlagents/trainers/tests/simple_test_envs.py


self.vis_obs_size = vis_obs_size
self.vec_obs_size = vec_obs_size
if use_discrete:
action_spec = ActionSpec(0, tuple(2 for _ in range(action_size)))
action_spec = ActionSpec.make_discrete(tuple(2 for _ in range(action_size)))
action_spec = ActionSpec(action_size, ())
action_spec = ActionSpec.make_continuous(action_size)
self.behavior_spec = BehaviorSpec(self._make_obs_spec(), action_spec)
self.action_size = action_size
self.names = brain_names

2
ml-agents/mlagents/trainers/tests/tensorflow/test_models.py


def create_behavior_spec(num_visual, num_vector, vector_size):
behavior_spec = BehaviorSpec(
[(84, 84, 3)] * int(num_visual) + [(vector_size,)] * int(num_vector),
ActionSpec(0, (1,)),
ActionSpec.make_discrete((1,)),
)
return behavior_spec

4
ml-agents/mlagents/trainers/tests/tensorflow/test_ppo.py


BUFFER_INIT_SAMPLES = 64
NUM_AGENTS = 12
CONTINUOUS_ACTION_SPEC = ActionSpec(VECTOR_ACTION_SPACE, ())
DISCRETE_ACTION_SPEC = ActionSpec(0, tuple(DISCRETE_ACTION_SPACE))
CONTINUOUS_ACTION_SPEC = ActionSpec.make_continuous(VECTOR_ACTION_SPACE)
DISCRETE_ACTION_SPEC = ActionSpec.make_discrete(tuple(DISCRETE_ACTION_SPACE))
def _create_ppo_optimizer_ops_mock(dummy_config, use_rnn, use_discrete, use_visual):

5
ml-agents/mlagents/trainers/tests/tensorflow/test_tf_policy.py


from mlagents.trainers.policy.tf_policy import TFPolicy
from mlagents_envs.base_env import ActionSpec, DecisionSteps, BehaviorSpec
from mlagents_envs.base_env import DecisionSteps, BehaviorSpec
from mlagents_envs.base_env import ActionSpec
dummy_actionspec = ActionSpec(1, ())
dummy_actionspec = ActionSpec.make_continuous(1)
dummy_groupspec = BehaviorSpec([(1,)], dummy_actionspec)
return dummy_groupspec

19
ml-agents/mlagents/trainers/tests/test_agent_processor.py


from mlagents.trainers.behavior_id_utils import get_global_agent_id
from mlagents_envs.side_channel.stats_side_channel import StatsAggregationMethod
from mlagents_envs.base_env import ActionSpec
def create_mock_policy():
mock_policy = mock.Mock()

mock_decision_steps, mock_terminal_steps = mb.create_mock_steps(
num_agents=2,
observation_shapes=[(8,)] + num_vis_obs * [(84, 84, 3)],
action_shape=2,
action_spec=ActionSpec.make_continuous(2),
)
fake_action_info = ActionInfo(
action=[0.1, 0.1],

mock_decision_steps, mock_terminal_steps = mb.create_mock_steps(
num_agents=0,
observation_shapes=[(8,)] + num_vis_obs * [(84, 84, 3)],
action_shape=2,
action_spec=ActionSpec.make_continuous(2),
)
processor.add_experiences(
mock_decision_steps, mock_terminal_steps, 0, ActionInfo([], [], {}, [])

"log_probs": [0.1],
}
mock_decision_step, mock_terminal_step = mb.create_mock_steps(
num_agents=1, observation_shapes=[(8,)], action_shape=2
num_agents=1,
observation_shapes=[(8,)],
action_spec=ActionSpec.make_continuous(2),
num_agents=1, observation_shapes=[(8,)], action_shape=2, done=True
num_agents=1,
observation_shapes=[(8,)],
action_spec=ActionSpec.make_continuous(2),
done=True,
)
fake_action_info = ActionInfo(
action=[0.1],

"log_probs": [0.1],
}
mock_decision_step, mock_terminal_step = mb.create_mock_steps(
num_agents=1, observation_shapes=[(8,)], action_shape=2
num_agents=1,
observation_shapes=[(8,)],
action_spec=ActionSpec.make_continuous(2),
)
fake_action_info = ActionInfo(
action=[0.1],

5
ml-agents/mlagents/trainers/tests/test_rl_trainer.py


from mlagents.trainers.tests.test_buffer import construct_fake_buffer
from mlagents.trainers.agent_processor import AgentManagerQueue
from mlagents.trainers.settings import TrainerSettings, FrameworkType
from mlagents_envs.base_env import ActionSpec

length=time_horizon,
observation_shapes=[(1,)],
max_step_complete=True,
action_spec=ActionSpec(0, (2,)),
action_spec=ActionSpec.make_discrete((2,)),
)
trajectory_queue.put(trajectory)

length=time_horizon,
observation_shapes=[(1,)],
max_step_complete=True,
action_spec=ActionSpec(0, (2,)),
action_spec=ActionSpec.make_discrete((2,)),
)
# Check that we can turn off the trainer and that the buffer is cleared
num_trajectories = 5

2
ml-agents/mlagents/trainers/tests/test_trajectory.py


trajectory = make_fake_trajectory(
length=length,
observation_shapes=[(VEC_OBS_SIZE,), (84, 84, 3)],
action_spec=ActionSpec(ACTION_SIZE, ()),
action_spec=ActionSpec.make_continuous(ACTION_SIZE),
)
agentbuffer = trajectory.to_agentbuffer()
seen_keys = set()

9
ml-agents/mlagents/trainers/tests/torch/test_networks.py


SeparateActorCritic,
)
from mlagents.trainers.settings import NetworkSettings
from mlagents_envs.base_env import ActionSpec
from mlagents_envs.base_env import ActionSpec
def test_networkbody_vector():

act_size = [2]
if use_discrete:
masks = torch.ones((1, 1))
action_spec = ActionSpec(0, tuple(act_size))
action_spec = ActionSpec.make_discrete(tuple(act_size))
action_spec = ActionSpec(act_size[0], ())
action_spec = ActionSpec.make_continuous(act_size[0])
actor = SimpleActor(obs_shapes, network_settings, action_spec)
# Test get_dist
sample_obs = torch.ones((1, obs_size))

obs_shapes = [(obs_size,)]
act_size = [2]
stream_names = [f"stream_name{n}" for n in range(4)]
action_spec = ActionSpec(act_size[0], ())
action_spec = ActionSpec.make_continuous(act_size[0])
actor = ac_type(obs_shapes, network_settings, action_spec, stream_names)
if lstm:
sample_obs = torch.ones((1, network_settings.memory.sequence_length, obs_size))

4
ml-agents/mlagents/trainers/tests/torch/test_ppo.py


BUFFER_INIT_SAMPLES = 64
NUM_AGENTS = 12
CONTINUOUS_ACTION_SPEC = ActionSpec(VECTOR_ACTION_SPACE, ())
DISCRETE_ACTION_SPEC = ActionSpec(0, tuple(DISCRETE_ACTION_SPACE))
CONTINUOUS_ACTION_SPEC = ActionSpec.make_continuous(VECTOR_ACTION_SPACE)
DISCRETE_ACTION_SPEC = ActionSpec.make_discrete(tuple(DISCRETE_ACTION_SPACE))
def create_test_ppo_optimizer(dummy_config, use_rnn, use_discrete, use_visual):

6
ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_curiosity.py


SEED = [42]
ACTIONSPEC_CONTINUOUS = ActionSpec(5, ())
ACTIONSPEC_TWODISCRETE = ActionSpec(0, (2, 3))
ACTIONSPEC_DISCRETE = ActionSpec(0, (2,))
ACTIONSPEC_CONTINUOUS = ActionSpec.make_continuous(5)
ACTIONSPEC_TWODISCRETE = ActionSpec.make_discrete((2, 3))
ACTIONSPEC_DISCRETE = ActionSpec.make_discrete((2,))
@pytest.mark.parametrize(

5
ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_extrinsic.py


create_agent_buffer,
)
ACTIONSPEC_CONTINUOUS = ActionSpec(5, ())
ACTIONSPEC_TWODISCRETE = ActionSpec(0, (2, 3))
ACTIONSPEC_CONTINUOUS = ActionSpec.make_continuous(5)
ACTIONSPEC_TWODISCRETE = ActionSpec.make_discrete((2, 3))
@pytest.mark.parametrize(

7
ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_gail.py


DiscriminatorNetwork,
)
CONTINUOUS_PATH = (
os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir, os.pardir)
+ "/test.demo"

)
SEED = [42]
ACTIONSPEC_CONTINUOUS = ActionSpec(2, ())
ACTIONSPEC_FOURDISCRETE = ActionSpec(0, (2, 3, 3, 3))
ACTIONSPEC_DISCRETE = ActionSpec(0, (20,))
ACTIONSPEC_CONTINUOUS = ActionSpec.make_continuous(2)
ACTIONSPEC_FOURDISCRETE = ActionSpec.make_discrete((2, 3, 3, 3))
ACTIONSPEC_DISCRETE = ActionSpec.make_discrete((20,))
@pytest.mark.parametrize("behavior_spec", [BehaviorSpec([(8,)], ACTIONSPEC_CONTINUOUS)])

7
ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_rnd.py


create_agent_buffer,
)
ACTIONSPEC_CONTINUOUS = ActionSpec(5, ())
ACTIONSPEC_TWODISCRETE = ActionSpec(0, (2, 3))
ACTIONSPEC_DISCRETE = ActionSpec(0, (2,))
ACTIONSPEC_CONTINUOUS = ActionSpec.make_continuous(5)
ACTIONSPEC_TWODISCRETE = ActionSpec.make_discrete((2, 3))
ACTIONSPEC_DISCRETE = ActionSpec.make_discrete((2,))
@pytest.mark.parametrize(

9
ml-agents/mlagents/trainers/torch/networks.py


):
super().__init__()
self.action_spec = action_spec
self.act_size = self.action_spec.size
torch.Tensor([self.action_spec.total_size]), requires_grad=False
torch.Tensor(
[
self.action_spec.continuous_size
+ sum(self.action_spec.discrete_branches)
]
),
requires_grad=False,
)
self.network_body = NetworkBody(observation_shapes, network_settings)
if network_settings.memory is not None:

正在加载...
取消
保存