浏览代码

rename make_x to creat_x/remove redundant properties

/develop/action-spec-gym
Andrew Cohen 4 年前
当前提交
afd16cc9
共有 19 个文件被更改,包括 55 次插入69 次删除
  1. 4
      gym-unity/gym_unity/tests/test_gym.py
  2. 26
      ml-agents-envs/mlagents_envs/base_env.py
  3. 14
      ml-agents-envs/mlagents_envs/tests/test_rpc_utils.py
  4. 12
      ml-agents-envs/mlagents_envs/tests/test_steps.py
  5. 4
      ml-agents/mlagents/trainers/tests/mock_brain.py
  6. 6
      ml-agents/mlagents/trainers/tests/simple_test_envs.py
  7. 2
      ml-agents/mlagents/trainers/tests/tensorflow/test_models.py
  8. 4
      ml-agents/mlagents/trainers/tests/tensorflow/test_ppo.py
  9. 2
      ml-agents/mlagents/trainers/tests/tensorflow/test_simple_rl.py
  10. 2
      ml-agents/mlagents/trainers/tests/tensorflow/test_tf_policy.py
  11. 10
      ml-agents/mlagents/trainers/tests/test_agent_processor.py
  12. 4
      ml-agents/mlagents/trainers/tests/test_rl_trainer.py
  13. 2
      ml-agents/mlagents/trainers/tests/test_trajectory.py
  14. 6
      ml-agents/mlagents/trainers/tests/torch/test_networks.py
  15. 4
      ml-agents/mlagents/trainers/tests/torch/test_ppo.py
  16. 6
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_curiosity.py
  17. 4
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_extrinsic.py
  18. 6
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_gail.py
  19. 6
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_rnd.py

4
gym-unity/gym_unity/tests/test_gym.py


vector_action_space_size = 2
else:
vector_action_space_size = vector_action_space_size[0]
action_spec = ActionSpec.make_continuous(vector_action_space_size)
action_spec = ActionSpec.create_continuous(vector_action_space_size)
action_spec = ActionSpec.make_discrete(vector_action_space_size)
action_spec = ActionSpec.create_discrete(vector_action_space_size)
obs_shapes = [(vector_observation_space_size,)]
for _ in range(number_visual_observations):
obs_shapes += [(8, 8, 3)]

26
ml-agents-envs/mlagents_envs/base_env.py


the number of discrete actions available to the agent on an independent action branch.
"""
num_continuous_actions: int
discrete_branch_sizes: Tuple[int, ...]
continuous_size: int
discrete_branches: Tuple[int, ...]
def __eq__(self, other):
return (

return self.discrete_size == 0 and self.continuous_size > 0
@property
def discrete_branches(self) -> Tuple[int, ...]:
"""
Returns a Tuple of int corresponding to the number of possible actions
for each branch (only for discrete actions). Will return None in
for continuous actions.
"""
return self.discrete_branch_sizes # type: ignore
@property
return len(self.discrete_branch_sizes)
@property
def continuous_size(self) -> int:
"""
Returns a an int corresponding to the number of continuous actions.
"""
return self.num_continuous_actions
return len(self.discrete_branches)
def create_empty(self, n_agents: int) -> np.ndarray:
"""

return actions
@staticmethod
def make_continuous(continuous_size: int) -> "ActionSpec":
def create_continuous(continuous_size: int) -> "ActionSpec":
"""
Creates an ActionSpec that is homogenously continuous
"""

def make_discrete(discrete_branches: Tuple[int]) -> "ActionSpec":
def create_discrete(discrete_branches: Tuple[int]) -> "ActionSpec":
"""
Creates an ActionSpec that is homogenously discrete
"""

14
ml-agents-envs/mlagents_envs/tests/test_rpc_utils.py


def test_batched_step_result_from_proto():
n_agents = 10
shapes = [(3,), (4,)]
spec = BehaviorSpec(shapes, ActionSpec.make_continuous(3))
spec = BehaviorSpec(shapes, ActionSpec.create_continuous(3))
ap_list = generate_list_agent_proto(n_agents, shapes)
decision_steps, terminal_steps = steps_from_proto(ap_list, spec)
for agent_id in range(n_agents):

def test_action_masking_discrete():
n_agents = 10
shapes = [(3,), (4,)]
behavior_spec = BehaviorSpec(shapes, ActionSpec.make_discrete((7, 3)))
behavior_spec = BehaviorSpec(shapes, ActionSpec.create_discrete((7, 3)))
ap_list = generate_list_agent_proto(n_agents, shapes)
decision_steps, terminal_steps = steps_from_proto(ap_list, behavior_spec)
masks = decision_steps.action_mask

def test_action_masking_discrete_1():
n_agents = 10
shapes = [(3,), (4,)]
behavior_spec = BehaviorSpec(shapes, ActionSpec.make_discrete((10,)))
behavior_spec = BehaviorSpec(shapes, ActionSpec.create_discrete((10,)))
ap_list = generate_list_agent_proto(n_agents, shapes)
decision_steps, terminal_steps = steps_from_proto(ap_list, behavior_spec)
masks = decision_steps.action_mask

def test_action_masking_discrete_2():
n_agents = 10
shapes = [(3,), (4,)]
behavior_spec = BehaviorSpec(shapes, ActionSpec.make_discrete((2, 2, 6)))
behavior_spec = BehaviorSpec(shapes, ActionSpec.create_discrete((2, 2, 6)))
ap_list = generate_list_agent_proto(n_agents, shapes)
decision_steps, terminal_steps = steps_from_proto(ap_list, behavior_spec)
masks = decision_steps.action_mask

def test_action_masking_continuous():
n_agents = 10
shapes = [(3,), (4,)]
behavior_spec = BehaviorSpec(shapes, ActionSpec.make_continuous(10))
behavior_spec = BehaviorSpec(shapes, ActionSpec.create_continuous(10))
ap_list = generate_list_agent_proto(n_agents, shapes)
decision_steps, terminal_steps = steps_from_proto(ap_list, behavior_spec)
masks = decision_steps.action_mask

def test_batched_step_result_from_proto_raises_on_infinite():
n_agents = 10
shapes = [(3,), (4,)]
behavior_spec = BehaviorSpec(shapes, ActionSpec.make_continuous(3))
behavior_spec = BehaviorSpec(shapes, ActionSpec.create_continuous(3))
ap_list = generate_list_agent_proto(n_agents, shapes, infinite_rewards=True)
with pytest.raises(RuntimeError):
steps_from_proto(ap_list, behavior_spec)

n_agents = 10
shapes = [(3,), (4,)]
behavior_spec = BehaviorSpec(shapes, ActionSpec.make_continuous(3))
behavior_spec = BehaviorSpec(shapes, ActionSpec.create_continuous(3))
ap_list = generate_list_agent_proto(n_agents, shapes, nan_observations=True)
with pytest.raises(RuntimeError):
steps_from_proto(ap_list, behavior_spec)

12
ml-agents-envs/mlagents_envs/tests/test_steps.py


def test_empty_decision_steps():
specs = BehaviorSpec(
observation_shapes=[(3, 2), (5,)], action_spec=ActionSpec.make_continuous(3)
observation_shapes=[(3, 2), (5,)], action_spec=ActionSpec.create_continuous(3)
)
ds = DecisionSteps.empty(specs)
assert len(ds.obs) == 2

def test_empty_terminal_steps():
specs = BehaviorSpec(
observation_shapes=[(3, 2), (5,)], action_spec=ActionSpec.make_continuous(3)
observation_shapes=[(3, 2), (5,)], action_spec=ActionSpec.create_continuous(3)
)
ts = TerminalSteps.empty(specs)
assert len(ts.obs) == 2

def test_specs():
specs = ActionSpec.make_continuous(3)
specs = ActionSpec.create_continuous(3)
assert specs.discrete_branches == ()
assert specs.discrete_size == 0
assert specs.continuous_size == 3

specs = ActionSpec.make_discrete((3,))
specs = ActionSpec.create_discrete((3,))
assert specs.discrete_branches == (3,)
assert specs.discrete_size == 1
assert specs.continuous_size == 0

def test_action_generator():
# Continuous
action_len = 30
specs = ActionSpec.make_continuous(action_len)
specs = ActionSpec.create_continuous(action_len)
zero_action = specs.create_empty(4)
assert np.array_equal(zero_action, np.zeros((4, action_len), dtype=np.float32))
random_action = specs.create_random(4)

# Discrete
action_shape = (10, 20, 30)
specs = ActionSpec.make_discrete(action_shape)
specs = ActionSpec.create_discrete(action_shape)
zero_action = specs.create_empty(4)
assert np.array_equal(zero_action, np.zeros((4, len(action_shape)), dtype=np.int32))

4
ml-agents/mlagents/trainers/tests/mock_brain.py


use_discrete=True, use_visual=False, vector_action_space=2, vector_obs_space=8
):
if use_discrete:
action_spec = ActionSpec.make_discrete(tuple(vector_action_space))
action_spec = ActionSpec.create_discrete(tuple(vector_action_space))
action_spec = ActionSpec.make_continuous(vector_action_space)
action_spec = ActionSpec.create_continuous(vector_action_space)
behavior_spec = BehaviorSpec(
[(84, 84, 3)] * int(use_visual) + [(vector_obs_space,)], action_spec
)

6
ml-agents/mlagents/trainers/tests/simple_test_envs.py


self.vis_obs_size = vis_obs_size
self.vec_obs_size = vec_obs_size
if use_discrete:
action_spec = ActionSpec.make_discrete(tuple(2 for _ in range(action_size)))
action_spec = ActionSpec.create_discrete(
tuple(2 for _ in range(action_size))
)
action_spec = ActionSpec.make_continuous(action_size)
action_spec = ActionSpec.create_continuous(action_size)
self.behavior_spec = BehaviorSpec(self._make_obs_spec(), action_spec)
self.action_size = action_size
self.names = brain_names

2
ml-agents/mlagents/trainers/tests/tensorflow/test_models.py


def create_behavior_spec(num_visual, num_vector, vector_size):
behavior_spec = BehaviorSpec(
[(84, 84, 3)] * int(num_visual) + [(vector_size,)] * int(num_vector),
ActionSpec.make_discrete((1,)),
ActionSpec.create_discrete((1,)),
)
return behavior_spec

4
ml-agents/mlagents/trainers/tests/tensorflow/test_ppo.py


BUFFER_INIT_SAMPLES = 64
NUM_AGENTS = 12
CONTINUOUS_ACTION_SPEC = ActionSpec.make_continuous(VECTOR_ACTION_SPACE)
DISCRETE_ACTION_SPEC = ActionSpec.make_discrete(tuple(DISCRETE_ACTION_SPACE))
CONTINUOUS_ACTION_SPEC = ActionSpec.create_continuous(VECTOR_ACTION_SPACE)
DISCRETE_ACTION_SPEC = ActionSpec.create_discrete(tuple(DISCRETE_ACTION_SPACE))
def _create_ppo_optimizer_ops_mock(dummy_config, use_rnn, use_discrete, use_visual):

2
ml-agents/mlagents/trainers/tests/tensorflow/test_simple_rl.py


PPO_TF_CONFIG,
hyperparameters=new_hyperparams,
network_settings=new_networksettings,
max_steps=450,
max_steps=400,
summary_freq=100,
framework=FrameworkType.TENSORFLOW,
)

2
ml-agents/mlagents/trainers/tests/tensorflow/test_tf_policy.py


def basic_behavior_spec():
dummy_actionspec = ActionSpec.make_continuous(1)
dummy_actionspec = ActionSpec.create_continuous(1)
dummy_groupspec = BehaviorSpec([(1,)], dummy_actionspec)
return dummy_groupspec

10
ml-agents/mlagents/trainers/tests/test_agent_processor.py


mock_decision_steps, mock_terminal_steps = mb.create_mock_steps(
num_agents=2,
observation_shapes=[(8,)] + num_vis_obs * [(84, 84, 3)],
action_spec=ActionSpec.make_continuous(2),
action_spec=ActionSpec.create_continuous(2),
)
fake_action_info = ActionInfo(
action=[0.1, 0.1],

mock_decision_steps, mock_terminal_steps = mb.create_mock_steps(
num_agents=0,
observation_shapes=[(8,)] + num_vis_obs * [(84, 84, 3)],
action_spec=ActionSpec.make_continuous(2),
action_spec=ActionSpec.create_continuous(2),
)
processor.add_experiences(
mock_decision_steps, mock_terminal_steps, 0, ActionInfo([], [], {}, [])

mock_decision_step, mock_terminal_step = mb.create_mock_steps(
num_agents=1,
observation_shapes=[(8,)],
action_spec=ActionSpec.make_continuous(2),
action_spec=ActionSpec.create_continuous(2),
action_spec=ActionSpec.make_continuous(2),
action_spec=ActionSpec.create_continuous(2),
done=True,
)
fake_action_info = ActionInfo(

mock_decision_step, mock_terminal_step = mb.create_mock_steps(
num_agents=1,
observation_shapes=[(8,)],
action_spec=ActionSpec.make_continuous(2),
action_spec=ActionSpec.create_continuous(2),
)
fake_action_info = ActionInfo(
action=[0.1],

4
ml-agents/mlagents/trainers/tests/test_rl_trainer.py


length=time_horizon,
observation_shapes=[(1,)],
max_step_complete=True,
action_spec=ActionSpec.make_discrete((2,)),
action_spec=ActionSpec.create_discrete((2,)),
)
trajectory_queue.put(trajectory)

length=time_horizon,
observation_shapes=[(1,)],
max_step_complete=True,
action_spec=ActionSpec.make_discrete((2,)),
action_spec=ActionSpec.create_discrete((2,)),
)
# Check that we can turn off the trainer and that the buffer is cleared
num_trajectories = 5

2
ml-agents/mlagents/trainers/tests/test_trajectory.py


trajectory = make_fake_trajectory(
length=length,
observation_shapes=[(VEC_OBS_SIZE,), (84, 84, 3)],
action_spec=ActionSpec.make_continuous(ACTION_SIZE),
action_spec=ActionSpec.create_continuous(ACTION_SIZE),
)
agentbuffer = trajectory.to_agentbuffer()
seen_keys = set()

6
ml-agents/mlagents/trainers/tests/torch/test_networks.py


act_size = [2]
if use_discrete:
masks = torch.ones((1, 1))
action_spec = ActionSpec.make_discrete(tuple(act_size))
action_spec = ActionSpec.create_discrete(tuple(act_size))
action_spec = ActionSpec.make_continuous(act_size[0])
action_spec = ActionSpec.create_continuous(act_size[0])
actor = SimpleActor(obs_shapes, network_settings, action_spec)
# Test get_dist
sample_obs = torch.ones((1, obs_size))

obs_shapes = [(obs_size,)]
act_size = [2]
stream_names = [f"stream_name{n}" for n in range(4)]
action_spec = ActionSpec.make_continuous(act_size[0])
action_spec = ActionSpec.create_continuous(act_size[0])
actor = ac_type(obs_shapes, network_settings, action_spec, stream_names)
if lstm:
sample_obs = torch.ones((1, network_settings.memory.sequence_length, obs_size))

4
ml-agents/mlagents/trainers/tests/torch/test_ppo.py


BUFFER_INIT_SAMPLES = 64
NUM_AGENTS = 12
CONTINUOUS_ACTION_SPEC = ActionSpec.make_continuous(VECTOR_ACTION_SPACE)
DISCRETE_ACTION_SPEC = ActionSpec.make_discrete(tuple(DISCRETE_ACTION_SPACE))
CONTINUOUS_ACTION_SPEC = ActionSpec.create_continuous(VECTOR_ACTION_SPACE)
DISCRETE_ACTION_SPEC = ActionSpec.create_discrete(tuple(DISCRETE_ACTION_SPACE))
def create_test_ppo_optimizer(dummy_config, use_rnn, use_discrete, use_visual):

6
ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_curiosity.py


SEED = [42]
ACTIONSPEC_CONTINUOUS = ActionSpec.make_continuous(5)
ACTIONSPEC_TWODISCRETE = ActionSpec.make_discrete((2, 3))
ACTIONSPEC_DISCRETE = ActionSpec.make_discrete((2,))
ACTIONSPEC_CONTINUOUS = ActionSpec.create_continuous(5)
ACTIONSPEC_TWODISCRETE = ActionSpec.create_discrete((2, 3))
ACTIONSPEC_DISCRETE = ActionSpec.create_discrete((2,))
@pytest.mark.parametrize(

4
ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_extrinsic.py


)
ACTIONSPEC_CONTINUOUS = ActionSpec.make_continuous(5)
ACTIONSPEC_TWODISCRETE = ActionSpec.make_discrete((2, 3))
ACTIONSPEC_CONTINUOUS = ActionSpec.create_continuous(5)
ACTIONSPEC_TWODISCRETE = ActionSpec.create_discrete((2, 3))
@pytest.mark.parametrize(

6
ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_gail.py


)
SEED = [42]
ACTIONSPEC_CONTINUOUS = ActionSpec.make_continuous(2)
ACTIONSPEC_FOURDISCRETE = ActionSpec.make_discrete((2, 3, 3, 3))
ACTIONSPEC_DISCRETE = ActionSpec.make_discrete((20,))
ACTIONSPEC_CONTINUOUS = ActionSpec.create_continuous(2)
ACTIONSPEC_FOURDISCRETE = ActionSpec.create_discrete((2, 3, 3, 3))
ACTIONSPEC_DISCRETE = ActionSpec.create_discrete((20,))
@pytest.mark.parametrize("behavior_spec", [BehaviorSpec([(8,)], ACTIONSPEC_CONTINUOUS)])

6
ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_rnd.py


SEED = [42]
ACTIONSPEC_CONTINUOUS = ActionSpec.make_continuous(5)
ACTIONSPEC_TWODISCRETE = ActionSpec.make_discrete((2, 3))
ACTIONSPEC_DISCRETE = ActionSpec.make_discrete((2,))
ACTIONSPEC_CONTINUOUS = ActionSpec.create_continuous(5)
ACTIONSPEC_TWODISCRETE = ActionSpec.create_discrete((2, 3))
ACTIONSPEC_DISCRETE = ActionSpec.create_discrete((2,))
@pytest.mark.parametrize(

正在加载...
取消
保存