浏览代码

remove *_action_* from function names

/develop/action-spec-gym
Andrew Cohen 4 年前
当前提交
9689cf2c
共有 23 个文件被更改,包括 125 次插入122 次删除
  1. 10
      gym-unity/gym_unity/envs/__init__.py
  2. 59
      ml-agents-envs/mlagents_envs/base_env.py
  3. 19
      ml-agents-envs/mlagents_envs/environment.py
  4. 6
      ml-agents-envs/mlagents_envs/rpc_utils.py
  5. 7
      ml-agents-envs/mlagents_envs/tests/test_envs.py
  6. 14
      ml-agents-envs/mlagents_envs/tests/test_rpc_utils.py
  7. 24
      ml-agents-envs/mlagents_envs/tests/test_steps.py
  8. 16
      ml-agents/mlagents/trainers/demo_loader.py
  9. 15
      ml-agents/mlagents/trainers/policy/policy.py
  10. 9
      ml-agents/mlagents/trainers/policy/tf_policy.py
  11. 2
      ml-agents/mlagents/trainers/ppo/trainer.py
  12. 6
      ml-agents/mlagents/trainers/sac/optimizer_torch.py
  13. 12
      ml-agents/mlagents/trainers/tests/mock_brain.py
  14. 2
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/utils.py
  15. 2
      ml-agents/mlagents/trainers/tf/components/bc/model.py
  16. 4
      ml-agents/mlagents/trainers/tf/components/bc/module.py
  17. 2
      ml-agents/mlagents/trainers/tf/components/reward_signals/curiosity/model.py
  18. 2
      ml-agents/mlagents/trainers/tf/components/reward_signals/gail/model.py
  19. 2
      ml-agents/mlagents/trainers/torch/components/bc/module.py
  20. 12
      ml-agents/mlagents/trainers/torch/components/reward_providers/curiosity_reward_provider.py
  21. 2
      ml-agents/mlagents/trainers/torch/model_serialization.py
  22. 10
      ml-agents/mlagents/trainers/torch/networks.py
  23. 10
      ml-agents/mlagents/trainers/torch/utils.py

10
gym-unity/gym_unity/envs/__init__.py


self._previous_decision_step = decision_steps
# Set action spaces
if self.group_spec.action_spec.is_action_discrete():
branches = self.group_spec.action_spec.discrete_action_branches
if self.group_spec.action_spec.discrete_action_size == 1:
if self.group_spec.action_spec.is_discrete():
branches = self.group_spec.action_spec.discrete_branches
if self.group_spec.action_spec.discrete_size == 1:
self._action_space = spaces.Discrete(branches[0])
else:
if flatten_branched:

"The environment has a non-discrete action space. It will "
"not be flattened."
)
high = np.array([1] * self.group_spec.action_spec.continuous_action_size)
high = np.array([1] * self.group_spec.action_spec.continuous_size)
self._action_space = spaces.Box(-high, high, dtype=np.float32)
# Set observations space

action = self._flattener.lookup_action(action)
spec = self.group_spec
action = np.array(action).reshape((1, spec.action_spec.action_size))
action = np.array(action).reshape((1, spec.action_spec.size))
self._env.set_actions(self.name, action)
self._env.step()

59
ml-agents-envs/mlagents_envs/base_env.py


class ActionType(Enum):
DISCRETE = 0
CONTINUOUS = 1
HYBRID = 2
"""
A NamedTuple containing utility functions and information about the action spaces
for a group of Agents under the same behavior.
- num_continuous_actions is an int corresponding to the number of floats which
constitute the action.
- discrete_branch_sizes is a Tuple of int where each int corresponds to
the number of discrete actions available to the agent on an independent action branch.
"""
def is_action_discrete(self) -> bool:
def is_discrete(self) -> bool:
return self.discrete_action_size > 0
return self.discrete_size > 0
def is_action_continuous(self) -> bool:
def is_continuous(self) -> bool:
return self.continuous_action_size > 0
return self.continuous_size > 0
def discrete_action_branches(self) -> Tuple[int, ...]:
def discrete_branches(self) -> Tuple[int, ...]:
def discrete_action_size(self) -> int:
def discrete_size(self) -> int:
def continuous_action_size(self) -> int:
def continuous_size(self) -> int:
def action_size(self) -> int:
return self.discrete_action_size + self.continuous_action_size
def size(self) -> int:
return self.discrete_size + self.continuous_size
def total_action_size(self) -> int:
return sum(self.discrete_action_branches) + self.continuous_action_size
def total_size(self) -> int:
return sum(self.discrete_branches) + self.continuous_size
def create_empty_action(self, n_agents: int) -> np.ndarray:
if self.is_action_continuous():
return np.zeros((n_agents, self.continuous_action_size), dtype=np.float32)
return np.zeros((n_agents, self.discrete_action_size), dtype=np.int32)
def create_empty(self, n_agents: int) -> np.ndarray:
if self.is_continuous():
return np.zeros((n_agents, self.continuous_size), dtype=np.float32)
return np.zeros((n_agents, self.discrete_size), dtype=np.int32)
def create_random_action(self, n_agents: int) -> np.ndarray:
if self.is_action_continuous():
def create_random(self, n_agents: int) -> np.ndarray:
if self.is_continuous():
low=-1.0, high=1.0, size=(n_agents, self.continuous_action_size)
low=-1.0, high=1.0, size=(n_agents, self.continuous_size)
branch_size = self.discrete_action_branches
branch_size = self.discrete_branches
action = np.column_stack(
[
np.random.randint(

dtype=np.int32,
)
for i in range(self.discrete_action_size)
for i in range(self.discrete_size)
]
)
return action

"""
A NamedTuple containing information about the observation and action
spaces for a group of Agents under the same behavior.
- observation_shapes is a List of Tuples of int : Each Tuple corresponds
to an observation's dimensions. The shape tuples have the same ordering as
the ordering of the DecisionSteps and TerminalSteps.
- action_spec is an ActionSpec NamedTuple
"""
observation_shapes: List[Tuple]
action_spec: ActionSpec

19
ml-agents-envs/mlagents_envs/environment.py


n_agents = len(self._env_state[group_name][0])
self._env_actions[group_name] = self._env_specs[
group_name
].action_spec.create_empty_action(n_agents)
].action_spec.create_empty(n_agents)
step_input = self._generate_step_input(self._env_actions)
with hierarchical_timer("communicator.exchange"):
outputs = self._communicator.exchange(step_input)

if behavior_name not in self._env_state:
return
spec = self._env_specs[behavior_name]
expected_type = (
np.float32 if spec.action_spec.is_action_continuous() else np.int32
)
expected_shape = (
len(self._env_state[behavior_name][0]),
spec.action_spec.action_size,
)
expected_type = np.float32 if spec.action_spec.is_continuous() else np.int32
expected_shape = (len(self._env_state[behavior_name][0]), spec.action_spec.size)
if action.shape != expected_shape:
raise UnityActionException(
f"The behavior {behavior_name} needs an input of dimension "

if behavior_name not in self._env_state:
return
spec = self._env_specs[behavior_name]
expected_shape = (spec.action_spec.action_size,)
expected_shape = (spec.action_spec.size,)
if action.shape != expected_shape:
raise UnityActionException(
f"The Agent {agent_id} with BehaviorName {behavior_name} needs "

expected_type = (
np.float32 if spec.action_spec.is_action_continuous() else np.int32
)
expected_type = np.float32 if spec.action_spec.is_continuous() else np.int32
self._env_actions[behavior_name] = spec.action_spec.create_empty_action(
self._env_actions[behavior_name] = spec.action_spec.create_empty(
len(self._env_state[behavior_name][0])
)
try:

6
ml-agents-envs/mlagents_envs/rpc_utils.py


[agent_info.id for agent_info in terminal_agent_info_list], dtype=np.int32
)
action_mask = None
if behavior_spec.action_spec.is_action_discrete():
if behavior_spec.action_spec.is_discrete():
a_size = np.sum(behavior_spec.action_spec.discrete_action_branches)
a_size = np.sum(behavior_spec.action_spec.discrete_branches)
mask_matrix = np.ones((n_agents, a_size), dtype=np.bool)
for agent_index, agent_info in enumerate(decision_agent_info_list):
if agent_info.action_mask is not None:

]
action_mask = (1 - mask_matrix).astype(np.bool)
indices = _generate_split_indices(
behavior_spec.action_spec.discrete_action_branches
behavior_spec.action_spec.discrete_branches
)
action_mask = np.split(action_mask, indices, axis=1)
return (

7
ml-agents-envs/mlagents_envs/tests/test_envs.py


decision_steps, terminal_steps = env.get_steps("RealFakeBrain")
n_agents = len(decision_steps)
env.set_actions(
"RealFakeBrain",
np.zeros((n_agents, spec.action_spec.action_size), dtype=np.float32),
"RealFakeBrain", np.zeros((n_agents, spec.action_spec.size), dtype=np.float32)
np.zeros((n_agents - 1, spec.action_spec.action_size), dtype=np.float32),
np.zeros((n_agents - 1, spec.action_spec.size), dtype=np.float32),
-1 * np.ones((n_agents, spec.action_spec.action_size), dtype=np.float32),
-1 * np.ones((n_agents, spec.action_spec.size), dtype=np.float32),
)
env.step()

14
ml-agents-envs/mlagents_envs/tests/test_rpc_utils.py


bp.vector_action_size.extend([5, 4])
bp.vector_action_space_type = 0
behavior_spec = behavior_spec_from_proto(bp, agent_proto)
assert behavior_spec.action_spec.is_action_discrete()
assert not behavior_spec.action_spec.is_action_continuous()
assert behavior_spec.action_spec.is_discrete()
assert not behavior_spec.action_spec.is_continuous()
assert behavior_spec.action_spec.discrete_action_branches == (5, 4)
assert behavior_spec.action_spec.action_size == 2
assert behavior_spec.action_spec.discrete_branches == (5, 4)
assert behavior_spec.action_spec.size == 2
assert not behavior_spec.action_spec.is_action_discrete()
assert behavior_spec.action_spec.is_action_continuous()
assert behavior_spec.action_spec.action_size == 6
assert not behavior_spec.action_spec.is_discrete()
assert behavior_spec.action_spec.is_continuous()
assert behavior_spec.action_spec.size == 6
def test_batched_step_result_from_proto_raises_on_infinite():

24
ml-agents-envs/mlagents_envs/tests/test_steps.py


def test_specs():
specs = ActionSpec(3, ())
assert specs.discrete_action_branches == ()
assert specs.action_size == 3
assert specs.create_empty_action(5).shape == (5, 3)
assert specs.create_empty_action(5).dtype == np.float32
assert specs.discrete_branches == ()
assert specs.size == 3
assert specs.create_empty(5).shape == (5, 3)
assert specs.create_empty(5).dtype == np.float32
assert specs.discrete_action_branches == (3,)
assert specs.action_size == 1
assert specs.create_empty_action(5).shape == (5, 1)
assert specs.create_empty_action(5).dtype == np.int32
assert specs.discrete_branches == (3,)
assert specs.size == 1
assert specs.create_empty(5).shape == (5, 1)
assert specs.create_empty(5).dtype == np.int32
def test_action_generator():

zero_action = specs.create_empty_action(4)
zero_action = specs.create_empty(4)
random_action = specs.create_random_action(4)
random_action = specs.create_random(4)
assert random_action.dtype == np.float32
assert random_action.shape == (4, action_len)
assert np.min(random_action) >= -1

action_shape = (10, 20, 30)
specs = ActionSpec(0, action_shape)
zero_action = specs.create_empty_action(4)
zero_action = specs.create_empty(4)
random_action = specs.create_random_action(4)
random_action = specs.create_random(4)
assert random_action.dtype == np.int32
assert random_action.shape == (4, len(action_shape))
assert np.min(random_action) >= 0

16
ml-agents/mlagents/trainers/demo_loader.py


if expected_behavior_spec:
# check action dimensions in demonstration match
if (
behavior_spec.action_spec.continuous_action_size
!= expected_behavior_spec.action_spec.continuous_action_size
behavior_spec.action_spec.continuous_size
!= expected_behavior_spec.action_spec.continuous_size
behavior_spec.action_spec.continuous_action_size,
expected_behavior_spec.action_spec.continuous_action_size,
behavior_spec.action_spec.continuous_size,
expected_behavior_spec.action_spec.continuous_size,
behavior_spec.action_spec.discrete_action_branches
!= expected_behavior_spec.action_spec.discrete_action_branches
behavior_spec.action_spec.discrete_branches
!= expected_behavior_spec.action_spec.discrete_branches
behavior_spec.action_spec.discrete_action_branches,
expected_behavior_spec.action_spec.discrete_action_branches,
behavior_spec.action_spec.discrete_branches,
expected_behavior_spec.action_spec.discrete_branches,
)
)
# check observations match

15
ml-agents/mlagents/trainers/policy/policy.py


self.trainer_settings = trainer_settings
self.network_settings: NetworkSettings = trainer_settings.network_settings
self.seed = seed
if (
self.action_spec.continuous_action_size > 0
and self.action_spec.discrete_action_size > 0
):
if self.action_spec.continuous_size > 0 and self.action_spec.discrete_size > 0:
list(self.action_spec.discrete_action_branches)
if self.action_spec.is_action_discrete()
else [self.action_spec.action_size]
list(self.action_spec.discrete_branches)
if self.action_spec.is_discrete()
else [self.action_spec.size]
)
self.vec_obs_size = sum(
shape[0] for shape in behavior_spec.observation_shapes if len(shape) == 1

)
self.use_continuous_act = self.action_spec.is_action_continuous()
self.num_branches = self.action_spec.action_size
self.use_continuous_act = self.action_spec.is_continuous()
self.num_branches = self.action_spec.size
self.previous_action_dict: Dict[str, np.array] = {}
self.memory_dict: Dict[str, np.ndarray] = {}
self.normalize = trainer_settings.network_settings.normalize

9
ml-agents/mlagents/trainers/policy/tf_policy.py


feed_dict[self.vector_in] = vec_vis_obs.vector_observations
if not self.use_continuous_act:
mask = np.ones(
(
len(batched_step_result),
sum(self.action_spec.discrete_action_branches),
),
(len(batched_step_result), sum(self.action_spec.discrete_branches)),
dtype=np.float32,
)
if batched_step_result.action_mask is not None:

self.mask = tf.cast(self.mask_input, tf.int32)
tf.Variable(
int(self.action_spec.is_action_continuous()),
int(self.action_spec.is_continuous()),
name="is_continuous_control",
trainable=False,
dtype=tf.int32,

tf.Variable(
self.m_size, name="memory_size", trainable=False, dtype=tf.int32
)
if self.action_spec.is_action_continuous():
if self.action_spec.is_continuous():
tf.Variable(
self.act_size[0],
name="action_output_shape",

2
ml-agents/mlagents/trainers/ppo/trainer.py


behavior_spec,
self.trainer_settings,
condition_sigma_on_obs=False, # Faster training for PPO
separate_critic=behavior_spec.action_spec.is_action_continuous(),
separate_critic=behavior_spec.action_spec.is_continuous(),
)
return policy

6
ml-agents/mlagents/trainers/sac/optimizer_torch.py


):
super().__init__()
self.action_spec = action_spec
if self.action_spec.is_action_continuous():
if self.action_spec.is_continuous():
self.act_size = self.action_spec.continuous_action_size
self.act_size = self.action_spec.continuous_size
self.act_size = self.action_spec.discrete_action_branches
self.act_size = self.action_spec.discrete_branches
num_value_outs = sum(self.act_size)
num_action_ins = 0
self.q1_network = ValueNetwork(

12
ml-agents/mlagents/trainers/tests/mock_brain.py


behavior_spec: BehaviorSpec, num_agents: int = 1
) -> Tuple[DecisionSteps, TerminalSteps]:
action_spec = behavior_spec.action_spec
is_discrete = action_spec.is_action_discrete()
is_discrete = action_spec.is_discrete()
action_shape=action_spec.discrete_action_branches
action_shape=action_spec.discrete_branches
else action_spec.continuous_action_size,
else action_spec.continuous_size,
discrete=is_discrete,
)

memory_size: int = 10,
exclude_key_list: List[str] = None,
) -> AgentBuffer:
is_discrete = behavior_spec.action_spec.is_action_discrete()
is_discrete = behavior_spec.action_spec.is_discrete()
action_space = behavior_spec.action_spec.discrete_action_branches
action_space = behavior_spec.action_spec.discrete_branches
action_space = behavior_spec.action_spec.continuous_action_size
action_space = behavior_spec.action_spec.continuous_size
trajectory = make_fake_trajectory(
length,
behavior_spec.observation_shapes,

2
ml-agents/mlagents/trainers/tests/torch/test_reward_providers/utils.py


next_observations = [
np.random.normal(size=shape) for shape in behavior_spec.observation_shapes
]
action = behavior_spec.action_spec.create_random_action(1)[0, :]
action = behavior_spec.action_spec.create_random(1)[0, :]
for _ in range(number):
curr_split_obs = SplitObservations.from_observations(curr_observations)
next_split_obs = SplitObservations.from_observations(next_observations)

2
ml-agents/mlagents/trainers/tf/components/bc/model.py


self.done_expert = tf.placeholder(shape=[None, 1], dtype=tf.float32)
self.done_policy = tf.placeholder(shape=[None, 1], dtype=tf.float32)
if self.policy.action_spec.is_action_continuous():
if self.policy.action_spec.is_continuous():
action_length = self.policy.act_size[0]
self.action_in_expert = tf.placeholder(
shape=[None, action_length], dtype=tf.float32

4
ml-agents/mlagents/trainers/tf/components/bc/module.py


self.policy.sequence_length_ph: self.policy.sequence_length,
}
feed_dict[self.model.action_in_expert] = mini_batch_demo["actions"]
if self.policy.action_spec.is_action_discrete():
if self.policy.action_spec.is_discrete():
sum(self.policy.action_spec.discrete_action_branches),
sum(self.policy.action_spec.discrete_branches),
),
dtype=np.float32,
)

2
ml-agents/mlagents/trainers/tf/components/reward_signals/curiosity/model.py


"""
combined_input = tf.concat([encoded_state, encoded_next_state], axis=1)
hidden = tf.layers.dense(combined_input, 256, activation=ModelUtils.swish)
if self.policy.action_spec.is_action_continuous():
if self.policy.action_spec.is_continuous():
pred_action = tf.layers.dense(
hidden, self.policy.act_size[0], activation=None
)

2
ml-agents/mlagents/trainers/tf/components/reward_signals/gail/model.py


self.done_expert = tf.expand_dims(self.done_expert_holder, -1)
self.done_policy = tf.expand_dims(self.done_policy_holder, -1)
if self.policy.action_spec.is_action_continuous():
if self.policy.action_spec.is_continuous():
action_length = self.policy.act_size[0]
self.action_in_expert = tf.placeholder(
shape=[None, action_length], dtype=tf.float32

2
ml-agents/mlagents/trainers/torch/components/bc/module.py


np.ones(
(
self.n_sequences * self.policy.sequence_length,
sum(self.action_spec.discrete_action_branches),
sum(self.action_spec.discrete_branches),
),
dtype=np.float32,
)

12
ml-agents/mlagents/trainers/torch/components/reward_providers/curiosity_reward_provider.py


(self.get_current_state(mini_batch), self.get_next_state(mini_batch)), dim=1
)
hidden = self.inverse_model_action_prediction(inverse_model_input)
if self._action_spec.is_action_continuous():
if self._action_spec.is_continuous():
hidden, self._action_spec.discrete_action_branches
hidden, self._action_spec.discrete_branches
)
branches = [torch.softmax(b, dim=1) for b in branches]
return torch.cat(branches, dim=1)

Uses the current state embedding and the action of the mini_batch to predict
the next state embedding.
"""
if self._action_spec.is_action_continuous():
if self._action_spec.is_continuous():
self._action_spec.discrete_action_branches,
self._action_spec.discrete_branches,
),
dim=1,
)

action prediction (given the current and next state).
"""
predicted_action = self.predict_action(mini_batch)
if self._action_spec.is_action_continuous():
if self._action_spec.is_continuous():
sq_difference = (
ModelUtils.list_to_tensor(mini_batch["actions"], dtype=torch.float)
- predicted_action

true_action = torch.cat(
ModelUtils.actions_to_onehot(
ModelUtils.list_to_tensor(mini_batch["actions"], dtype=torch.long),
self._action_spec.discrete_action_branches,
self._action_spec.discrete_branches,
),
dim=1,
)

2
ml-agents/mlagents/trainers/torch/model_serialization.py


if len(shape) == 3
]
dummy_masks = torch.ones(
batch_dim + [sum(self.policy.action_spec.discrete_action_branches)]
batch_dim + [sum(self.policy.action_spec.discrete_branches)]
)
dummy_memories = torch.zeros(
batch_dim + seq_len_dim + [self.policy.export_memory_size]

10
ml-agents/mlagents/trainers/torch/networks.py


):
super().__init__()
self.action_spec = action_spec
if self.action_spec.is_action_continuous():
if self.action_spec.is_continuous():
self.act_size = self.action_spec.action_size
self.act_size = self.action_spec.size
torch.Tensor([self.action_spec.total_action_size]), requires_grad=False
torch.Tensor([self.action_spec.total_size]), requires_grad=False
)
self.network_body = NetworkBody(observation_shapes, network_settings)
if network_settings.memory is not None:

if self.act_type == ActionType.CONTINUOUS:
self.distribution = GaussianDistribution(
self.encoding_size,
self.action_spec.continuous_action_size,
self.action_spec.continuous_size,
self.encoding_size, self.action_spec.discrete_action_branches
self.encoding_size, self.action_spec.discrete_branches
)
@property

10
ml-agents/mlagents/trainers/torch/utils.py


@property
def flattened_size(self) -> int:
if self._specs.is_action_continuous():
return self._specs.action_size
if self._specs.is_continuous():
return self._specs.size
return sum(self._specs.discrete_action_branches)
return sum(self._specs.discrete_branches)
if self._specs.is_action_continuous():
if self._specs.is_continuous():
self._specs.discrete_action_branches,
self._specs.discrete_branches,
),
dim=1,
)

正在加载...
取消
保存