浏览代码

ActionSpec and ActionBuffer (#4578)

/develop/actionmodel-csharp
GitHub 4 年前
当前提交
e4db5dc5
共有 10 个文件被更改,包括 68 次插入76 次删除
  1. 35
      ml-agents-envs/mlagents_envs/base_env.py
  2. 19
      ml-agents-envs/mlagents_envs/rpc_utils.py
  3. 16
      ml-agents/mlagents/trainers/policy/policy.py
  4. 10
      ml-agents/mlagents/trainers/tests/simple_test_envs.py
  5. 8
      ml-agents/mlagents/trainers/tests/torch/test_simple_rl.py
  6. 22
      ml-agents/mlagents/trainers/torch/action_model.py
  7. 2
      ml-agents/mlagents/trainers/torch/distributions.py
  8. 2
      ml-agents/mlagents/trainers/torch/model_serialization.py
  9. 27
      ml-agents/mlagents/trainers/torch/networks.py
  10. 3
      ml-agents/mlagents/trainers/torch/utils.py

35
ml-agents-envs/mlagents_envs/base_env.py


BehaviorName = str
class HybridAction(NamedTuple):
class ActionBuffer(NamedTuple):
"""
Contains continuous and discrete actions as numpy arrays.
"""

CONTINUOUS = 1
HYBRID = 2
class ActionSpec(NamedTuple):
num_continuous_actions: int
discrete_branch_sizes: Tuple[int]
class BehaviorSpec(NamedTuple):
observation_shapes: List[Tuple]
continuous_action_shape: int
discrete_action_shape: Tuple[int]
# For backwards compatibility
def is_action_discrete(self) -> bool:
"""

return self.continuous_action_size > 0
@property
def discrete_action_branches(self) -> Optional[Tuple[int, ...]]:
return self.discrete_branch_sizes # type: ignore
@property
return len(self.discrete_action_shape)
return len(self.discrete_branch_sizes)
return self.continuous_action_shape
return self.num_continuous_actions
@property
def discrete_action_branches(self) -> Optional[Tuple[int, ...]]:
return self.discrete_action_shape # type: ignore
return HybridAction(
return ActionBuffer(
np.zeros((n_agents, self.continuous_action_size), dtype=np.float32),
np.zeros((n_agents, self.discrete_action_size), dtype=np.int32),
)

for i in range(self.discrete_action_size)
]
)
return HybridAction(continuous_action, discrete_action)
return ActionBuffer(continuous_action, discrete_action)
class BehaviorSpec(NamedTuple):
observation_shapes: List[Tuple]
action_spec: ActionSpec
class BehaviorMapping(Mapping):
def __init__(self, specs: Dict[BehaviorName, BehaviorSpec]):
self._dict = specs

@abstractmethod
def set_actions(
self, behavior_name: BehaviorName, action: Union[HybridAction, np.ndarray]
self, behavior_name: BehaviorName, action: Union[ActionBuffer, np.ndarray]
) -> None:
"""
Sets the action for all of the agents in the simulation for the next

self,
behavior_name: BehaviorName,
agent_id: AgentId,
action: Union[HybridAction, np.ndarray],
action: Union[ActionBuffer, np.ndarray],
) -> None:
"""
Sets the action for one of the agents in the simulation for the next

19
ml-agents-envs/mlagents_envs/rpc_utils.py


from mlagents_envs.base_env import (
ActionSpec,
ActionType,
DecisionSteps,
TerminalSteps,
)

:return: BehaviorSpec object.
"""
observation_shape = [tuple(obs.shape) for obs in agent_info.observations]
action_type = (
ActionType.DISCRETE
if brain_param_proto.vector_action_space_type == 0
else ActionType.CONTINUOUS
)
if action_type == ActionType.CONTINUOUS:
action_shape: Union[
int, Tuple[int, ...]
] = brain_param_proto.vector_action_size[0]
else:
action_shape = tuple(brain_param_proto.vector_action_size)
return BehaviorSpec(observation_shape, action_type, action_shape)
action_spec = brain_param_proto.action_spec
action_spec = ActionSpec(action_spec.num_continuous_actions,
tuple(branch for branch in action_spec.discrete_branch_sizes)
)
return BehaviorSpec(observation_shape, action_spec)
class OffsetBytesIO:

16
ml-agents/mlagents/trainers/policy/policy.py


condition_sigma_on_obs: bool = True,
):
self.behavior_spec = behavior_spec
self.action_spec = behavior_spec.action_spec
self.continuous_act_size = behavior_spec.continuous_action_size
self.discrete_act_size = behavior_spec.discrete_action_branches
self.continuous_act_size = self.action_spec.continuous_action_size
self.discrete_act_size = self.action_spec.discrete_action_size
self.discrete_act_branches = self.action_spec.discrete_action_branches
list(behavior_spec.discrete_action_branches)
if behavior_spec.is_action_discrete()
else [behavior_spec.action_size]
list(self.action_spec.discrete_action_branches)
if self.action_spec.is_action_discrete()
else [self.action_spec.action_size]
)
self.vec_obs_size = sum(
shape[0] for shape in behavior_spec.observation_shapes if len(shape) == 1

)
self.use_continuous_act = behavior_spec.is_action_continuous()
self.num_branches = self.behavior_spec.action_size
self.use_continuous_act = self.action_spec.is_action_continuous()
self.num_branches = self.action_spec.action_size
self.previous_action_dict: Dict[str, np.array] = {}
self.memory_dict: Dict[str, np.ndarray] = {}
self.normalize = trainer_settings.network_settings.normalize

10
ml-agents/mlagents/trainers/tests/simple_test_envs.py


import numpy as np
from mlagents_envs.base_env import (
ActionSpec,
BaseEnv,
BehaviorSpec,
DecisionSteps,

BehaviorName,
HybridAction,
ActionBuffer,
)
from mlagents_envs.tests.test_rpc_utils import proto_from_steps_and_action
from mlagents_envs.communicator_objects.agent_info_action_pair_pb2 import (

action_type = ActionType.DISCRETE if use_discrete else ActionType.CONTINUOUS
if use_discrete:
self.behavior_spec = BehaviorSpec(
self._make_obs_spec(), 0, tuple(2 for _ in range(action_size))
self._make_obs_spec(), ActionSpec(0, tuple(2 for _ in range(action_size)))
self._make_obs_spec(), action_size, tuple()
self._make_obs_spec(), ActionSpec(action_size, tuple())
)
self.action_size = action_size
self.names = brain_names

# less than 1/step_size to force agent to use memory
self.behavior_spec = BehaviorSpec(
self._make_obs_spec(),
continuous_action_size,
tuple(2 for _ in range(discrete_action_size)),
ActionSpec(continuous_action_size, tuple(2 for _ in range(discrete_action_size))),
)
self.continuous_action_size = continuous_action_size
self.discrete_action_size = discrete_action_size

8
ml-agents/mlagents/trainers/tests/torch/test_simple_rl.py


check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=1.0)
def test_3chybrid_ppo():
env = HybridEnvironment(
[BRAIN_NAME], continuous_action_size=2, discrete_action_size=1, step_size=0.8
)
def test_3cdhybrid_ppo():
env = HybridEnvironment([BRAIN_NAME], continuous_action_size=2, discrete_action_size=1, step_size=0.8)
new_hyperparams = attr.evolve(
PPO_TORCH_CONFIG.hyperparameters, batch_size=128, buffer_size=1280, beta=0.01
)

[BRAIN_NAME], continuous_action_size=1, discrete_action_size=2, step_size=0.8
)
new_hyperparams = attr.evolve(
PPO_TORCH_CONFIG.hyperparameters, batch_size=128, buffer_size=1280, beta=0.05
PPO_TORCH_CONFIG.hyperparameters, batch_size=128, buffer_size=1280, beta=0.01
)
config = attr.evolve(PPO_TORCH_CONFIG, hyperparameters=new_hyperparams, max_steps=10000)
check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=1.0)

22
ml-agents/mlagents/trainers/torch/action_model.py


from mlagents.trainers.torch.distributions import DistInstance, DiscreteDistInstance, GaussianDistribution, MultiCategoricalDistribution
from mlagents.trainers.torch.utils import ModelUtils
from mlagents_envs.base_env import ActionSpec
EPSILON = 1e-7 # Small value to avoid divide by zero

hidden_size: int,
continuous_act_size: int,
discrete_act_size: List[int],
action_spec: ActionSpec,
self.continuous_act_size = continuous_act_size
self.discrete_act_size = discrete_act_size
self.continuous_act_size = action_spec.continuous_action_size
self.discrete_act_branches = action_spec.discrete_action_branches
self.discrete_act_size = action_spec.discrete_action_size
self.action_spec = action_spec
if continuous_act_size > 0:
if self.continuous_act_size > 0:
continuous_act_size,
self.continuous_act_size,
self._split_list.append(continuous_act_size)
self._split_list.append(self.continuous_act_size)
if len(discrete_act_size) > 0:
self._distributions.append(MultiCategoricalDistribution(self.encoding_size, discrete_act_size))
self._split_list += [1 for _ in range(len(discrete_act_size))]
if self.discrete_act_size > 0:
self._distributions.append(MultiCategoricalDistribution(self.encoding_size, self.discrete_act_branches))
self._split_list += [1 for _ in range(self.discrete_act_size)]
def _sample_action(self, dists: List[DistInstance]) -> List[torch.Tensor]:
"""

2
ml-agents/mlagents/trainers/torch/distributions.py


if self.conditional_sigma:
log_sigma = torch.clamp(self.log_sigma(inputs), min=-20, max=2)
else:
log_sigma = self.log_sigma
log_sigma = self.log_sigma.expand(inputs.shape[0], -1)
if self.tanh_squash:
return [TanhGaussianDistInstance(mu, torch.exp(log_sigma))]
else:

2
ml-agents/mlagents/trainers/torch/model_serialization.py


for shape in self.policy.behavior_spec.observation_shapes
if len(shape) == 3
]
dummy_masks = torch.ones(batch_dim + [sum(self.policy.actor_critic.discrete_act_size)])
dummy_masks = torch.ones(batch_dim + [sum(self.policy.actor_critic.discrete_act_branches)])
dummy_memories = torch.zeros(
batch_dim + seq_len_dim + [self.policy.export_memory_size]
)

27
ml-agents/mlagents/trainers/torch/networks.py


from mlagents.torch_utils import torch, nn
from mlagents_envs.base_env import ActionType
from mlagents_envs.base_env import ActionSpec
from mlagents.trainers.torch.distributions import DistInstance
from mlagents.trainers.torch.action_model import ActionModel
from mlagents.trainers.settings import NetworkSettings

self,
observation_shapes: List[Tuple[int, ...]],
network_settings: NetworkSettings,
continuous_act_size: List[int],
discrete_act_size: List[int],
action_spec: ActionSpec,
self.discrete_act_size = discrete_act_size
self.continuous_act_size = continuous_act_size
self.discrete_act_size = action_spec.discrete_action_size
self.discrete_act_branches = action_spec.discrete_action_branches
self.continuous_act_size = action_spec.continuous_action_size
torch.Tensor(continuous_act_size + len(discrete_act_size))
torch.Tensor(action_spec.action_size)
)
self.is_continuous_int = torch.nn.Parameter(
torch.Tensor([int(self.continuous_act_size > 0)])

self.action_model = ActionModel(
self.encoding_size,
continuous_act_size,
discrete_act_size,
action_spec,
conditional_sigma=conditional_sigma,
tanh_squash=tanh_squash,
)

self,
observation_shapes: List[Tuple[int, ...]],
network_settings: NetworkSettings,
continuous_act_size: List[int],
discrete_act_size: List[int],
action_spec: ActionSpec,
stream_names: List[str],
conditional_sigma: bool = False,
tanh_squash: bool = False,

observation_shapes,
network_settings,
continuous_act_size,
discrete_act_size,
action_spec,
conditional_sigma,
tanh_squash,
)

self,
observation_shapes: List[Tuple[int, ...]],
network_settings: NetworkSettings,
continuous_act_size: List[int],
discrete_act_size: List[int],
action_spec: ActionSpec,
stream_names: List[str],
conditional_sigma: bool = False,
tanh_squash: bool = False,

observation_shapes,
network_settings,
continuous_act_size,
discrete_act_size,
action_spec,
conditional_sigma,
tanh_squash,
)

3
ml-agents/mlagents/trainers/torch/utils.py


log_prob = action_dist.log_prob(action)
log_probs_list.append(log_prob)
entropy = action_dist.entropy()
entropies_list.append(torch.mean(entropy).unsqueeze(-1).unsqueeze(-1))
# entropies_list.append(entropy)
entropies_list.append(entropy)
if isinstance(action_dist, DiscreteDistInstance):
all_probs_list.append(action_dist.all_log_prob())
log_probs = torch.cat(log_probs_list, dim=1)

正在加载...
取消
保存