浏览代码

Export separate nodes for continuous/discrete actions (#4655)

/MLA-1734-demo-provider
GitHub 4 年前
当前提交
d7c3022d
共有 4 个文件被更改,包括 68 次插入28 次删除
  1. 10
      ml-agents-envs/mlagents_envs/environment.py
  2. 18
      ml-agents/mlagents/trainers/torch/action_model.py
  3. 24
      ml-agents/mlagents/trainers/torch/model_serialization.py
  4. 44
      ml-agents/mlagents/trainers/torch/networks.py

10
ml-agents-envs/mlagents_envs/environment.py


if n_agents == 0:
continue
for i in range(n_agents):
# TODO: extend to AgentBuffers
# TODO add separate fields for continuous and discrete actions in AgentActionProto
_act = []
_act = vector_action[b].continuous[i]
else:
_act = vector_action[b].discrete[i]
_act.append(vector_action[b].continuous[i])
if vector_action[b].discrete is not None:
_act.append(vector_action[b].discrete[i])
_act = np.concatenate(_act, axis=0)
action = AgentActionProto(vector_actions=_act)
rl_in.agent_actions[b].value.extend([action])
rl_in.command = STEP

18
ml-agents/mlagents/trainers/torch/action_model.py


def get_action_out(self, inputs: torch.Tensor, masks: torch.Tensor) -> torch.Tensor:
dists = self._get_dists(inputs, masks)
out_list: List[torch.Tensor] = []
continuous_out, discrete_out = None, None
out_list.append(dists.continuous.exported_model_output())
continuous_out = dists.continuous.exported_model_output()
action_out_deprecated = continuous_out
for discrete_dist in dists.discrete:
out_list.append(discrete_dist.exported_model_output())
return torch.cat(out_list, dim=1)
discrete_out = [
discrete_dist.exported_model_output()
for discrete_dist in dists.discrete
]
discrete_out = torch.cat(discrete_out, dim=1)
action_out_deprecated = discrete_out
# deprecated action field does not support hybrid action
if self.action_spec.continuous_size > 0 and self.action_spec.discrete_size > 0:
action_out_deprecated = None
return continuous_out, discrete_out, action_out_deprecated
def forward(
self, inputs: torch.Tensor, masks: torch.Tensor

24
ml-agents/mlagents/trainers/torch/model_serialization.py


+ ["action_masks", "memories"]
)
self.output_names = [
"action",
"version_number",
"memory_size",
"is_continuous_control",
"action_output_shape",
]
self.output_names = ["version_number", "memory_size"]
if self.policy.action_spec.continuous_size > 0:
self.output_names += [
"continuous_actions",
"continuous_action_output_shape",
]
if self.policy.action_spec.discrete_size > 0:
self.output_names += ["discrete_actions", "discrete_action_output_shape"]
if (
self.policy.action_spec.continuous_size == 0
or self.policy.action_spec.discrete_size == 0
):
self.output_names += [
"action",
"is_continuous_control",
"action_output_shape",
]
self.dynamic_axes = {name: {0: "batch"} for name in self.input_names}
self.dynamic_axes.update({"action": {0: "batch"}})

44
ml-agents/mlagents/trainers/torch/networks.py


from typing import Callable, List, Dict, Tuple, Optional
from typing import Callable, List, Dict, Tuple, Optional, Union
import abc
from mlagents.torch_utils import torch, nn

vis_inputs: List[torch.Tensor],
masks: Optional[torch.Tensor] = None,
memories: Optional[torch.Tensor] = None,
) -> Tuple[torch.Tensor, int, int, int, int]:
) -> Tuple[Union[int, torch.Tensor], ...]:
"""
Forward pass of the Actor for inference. This is required for export to ONNX, and
the inputs and outputs of this method should not be changed without a respective change

super().__init__()
self.action_spec = action_spec
self.version_number = torch.nn.Parameter(torch.Tensor([2.0]))
self.is_continuous_int = torch.nn.Parameter(
self.is_continuous_int_deprecated = torch.nn.Parameter(
self.act_size_vector = torch.nn.Parameter(
self.continuous_act_size_vector = torch.nn.Parameter(
torch.Tensor([int(self.action_spec.continuous_size)]), requires_grad=False
)
# TODO: export list of branch sizes instead of sum
self.discrete_act_size_vector = torch.nn.Parameter(
torch.Tensor([sum(self.action_spec.discrete_branches)]), requires_grad=False
)
self.act_size_vector_deprecated = torch.nn.Parameter(
torch.Tensor(
[
self.action_spec.continuous_size

vis_inputs: List[torch.Tensor],
masks: Optional[torch.Tensor] = None,
memories: Optional[torch.Tensor] = None,
) -> Tuple[torch.Tensor, int, int, int, int]:
) -> Tuple[Union[int, torch.Tensor], ...]:
At this moment, torch.onnx.export() doesn't accept None as tensor to be exported,
so the size of return tuple varies with action spec.
# TODO: How this is written depends on how the inference model is structured
action_out = self.action_model.get_action_out(encoding, masks)
return (
action_out,
cont_action_out, disc_action_out, action_out_deprecated = self.action_model.get_action_out(
encoding, masks
)
export_out = [
self.is_continuous_int,
self.act_size_vector,
)
]
if self.action_spec.continuous_size > 0:
export_out += [cont_action_out, self.continuous_act_size_vector]
if self.action_spec.discrete_size > 0:
export_out += [disc_action_out, self.discrete_act_size_vector]
# Only export deprecated nodes with non-hybrid action spec
if self.action_spec.continuous_size == 0 or self.action_spec.discrete_size == 0:
export_out += [
action_out_deprecated,
self.is_continuous_int_deprecated,
self.act_size_vector_deprecated,
]
return tuple(export_out)
class SharedActorCritic(SimpleActor, ActorCritic):

正在加载...
取消
保存