|
|
|
|
|
|
def __init__(self, policy): |
|
|
|
self.policy = policy |
|
|
|
batch_dim = [1] |
|
|
|
seq_len_dim = [1] |
|
|
|
dummy_vec_obs = [torch.zeros(batch_dim + [self.policy.vec_obs_size])] |
|
|
|
dummy_vis_obs = [ |
|
|
|
torch.zeros(batch_dim + list(shape)) |
|
|
|
|
|
|
dummy_masks = torch.ones(batch_dim + [sum(self.policy.actor_critic.act_size)]) |
|
|
|
dummy_memories = torch.zeros(batch_dim + [1] + [self.policy.m_size]) |
|
|
|
dummy_memories = torch.zeros(batch_dim + seq_len_dim + [self.policy.m_size]) |
|
|
|
# Need to pass all possible inputs since currently keyword arguments is not |
|
|
|
# supported by torch.nn.export() |
|
|
|
# Input names can only contain actual input used since in torch.nn.export |
|
|
|
# it maps input_names only to input nodes that exist in the graph |
|
|
|
self.input_names = [] |
|
|
|
self.dynamic_axes = {"action": {0: "batch"}, "action_probs": {0: "batch"}} |
|
|
|
if self.policy.use_vec_obs: |
|
|
|
self.input_names.append("vector_observation") |
|
|
|
self.dynamic_axes.update({"vector_observation": {0: "batch"}}) |
|
|
|
for i in range(self.policy.vis_obs_size): |
|
|
|
self.input_names.append(f"visual_observation_{i}") |
|
|
|
self.dynamic_axes.update({f"visual_observation_{i}": {0: "batch"}}) |
|
|
|
if not self.policy.use_continuous_act: |
|
|
|
self.input_names.append("action_masks") |
|
|
|
self.dynamic_axes.update({"action_masks": {0: "batch"}}) |
|
|
|
if self.policy.use_recurrent: |
|
|
|
self.input_names.append("memories") |
|
|
|
self.dynamic_axes.update({"memories": {0: "batch"}}) |
|
|
|
self.input_names = ( |
|
|
|
["vector_observation"] |
|
|
|
+ [f"visual_observation_{i}" for i in range(self.policy.vis_obs_size)] |
|
|
|
+ ["action_masks", "memories"] |
|
|
|
) |
|
|
|
|
|
|
|
self.output_names = [ |
|
|
|
"action", |
|
|
|
|
|
|
"is_continuous_control", |
|
|
|
"action_output_shape", |
|
|
|
] |
|
|
|
|
|
|
|
self.dynamic_axes = {name: {0: "batch"} for name in self.input_names} |
|
|
|
self.dynamic_axes.update({"action": {0: "batch"}, "action_probs": {0: "batch"}}) |
|
|
|
|
|
|
|
def export_policy_model(self, output_filepath: str) -> None: |
|
|
|
""" |
|
|
|
|
|
|
self.policy.actor_critic, |
|
|
|
self.dummy_input, |
|
|
|
onnx_output_path, |
|
|
|
verbose=False, |
|
|
|
opset_version=SerializationSettings.onnx_opset, |
|
|
|
input_names=self.input_names, |
|
|
|
output_names=self.output_names, |
|
|
|