浏览代码

ONNX exporting

/develop/add-fire
Arthur Juliani 5 年前
当前提交
46874cc7
共有 4 个文件被更改,包括 79 次插入36 次删除
  1. 50
      ml-agents/mlagents/trainers/distributions_torch.py
  2. 35
      ml-agents/mlagents/trainers/models_torch.py
  3. 27
      ml-agents/mlagents/trainers/policy/torch_policy.py
  4. 3
      ml-agents/mlagents/trainers/ppo/trainer.py

50
ml-agents/mlagents/trainers/distributions_torch.py


import torch
from torch import nn
from torch import distributions
import math
class GaussianDistInstance(nn.Module):
def __init__(self, mean, std):
super(GaussianDistInstance, self).__init__()
self.mean = mean
self.std = std
def sample(self):
return self.mean + torch.randn_like(self.mean) * self.std
def pdf(self, value):
var = self.std ** 2
log_scale = self.std.log()
return (
-((value - self.mean) ** 2) / (2 * var)
- log_scale
- math.log(math.sqrt(2 * math.pi))
)
def log_prob(self, value):
return torch.log(self.pdf(value))
def entropy(self):
return torch.log(2 * math.pi * math.e * self.std)
class CategoricalDistInstance(nn.Module):
def __init__(self, logits):
super(CategoricalDistInstance, self).__init__()
self.logits = logits
self.probs = torch.softmax(self.logits, dim=-1)
def sample(self):
return torch.multinomial(self.probs, 1)
def pdf(self, value):
return self.probs[:, value]
def log_prob(self, value):
return torch.log(self.pdf(value))
def entropy(self):
return torch.sum(self.probs * torch.log(self.probs), dim=-1)
class GaussianDistribution(nn.Module):
def __init__(self, hidden_size, num_outputs, conditional_sigma=False, **kwargs):
super(GaussianDistribution, self).__init__(**kwargs)

log_sigma = self.log_sigma(inputs)
else:
log_sigma = self.log_sigma
return [distributions.normal.Normal(loc=mu, scale=torch.exp(log_sigma))]
return [GaussianDistInstance(mu, torch.exp(log_sigma))]
class MultiCategoricalDistribution(nn.Module):

for idx, branch in enumerate(self.branches):
logits = branch(inputs)
norm_logits = self.mask_branch(logits, masks[idx])
distribution = distributions.categorical.Categorical(logits=norm_logits)
distribution = CategoricalDistInstance(norm_logits)
branch_distributions.append(distribution)
return branch_distributions

35
ml-agents/mlagents/trainers/models_torch.py


import torch
from torch import nn
from mlagents.trainers.distributions_torch import (
GaussianDistribution,
MultiCategoricalDistribution,
)
from mlagents.trainers.exception import UnityTrainerException
from mlagents.trainers.distributions_torch import GaussianDistribution, CategoricalDistInstance
ActivationFunction = Callable[[torch.Tensor], torch.Tensor]
EncoderFunction = Callable[

vis_embeds.append(hidden)
if len(vec_embeds) > 0:
vec_embeds = torch.cat(vec_embeds)
vec_embeds = torch.stack(vec_embeds, dim=-1).sum(dim=-1)
vis_embeds = torch.cat(vis_embeds)
vis_embeds = torch.stack(vis_embeds, dim=-1).sum(dim=-1)
embedding = torch.cat([vec_embeds, vis_embeds])
embedding = torch.stack([vec_embeds, vis_embeds], dim=-1).sum(dim=-1)
elif len(vis_embeds) > 0:
embedding = vis_embeds
embedding = vis_embeds
raise Exception("No valid inputs to network.")
embedding = embedding.reshape([sequence_length, -1, self.h_size])
embedding = embedding.view([sequence_length, -1, self.h_size])
embedding = embedding.reshape([-1, self.m_size // 2])
embedding = embedding.view([-1, self.m_size // 2])
memories = torch.cat(memories, dim=-1)
return embedding, memories

return dists, value_outputs, memories
def forward(
self, vec_inputs, vis_inputs, masks=None, memories=None, sequence_length=1
self, vec_inputs, vis_inputs=None, masks=None, memories=None, sequence_length=1
embedding, memories = self.network_body(
vec_inputs, vis_inputs, memories, sequence_length
)
return sampled_actions, memories
return sampled_actions, dists[0].pdf(sampled_actions)
class Critic(nn.Module):

def forward(self, visual_obs):
conv_1 = torch.relu(self.conv1(visual_obs))
conv_2 = torch.relu(self.conv2(conv_1))
hidden = self.dense(conv_2.reshape([-1, self.final_flat]))
hidden = self.dense(conv_2.view([-1, self.final_flat]))
return hidden

self.final_flat = conv_3_hw[0] * conv_3_hw[1] * 64
self.conv1 = nn.Conv2d(initial_channels, 32, [8, 8], [4, 4])
self.conv2 = nn.Conv2d(43, 64, [4, 4], [2, 2])
self.conv2 = nn.Conv2d(32, 64, [4, 4], [2, 2])
self.conv3 = nn.Conv2d(64, 64, [3, 3], [1, 1])
self.dense = nn.Linear(self.final_flat, self.h_size)

conv_3 = torch.relu(self.conv3(conv_2))
hidden = self.dense(conv_3.reshape([-1, self.final_flat]))
hidden = self.dense(conv_3.view([-1, self.final_flat]))
return hidden

class ResNetVisualEncoder(nn.Module):
def __init__(self, initial_channels):
def __init__(self, height, width, initial_channels, final_hidden):
super(ResNetVisualEncoder, self).__init__()
n_channels = [16, 32, 32] # channel for each stack
n_blocks = 2 # number of residual blocks

27
ml-agents/mlagents/trainers/policy/torch_policy.py


seed: int,
brain: BrainParameters,
trainer_settings: TrainerSettings,
model_path: str,
load: bool,
tanh_squash: bool = False,
reparameterize: bool = False,

self.brain = brain
self.global_step = 0
self.m_size = 0
self.model_path = model_path
self.act_size = brain.vector_action_space_size
self.act_type = brain.vector_action_space_type

self.actor_critic.load_state_dict(torch.load(load_path))
def export_model(self, step=0):
fake_vec_obs = [torch.zeros([1] + [self.vec_obs_size])]
fake_vis_obs = [
torch.zeros(
[1] + [camera_res.height, camera_res.width, camera_res.num_channels]
)
for camera_res in self.brain.camera_resolutions
]
if self.use_continuous_act:
fake_masks = None
else:
fake_masks = torch.ones([1] + [int(np.sum(self.act_size))])
fake_vec_obs = [torch.zeros([1] + [self.brain.vector_observation_space_size])]
fake_vis_obs = [torch.zeros([1] + [84, 84, 3])]
fake_masks = torch.ones([1] + self.actor_critic.act_size)
export_path = self.model_path + "/model-" + str(step) + ".onnx"
output_names = ["action", "value_estimates", "memories"]
export_path = "./model-" + str(step) + ".onnx"
output_names = ["action", "action_probs"]
input_names = ["vector_observation", "action_mask"]
dynamic_axes = {"vector_observation": [0], "action": [0], "action_probs": [0]}
(fake_vec_obs, fake_vis_obs, fake_masks, fake_memories, 1),
(fake_vec_obs, fake_vis_obs, fake_masks),
opset_version=12,
input_names=input_names,
dynamic_axes=dynamic_axes,
)
@property

3
ml-agents/mlagents/trainers/ppo/trainer.py


self.seed,
brain_parameters,
self.trainer_settings,
self.is_training,
self.artifact_path,
self.load,
condition_sigma_on_obs=False, # Faster training for PPO

self.seed,
brain_parameters,
self.trainer_settings,
self.is_training,
self.artifact_path,
self.load,
condition_sigma_on_obs=False, # Faster training for PPO
)

正在加载...
取消
保存