浏览代码

Prepare model for onnx export

/develop/add-fire
Arthur Juliani 4 年前
当前提交
9835d26c
共有 3 个文件被更改,包括 63 次插入33 次删除
  1. 39
      ml-agents/mlagents/trainers/models_torch.py
  2. 53
      ml-agents/mlagents/trainers/policy/torch_policy.py
  3. 4
      ml-agents/mlagents/trainers/ppo/optimizer_torch.py

39
ml-agents/mlagents/trainers/models_torch.py


embedding, _ = self.network_body(vec_inputs, vis_inputs)
return self.value_heads(embedding)
def forward(
def sample_action(self, dists):
actions = []
for action_dist in dists:
action = action_dist.sample()
actions.append(action)
actions = torch.stack(actions, dim=-1)
return actions
def get_probs_and_entropy(self, actions, dists):
log_probs = []
entropies = []
for idx, action_dist in enumerate(dists):
action = actions[:, idx]
log_probs.append(action_dist.log_prob(action))
entropies.append(action_dist.entropy())
log_probs = torch.stack(log_probs, dim=-1)
entropies = torch.stack(entropies, dim=-1)
if self.act_type == ActionType.CONTINUOUS:
log_probs = log_probs.squeeze(-1)
entropies = entropies.squeeze(-1)
return log_probs, entropies
def get_dist_and_value(
self, vec_inputs, vis_inputs, masks=None, memories=None, sequence_length=1
):
embedding, memories = self.network_body(

dist = self.distribution(embedding)
dists = self.distribution(embedding)
dist = self.distribution(embedding, masks=masks)
dists = self.distribution(embedding, masks=masks)
return dist, value_outputs, memories
return dists, value_outputs, memories
def forward(
self, vec_inputs, vis_inputs, masks=None, memories=None, sequence_length=1
):
dists, value_outputs, memories = self.get_dist_and_value(
vec_inputs, vis_inputs, masks, memories, sequence_length
)
sampled_actions = self.sample_action(dists)
return sampled_actions, memories
class Critic(nn.Module):

53
ml-agents/mlagents/trainers/policy/torch_policy.py


from typing import Any, Dict, List
import numpy as np
import torch
import os
from torch import onnx
from mlagents.trainers.action_info import ActionInfo
from mlagents.trainers.brain_conversion_utils import get_global_agent_id

def execute_model(
self, vec_obs, vis_obs, masks=None, actions=None, memories=None, seq_len=1
):
action_dists, (value_heads, mean_value), new_memories = self.actor_critic(
dists, (
value_heads,
mean_value,
), new_memories = self.actor_critic.get_dist_and_value(
generate_actions = True
actions = []
else:
generate_actions = False
log_probs = []
entropies = []
for idx, action_dist in enumerate(action_dists):
if generate_actions:
action = action_dist.sample()
actions.append(action)
else:
action = actions[idx]
log_probs.append(action_dist.log_prob(action))
entropies.append(action_dist.entropy())
if generate_actions:
actions = torch.stack(actions, dim=-1)
log_probs = torch.stack(log_probs, dim=-1)
entropies = torch.stack(entropies, dim=-1)
actions = self.actor_critic.sample_action(dists)
log_probs, entropies = self.actor_critic.get_probs_and_entropy(actions, dists)
if generate_actions:
actions = actions.squeeze(-1)
log_probs = log_probs.squeeze(-1)
entropies = entropies.squeeze(-1)
actions.squeeze_(-1)
return actions, log_probs, entropies, value_heads, memories
@timed

Saves the model
:param step: The number of steps the model was trained for
"""
if not os.path.exists(self.model_path):
os.makedirs(self.model_path)
save_path = self.model_path + "/model-" + str(step) + ".pt"
torch.save(self.actor_critic.state_dict(), save_path)

def export_model(self, step=0):
fake_vec_obs = [torch.zeros(self.vec_obs_size)]
fake_vis_obs = [torch.zeros(camera_res) for camera_res in self.vis_obs_size]
fake_vec_obs = [torch.zeros([1] + [self.vec_obs_size])]
fake_vis_obs = [
torch.zeros(
[1] + [camera_res.height, camera_res.width, camera_res.num_channels]
)
for camera_res in self.brain.camera_resolutions
]
if self.use_continuous_act:
fake_masks = None
else:
fake_masks = torch.ones([1] + [int(np.sum(self.act_size))])
fake_memories = torch.zeros([1] + [self.m_size])
output_names = ["action", "memories", "value_estimates"]
output_names = ["action", "value_estimates", "memories"]
(fake_vec_obs, fake_vis_obs),
(fake_vec_obs, fake_vis_obs, fake_masks, fake_memories, 1),
export_path,
verbose=True,
output_names=output_names,

4
ml-agents/mlagents/trainers/ppo/optimizer_torch.py


vec_obs = [torch.Tensor(vec_obs)]
act_masks = torch.Tensor(np.array(batch["action_mask"]))
if self.policy.use_continuous_act:
actions = [torch.Tensor(np.array(batch["actions"]))]
actions = torch.Tensor(np.array(batch["actions"])).unsqueeze(-1)
actions = list(torch.Tensor(np.array(batch["actions"])).permute(1, 0))
actions = torch.Tensor(np.array(batch["actions"]))
memories = [
torch.Tensor(np.array(batch["memory"][i]))

正在加载...
取消
保存