浏览代码

Upgraded to Barracuda 0.2.1, fixes issues with discrete action models

/develop-generalizationTraining-TrainerController
Mantas Puida 6 年前
当前提交
8a5d17da
共有 7 个文件被更改,包括 656 次插入603 次删除
  1. 825
      UnitySDK/Assets/ML-Agents/Plugins/Barracuda.Core/Barracuda/Barracuda.dll
  2. 18
      UnitySDK/Assets/ML-Agents/Plugins/Barracuda.Core/Barracuda/Resources/BarracudaReferenceImpl.compute
  3. 142
      UnitySDK/Assets/ML-Agents/Plugins/Barracuda.Core/Barracuda/Resources/Conv.compute
  4. 25
      UnitySDK/Assets/ML-Agents/Plugins/Barracuda.Core/ReleaseNotes.md
  5. 2
      UnitySDK/Assets/ML-Agents/Plugins/Barracuda.Core/package.json
  6. 13
      ml-agents/mlagents/trainers/barracuda.py
  7. 234
      ml-agents/mlagents/trainers/tensorflow_to_barracuda.py

825
UnitySDK/Assets/ML-Agents/Plugins/Barracuda.Core/Barracuda/Barracuda.dll
文件差异内容过多而无法显示
查看文件

18
UnitySDK/Assets/ML-Agents/Plugins/Barracuda.Core/Barracuda/Resources/BarracudaReferenceImpl.compute


uint4 _Pool;
uint4 _Stride;
float _Alpha;
float _Beta;
float _Seed;
[numthreads(8,8,1)]

if (X.channels - c == 1)
{
// broadcast to all channels
v = X.Get(b, y, x, c);
v = _Alpha * X.Get(b, y, x, c) + _Beta;
v.r = X.Get(b, y, x, c+0);
v.g = X.Get(b, y, x, c+1);
v.b = X.Get(b, y, x, c+2);
v.r = _Alpha * X.Get(b, y, x, c+0) + _Beta;
v.g = _Alpha * X.Get(b, y, x, c+1) + _Beta;
v.b = _Alpha * X.Get(b, y, x, c+2) + _Beta;
v.r = X.Get(b, y, x, c+0);
v.g = X.Get(b, y, x, c+1);
v.b = X.Get(b, y, x, c+2);
v.a = X.Get(b, y, x, c+3);
v.r = _Alpha * X.Get(b, y, x, c+0) + _Beta;
v.g = _Alpha * X.Get(b, y, x, c+1) + _Beta;
v.b = _Alpha * X.Get(b, y, x, c+2) + _Beta;
v.a = _Alpha * X.Get(b, y, x, c+3) + _Beta;
Otex2D[dispatchThreadID.xy] = v;
}

142
UnitySDK/Assets/ML-Agents/Plugins/Barracuda.Core/Barracuda/Resources/Conv.compute


#pragma kernel Conv2D
#pragma kernel Conv2D_RegisterBlock4x2
#pragma kernel Conv2D_L1Cached64_RegisterBlock4x4
#pragma kernel Conv2D_L1Cached32_RegisterBlock4x4
#pragma kernel DepthwiseConv2D

#undef SIZE_W
#undef SIZE_H
#undef L1CACHESIZE
#define L1CACHESIZE 64
#undef SIZE
#define SIZE 4
groupshared float Conv2D_L1Cached64_Reg_Loop_safe_X[SIZE*SIZE][L1CACHESIZE];
[numthreads(L1CACHESIZE, 1, 1)]
void Conv2D_L1Cached64_RegisterBlock4x4(uint3 groupID : SV_GroupID, uint3 groupThreadID : SV_GroupThreadID)
{
DISPATCH_ARGS(K.kernelCount, O.width, O.height);
TENSOR_SHARED2_ARGS4(X, K, B, WBK, O);
#define CONV2D_L1CACHED(L1CACHESIZE, SIZE, FMA) \
groupshared float Conv2D_L1Cached##L1CACHESIZE##_Reg_Loop_safe_X[SIZE*SIZE][L1CACHESIZE];\
[numthreads(L1CACHESIZE, 1, 1)]\
void Conv2D_L1Cached##L1CACHESIZE##_RegisterBlock##SIZE##x##SIZE(uint3 groupID : SV_GroupID, uint3 groupThreadID : SV_GroupThreadID)\
{\
DISPATCH_ARGS(K.kernelCount, O.width, O.height);\
TENSOR_SHARED2_ARGS4(X, K, B, WBK, O);\
\
uint k = L1CACHESIZE * groupID.x + groupThreadID.x;\
uint x = groupID.y;\
uint y = groupID.z;\
\
if (x*SIZE >= O.width) return;\
if (y*SIZE >= O.height) return;\
\
for (uint n = 0; n < O.batch; ++n)\
{\
float acc[SIZE*SIZE];\
[unroll]\
for (uint q = 0; q < SIZE*SIZE; ++q)\
acc[q] = B.SafeGet(k);\
\
for (uint dy = 0; dy < K.GetKernelHeight(); ++dy)\
{\
for (uint dx = 0; dx < K.GetKernelWidth(); ++dx)\
{\
uint2 pos[SIZE*SIZE];\
[unroll]\
for (uint q = 0; q < SIZE*SIZE; ++q)\
pos[q] = uint2(x*SIZE+(q%SIZE), y*SIZE+(q/SIZE)) * _Stride.xy + uint2(dx, dy);\
\
for (uint c = 0; c < X.channels; c += L1CACHESIZE)\
{\
uint dc = groupThreadID.x;\
[unroll]\
for (q = 0; q < SIZE*SIZE; ++q)\
Conv2D_L1Cached##L1CACHESIZE##_Reg_Loop_safe_X[q][dc] = X.SafeGet(n, pos[q], c + dc, _Pad.xy);\
GroupMemoryBarrierWithGroupSync();\
\
if (k < K.channels)\
{\
uint kIndex = K.Index(dy, dx, c, k);\
for (dc = 0; dc < L1CACHESIZE; ++dc)\
{\
[unroll]\
for (q = 0; q < SIZE*SIZE; ++q)\
acc[q] = FMA(Conv2D_L1Cached##L1CACHESIZE##_Reg_Loop_safe_X[q][dc], K.data[kIndex], acc[q]);\
kIndex += K.channels;\
}\
}\
GroupMemoryBarrierWithGroupSync();\
}\
}\
}\
\
uint remainderW = (O.width - x*SIZE);\
uint remainderH = (O.height - y*SIZE);\
\
if (k < K.channels)\
[unroll]\
for (q = 0; q < SIZE*SIZE; ++q)\
if (q/SIZE < remainderH && q%SIZE < remainderW)\
O.Set(n, y*SIZE+(q/SIZE), x*SIZE+(q%SIZE), k, acc[q]);\
}\
\
}
#define X_ Conv2D_L1Cached64_Reg_Loop_safe_X
uint k = L1CACHESIZE * groupID.x + groupThreadID.x;
uint x = groupID.y;
uint y = groupID.z;
// need all threads to load channels, thus will do late check against kernel count
if (x*SIZE >= O.width) return;
if (y*SIZE >= O.height) return;
for (uint n = 0; n < O.batch; ++n)
{
float acc[SIZE*SIZE];
[unroll]
for (uint q = 0; q < SIZE*SIZE; ++q)
acc[q] = B.SafeGet(k);
for (uint dy = 0; dy < K.GetKernelHeight(); ++dy)
{
for (uint dx = 0; dx < K.GetKernelWidth(); ++dx)
{
uint2 pos[SIZE*SIZE];
[unroll]
for (uint q = 0; q < SIZE*SIZE; ++q)
pos[q] = uint2(x*SIZE+(q%SIZE), y*SIZE+(q/SIZE)) * _Stride.xy + uint2(dx, dy);
for (uint c = 0; c < X.channels; c += L1CACHESIZE)
{
// Cache X
uint dc = groupThreadID.x;
[unroll]
for (q = 0; q < SIZE*SIZE; ++q)
X_[q][dc] = X.SafeGet(n, pos[q], c + dc, _Pad.xy);
GroupMemoryBarrierWithGroupSync();
CONV2D_L1CACHED(64,4, fastfma)
CONV2D_L1CACHED(32,4, fastfma)
// X * K
if (k < K.channels) // need all threads to load channels, thus late check against kernel count
{
uint kIndex = K.Index(dy, dx, c, k);
for (dc = 0; dc < L1CACHESIZE && (c + dc) < K.GetKernelDepth(); ++dc)
{
[unroll]
for (q = 0; q < SIZE*SIZE; ++q)
acc[q] = fastfma(X_[q][dc], K.data[kIndex], acc[q]);
kIndex += K.channels;
}
}
GroupMemoryBarrierWithGroupSync();
}
}
}
uint remainderW = (O.width - x*SIZE);
uint remainderH = (O.height - y*SIZE);
if (k < K.channels) // need all threads to load channels, thus late check against kernel count
[unroll]
for (q = 0; q < SIZE*SIZE; ++q)
if (q/SIZE < remainderH && q%SIZE < remainderW)
O.Set(n, y*SIZE+(q/SIZE), x*SIZE+(q%SIZE), k, acc[q]);
}
#undef X_
}
NUMTHREADS((16,4,4), (8,4,4), (4,4,4))

25
UnitySDK/Assets/ML-Agents/Plugins/Barracuda.Core/ReleaseNotes.md


# Release notes
## 0.2.1
- TF importer: fixed ResizeNearestNeighbor aka Upsample2D scaling factor detection.
- TF importer: optimized node sorting. Should be faster than 0.2.0.
- TF importer: made detection of actual output node from LSTM/GRU pattern more bullet proof by skipping Const nodes.
- TF importer: improved InstanceNormalization handling.
- TF importer: fixed SquareDifference pattern.
- TF importer: fixed Conv2DBackpropInput (transpose convolution) import.
- Fixed Conv2D performance regression on some GPUs.
- Fixed TextureAsTensorData.Download() to work properly with InterpretDepthAs.Channels.
- Fixed bug when identity/nop layers would reuse input as an output and later causing premature release of that tensor as part of intermediate data cleanup.
- Added scale + bias to TenstorToRenderTexture interface, usefull for adjusting network output scale + bias on the fly.
- Fixed double Dispose issue when worker gets garbage collected.
## 0.2.0
- Version bumped to 0.2.0 as it brings breaking API changes, for details look below.
- Significantly reduced temporary memory allocations by introducing internal allocator support. Now memory is re-used between layer execution as much as possible.

- Compatibility with ML Agents models: 3DBall, PushBlock, GridWorld, Soccer.
## 0.1.0
- First internal build. Due some bugs encountered wasn't published.
- First internal build. Due some bugs encountered wasn't published.
#Contributors
- Renaldas (ReJ) Zioma
- Mantas Puida
- Vladimir Oster
- Martin Sternevald
- Valdemar Bučilko
- Kuba Cupisz
- Povilas Kanapickas
- Paulius Puodžiūnas

2
UnitySDK/Assets/ML-Agents/Plugins/Barracuda.Core/package.json


{
"name": "com.unity.barracuda",
"displayName": "Barracuda",
"version": "0.2.0-preview",
"version": "0.2.1-preview",
"unity": "2017.4",
"description": "Barracuda is lightweight and cross-platform Neural Net inference library. Barracuda supports inference both on GPU and CPU.",
"dependencies": {}

13
ml-agents/mlagents/trainers/barracuda.py


assert(len(find_missing_inputs(new_model, inputs_and_memories)) == 0)
return new_model
# Trim
def trim(model, criteria_regexp_string, verbose):
if hasattr(model, 'layers'):

model = trim_model(model, preserve_outputs)
else:
print("WARNING: Trim couldn't find any layers to match:", criteria_regexp_string)
return model
# Fuse
def fuse(model, verbose):
i = 0
while i < len(model) - 1:
if model[i].type == model[i+1].type and model[i].type == 255: # Load
model[i].tensors += model[i+1].tensors
del model[i+1]
else:
i += 1
return model
def compress(model):

234
ml-agents/mlagents/trainers/tensorflow_to_barracuda.py


rank = 4,
out_shapes = lambda s: [
[s[0][0], s[0][1], s[0][3], s[0][2]], # K TF:[H, W, in_channels, channel_multiplier] => [H, W, 1, in_channels]
[1, 1, 1, s[-1][-1]] if len(s) > 1 else
[1, 1, 1, s[0][2]] # B
[1, 1, 1, s[-1][-1]] if len(s) > 1
else [1, 1, 1, s[0][2]] # B
],
patch_data = lambda data: [
np.transpose(data[0], (0,1,3,2)),

id = 22,
rank = 4,
out_shapes = lambda shapes: [
# NOTE: skip the 0th tensor
# in Conv2DBackpropInput 0th tensor is 'input_sizes' - which differs from other Conv layers
shapes[1], # K
[1, 1, 1, shapes[-1][-1]], # B
out_shapes = lambda s: [
[s[0][0], s[0][1], s[0][3], s[0][2]], # K TF:[H, W, in_channels, out_channels] => [H, W, out_channels, in_channels]
[1, 1, 1, s[-1][-1]] if len(s) > 1
else [1, 1, 1, s[0][2]] # B
data[1],
data[2] if len(data) > 2
else np.zeros([1,1,1,np.shape(data[1])[-1]]) # NOTE: since 0th tensor is skipped in out_shapes, bias tensor when missing is not automatically initialized with zeros
np.transpose(data[0], (0,1,3,2)),
data[1]
]),
'Pad': 29,

[data[0], data[1]] if len(data) == 4 else
[np.ones(np.shape(data[0])), data[0]]
),
'InstanceNormalization': Struct(
'InstanceNormalization': Struct( # TODO: epsilon
id = 52,
out_shapes = lambda shapes: [
[1, 1, 1, shapes[0][0]], # G

repr(['Add', 'Rsqrt', 'Mul', 'Mul', 'Mul', 'Sub', 'Add']) : 'BatchNormalization',
repr(['Mean', 'StopGradient', 'SquaredDifference', 'Mean',
'Sub', 'Add', 'Pow', 'RealDiv', 'Mul', 'Add']) : 'InstanceNormalization_ByTensorOrder',
repr(['Mean', 'StopGradient', 'SquaredDifference', 'Mean',
'Add', 'Rsqrt', 'Mul', 'Mul', 'Mul', 'Sub', 'Add']) : 'InstanceNormalization',
'Add', 'Rsqrt', 'Mul', 'Mul', 'Mul', 'Sub', 'Add']) : 'InstanceNormalization_ByTensorName',
repr(['Conv2DBackpropInput']) : 'Conv2DBackpropInput',
repr(['Shape', 'StridedSlice', 'StridedSlice', 'StridedSlice', 'Mul',
'Mul', 'Pack', 'Conv2DBackpropInput', 'BiasAdd']) : 'Conv2DBackpropInput',
repr(['Shape', 'StridedSlice', 'StridedSlice', 'StridedSlice', 'Mul',
'Mul', 'Pack', 'Conv2DBackpropInput']) : 'Conv2DBackpropInput',
repr(['Shape', 'StridedSlice', 'Mul', 'ResizeNearestNeighbor'])
: 'ResizeNearestNeighbor',
repr(['Pack', 'Reshape']) : 'Flatten$', # for now we assume that this combination is trivial Flatten
# for exmaple it is used in ML-agents LSTM nets with sequence_length==1

op = 'BatchNormalization',
input = [i for i in inputs] +
order_by([t.name for t in tensors], ['gamma', 'beta', 'mean', 'variance']),
),
'InstanceNormalization' : lambda nodes, inputs, tensors, _:
),
'InstanceNormalization_ByTensorName' : lambda nodes, inputs, tensors, _:
),
),
'InstanceNormalization_ByTensorOrder' : lambda nodes, inputs, tensors, _:
Struct(
op = 'InstanceNormalization',
input = [i for i in inputs] + [t.name for t in tensors][-2:],
),
'Dense' : lambda nodes, inputs, tensors, _:
Struct(
op = 'Dense',

'Conv2DBackpropInput' : lambda nodes, inputs, tensors, _:
Struct(
op = 'Conv2DBackpropInput',
input = [i for i in inputs] + [t.name for t in tensors],
input = [i for i in inputs] + [t.name for t in tensors][1:][-2:], # [1:] - skips the 0th tensor, since Conv2DBackpropInput 0th tensor is 'input_sizes' (which differs from other Conv layers)
# [-2:] - take only last 2 tensors, this allows to process large patterns with the same code
'ResizeNearestNeighbor' : lambda nodes, inputs, tensors, _:
Struct(
op = 'ResizeNearestNeighbor',
input = [i for i in inputs],
ksize = [int(tensors[0].data[0]), int(tensors[0].data[1])] if len(tensors) == 1 and len(tensors[0].data) == 2
else [int(tensors[-1].data[0]), int(tensors[-1].data[1])] if len(tensors) >= 4 and len(tensors[-1].data) == 2
else [1,1]
),
'Mean' : lambda nodes, inputs, tensors, _:
# take only the last input
barracuda.mean(nodes[-1].name, inputs[-1], axis=tensors[0].data),

'BasicLSTM' : lambda nodes, inputs, tensors, context:
basic_lstm(nodes, inputs, tensors, context),
basic_lstm(nodes, inputs, tensors, context,
index_of_actual_output_node=-3, assert_output_node_op_type='Reshape'),
'Swish' : lambda nodes, inputs, tensors, _:
Struct(

def sqr_diff(name, a, b):
nn = barracuda.Build(name)
d = nn.sub(a, b)
nn.mul(d, d)
nn.mul(d, d, out=name)
return nn.layers
def strided_slice(name, input, input_rank, begin, end, strides, begin_mask, end_mask, ellipsis_mask, new_axis_mask, shrink_axis_mask):

nn.strided_slice(input, begin, end, strides, output_rank, out=name)
return nn.layers
def gru(nodes, inputs, tensors, context):
# search backwards starting from index_of_actual_output_node for non-const node
def locate_actual_output_node(nodes, index_of_actual_output_node=-1, assert_output_node_op_type=None):
while (-index_of_actual_output_node-1) < len(nodes) and nodes[index_of_actual_output_node].op == 'Const':
index_of_actual_output_node -= 1
actual_output_node = nodes[index_of_actual_output_node]
assert(actual_output_node.op == assert_output_node_op_type or not assert_output_node_op_type)
return actual_output_node
def gru(nodes, inputs, tensors, context, index_of_actual_output_node, assert_output_node_op_type=None):
assert(len(inputs) == 2)
def find_tensor_by_name(name, default=None):

context.model_memories += [state_shape, state, new_state]
# map exptected output of the replaced pattern to output from our GRU cell
actual_output_node = nodes[-4]
assert(actual_output_node.op == 'Reshape')
actual_output_node = locate_actual_output_node(nodes, index_of_actual_output_node, assert_output_node_op_type)
def basic_lstm(nodes, inputs, tensors, context):
def basic_lstm(nodes, inputs, tensors, context, index_of_actual_output_node, assert_output_node_op_type=None):
assert(len(inputs) == 2)
def find_tensor_by_name(name, default=None):

context.model_memories += [state_shape, state_h, new_state_h]
# map expected output of the replaced pattern to output from our LSTM cell
actual_output_node = nodes[-4]
assert(actual_output_node.op == 'Reshape')
actual_output_node = locate_actual_output_node(nodes, index_of_actual_output_node, assert_output_node_op_type)
context.map_ignored_layer_to_its_input[actual_output_node.name] = new_state_h
return new_layers

# Find node patterns
nodes_as_array = [node for node in model.node]
nodes_as_array = slow_but_stable_topological_sort(nodes_as_array)
nodes_as_array = slow_but_stable_topological_sort(nodes_as_array, verbose=True)
node_index = 0
while node_index < len(nodes_as_array):

process_layer(node, o_context, args)
node_index += 1
return o_context.layers, o_context.input_shapes, o_context.model_tensors, o_context.model_memories
def find_unconnected_const_nodes(nodes):
nodes_with_consts = {node.name: node for node in nodes if node.op == 'Const'}
for node in nodes:
for i in node.input:
nodes_with_consts.pop(i, None)
return list(nodes_with_consts.keys())
return o_context.layers, o_context.input_shapes, o_context.model_tensors, o_context.model_memories, \
find_unconnected_const_nodes(nodes_as_array)
def slow_but_stable_topological_sort(nodes):
def slow_but_stable_topological_sort(nodes, verbose):
nodes_with_consts = [node for node in nodes if node.op == 'Const']
nodes_for_sorting = [node for node in nodes if node.op != 'Const']
# TODO: optimize for performance
# based on http://blog.gapotchenko.com/stable-topological-sort
def assign_ids(nodes):
ids = []
id_by_name = {}
id = 0
for node in nodes:
id_by_name[node.name] = id;
ids.append(id)
id += 1
inputs_by_id = [None] * len(nodes)
for node in nodes:
id = id_by_name[node.name]
inputs_by_id[id] = {id_by_name.get(i, -1) for i in node.input}
return ids, inputs_by_id
def sort(ids, inputs_by_id, verbose_lambda):
sorted = False
n = len(ids)
while not sorted:
sorted = True
for i in range(n):
for j in range (i):
if ids[i] in inputs_by_id[ids[j]]:
tmp = ids.pop(i)
ids.insert(j, tmp)
sorted = False
verbose_lambda(sorted)
return ids
prefix_printed = False
def print_status(sorted):
nonlocal prefix_printed
if not sorted:
if not prefix_printed:
print('Sorting model, may take a while...', end="", flush=True)
prefix_printed = True
else:
print('.', end="", flush=True)
else:
if prefix_printed:
print(' Done!')
ids, inputs_by_id = assign_ids(nodes_for_sorting)
ids = sort(ids, inputs_by_id, lambda sorted: print_status(sorted) if verbose else None)
assert(len(ids) == len(nodes_for_sorting))
assert(len(ids) + len(nodes_with_consts) == len(nodes))
return nodes_with_consts + [nodes_for_sorting[id] for id in ids]
def very_slow_but_stable_topological_sort(nodes, verbose):
# TODO: optimize for performance
# based on http://blog.gapotchenko.com/stable-topological-sort
n = len(nodes)

assert(len(nodes) == n)
return nodes
def sort_nodes_by_dependencies(nodes):
from collections import defaultdict
# Class to represent a graph
# Taken from: https://www.geeksforgeeks.org/python-program-for-topological-sorting/
class Graph:
def __init__(self,vertices):
self.graph = defaultdict(list) #dictionary containing adjacency List
self.V = vertices #No. of vertices
# function to add an edge to graph
def addEdge(self,u,v):
self.graph[u].append(v)
# A recursive function used by topologicalSort
def topologicalSortUtil(self,v,visited,stack):
# Mark the current node as visited.
visited[v] = True
# Recur for all the vertices adjacent to this vertex
for i in self.graph[v]:
if visited[i] == False:
self.topologicalSortUtil(i,visited,stack)
# Push current vertex to stack which stores result
stack.insert(0,v)
# The function to do Topological Sort. It uses recursive
# topologicalSortUtil()
def topologicalSort(self):
# Mark all the vertices as not visited
visited = [False]*self.V
stack =[]
# Call the recursive helper function to store Topological
# Sort starting from all vertices one by one
for i in range(self.V):
if visited[i] == False:
self.topologicalSortUtil(i,visited,stack)
return stack
g = Graph(len(nodes))
id_by_name = {}
id = 0
for node in nodes:
id_by_name[node.name] = id;
id += 1
for node in nodes:
for i in node.input:
#if i not in inputs_and_memories:
g.addEdge(id_by_name.get(i, -1), id_by_name[node.name])
sorted_layer_indices = g.topologicalSort()
return [nodes[idx] for idx in sorted_layer_indices]
#########################################################
def convert(source_file, target_file, trim_unused_by_output="", verbose=False, compress_f16=False):

# Convert
o_model = barracuda.Model()
o_model.layers, o_input_shapes, o_model.tensors, o_model.memories = \
o_model.layers, o_input_shapes, o_model.tensors, o_model.memories, o_model.globals = \
process_model(i_model, args)
# Cleanup unconnected Identities (they might linger after processing complex node patterns like LSTM)

all_inputs = {i for l in o_model.layers for i in l.inputs}
embedded_tensors = {t.name for l in o_model.layers for t in l.tensors}
# Find global tensors
# Trim
if trim_unused_by_output:
o_model.layers = barracuda.trim(o_model.layers, trim_unused_by_output, args.verbose)
# Create load layer for constants
o_model.globals = [t for t in o_model.tensors if t not in all_inputs and t not in embedded_tensors]
#for x in global_tensors:
# shape = dims_to_barracuda_shape(get_tensor_dims(o_model.tensors[x]))
# o_globals += [Struct(
# name = x,
# shape = shape,
# data = np.reshape(get_tensor_data(o_model.tensors[x]), shape).astype(np.float32))]
# Trim
if trim_unused_by_output:
o_model.layers = barracuda.trim(o_model.layers, trim_unused_by_output, args.verbose)
# Create load layers for constants
o_l = Struct(
type = 255, # Load
class_name = "Const",

# Sort model so that layer inputs are always ready upfront
o_model.layers = barracuda.sort(o_model.layers, o_model.inputs, o_model.memories, args.verbose)
o_model.layers = barracuda.fuse(o_model.layers, args.verbose)
# Summary
barracuda.summary(o_model,

正在加载...
取消
保存