您最多选择25个主题
主题必须以中文或者字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符
1662 行
55 KiB
1662 行
55 KiB
from __future__ import print_function
|
|
import numpy as np
|
|
import struct # convert from Python values and C structs
|
|
import tensorflow as tf
|
|
import re
|
|
|
|
# import barracuda
|
|
# from barracuda import Struct
|
|
from mlagents.trainers import barracuda
|
|
from mlagents.trainers.barracuda import Struct
|
|
from google.protobuf import descriptor
|
|
from google.protobuf.json_format import MessageToJson
|
|
|
|
|
|
if __name__ == "__main__":
|
|
# Handle command line argumengts
|
|
args = barracuda.parse_args(
|
|
description="Convert Tensorflow model to Barracuda binary",
|
|
source_extension=".pb",
|
|
help="input Tensorflow serialized .pb file",
|
|
)
|
|
# Te following code can be used as an example of API used from another module
|
|
# convert() is the main entry point for converter
|
|
import tensorflow_to_barracuda as tf2bc
|
|
|
|
tf2bc.convert(args.source_file, args.target_file, args.trim_unused_by_output, args)
|
|
|
|
|
|
# TODO: support more than 1 LSTM layer per model - prepend scope to names and inputs
|
|
# TODO: support different activation functions in LSTM
|
|
# TODO: strip output Identity node, instead patch upstream layer names
|
|
# TODO: use ScaleBias and Pow with alpha when input is constant Tensor
|
|
# TODO: support all data format types (curretly only NHWC)
|
|
# TODO: support all data types (currently only FLOAT, INT32, BOOL)
|
|
# TODO: implement FusedResizeAndPadConv2D
|
|
|
|
# Important ProtoBuf definitions:
|
|
# https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/types.proto
|
|
# https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/tensor.proto
|
|
# https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/node_def.proto
|
|
#
|
|
# Node descriptions:
|
|
# https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/ops/nn_ops.cc
|
|
# https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/ops/math_ops.cc
|
|
# https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/ops/random_ops.cc
|
|
#
|
|
# Class doc:
|
|
# https://www.tensorflow.org/api_docs/cc/
|
|
#
|
|
known_classes = {
|
|
"Dense": Struct(
|
|
id=1,
|
|
rank=2,
|
|
out_shapes=lambda shapes: [
|
|
[shapes[0][0], 1, 1, shapes[0][1]]
|
|
if len(shapes[0]) > 1
|
|
else [1, 1, 1, 1], # W
|
|
[1, 1, 1, shapes[-1][-1]], # B
|
|
],
|
|
patch_data=lambda data: [data[0], data[1]],
|
|
),
|
|
"MatMul": Struct(
|
|
id=1,
|
|
rank=2,
|
|
out_shapes=lambda shapes: [
|
|
[shapes[0][0], 1, 1, shapes[0][1]], # W
|
|
[1, 1, 1, shapes[0][1]], # B
|
|
],
|
|
patch_data=lambda data: [data[0], np.zeros(np.shape(data[1]))],
|
|
),
|
|
"BiasAdd": Struct(
|
|
id=51, # implemented as ScaleBias
|
|
out_shapes=lambda shapes: [
|
|
[1, 1, 1, shapes[0][0]], # ONE
|
|
[1, 1, 1, shapes[0][0]], # B
|
|
],
|
|
patch_data=lambda data: [np.ones(np.shape(data[0])), data[0]],
|
|
),
|
|
# TODO: NCHW
|
|
"Conv2D": Struct(
|
|
id=20,
|
|
rank=4,
|
|
out_shapes=lambda shapes: [shapes[0], [1, 1, 1, shapes[-1][-1]]], # K # B
|
|
patch_data=lambda data: [data[0], data[1]],
|
|
),
|
|
"DepthwiseConv2dNative": Struct( # DepthwiseConv2D
|
|
id=21,
|
|
rank=4,
|
|
out_shapes=lambda s: [
|
|
[
|
|
s[0][0],
|
|
s[0][1],
|
|
s[0][3],
|
|
s[0][2],
|
|
], # K TF:[H, W, in_channels, channel_multiplier] => [H, W, 1, in_channels]
|
|
[1, 1, 1, s[-1][-1]] if len(s) > 1 else [1, 1, 1, s[0][2]], # B
|
|
],
|
|
patch_data=lambda data: [np.transpose(data[0], (0, 1, 3, 2)), data[1]],
|
|
),
|
|
"Conv2DBackpropInput": Struct( # Conv2DTranspose
|
|
id=22,
|
|
rank=4,
|
|
out_shapes=lambda s: [
|
|
[
|
|
s[0][0],
|
|
s[0][1],
|
|
s[0][3],
|
|
s[0][2],
|
|
], # K TF:[H, W, in_channels, out_channels] => [H, W, out_channels, in_channels]
|
|
[1, 1, 1, s[-1][-1]] if len(s) > 1 else [1, 1, 1, s[0][2]], # B
|
|
],
|
|
patch_data=lambda data: [np.transpose(data[0], (0, 1, 3, 2)), data[1]],
|
|
),
|
|
"Pad": 29,
|
|
# TODO: 3D
|
|
"ResizeNearestNeighbor": 23, # implemented as Upsample2D
|
|
"ResizeBilinear": 23, # implemented as Upsample2D
|
|
"ResizeBicubic": 23, # implemented as Upsample2D
|
|
"MaxPool": 25,
|
|
"AvgPool": 26,
|
|
"GlobalAveragePool": 28,
|
|
"GlobalAvgPool": 28,
|
|
"Activation": 50,
|
|
"BatchNormalization": Struct(
|
|
id=51, # after fusion implemented as ScaleBias
|
|
out_shapes=lambda shapes: [
|
|
[1, 1, 1, shapes[0][0]], # S
|
|
[1, 1, 1, shapes[0][0]], # B
|
|
],
|
|
patch_data=lambda data:
|
|
# fuse [gamma, beta, mean, var, epsilon] => [scale, bias]
|
|
# TODO: double-check if epsilon is the last data argument and not the 1st?
|
|
barracuda.fuse_batchnorm_weights(data[0], data[1], data[2], data[3], data[4])
|
|
if len(data) == 5
|
|
else
|
|
# fuse [ONE, beta, mean, var, epsilon] => [scale, bias]
|
|
# TODO: double-check if epsilon is the last data argument and not the 1st?
|
|
barracuda.fuse_batchnorm_weights(
|
|
np.ones(np.shape(data[0])), data[0], data[1], data[2], data[3]
|
|
),
|
|
),
|
|
"FusedBatchNorm": Struct(
|
|
id=51, # after fusion implemented as ScaleBias
|
|
out_shapes=lambda shapes: [
|
|
[1, 1, 1, shapes[0][0]], # S
|
|
[1, 1, 1, shapes[0][0]], # B
|
|
],
|
|
patch_data=lambda data, layer:
|
|
# fuse [gamma, beta, mean, var, epsilon] => [scale, bias]
|
|
barracuda.fuse_batchnorm_weights(
|
|
data[0], data[1], data[2], data[3], get_epsilon(layer)
|
|
),
|
|
),
|
|
"BatchNormalizationRuntime": Struct(
|
|
id=52,
|
|
out_shapes=lambda shapes: [
|
|
[1, 1, 1, shapes[0][0]], # G
|
|
[1, 1, 1, shapes[0][0]], # B
|
|
],
|
|
patch_data=lambda data: [data[0], data[1]]
|
|
if len(data) == 4
|
|
else [np.ones(np.shape(data[0])), data[0]],
|
|
),
|
|
"InstanceNormalization": Struct( # TODO: epsilon
|
|
id=52,
|
|
out_shapes=lambda shapes: [
|
|
[1, 1, 1, shapes[0][0]], # G
|
|
[1, 1, 1, shapes[0][0]], # B
|
|
],
|
|
patch_data=lambda data: [data[0], data[1]]
|
|
if len(data) == 2
|
|
else [np.ones(np.shape(data[0])), data[0]],
|
|
),
|
|
"LRN": 53,
|
|
"RandomStandardNormal": 64,
|
|
"RandomUniform": 65,
|
|
"Multinomial": Struct(id=66, rank=2),
|
|
"OneHot": Struct(id=67, rank=lambda inputs: inputs[0] + 1),
|
|
# Broadcast ops
|
|
"Add": Struct(id=100, rank=lambda inputs: np.max(inputs)),
|
|
"Sub": Struct(id=101, rank=lambda inputs: np.max(inputs)),
|
|
"Mul": Struct(id=102, rank=lambda inputs: np.max(inputs)),
|
|
"RealDiv": Struct(id=103, rank=lambda inputs: np.max(inputs)),
|
|
"Pow": Struct(id=104, rank=lambda inputs: np.max(inputs)),
|
|
"Minimum": Struct(id=110, rank=lambda inputs: np.max(inputs)),
|
|
"Maximum": Struct(id=111, rank=lambda inputs: np.max(inputs)),
|
|
# Reduce ops
|
|
"Max": Struct(id=124, rank=lambda inputs: inputs[0] - 1),
|
|
"Mean": Struct(id=125, rank=lambda inputs: inputs[0] - 1),
|
|
"Min": Struct(id=126, rank=lambda inputs: inputs[0] - 1),
|
|
"Prod": Struct(id=127, rank=lambda inputs: inputs[0] - 1),
|
|
"Sum": Struct(id=128, rank=lambda inputs: inputs[0] - 1),
|
|
"Flatten": Struct(id=200, rank=2),
|
|
"Reshape": 201,
|
|
"Concat": 210,
|
|
"StridedSlice": 211,
|
|
"Nop": 0,
|
|
}
|
|
|
|
requires_runtime_flag = {
|
|
"Dropout": "DropoutRuntime",
|
|
"BatchNormalization": "BatchNormalizationRuntime",
|
|
}
|
|
|
|
known_activations = {
|
|
"Linear": 0,
|
|
"Relu": 1,
|
|
"Softmax": 2,
|
|
"Tanh": 3,
|
|
"Sigmoid": 4,
|
|
"Elu": 5,
|
|
"Relu6": 6,
|
|
"LeakyRelu": 7,
|
|
"Selu": 8,
|
|
"Swish": 9,
|
|
"LogSoftmax": 10,
|
|
"Softplus": 11,
|
|
"Softsign": 12,
|
|
"Abs": 100,
|
|
"Neg": 101,
|
|
"Ceil": 102,
|
|
"Floor": 104,
|
|
"Sqrt": 111,
|
|
"Exp": 113,
|
|
"Log": 114,
|
|
"Acos": 200,
|
|
"Acosh": 201,
|
|
"Asin": 202,
|
|
"Asinh": 203,
|
|
"Atan": 204,
|
|
"Atanh": 205,
|
|
"Cos": 206,
|
|
"Cosh": 207,
|
|
"Sin": 208,
|
|
"Sinh": 209,
|
|
"Tan": 210,
|
|
}
|
|
|
|
known_paddings = {"VALID": [0, 0, 0, 0], "SAME": [-1]} # SameUpper
|
|
|
|
supported_data_formats = {"NHWC"}
|
|
|
|
known_patterns = {
|
|
# TODO: Flatten pattern using namespace regexp
|
|
repr(["Shape", "StridedSlice", "Pack", "Reshape"]): "Flatten",
|
|
repr(["Shape", "StridedSlice", "Prod", "Pack", "Reshape"]): "Flatten",
|
|
repr(
|
|
["Shape", "Slice", "Slice", "Prod", "ExpandDims", "ConcatV2", "Reshape"]
|
|
): "Flatten",
|
|
repr(["Add", "Rsqrt", "Mul", "Mul", "Sub", "Add"]): "BatchNormalization",
|
|
repr(["Add", "Rsqrt", "Mul", "Mul", "Mul", "Sub", "Add"]): "BatchNormalization",
|
|
repr(
|
|
[
|
|
"Mean",
|
|
"StopGradient",
|
|
"SquaredDifference",
|
|
"Mean",
|
|
"Sub",
|
|
"Add",
|
|
"Pow",
|
|
"RealDiv",
|
|
"Mul",
|
|
"Add",
|
|
]
|
|
): "InstanceNormalization_ByTensorOrder",
|
|
repr(
|
|
[
|
|
"Mean",
|
|
"StopGradient",
|
|
"SquaredDifference",
|
|
"Mean",
|
|
"Squeeze",
|
|
"Squeeze",
|
|
"Add",
|
|
"Rsqrt",
|
|
"Mul",
|
|
"Mul",
|
|
"Mul",
|
|
"Sub",
|
|
"Add",
|
|
]
|
|
): "InstanceNormalization_ByTensorName",
|
|
repr(["MatMul", "BiasAdd"]): "Dense",
|
|
repr(["Conv2D", "BiasAdd"]): "Conv2D",
|
|
repr(["DepthwiseConv2dNative", "BiasAdd"]): "DepthwiseConv2dNative",
|
|
repr(["Conv2DBackpropInput", "BiasAdd"]): "Conv2DBackpropInput",
|
|
repr(["Conv2DBackpropInput"]): "Conv2DBackpropInput",
|
|
repr(
|
|
[
|
|
"Shape",
|
|
"StridedSlice",
|
|
"StridedSlice",
|
|
"StridedSlice",
|
|
"Mul",
|
|
"Mul",
|
|
"Pack",
|
|
"Conv2DBackpropInput",
|
|
"BiasAdd",
|
|
]
|
|
): "Conv2DBackpropInput",
|
|
repr(
|
|
[
|
|
"Shape",
|
|
"StridedSlice",
|
|
"StridedSlice",
|
|
"StridedSlice",
|
|
"Mul",
|
|
"Mul",
|
|
"Pack",
|
|
"Conv2DBackpropInput",
|
|
]
|
|
): "Conv2DBackpropInput",
|
|
repr(
|
|
["Shape", "StridedSlice", "Mul", "ResizeNearestNeighbor"]
|
|
): "ResizeNearestNeighbor",
|
|
repr(
|
|
["Pack", "Reshape"]
|
|
): "Flatten$", # for now we assume that this combination is trivial Flatten
|
|
# for exmaple it is used in ML-agents LSTM nets with sequence_length==1
|
|
repr(
|
|
[
|
|
"StridedSlice",
|
|
"Reshape",
|
|
re.compile("^lstm/"),
|
|
"Reshape",
|
|
"ConcatV2",
|
|
"Identity",
|
|
]
|
|
): "BasicLSTMReshapeOut",
|
|
repr(
|
|
[re.compile("^lstm/"), "Reshape", "ConcatV2", "Identity"]
|
|
): "BasicLSTMReshapeOut",
|
|
repr(
|
|
["Reshape", re.compile("^lstm_[a-z]*/"), "Reshape", "ConcatV2"]
|
|
): "BasicLSTMReshapeOut",
|
|
repr(["Reshape", re.compile("^lstm_[a-z]*/"), "ConcatV2"]): "BasicLSTMConcatOut",
|
|
repr(["Sigmoid", "Mul"]): "Swish",
|
|
repr(["Mul", "Abs", "Mul", "Add"]): "LeakyRelu",
|
|
repr(
|
|
["Shape", "Reshape"]
|
|
): "ReshapeLikeInput0", # shape comes from the 1st node as input[0]
|
|
repr(["Reshape"]): "Reshape",
|
|
repr(["ConcatV2"]): "ConcatV2",
|
|
repr(["Mean"]): "Mean",
|
|
repr(["Pad"]): "Pad",
|
|
repr(["Multinomial"]): "Multinomial",
|
|
repr(["OneHot"]): "OneHot",
|
|
repr(["Square"]): "Square",
|
|
repr(["SquaredDifference"]): "SquaredDifference",
|
|
repr(["StridedSlice"]): "StridedSlice",
|
|
repr(["Squeeze"]): "Squeeze",
|
|
repr(["ExpandDims"]): "ExpandDims",
|
|
# TODO: FusedResizeAndPadConv2D
|
|
}
|
|
|
|
|
|
def by_name(args, name):
|
|
for a in args:
|
|
if a.name.endswith(name):
|
|
return a
|
|
|
|
|
|
def by_op(args, op):
|
|
for a in args:
|
|
if a.op == op:
|
|
return a
|
|
|
|
|
|
def order_by(args, names):
|
|
ordered = []
|
|
arg_count = len(args)
|
|
for name in names:
|
|
ordered += [a for a in args if a.endswith(name)]
|
|
args = [a for a in args if not a.endswith(name)]
|
|
ordered += args # append what is left
|
|
assert len(ordered) == arg_count
|
|
return ordered
|
|
|
|
|
|
transform_patterns = {
|
|
"Flatten": lambda nodes, inputs, tensors, _: Struct(op="Flatten", input=inputs),
|
|
"Flatten$": lambda nodes, inputs, tensors, _: Struct(
|
|
op="Flatten",
|
|
input=[
|
|
inputs[-1]
|
|
], # take only the last input, assume all other arguments are trivial (like sequence_length==1
|
|
# always in ML-agents LSTM nets)
|
|
),
|
|
"Reshape": lambda nodes, inputs, tensors, context: Struct(
|
|
op="Reshape",
|
|
rank=len(tensors[0].data)
|
|
if len(tensors)
|
|
> 0 # tensor data is treated as reshape coefficient, if not empty
|
|
else context.layer_ranks[inputs[1]]
|
|
if len(inputs) == 2 # otherwise shape of the 2nd input tensor is used
|
|
else -1,
|
|
input=inputs,
|
|
shape=[
|
|
tensors[0].data[0],
|
|
tensors[0].data[1],
|
|
tensors[0].data[2],
|
|
tensors[0].data[3],
|
|
]
|
|
if len(tensors) > 0 and len(tensors[0].data) == 4
|
|
else [tensors[0].data[0], 1, tensors[0].data[1], tensors[0].data[2]]
|
|
if len(tensors) > 0 and len(tensors[0].data) == 3
|
|
else [tensors[0].data[0], 1, 1, tensors[0].data[1]]
|
|
if len(tensors) > 0 and len(tensors[0].data) == 2
|
|
else [1, 1, 1, tensors[0].data[0]]
|
|
if len(tensors) > 0 and len(tensors[0].data) == 1
|
|
else [],
|
|
),
|
|
"ReshapeLikeInput0": lambda nodes, inputs, tensors, context: Struct(
|
|
op="Reshape",
|
|
rank=context.layer_ranks[inputs[0]]
|
|
if len(inputs)
|
|
== 2 # unlike standard 'Reshape' input[0] is used as shape & input[1] as data
|
|
else -1,
|
|
input=[inputs[1], inputs[0]]
|
|
if len(inputs)
|
|
== 2 # unlike standard 'Reshape' input[0] is used as shape & input[1] as data
|
|
else inputs,
|
|
),
|
|
"Pad": lambda nodes, inputs, tensors, _: Struct(
|
|
op="Pad"
|
|
if (
|
|
len(tensors) > 0
|
|
and np.shape(tensors[0]) == [4, 2]
|
|
and get_attr(nodes[-1], "mode", default="constant").lower() == "constant"
|
|
)
|
|
else "BarracudaUnsupportedPad",
|
|
input=inputs,
|
|
pads=[
|
|
tensors[0].data[1, 0],
|
|
tensors[0].data[1, 1],
|
|
tensors[0].data[2, 0],
|
|
tensors[0].data[2, 1],
|
|
]
|
|
if len(tensors) > 0 and np.shape(tensors[0]) == [4, 2]
|
|
else [0, 0, 0, 0],
|
|
beta=get_attr(nodes[-1], "constant_values") or 0,
|
|
),
|
|
"Squeeze": lambda nodes, inputs, tensors, context: Struct(
|
|
op="Nop", # Squeeze is no-operation in Barracuda
|
|
input=inputs,
|
|
rank=context.layer_ranks[inputs[0]] - len(get_attr(nodes[-1], "squeeze_dims"))
|
|
if len(get_attr(nodes[-1], "squeeze_dims")) > 0
|
|
else -1, # if list of squeeze axis is not specified, it is unknown what would be the rank of result
|
|
),
|
|
"ExpandDims": lambda nodes, inputs, tensors, context: Struct(
|
|
op="Nop", # ExpandDims is no-operation in Barracuda
|
|
input=[inputs[0]],
|
|
rank=context.layer_ranks[inputs[0]] + 1,
|
|
),
|
|
"Multinomial": lambda nodes, inputs, tensors, _: Struct(
|
|
op="Multinomial",
|
|
input=inputs,
|
|
shape=[int(by_name(tensors, "/num_samples").data[0])],
|
|
# seed = get_attr(nodes[0], 'seed'),
|
|
),
|
|
"OneHot": lambda nodes, inputs, tensors, _: Struct(
|
|
op="OneHot",
|
|
input=inputs,
|
|
shape=[int(by_name(tensors, "/depth").data[0])],
|
|
alpha=by_name(tensors, "/on_value").data[0],
|
|
beta=by_name(tensors, "/off_value").data[0],
|
|
),
|
|
"Square": lambda nodes, inputs, tensors, _: Struct(
|
|
op="Mul", input=[inputs[0], inputs[0]] # input * input
|
|
),
|
|
"ConcatV2": lambda nodes, inputs, tensors, context: Struct(
|
|
op="Concat",
|
|
input=inputs,
|
|
axis=axis_to_barracuda(
|
|
int(by_name(tensors, "/axis").data[0]), context.layer_ranks[inputs[0]]
|
|
),
|
|
),
|
|
"StridedSlice": lambda nodes, inputs, tensors, context: strided_slice(
|
|
nodes[-1].name,
|
|
inputs[0],
|
|
context.layer_ranks[inputs[0]],
|
|
begin=tensors[0].data,
|
|
end=tensors[1].data,
|
|
strides=tensors[2].data,
|
|
begin_mask=get_attr(nodes[-1], "begin_mask"),
|
|
end_mask=get_attr(nodes[-1], "end_mask"),
|
|
ellipsis_mask=get_attr(nodes[-1], "ellipsis_mask"),
|
|
new_axis_mask=get_attr(nodes[-1], "new_axis_mask"),
|
|
shrink_axis_mask=get_attr(nodes[-1], "shrink_axis_mask"),
|
|
),
|
|
"BatchNormalization": lambda nodes, inputs, tensors, _: Struct(
|
|
op="BatchNormalization",
|
|
input=[i for i in inputs]
|
|
+ order_by([t.name for t in tensors], ["gamma", "beta", "mean", "variance"]),
|
|
),
|
|
"InstanceNormalization_ByTensorName": lambda nodes, inputs, tensors, _: Struct(
|
|
op="InstanceNormalization",
|
|
input=[i for i in inputs]
|
|
+ order_by([t.name for t in tensors], ["scale", "offset"]),
|
|
),
|
|
"InstanceNormalization_ByTensorOrder": lambda nodes, inputs, tensors, _: Struct(
|
|
op="InstanceNormalization",
|
|
input=[i for i in inputs] + [t.name for t in tensors][-2:],
|
|
),
|
|
"Dense": lambda nodes, inputs, tensors, _: Struct(
|
|
op="Dense",
|
|
input=[i for i in inputs] + [t.name for t in tensors],
|
|
data_frmt=get_attr(
|
|
by_op(nodes, "Dense") or by_op(nodes, "MatMul"), "data_format"
|
|
),
|
|
),
|
|
"Conv2D": lambda nodes, inputs, tensors, _: Struct(
|
|
op="Conv2D",
|
|
input=[i for i in inputs] + [t.name for t in tensors],
|
|
padding=get_attr(by_op(nodes, "Conv2D"), "padding"),
|
|
strides=get_attr(by_op(nodes, "Conv2D"), "strides"),
|
|
dilations=get_attr(by_op(nodes, "Conv2D"), "dilations"),
|
|
data_frmt=get_attr(by_op(nodes, "Conv2D"), "data_format"),
|
|
),
|
|
"DepthwiseConv2dNative": lambda nodes, inputs, tensors, _: Struct(
|
|
op="DepthwiseConv2dNative",
|
|
input=[i for i in inputs] + [t.name for t in tensors],
|
|
padding=get_attr(by_op(nodes, "DepthwiseConv2dNative"), "padding"),
|
|
strides=get_attr(by_op(nodes, "DepthwiseConv2dNative"), "strides"),
|
|
dilations=get_attr(by_op(nodes, "DepthwiseConv2dNative"), "dilations"),
|
|
data_frmt=get_attr(by_op(nodes, "DepthwiseConv2dNative"), "data_format"),
|
|
),
|
|
"Conv2DBackpropInput": lambda nodes, inputs, tensors, _: Struct(
|
|
op="Conv2DBackpropInput",
|
|
input=[i for i in inputs]
|
|
+ [t.name for t in tensors][1:][
|
|
-2:
|
|
], # [1:] - skips the 0th tensor, since Conv2DBackpropInput 0th tensor is 'input_sizes'
|
|
# (which differs from other Conv layers)
|
|
# [-2:] - take only last 2 tensors, this allows to process large patterns with the same code
|
|
padding=get_attr(by_op(nodes, "Conv2DBackpropInput"), "padding"),
|
|
strides=get_attr(by_op(nodes, "Conv2DBackpropInput"), "strides"),
|
|
dilations=get_attr(by_op(nodes, "Conv2DBackpropInput"), "dilations"),
|
|
data_frmt=get_attr(by_op(nodes, "Conv2DBackpropInput"), "data_format"),
|
|
),
|
|
"ResizeNearestNeighbor": lambda nodes, inputs, tensors, _: Struct(
|
|
op="ResizeNearestNeighbor",
|
|
input=[i for i in inputs],
|
|
ksize=[int(tensors[0].data[0]), int(tensors[0].data[1])]
|
|
if len(tensors) == 1 and len(tensors[0].data) == 2
|
|
else [int(tensors[-1].data[0]), int(tensors[-1].data[1])]
|
|
if len(tensors) >= 4 and len(tensors[-1].data) == 2
|
|
else [1, 1],
|
|
),
|
|
"Mean": lambda nodes, inputs, tensors, _:
|
|
# take only the last input
|
|
barracuda.mean(nodes[-1].name, inputs[-1], axis=tensors[0].data),
|
|
"SquaredDifference": lambda nodes, inputs, tensors, _: sqr_diff(
|
|
nodes[-1].name, inputs[0], inputs[1]
|
|
),
|
|
"BasicLSTMReshapeOut": lambda nodes, inputs, tensors, context: basic_lstm(
|
|
nodes, inputs, tensors, context, find_type="Reshape"
|
|
),
|
|
"BasicLSTMConcatOut": lambda nodes, inputs, tensors, context: basic_lstm(
|
|
nodes, inputs, tensors, context, find_type="ConcatV2"
|
|
),
|
|
"Swish": lambda nodes, inputs, tensors, _: Struct(op="Swish", input=inputs),
|
|
"LeakyRelu": lambda nodes, inputs, tensors, _: Struct(op="LeakyRelu", input=inputs),
|
|
# TODO:'Round'
|
|
# TODO:'Rsqrt'
|
|
}
|
|
|
|
|
|
# Debug
|
|
def debug(s):
|
|
print(s)
|
|
return s
|
|
|
|
|
|
# Helper
|
|
def embody(v, default=0):
|
|
return default if v is None else v
|
|
|
|
|
|
# Parse
|
|
def get_attr(node, attr_name, default=None):
|
|
if type(node) == Struct:
|
|
if hasattr(node, attr_name):
|
|
return getattr(node, attr_name)
|
|
else:
|
|
return default
|
|
|
|
# See: https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/attr_value.proto
|
|
val = node.attr[attr_name]
|
|
|
|
if val.HasField("list"):
|
|
return val.list.i
|
|
# NOTE: can't find way to identify type of list BUT it is almost always list(int)
|
|
# except list(float) in FractionalAvg/MaxPool
|
|
if val.HasField("b"):
|
|
return val.b
|
|
if val.HasField("i"):
|
|
return val.i
|
|
if val.HasField("f"):
|
|
return val.f
|
|
if val.HasField("s"):
|
|
return val.s.decode("utf-8")
|
|
if val.HasField("shape"):
|
|
return val.shape
|
|
if val.HasField("tensor"):
|
|
return val.tensor
|
|
return default
|
|
|
|
|
|
def get_epsilon(layer):
|
|
return get_attr(
|
|
layer, "epsilon", default=0.001
|
|
) # default epsilon taken from tf.layers.batch_normalization
|
|
|
|
|
|
def get_layer_rank(layer):
|
|
shape = get_attr(layer, "shape")
|
|
if not shape:
|
|
return None
|
|
if isinstance(shape, list):
|
|
return 1
|
|
shape = [dim.size for dim in shape.dim]
|
|
return len(shape)
|
|
|
|
|
|
def get_layer_shape(layer):
|
|
shape = get_attr(layer, "shape")
|
|
if not shape:
|
|
return [-1, -1, -1, -1]
|
|
shape = [dim.size for dim in shape.dim]
|
|
if len(shape) == 1:
|
|
return [1, 1, 1, shape[0]]
|
|
if len(shape) == 2:
|
|
return [shape[0], 1, 1, shape[1]]
|
|
if len(shape) == 3:
|
|
return [shape[0], 1, shape[1], shape[2]]
|
|
return shape
|
|
|
|
|
|
def get_tensor_dims(tensor):
|
|
if isinstance(tensor, np.ndarray):
|
|
return np.shape(tensor)
|
|
|
|
dims = []
|
|
if tensor.tensor_shape:
|
|
dims = [v.size for v in tensor.tensor_shape.dim]
|
|
if tensor.float_val:
|
|
dims = np.shape(tensor.float_val)
|
|
if tensor.int_val:
|
|
dims = np.shape(tensor.int_val)
|
|
if tensor.bool_val:
|
|
dims = np.shape(tensor.bool_val)
|
|
return dims
|
|
|
|
|
|
def get_tensor_dtype(tensor):
|
|
if isinstance(tensor, np.ndarray):
|
|
return tensor.dtype
|
|
|
|
dataType = ""
|
|
fields = tensor.ListFields()
|
|
|
|
for field, value in fields:
|
|
if (
|
|
field.name == "dtype"
|
|
and field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_ENUM
|
|
):
|
|
dataType = field.enum_type.values_by_number.get(value, None).name
|
|
|
|
return dataType
|
|
|
|
|
|
def get_tensor_data(tensor):
|
|
if isinstance(tensor, np.ndarray):
|
|
return tensor.astype(float)
|
|
|
|
dims = get_tensor_dims(tensor)
|
|
elems = np.product(dims)
|
|
|
|
if tensor.tensor_content:
|
|
# TODO: support other types
|
|
dataType = get_tensor_dtype(tensor)
|
|
if dataType == "DT_FLOAT":
|
|
data = struct.unpack("<" + str(elems) + "f", tensor.tensor_content)
|
|
elif dataType == "DT_INT32":
|
|
data = struct.unpack("<" + str(elems) + "i", tensor.tensor_content)
|
|
elif dataType == "DT_BOOL":
|
|
data = struct.unpack("<" + str(elems) + "?", tensor.tensor_content)
|
|
else:
|
|
print("UNSUPPORTED: data type", dataType)
|
|
if tensor.float_val:
|
|
data = tensor.float_val
|
|
if tensor.int_val:
|
|
data = np.array(tensor.int_val, dtype=float)
|
|
if tensor.bool_val:
|
|
data = np.array(tensor.bool_val, dtype=float)
|
|
return np.array(data).reshape(dims)
|
|
|
|
|
|
def flatten(items, enter=lambda x: isinstance(x, list)):
|
|
# http://stackoverflow.com/a/40857703
|
|
# https://github.com/ctmakro/canton/blob/master/canton/misc.py
|
|
"""Yield items from any nested iterable; see REF."""
|
|
for x in items:
|
|
if enter(x):
|
|
yield from flatten(x)
|
|
else:
|
|
yield x
|
|
|
|
|
|
def replace_strings_in_list(array_of_strigs, replace_with_strings):
|
|
"A value in replace_with_strings can be either single string or list of strings"
|
|
potentially_nested_list = [
|
|
replace_with_strings.get(s) or s for s in array_of_strigs
|
|
]
|
|
return list(flatten(potentially_nested_list))
|
|
|
|
|
|
def remove_duplicates_from_list(array):
|
|
"Preserves the order of elements in the list"
|
|
output = []
|
|
unique = set()
|
|
for a in array:
|
|
if a not in unique:
|
|
unique.add(a)
|
|
output.append(a)
|
|
return output
|
|
|
|
|
|
#########################################################
|
|
|
|
|
|
def pool_to_HW(shape, data_frmt):
|
|
""" Convert from NHWC|NCHW => HW
|
|
"""
|
|
if len(shape) != 4:
|
|
return shape # Not NHWC|NCHW, return as is
|
|
if data_frmt == "NCHW":
|
|
return [shape[2], shape[3]]
|
|
return [shape[1], shape[2]]
|
|
|
|
|
|
def strides_to_HW(shape, format):
|
|
return pool_to_HW(shape, format)
|
|
|
|
|
|
def axis_to_barracuda(axis, input_rank):
|
|
N = 0
|
|
H = 1
|
|
W = 2
|
|
C = 3
|
|
if axis < 0:
|
|
axis = input_rank - axis
|
|
assert axis >= 0
|
|
assert axis < input_rank
|
|
if input_rank == 4:
|
|
# [NHWC]
|
|
return [N, H, W, C][axis]
|
|
if input_rank == 3:
|
|
# [N_WC]
|
|
return [N, W, C][axis]
|
|
elif input_rank == 2:
|
|
# [N__C]
|
|
return [N, C][axis]
|
|
elif input_rank == 1:
|
|
# [___C]
|
|
return [C][axis]
|
|
return -1
|
|
|
|
|
|
#########################################################
|
|
|
|
|
|
def sqr_diff(name, a, b):
|
|
nn = barracuda.Build(name)
|
|
d = nn.sub(a, b)
|
|
nn.mul(d, d, out=name)
|
|
return nn.layers
|
|
|
|
|
|
def strided_slice(
|
|
name,
|
|
input,
|
|
input_rank,
|
|
begin,
|
|
end,
|
|
strides,
|
|
begin_mask,
|
|
end_mask,
|
|
ellipsis_mask,
|
|
new_axis_mask,
|
|
shrink_axis_mask,
|
|
):
|
|
assert input_rank != -1
|
|
begin = begin.astype(np.int32).tolist()
|
|
end = end.astype(np.int32).tolist()
|
|
strides = strides.astype(np.int32).tolist()
|
|
|
|
# StridedSlice range and mask descriptions:
|
|
# https://www.tensorflow.org/api_docs/cc/class/tensorflow/ops/strided-slice
|
|
# TODO: I don't think elipsis and newaxis would work together well with current implementation
|
|
|
|
assert len(begin) == len(end)
|
|
assert len(begin) == len(strides)
|
|
|
|
# prepare begin, end, stride arrays
|
|
output_rank = input_rank
|
|
insert_pos = 0
|
|
while ellipsis_mask:
|
|
ellipsis_mask >>= 1
|
|
insert_pos += 1
|
|
|
|
# NOTE: begin=0, end=0, stride=1 <= full range from existing axis
|
|
# begin=0, end=0, stride=0 <= new axis OR shrink axis to single 1st element
|
|
# begin=N, end=N, stride=0 <= shrink axis to single Nth element
|
|
while len(begin) < input_rank:
|
|
if insert_pos:
|
|
begin.insert(insert_pos, 0)
|
|
end.insert(insert_pos, 0)
|
|
strides.insert(insert_pos, 1)
|
|
else:
|
|
begin.append(0)
|
|
end.append(0)
|
|
strides.append(1)
|
|
assert len(begin) <= input_rank
|
|
|
|
descriptor_count = input_rank
|
|
for i in range(len(begin)):
|
|
if begin_mask & (1 << i):
|
|
begin[i] = 0
|
|
if end_mask & (1 << i):
|
|
end[i] = 0
|
|
if new_axis_mask & (1 << i):
|
|
begin[i] = end[i] = strides[i] = 0
|
|
output_rank += 1
|
|
if shrink_axis_mask & (1 << i):
|
|
end[i] = begin[i]
|
|
strides[i] = 0
|
|
output_rank -= 1
|
|
|
|
# convert to Barracuda layout
|
|
descriptor_count = len(begin)
|
|
assert descriptor_count <= 4
|
|
if descriptor_count == 3:
|
|
begin = [begin[0], 0, begin[1], begin[2]]
|
|
end = [end[0], 0, end[1], end[2]]
|
|
strides = [strides[0], 1, strides[1], strides[2]]
|
|
elif descriptor_count == 2:
|
|
begin = [begin[0], 0, 0, begin[1]]
|
|
end = [end[0], 0, 0, end[1]]
|
|
strides = [strides[0], 1, 1, strides[1]]
|
|
elif descriptor_count == 1:
|
|
begin = [0, 0, 0, begin[0]]
|
|
end = [0, 0, 0, end[0]]
|
|
strides = [1, 1, 1, strides[0]]
|
|
|
|
nn = barracuda.Build(name)
|
|
nn.strided_slice(input, begin, end, strides, output_rank, out=name)
|
|
return nn.layers
|
|
|
|
|
|
# search backwards starting from index_of_actual_output_node for non-const node
|
|
def locate_actual_output_node(
|
|
nodes, index_of_actual_output_node=-1, find_type="Reshape"
|
|
):
|
|
while (-index_of_actual_output_node - 1) < len(nodes) and nodes[
|
|
index_of_actual_output_node
|
|
].op != find_type:
|
|
index_of_actual_output_node -= 1
|
|
actual_output_node = nodes[index_of_actual_output_node]
|
|
assert -index_of_actual_output_node < len(nodes)
|
|
return actual_output_node
|
|
|
|
|
|
def gru(
|
|
nodes,
|
|
inputs,
|
|
tensors,
|
|
context,
|
|
index_of_actual_output_node,
|
|
assert_output_node_op_type=None,
|
|
):
|
|
assert len(inputs) == 2
|
|
|
|
def find_tensor_by_name(name, default=None):
|
|
nonlocal tensors
|
|
candidates = [t for t in tensors if t.name.endswith(name)]
|
|
return candidates[0].data if candidates else default
|
|
|
|
input = inputs[-1]
|
|
state = inputs[0]
|
|
gates_kernel = find_tensor_by_name("/gates/kernel")
|
|
gates_bias = find_tensor_by_name(
|
|
"/gates/bias", default=np.zeros(np.shape(gates_kernel)[-1])
|
|
)
|
|
candidate_kernel = find_tensor_by_name("/candidate/kernel")
|
|
candidate_bias = find_tensor_by_name(
|
|
"/candidate/bias", default=np.zeros(np.shape(candidate_kernel)[-1])
|
|
)
|
|
new_state = nodes[-1].name + "_h"
|
|
|
|
assert np.shape(gates_kernel)[-1] == np.shape(gates_bias)[-1]
|
|
assert np.shape(candidate_kernel)[-1] == np.shape(candidate_bias)[-1]
|
|
|
|
num_gates = 2
|
|
seq_length = 1
|
|
hidden_size = np.shape(gates_kernel)[-1] // num_gates
|
|
|
|
gate_kernels = np.split(gates_kernel, num_gates, axis=-1)
|
|
gate_biases = np.split(gates_bias, num_gates, axis=-1)
|
|
|
|
context.model_tensors["kernel_r"] = gate_kernels[0]
|
|
context.model_tensors["kernel_u"] = gate_kernels[1]
|
|
context.model_tensors["kernel_c"] = candidate_kernel
|
|
context.model_tensors["bias_r"] = gate_biases[0]
|
|
context.model_tensors["bias_u"] = gate_biases[1]
|
|
context.model_tensors["bias_c"] = candidate_bias
|
|
|
|
context.layer_ranks[state] = 2
|
|
|
|
new_layers = barracuda.gru(
|
|
"gru",
|
|
input,
|
|
state,
|
|
"kernel_r",
|
|
"kernel_u",
|
|
"kernel_c",
|
|
"bias_r",
|
|
"bias_u",
|
|
"bias_c",
|
|
new_state,
|
|
)
|
|
|
|
state_shape = [1, 1, seq_length, hidden_size]
|
|
context.model_memories += [state_shape, state, new_state]
|
|
|
|
# map exptected output of the replaced pattern to output from our GRU cell
|
|
actual_output_node = locate_actual_output_node(
|
|
nodes, index_of_actual_output_node, assert_output_node_op_type
|
|
)
|
|
context.map_ignored_layer_to_its_input[actual_output_node.name] = new_state
|
|
|
|
return new_layers
|
|
|
|
|
|
def basic_lstm(nodes, inputs, tensors, context, find_type="Reshape"):
|
|
assert len(inputs) == 2
|
|
|
|
def find_tensor_by_name(name, default=None):
|
|
nonlocal tensors
|
|
candidates = [t for t in tensors if t.name.endswith(name)]
|
|
return candidates[0].data if candidates else default
|
|
|
|
def find_forget_bias():
|
|
nonlocal nodes
|
|
nonlocal tensors
|
|
# TODO: make it more fault-tolerant
|
|
# search for scalar float constant that is input to Add node
|
|
# and hope it is not a constant for some complex activation function
|
|
for t in tensors:
|
|
if np.prod(t.shape) == 1 and get_tensor_dtype(t.obj) == "DT_FLOAT":
|
|
for n in nodes:
|
|
if n.op == "Add" and t.name in n.input:
|
|
return t.data
|
|
return np.zeros(1)
|
|
|
|
input = inputs[-1]
|
|
state_c = inputs[0] + "_c"
|
|
state_h = inputs[0] + "_h"
|
|
kernel = find_tensor_by_name("/kernel")
|
|
bias = find_tensor_by_name("/bias", default=np.zeros(np.shape(kernel)[-1]))
|
|
forget_bias = find_forget_bias()
|
|
new_state_c = nodes[-1].name + "_c"
|
|
new_state_h = nodes[-1].name + "_h"
|
|
|
|
assert np.shape(kernel)[-1] == np.shape(bias)[-1]
|
|
|
|
num_gates = 4
|
|
seq_length = 1
|
|
hidden_size = np.shape(kernel)[-1] // num_gates
|
|
|
|
kernels = np.split(kernel, num_gates, axis=-1)
|
|
biases = np.split(bias, num_gates, axis=-1)
|
|
|
|
context.model_tensors["kernel_i"] = kernels[0]
|
|
context.model_tensors["kernel_j"] = kernels[1]
|
|
context.model_tensors["kernel_f"] = kernels[2]
|
|
context.model_tensors["kernel_o"] = kernels[3]
|
|
context.model_tensors["bias_i"] = biases[0]
|
|
context.model_tensors["bias_j"] = biases[1]
|
|
context.model_tensors["bias_f"] = biases[2] + forget_bias
|
|
context.model_tensors["bias_o"] = biases[3]
|
|
|
|
context.layer_ranks[state_c] = 2
|
|
context.layer_ranks[state_h] = 2
|
|
|
|
# lstm_value/strided_slice/stack => lstm_value
|
|
lstm_name = next(i.name for i in nodes if i.name.startswith("lstm")).split("/")[0]
|
|
|
|
new_layers = barracuda.lstm(
|
|
lstm_name,
|
|
input,
|
|
state_c,
|
|
state_h,
|
|
"kernel_i",
|
|
"kernel_j",
|
|
"kernel_f",
|
|
"kernel_o",
|
|
"bias_i",
|
|
"bias_j",
|
|
"bias_f",
|
|
"bias_o",
|
|
new_state_c,
|
|
new_state_h,
|
|
)
|
|
|
|
state_shape = [1, 1, seq_length, hidden_size]
|
|
context.model_memories += [state_shape, state_c, new_state_c]
|
|
context.model_memories += [state_shape, state_h, new_state_h]
|
|
|
|
# map expected output of the replaced pattern to output from our LSTM cell
|
|
actual_output_node = locate_actual_output_node(nodes, -1, find_type)
|
|
concat_out_node = locate_actual_output_node(nodes, -1, "ConcatV2")
|
|
context.map_ignored_layer_to_its_input[actual_output_node.name] = new_state_h
|
|
context.map_ignored_layer_to_its_input[concat_out_node.name] = new_state_c
|
|
|
|
return new_layers
|
|
|
|
|
|
#########################################################
|
|
|
|
|
|
def process_layer(layer, context, args):
|
|
model_tensors = context.model_tensors
|
|
input_shapes = context.input_shapes
|
|
layer_ranks = context.layer_ranks
|
|
map_ignored_layer_to_its_input = context.map_ignored_layer_to_its_input
|
|
|
|
name = layer.name
|
|
class_name = layer.op
|
|
inputs = (
|
|
layer.input
|
|
) # Tensorflow inputs are always explicit, but in case of Keras we had 'inputs = layer.input or [prev_layer_name]'
|
|
inputs = replace_strings_in_list(inputs, map_ignored_layer_to_its_input)
|
|
|
|
if class_name == "Nop":
|
|
assert len(inputs) <= 1
|
|
map_ignored_layer_to_its_input[name] = inputs
|
|
return
|
|
|
|
if class_name == "Const":
|
|
model_tensors[name] = layer.attr["value"].tensor
|
|
layer_ranks[name] = (
|
|
get_layer_rank(layer) or 1
|
|
) # we treast constants without shape as rank=1 (scalar converted to tensor)
|
|
return
|
|
|
|
if class_name == "Placeholder":
|
|
assert inputs == []
|
|
map_ignored_layer_to_its_input[name] = inputs
|
|
input_shapes[name] = get_layer_shape(layer)
|
|
layer_ranks[name] = get_layer_rank(layer)
|
|
return
|
|
|
|
if class_name == "Identity":
|
|
connected_to_const = len(inputs) == 1 and inputs[0] in model_tensors
|
|
if connected_to_const:
|
|
map_ignored_layer_to_its_input[name] = inputs
|
|
return
|
|
else:
|
|
# treat Identity layer that are connected to processing nodes
|
|
# as output from the network
|
|
class_name = "Linear"
|
|
|
|
if args.print_layers or args.verbose:
|
|
var_tensors = [i for i in inputs if i not in model_tensors]
|
|
const_tensors = [i for i in inputs if i in model_tensors]
|
|
print(
|
|
"'%s' %s Vars:%s Const:%s" % (name, class_name, var_tensors, const_tensors)
|
|
)
|
|
|
|
if class_name in known_activations:
|
|
activation = class_name
|
|
class_name = "Activation"
|
|
else:
|
|
activation = "Linear"
|
|
|
|
if class_name not in known_classes:
|
|
if class_name in requires_runtime_flag:
|
|
print("SKIP:", class_name, "layer is used only for training")
|
|
else:
|
|
print("IGNORED:", class_name, "unknown layer")
|
|
map_ignored_layer_to_its_input[name] = inputs
|
|
return
|
|
|
|
klass = known_classes[class_name]
|
|
if type(klass) == int:
|
|
klass = Struct(id=klass)
|
|
|
|
o_l = Struct()
|
|
o_l.type = klass.id
|
|
o_l.class_name = class_name
|
|
o_l.name = name
|
|
|
|
auto_pad = get_attr(layer, "padding") # layer.attr['padding'].s.decode("utf-8")
|
|
pads = get_attr(layer, "pads")
|
|
strides = get_attr(layer, "strides") # layer.attr['strides'].list.i
|
|
pool_size = get_attr(layer, "ksize") # layer.attr['ksize'].list.i
|
|
shape = get_attr(layer, "shape")
|
|
starts = get_attr(layer, "starts")
|
|
ends = get_attr(layer, "ends")
|
|
slice_strides = get_attr(layer, "slice_strides")
|
|
rank = get_attr(layer, "rank") or get_layer_rank(layer)
|
|
data_frmt = get_attr(
|
|
layer, "data_format"
|
|
) # layer.attr['data_format'].s.decode("utf-8")
|
|
axis = get_attr(layer, "axis")
|
|
alpha = get_attr(layer, "alpha", default=1)
|
|
beta = get_attr(layer, "beta")
|
|
|
|
if activation and activation not in known_activations:
|
|
print("IGNORED: unknown activation", activation)
|
|
if auto_pad and auto_pad not in known_paddings:
|
|
print("IGNORED: unknown padding", auto_pad)
|
|
if data_frmt and data_frmt not in supported_data_formats:
|
|
print("UNSUPPORTED: data format", data_frmt)
|
|
|
|
o_l.activation = known_activations.get(activation) or 0
|
|
o_l.pads = (
|
|
known_paddings.get(auto_pad) if auto_pad else pads or starts or [0, 0, 0, 0]
|
|
)
|
|
o_l.strides = strides_to_HW(strides, data_frmt) if strides else slice_strides or []
|
|
o_l.pool_size = (
|
|
pool_to_HW(pool_size, data_frmt) if pool_size else ends or shape or []
|
|
)
|
|
o_l.axis = embody(axis, default=-1)
|
|
o_l.alpha = embody(alpha, default=1)
|
|
o_l.beta = beta or 0
|
|
o_l.rank = (
|
|
-1
|
|
) # default initialization, actual value will be set later on in this function
|
|
|
|
tensor_names = [i for i in inputs if i in model_tensors]
|
|
o_l.tensors = [
|
|
Struct(
|
|
name=x,
|
|
shape=get_tensor_dims(model_tensors[x]),
|
|
data=get_tensor_data(model_tensors[x]),
|
|
)
|
|
for x in tensor_names
|
|
]
|
|
# Patch shapes & data
|
|
layer_has_model_tensors = len(o_l.tensors) > 0
|
|
if hasattr(klass, "out_shapes") and layer_has_model_tensors:
|
|
shapes = klass.out_shapes([x.shape for x in o_l.tensors])
|
|
|
|
# if we have more shapes than actual tensors,
|
|
# then create & fill missing tensors with zeros
|
|
in_tensor_num = len(o_l.tensors)
|
|
for index, new_shape in enumerate(shapes):
|
|
if index >= in_tensor_num:
|
|
new_tensor = Struct(
|
|
name=("%s/patch:%i") % (name, index - in_tensor_num),
|
|
shape=new_shape,
|
|
data=np.zeros(new_shape),
|
|
)
|
|
o_l.tensors.append(new_tensor)
|
|
assert len(shapes) <= len(o_l.tensors)
|
|
|
|
if hasattr(klass, "patch_data"):
|
|
data = [x.data for x in o_l.tensors]
|
|
|
|
patch_data_fn = klass.patch_data
|
|
patch_data_expected_arg_count = patch_data_fn.__code__.co_argcount
|
|
patch_data_args = (
|
|
(data, layer) if patch_data_expected_arg_count > 1 else (data,)
|
|
)
|
|
tensor_data = patch_data_fn(*patch_data_args)
|
|
o_l.tensors = o_l.tensors[
|
|
: len(tensor_data)
|
|
] # resize tensor array to match patched data - patching might reduce number of tensors
|
|
for x, data in zip(o_l.tensors, tensor_data):
|
|
x.data = data
|
|
|
|
# after this point we should have equal amount of shapes and tensors
|
|
assert len(o_l.tensors) == len(shapes)
|
|
|
|
for x, shape in zip(o_l.tensors, shapes):
|
|
assert x.data.size == np.prod(shape)
|
|
x.shape = shape
|
|
|
|
o_l.inputs = [i for i in inputs if i not in model_tensors]
|
|
|
|
else:
|
|
# no 'patch_data' lambda was specifiowned, op does not require tensor args
|
|
o_l.tensors = []
|
|
o_l.inputs = inputs
|
|
|
|
# Force all tensors to float32
|
|
for x in o_l.tensors:
|
|
x.data = x.data.astype(np.float32)
|
|
|
|
input_ranks = [layer_ranks.get(i, -1) for i in o_l.inputs]
|
|
for i in o_l.inputs:
|
|
if i not in layer_ranks and "lstm" not in i:
|
|
print("WARNING: rank unknown for tensor", i, "while processing node", name)
|
|
if hasattr(klass, "rank"):
|
|
rank = klass.rank
|
|
if hasattr(rank, "__call__"):
|
|
assert (
|
|
-1 not in input_ranks
|
|
) # for rank() lambda all input ranks have to be known (not -1)
|
|
rank = rank(input_ranks)
|
|
if rank is None:
|
|
|
|
def all_elements_equal(arr): # http://stackoverflow.com/q/3844948/
|
|
return arr.count(arr[0]) == len(arr)
|
|
|
|
assert len(input_ranks) > 0
|
|
assert all_elements_equal(input_ranks)
|
|
rank = input_ranks[0]
|
|
layer_ranks[name] = rank
|
|
o_l.rank = rank
|
|
|
|
# Layer is ready
|
|
context.layers.append(o_l)
|
|
|
|
|
|
class ModelBuilderContext:
|
|
def __init__(self):
|
|
self.layers = []
|
|
self.input_shapes = {}
|
|
self.model_tensors = {}
|
|
self.model_memories = []
|
|
self.layer_ranks = {}
|
|
self.map_ignored_layer_to_its_input = {}
|
|
|
|
|
|
def process_model(model, args):
|
|
o_context = ModelBuilderContext()
|
|
|
|
# Find node patterns
|
|
nodes_as_array = [node for node in model.node]
|
|
nodes_as_array = slow_but_stable_topological_sort(nodes_as_array, verbose=True)
|
|
|
|
node_index = 0
|
|
while node_index < len(nodes_as_array):
|
|
node = nodes_as_array[node_index]
|
|
match = False
|
|
for pattern_repr, pattern_name in known_patterns.items():
|
|
pattern = eval(pattern_repr)
|
|
if node_index + len(pattern) > len(nodes_as_array):
|
|
continue # pattern too long, skip
|
|
|
|
require_exact_match = pattern[0] == "Const" or pattern[0] == "Identity"
|
|
pattern_end = node_index
|
|
|
|
def match_node(node, pattern):
|
|
return node.op == pattern or (
|
|
hasattr(pattern, "match") and pattern.match(node.name)
|
|
)
|
|
|
|
for p in pattern:
|
|
if not require_exact_match:
|
|
while (
|
|
pattern_end < len(nodes_as_array)
|
|
and nodes_as_array[pattern_end].op != p
|
|
and (
|
|
nodes_as_array[pattern_end].op == "Const"
|
|
or nodes_as_array[pattern_end].op == "Identity"
|
|
)
|
|
):
|
|
pattern_end += 1
|
|
if pattern_end >= len(nodes_as_array):
|
|
break
|
|
|
|
match = False
|
|
if hasattr(p, "match"): # regexp
|
|
while pattern_end < len(nodes_as_array) and p.match(
|
|
nodes_as_array[pattern_end].name
|
|
):
|
|
match = True
|
|
pattern_end += 1
|
|
else: # exact string
|
|
match = nodes_as_array[pattern_end].op == p
|
|
pattern_end += 1
|
|
|
|
if not match:
|
|
break
|
|
|
|
def get_tensors(pattern_nodes):
|
|
nonlocal o_context
|
|
map_ignored_layer_to_its_input = (
|
|
o_context.map_ignored_layer_to_its_input
|
|
)
|
|
model_tensors = o_context.model_tensors
|
|
|
|
# tensors <= all Const nodes within this pattern
|
|
const_nodes = [n for n in pattern_nodes if n.op == "Const"]
|
|
|
|
# TODO: unify / reuse code from process_layer
|
|
identity_nodes = [n for n in pattern_nodes if n.op == "Identity"]
|
|
for i in identity_nodes:
|
|
inputs = replace_strings_in_list(
|
|
i.input, map_ignored_layer_to_its_input
|
|
)
|
|
map_ignored_layer_to_its_input[i.name] = inputs
|
|
|
|
# gather inputs from Op nodes (not Const, not Identity)
|
|
op_nodes = [
|
|
n
|
|
for n in pattern_nodes
|
|
if n not in const_nodes and n not in identity_nodes
|
|
]
|
|
inputs_to_op_nodes = list(
|
|
flatten([list(flatten(n.input)) for n in op_nodes])
|
|
)
|
|
inputs_to_op_nodes = replace_strings_in_list(
|
|
inputs_to_op_nodes, map_ignored_layer_to_its_input
|
|
)
|
|
inputs_to_op_nodes = [i.split(":")[0] for i in inputs_to_op_nodes]
|
|
|
|
const_nodes_by_name = {n.name: n for n in const_nodes}
|
|
tensors = []
|
|
for i in inputs_to_op_nodes:
|
|
if i in model_tensors:
|
|
src = model_tensors[i]
|
|
tensors += [
|
|
Struct(
|
|
name=i,
|
|
obj=src,
|
|
shape=get_tensor_dims(src),
|
|
data=get_tensor_data(src),
|
|
)
|
|
]
|
|
elif i in const_nodes_by_name:
|
|
src = const_nodes_by_name[i].attr["value"].tensor
|
|
tensors += [
|
|
Struct(
|
|
name=i,
|
|
obj=src,
|
|
shape=get_tensor_dims(src),
|
|
data=get_tensor_data(src),
|
|
)
|
|
]
|
|
tensor_names = [n.name for n in tensors]
|
|
|
|
# filter only inputs that are coming from nodes that are outside this pattern
|
|
# preserve the order
|
|
pattern_nodes = [n.name for n in pattern_nodes] + tensor_names
|
|
# inputs_from_outside_pattern = remove_duplicates_from_list([i for i in inputs_to_op_nodes if
|
|
# nodes_by_name[i] not in pattern_nodes])
|
|
inputs_from_outside_pattern = remove_duplicates_from_list(
|
|
[i for i in inputs_to_op_nodes if i not in pattern_nodes]
|
|
)
|
|
|
|
return inputs_from_outside_pattern, tensors
|
|
|
|
if match:
|
|
nodes = nodes_as_array[node_index:pattern_end]
|
|
name = nodes[-1].name
|
|
var_tensors, const_tensors = get_tensors(nodes)
|
|
if args.print_patterns or args.verbose:
|
|
print(
|
|
"PATTERN:",
|
|
name,
|
|
"~~",
|
|
pattern_name,
|
|
"<-",
|
|
var_tensors,
|
|
"+",
|
|
[t.name for t in const_tensors],
|
|
)
|
|
print(" ", pattern)
|
|
for n in nodes:
|
|
if n.op == "Const" or n.op == "Identity":
|
|
process_layer(n, o_context, args)
|
|
|
|
new_layers = transform_patterns[pattern_name](
|
|
nodes, var_tensors, const_tensors, o_context
|
|
)
|
|
if not isinstance(new_layers, list):
|
|
if not hasattr(new_layers, name):
|
|
new_layers.name = name
|
|
new_layers = [new_layers]
|
|
|
|
for l in new_layers:
|
|
# TODO: prefix new layer names with scope, patch inputs
|
|
# l.name = name + '/' + l.name
|
|
process_layer(l, o_context, args)
|
|
|
|
node_index = pattern_end
|
|
break # pattern found & processed
|
|
|
|
if not match:
|
|
# TODO: gather tensors in the same way as patterns do
|
|
process_layer(node, o_context, args)
|
|
node_index += 1
|
|
|
|
def find_unconnected_const_nodes(nodes):
|
|
nodes_with_consts = {node.name: node for node in nodes if node.op == "Const"}
|
|
for node in nodes:
|
|
for i in node.input:
|
|
nodes_with_consts.pop(i, None)
|
|
return list(nodes_with_consts.keys())
|
|
|
|
return (
|
|
o_context.layers,
|
|
o_context.input_shapes,
|
|
o_context.model_tensors,
|
|
o_context.model_memories,
|
|
find_unconnected_const_nodes(nodes_as_array),
|
|
)
|
|
|
|
|
|
# Sort nodes so that all input dependencies are satisfied beforehand
|
|
# while preserving original order of the nodes in the model whenever possible.
|
|
# NOITE: preservation of original order is important for pattern matching
|
|
def slow_but_stable_topological_sort(nodes, verbose):
|
|
|
|
nodes_with_consts = [node for node in nodes if node.op == "Const"]
|
|
nodes_for_sorting = [node for node in nodes if node.op != "Const"]
|
|
|
|
# TODO: optimize for performance
|
|
# based on http://blog.gapotchenko.com/stable-topological-sort
|
|
|
|
def assign_ids(nodes):
|
|
ids = []
|
|
id_by_name = {}
|
|
id = 0
|
|
for node in nodes:
|
|
id_by_name[node.name] = id
|
|
ids.append(id)
|
|
id += 1
|
|
|
|
inputs_by_id = [None] * len(nodes)
|
|
for node in nodes:
|
|
id = id_by_name[node.name]
|
|
inputs_by_id[id] = {id_by_name.get(i, -1) for i in node.input}
|
|
|
|
return ids, inputs_by_id
|
|
|
|
def sort(ids, inputs_by_id, verbose_lambda):
|
|
sorted = False
|
|
n = len(ids)
|
|
while not sorted:
|
|
sorted = True
|
|
for i in range(n):
|
|
for j in range(i):
|
|
if ids[i] in inputs_by_id[ids[j]]:
|
|
tmp = ids.pop(i)
|
|
ids.insert(j, tmp)
|
|
sorted = False
|
|
verbose_lambda(sorted)
|
|
return ids
|
|
|
|
prefix_printed = False
|
|
|
|
def print_status(sorted):
|
|
nonlocal prefix_printed
|
|
if not sorted:
|
|
if not prefix_printed:
|
|
print("Sorting model, may take a while...", end="", flush=True)
|
|
prefix_printed = True
|
|
else:
|
|
print(".", end="", flush=True)
|
|
else:
|
|
if prefix_printed:
|
|
print(" Done!")
|
|
|
|
ids, inputs_by_id = assign_ids(nodes_for_sorting)
|
|
ids = sort(
|
|
ids, inputs_by_id, lambda sorted: print_status(sorted) if verbose else None
|
|
)
|
|
|
|
assert len(ids) == len(nodes_for_sorting)
|
|
assert len(ids) + len(nodes_with_consts) == len(nodes)
|
|
return nodes_with_consts + [nodes_for_sorting[id] for id in ids]
|
|
|
|
|
|
def very_slow_but_stable_topological_sort(nodes, verbose):
|
|
# TODO: optimize for performance
|
|
# based on http://blog.gapotchenko.com/stable-topological-sort
|
|
n = len(nodes)
|
|
sorted = False
|
|
|
|
while not sorted:
|
|
sorted = True
|
|
for i in range(n):
|
|
for j in range(i):
|
|
if nodes[i].name in nodes[j].input:
|
|
tmp = nodes.pop(i)
|
|
nodes.insert(j, tmp)
|
|
sorted = False
|
|
assert len(nodes) == n
|
|
return nodes
|
|
|
|
|
|
#########################################################
|
|
|
|
|
|
def convert(
|
|
source_file,
|
|
target_file,
|
|
trim_unused_by_output="",
|
|
verbose=False,
|
|
compress_f16=False,
|
|
):
|
|
"""
|
|
Converts a TensorFlow model into a Barracuda model.
|
|
:param source_file: The TensorFlow Model
|
|
:param target_file: The name of the file the converted model will be saved to
|
|
:param trim_unused_by_output: The regexp to match output nodes to remain in the model.
|
|
All other unconnected nodes will be removed.
|
|
:param verbose: If True, will display debug messages
|
|
:param compress_f16: If true, the float values will be converted to f16
|
|
:return:
|
|
"""
|
|
if type(verbose) == bool:
|
|
args = Struct()
|
|
args.verbose = verbose
|
|
args.print_layers = verbose
|
|
args.print_source_json = verbose
|
|
args.print_barracuda_json = verbose
|
|
args.print_layer_links = verbose
|
|
args.print_patterns = verbose
|
|
args.print_tensors = verbose
|
|
args.print_supported_ops = verbose
|
|
else:
|
|
args = verbose
|
|
|
|
if args.print_supported_ops:
|
|
barracuda.print_known_operations(known_classes, known_activations)
|
|
|
|
# Load Tensorflow model
|
|
print("Converting %s to %s" % (source_file, target_file))
|
|
f = open(source_file, "rb")
|
|
i_model = tf.GraphDef()
|
|
i_model.ParseFromString(f.read())
|
|
|
|
if args.verbose:
|
|
print("OP_TYPES:", {layer.op for layer in i_model.node})
|
|
|
|
if args.print_source_json or args.verbose:
|
|
for layer in i_model.node:
|
|
if not layer.op == "Const":
|
|
print("MODEL:", MessageToJson(layer) + ",")
|
|
|
|
# Convert
|
|
o_model = barracuda.Model()
|
|
o_model.layers, o_input_shapes, o_model.tensors, o_model.memories, o_model.globals = process_model(
|
|
i_model, args
|
|
)
|
|
|
|
# Cleanup unconnected Identities (they might linger after processing complex node patterns like LSTM)
|
|
def cleanup_layers(layers):
|
|
all_layers = {l.name for l in layers}
|
|
all_inputs = {i for l in layers for i in l.inputs}
|
|
|
|
def is_unconnected_identity(layer):
|
|
if layer.class_name == "Activation" and layer.activation == 0: # Identity
|
|
assert len(layer.inputs) == 1
|
|
if layer.inputs[0] not in all_layers and layer.name not in all_inputs:
|
|
return True
|
|
return False
|
|
|
|
return [l for l in layers if not is_unconnected_identity(l)]
|
|
|
|
o_model.layers = cleanup_layers(o_model.layers)
|
|
|
|
all_inputs = {i for l in o_model.layers for i in l.inputs}
|
|
|
|
# Trim
|
|
if trim_unused_by_output:
|
|
o_model.layers = barracuda.trim(
|
|
o_model.layers, trim_unused_by_output, args.verbose
|
|
)
|
|
|
|
# Create load layer for constants
|
|
def dims_to_barracuda_shape(dims):
|
|
shape = list(dims)
|
|
while len(shape) < 4:
|
|
shape = [1] + shape
|
|
return shape
|
|
|
|
const_tensors = [i for i in all_inputs if i in o_model.tensors]
|
|
const_tensors += o_model.globals
|
|
for x in const_tensors:
|
|
shape = dims_to_barracuda_shape(get_tensor_dims(o_model.tensors[x]))
|
|
o_l = Struct(
|
|
type=255, # Load
|
|
class_name="Const",
|
|
name=x,
|
|
pads=[0, 0, 0, 0],
|
|
strides=[],
|
|
pool_size=[],
|
|
axis=-1,
|
|
alpha=1,
|
|
beta=0,
|
|
activation=0,
|
|
inputs=[],
|
|
tensors=[
|
|
Struct(
|
|
name=x,
|
|
shape=shape,
|
|
data=np.reshape(get_tensor_data(o_model.tensors[x]), shape).astype(
|
|
np.float32
|
|
),
|
|
)
|
|
],
|
|
)
|
|
o_model.layers.insert(0, o_l)
|
|
|
|
# Find model inputs & outputs
|
|
all_layers = {l.name for l in o_model.layers}
|
|
# global inputs => are inputs that are NOT connected to any layer in the network
|
|
# global outputs => are outputs that are NOT feeding any layer in the network OR are coming from Identity layers
|
|
o_model.inputs = {
|
|
i: o_input_shapes[i]
|
|
for l in o_model.layers
|
|
for i in l.inputs
|
|
if i not in all_layers and i not in o_model.memories
|
|
}
|
|
|
|
def is_output_layer(layer):
|
|
if (
|
|
layer.class_name == "Const"
|
|
): # Constants never count as global output even when unconnected
|
|
return False
|
|
if (
|
|
layer.name not in all_inputs
|
|
): # this layer is not inputing to any other layer
|
|
return True
|
|
if (
|
|
layer.class_name == "Activation" and layer.activation == 0
|
|
): # Identity marks global output
|
|
return True
|
|
return False
|
|
|
|
o_model.outputs = [l.name for l in o_model.layers if is_output_layer(l)]
|
|
|
|
# Compress
|
|
if compress_f16:
|
|
o_model = barracuda.compress(o_model)
|
|
|
|
# Sort model so that layer inputs are always ready upfront
|
|
o_model.layers = barracuda.sort(
|
|
o_model.layers, o_model.inputs, o_model.memories, args.verbose
|
|
)
|
|
o_model.layers = barracuda.fuse(o_model.layers, args.verbose)
|
|
|
|
# Summary
|
|
barracuda.summary(
|
|
o_model,
|
|
print_layer_links=args.print_layer_links or args.verbose,
|
|
print_barracuda_json=args.print_barracuda_json or args.verbose,
|
|
print_tensors=args.print_tensors or args.verbose,
|
|
)
|
|
|
|
# Write to file
|
|
barracuda.write(o_model, target_file)
|
|
print("DONE: wrote", target_file, "file.")
|