ml-agents/ml-agents/mlagents/trainers/barracuda.py


								from __future__ import print_function

								from collections import defaultdict

								import numpy as np

								import json

								import struct  # convert from Python values and C structs

								import re

								import argparse

								import os.path


								BARRACUDA_VERSION = 16


								# Definition of Barracuda model

								class Model:

								    def __init__(self):

								        self.layers = []

								        self.tensors = {}

								        self.inputs = {}

								        self.outputs = []

								        self.globals = []

								        self.memories = []


								class Struct:

								    "A structure that can have any fields defined."


								    def __init__(self, **entries):

								        self.__dict__.update(entries)


								# Parse command line argumengts

								def parse_args(description, source_extension, help):

								    parser = argparse.ArgumentParser(description=description)

								    parser.add_argument("source_file", help=help)

								    parser.add_argument("target_file", help="output Barracuda binary file")

								    parser.add_argument("-trim", "--trim-unused-by-output")

								    parser.add_argument("--print-layers", action="store_true")

								    parser.add_argument("--print-source-json", action="store_true")

								    parser.add_argument("-json", "--print-barracuda-json", action="store_true")

								    parser.add_argument("--print-layer-links", action="store_true")

								    parser.add_argument("--print-patterns", action="store_true")

								    parser.add_argument("--print-tensors", action="store_true")

								    parser.add_argument("--print-supported-ops", action="store_true")

								    parser.add_argument("--verbose", action="store_true")

								    args = parser.parse_args()

								    args.compress_f16 = (

								        False

								    )  # TEMP: disabled, until properly implemented parser.add_argument('-f16', '--compress-f16', action='store_true')


								    output_extension = ".bc" if not args.compress_f16 else ".f16.bc"


								    if not os.path.exists(args.source_file):

								        args.source_file = args.source_file + source_extension


								    if not os.path.exists(args.source_file):

								        print("File", args.source_file, "does not exist.")

								        exit(-1)


								    def replaceFilenameExtension(filename, newExtenstion):

								        return os.path.splitext(os.path.basename(filename))[0] + newExtenstion


								    if os.path.isdir(args.target_file):

								        args.target_file = os.path.join(

								            args.target_file,

								            replaceFilenameExtension(args.source_file, output_extension),

								        )


								    if args.verbose:

								        print(args)


								    return args


								# Fuse training time BatchNorm tensors into Scale & Bias

								def fuse_batchnorm_weights(gamma, beta, mean, var, epsilon):

								    # https://github.com/Tencent/ncnn/blob/master/src/layer/batchnorm.cpp

								    """ float sqrt_var = sqrt(var_data[i]);

								        a_data[i] = bias_data[i] - slope_data[i] * mean_data[i] / sqrt_var;

								        b_data[i] = slope_data[i] / sqrt_var;

								        ...

								        ptr[i] = b * ptr[i] + a;

								    """

								    scale = gamma / np.sqrt(var + epsilon)

								    bias = beta - gamma * mean / np.sqrt(var + epsilon)

								    return [scale, bias]


								# Resort layers so that all inputs are satisfied for every layer beforehand

								def sort(model, inputs, memories, verbose):

								    if hasattr(model, "layers"):

								        model = model.layers

								    inputs_and_memories = set(list(inputs) + list(memories[1::3]))


								    def find_missing_inputs(model, inputs):

								        missing = set()

								        ready = set(inputs)

								        for l in model:

								            for i in l.inputs:

								                if i not in ready:

								                    missing.add(i)

								            ready.add(l.name)

								        return missing


								    # Class to represent a graph

								    # Taken from: https://www.geeksforgeeks.org/python-program-for-topological-sorting/

								    class Graph:

								        def __init__(self, vertices):

								            self.graph = defaultdict(list)  # dictionary containing adjacency List

								            self.V = vertices  # No. of vertices


								        # function to add an edge to graph

								        def addEdge(self, u, v):

								            self.graph[u].append(v)


								        # A recursive function used by topologicalSort

								        def topologicalSortUtil(self, v, visited, stack):


								            # Mark the current node as visited.

								            visited[v] = True


								            # Recur for all the vertices adjacent to this vertex

								            for i in self.graph[v]:

								                if not visited[i]:

								                    self.topologicalSortUtil(i, visited, stack)


								            # Push current vertex to stack which stores result

								            stack.insert(0, v)


								        # The function to do Topological Sort. It uses recursive

								        # topologicalSortUtil()

								        def topologicalSort(self):

								            # Mark all the vertices as not visited

								            visited = [False] * self.V

								            stack = []


								            # Call the recursive helper function to store Topological

								            # Sort starting from all vertices one by one

								            for i in range(self.V):

								                if not visited[i]:

								                    self.topologicalSortUtil(i, visited, stack)


								            # print(stack)

								            return stack


								    if len(find_missing_inputs(model, inputs_and_memories)) == 0:

								        return model


								    g = Graph(len(model))


								    layers = {}

								    id = 0

								    for l in model:

								        layers[l.name] = id

								        id += 1


								    for layer in model:

								        for i in layer.inputs:

								            if i not in inputs_and_memories:

								                g.addEdge(layers[i], layers[layer.name])


								    sorted_layer_indices = g.topologicalSort()

								    print("SORTED:", sorted_layer_indices)

								    new_model = [model[idx] for idx in sorted_layer_indices]


								    assert len(find_missing_inputs(new_model, inputs_and_memories)) == 0

								    return new_model


								# Trim

								def trim(model, criteria_regexp_string, verbose):

								    if hasattr(model, "layers"):

								        model = model.layers


								    def flatten(items, enter=lambda x: isinstance(x, list)):

								        # http://stackoverflow.com/a/40857703

								        # https://github.com/ctmakro/canton/blob/master/canton/misc.py

								        """Yield items from any nested iterable; see REF."""

								        for x in items:

								            if enter(x):

								                yield from flatten(x)

								            else:

								                yield x


								    def trim_model(model, outputs):

								        layers = {l.name: l for l in model}

								        connected = {o for o in outputs}

								        while len(outputs) > 0:

								            outputs = set(flatten([layers[o].inputs for o in outputs if o in layers]))

								            if verbose and len(outputs) > 0:

								                print(outputs)

								            for o in outputs:

								                connected.add(o)


								        trimmed = [l.name for l in model if l.name not in connected]


								        def array_without_brackets(arr):

								            return str(arr)[1:-1]  # array to string without brackets


								        print("TRIMMED:", array_without_brackets(trimmed))


								        return [l for l in model if l.name in connected]


								    layer_names = {l.name for l in model}

								    criteria = re.compile(criteria_regexp_string)

								    preserve_outputs = list(filter(criteria.match, layer_names))

								    if preserve_outputs:

								        print("Trimming model given outputs to preserve:", preserve_outputs)

								        model = trim_model(model, preserve_outputs)

								    else:

								        print(

								            "WARNING: Trim couldn't find any layers to match:", criteria_regexp_string

								        )

								    return model


								# Fuse

								def fuse(model, verbose):

								    i = 0

								    while i < len(model) - 1:

								        if model[i].type == model[i + 1].type and model[i].type == 255:  # Load

								            model[i].tensors += model[i + 1].tensors

								            del model[i + 1]

								        else:

								            i += 1

								    return model


								def compress(model):

								    compress_classes = {"Dense"}

								    for l in model.layers:

								        if l.class_name in compress_classes:

								            print(

								                "Compressing %s layer '%s' weights to float16" % (l.class_name, l.name)

								            )

								            for x in l.tensors:

								                x.data = np.float16(x.data)

								    return model


								# Verbose

								def to_json(model):

								    class StructEncoder(json.JSONEncoder):

								        def default(self, o):

								            if isinstance(o, np.ndarray):  # skip binary data packed inside ndarray

								                return ""

								            if getattr(o, "__dict__", None):

								                return o.__dict__

								            return str(o)


								    s = json.dumps(model.layers, cls=StructEncoder, separators=(", ", ":"))

								    # custom formatting

								    s = s.replace("]}, {", "]},\n{")

								    s = s.replace(":[{", ":[\n\t{")

								    s = s.replace("}, {", "},\n\t{")

								    s = s.replace('"', "'")

								    return s


								def summary(model, print_layer_links, print_barracuda_json, print_tensors):

								    def array_without_brackets(arr):

								        return str(arr)[1:-1]  # array to string without brackets


								    if print_layer_links:

								        for l in model.layers:

								            print(l.name, " <= ", l.inputs)


								    if print_barracuda_json:

								        print(to_json(model))


								    if model.globals:

								        if isinstance(model.globals, dict):

								            model.globals = {x.name: x.shape for x in model.globals}

								        print("GLOBALS:", array_without_brackets(model.globals))


								    for l in model.layers:

								        if isinstance(model.inputs, dict):

								            ins = {i: model.inputs[i] for i in l.inputs if i in model.inputs}

								        else:

								            ins = [i for i in l.inputs if i in model.inputs]

								        if ins:

								            print("IN: %s => '%s'" % (array_without_brackets(ins), l.name))

								    for mem_in, mem_out in zip(model.memories[1::3], model.memories[2::3]):

								        print("MEM: '%s' => '%s'" % (mem_in, mem_out))

								    print("OUT:", array_without_brackets(model.outputs))


								    if print_tensors:

								        for l in model.layers:

								            for x in l.tensors:

								                print(x.name, x.shape, x.data.dtype, x.data)


								class Build:

								    def __init__(self, scope=""):

								        self.scope = scope

								        self.layers = []

								        self.names_taken = set()


								    def __getattr__(self, attr):

								        if attr == "_":

								            return self.layers[-1].name if len(self.layer) > 0 else self.scope

								        raise AttributeError(attr)


								    def _patch_last_layer_name_and_return(self):

								        if self.layers[-1].name:

								            return self.layers[-1].name


								        # generate unique name based on op and increasing id

								        name = self.layers[-1].op


								        i = 1

								        while name in self.names_taken:

								            name = self.layers[-1].op + "_" + str(i)

								            i += 1

								        self.names_taken.add(name)


								        self.layers[-1].name = self.scope + ("/" if self.scope else "") + name

								        return self.layers[-1].name


								    def concat(self, a, b, axis=-1, out=""):

								        self.layers += [Struct(name=out, op="Concat", axis=axis, input=[a, b])]

								        return self._patch_last_layer_name_and_return()


								    def mad(self, x, kernel, bias, out=""):

								        self.layers += [Struct(name=out, op="Dense", input=[x, kernel, bias])]

								        return self._patch_last_layer_name_and_return()


								    def mul(self, a, b, out=""):

								        self.layers += [Struct(name=out, op="Mul", input=[a, b])]

								        return self._patch_last_layer_name_and_return()


								    def add(self, a, b, out=""):

								        self.layers += [Struct(name=out, op="Add", input=[a, b])]

								        return self._patch_last_layer_name_and_return()


								    def sub(self, a, b, out=""):

								        self.layers += [Struct(name=out, op="Sub", input=[a, b])]

								        return self._patch_last_layer_name_and_return()


								    def sigmoid(self, x, out=""):

								        self.layers += [Struct(name=out, op="Sigmoid", input=[x])]

								        return self._patch_last_layer_name_and_return()


								    def tanh(self, x, out=""):

								        self.layers += [Struct(name=out, op="Tanh", input=[x])]

								        return self._patch_last_layer_name_and_return()


								    def reduce(self, op, x, axis=-1, out=""):

								        self.layers += [Struct(name=out, op="Reduce" + op, axis=axis, input=[x])]

								        return self._patch_last_layer_name_and_return()


								    def pool(self, op, x, out=""):

								        self.layers += [Struct(name=out, op=op + "Pool", input=[x])]

								        return self._patch_last_layer_name_and_return()


								    def strided_slice(self, x, begin, end, strides, rank, out=""):

								        self.layers += [

								            Struct(

								                name=out,

								                op="StridedSlice",

								                rank=rank,

								                starts=begin,

								                ends=end,

								                slice_strides=strides,

								                input=[x],

								            )

								        ]

								        return self._patch_last_layer_name_and_return()


								def mean(name, input, axis=-1):

								    """ combines mean operation out of several simpler ops

								    """

								    nn = Build(name)

								    if np.array_equal(axis, [1, 2]):

								        nn.pool("GlobalAvg", input, out=name)

								    elif np.array_equal(axis, [1, 2, 3]):

								        nn.reduce(

								            "Mean",  # over channels

								            nn.pool("GlobalAvg", input),  # over height & width

								            out=name,

								        )

								    elif (

								        np.array_equal(axis, [3])

								        or np.array_equal(axis, [-1])

								        or np.array_equal(axis, 3)

								        or np.array_equal(axis, -1)

								    ):

								        nn.reduce("Mean", input, out=name)

								    return nn.layers


								def rnn(name, input, state, kernel, bias, new_state, number_of_gates=2):

								    """ - Ht = f(Xt*Wi + Ht_1*Ri + Wbi + Rbi)

								    """


								    nn = Build(name)

								    nn.tanh(nn.mad(kernel=kernel, bias=bias, x=nn.concat(input, state)), out=new_state)

								    return nn.layers


								def gru(

								    name,

								    input,

								    state,

								    kernel_r,

								    kernel_u,

								    kernel_c,

								    bias_r,

								    bias_u,

								    bias_c,

								    new_state,

								    number_of_gates=2,

								):

								    """ - zt = f(Xt*Wz + Ht_1*Rz        + Wbz + Rbz)

								        - rt = f(Xt*Wr + Ht_1*Rr        + Wbr + Rbr)

								        - ht = g(Xt*Wh + (rt . Ht_1)*Rh + Rbh + Wbh)

								        - Ht = (1-zt).ht + zt.Ht_1

								    """

								    nn = Build(name)

								    inputs = nn.concat(input, state)


								    u = nn.sigmoid(nn.mad(inputs, kernel_u, bias_u))

								    r = nn.sigmoid(nn.mad(inputs, kernel_r, bias_r))

								    r_state = nn.mul(r, state)


								    c = nn.tanh(nn.mad(kernel=kernel_c, bias=bias_c, x=nn.concat(input, r_state)))


								    # new_h = u' * state + (1 - u') * c'

								    #       = u' * state + c' - u' * c'


								    # u' * state + c'

								    nn.add(nn.mul(u, state), c)

								    # - u' * c'

								    nn.sub(nn._, nn.mul(u, c), out=new_state)


								    return nn.layers


								def lstm(

								    name,

								    input,

								    state_c,

								    state_h,

								    kernel_i,

								    kernel_j,

								    kernel_f,

								    kernel_o,

								    bias_i,

								    bias_j,

								    bias_f,

								    bias_o,

								    new_state_c,

								    new_state_h,

								):

								    """ Full:

								    - it = f(Xt*Wi + Ht_1*Ri + Pi . Ct_1 + Wbi + Rbi)

								    - ft = f(Xt*Wf + Ht_1*Rf + Pf . Ct_1 + Wbf + Rbf)

								    - ct = g(Xt*Wc + Ht_1*Rc + Wbc + Rbc)

								    - Ct =  ft . Ct_1  + it . ct

								    - ot = f(Xt*Wo + Ht_1*Ro + Po . Ct + Wbo + Rbo)

								    - Ht =  ot . h(Ct)

								    """


								    """ No peephole:

								    - it = f(Xt*Wi + Ht_1*Ri + Wbi + Rbi)

								    - ft = f(Xt*Wf + Ht_1*Rf + Wbf + Rbf)

								    - ct = g(Xt*Wc + Ht_1*Rc + Wbc + Rbc)

								    - Ct =   ft . Ct_  + it . ct

								    - ot = f(Xt*Wo + Ht_1*Ro + Wbo + Rbo)

								    - Ht =   ot . h(Ct)

								    """


								    nn = Build(name)

								    inputs = nn.concat(input, state_h)


								    i = nn.sigmoid(nn.mad(x=inputs, kernel=kernel_i, bias=bias_i))

								    j = nn.tanh(nn.mad(inputs, kernel_j, bias_j))

								    f = nn.sigmoid(nn.mad(inputs, kernel_f, bias_f))

								    o = nn.sigmoid(nn.mad(inputs, kernel_o, bias_o))


								    # new_c = state_c * f' + i' * j'

								    nn.add(nn.mul(state_c, f), nn.mul(i, j), out=new_state_c)


								    # new_h =

								    nn.mul(o, nn.tanh(new_state_c), out=new_state_h)


								    return nn.layers


								# Serialize

								class BarracudaWriter:

								    f = None


								    def __init__(self, filename):

								        self.f = open(filename, "wb+")


								    def __enter__(self):

								        return self


								    def __exit__(self, type, value, tb):

								        self.f.close()


								    def write_array(self, arr):

								        arr.tofile(self.f)


								    def write_str_array(self, array_of_strigs):

								        self.write_int32(len(array_of_strigs))

								        for s in array_of_strigs:

								            self.write_str(s)


								    def write_str(self, s):

								        self.write_int32(len(s))

								        self.f.write(s.encode("ascii"))


								    def write_float(self, d):

								        self.f.write(struct.pack("<f", d))


								    def write_int32(self, d):

								        self.f.write(struct.pack("<i", d))


								    def write_int64(self, d):

								        self.f.write(struct.pack("<q", d))


								    def write_shape(self, s):

								        self.write_int32(len(s))

								        for el in s:

								            self.write_int32(el if el is not None else -1)


								    def close(self):

								        self.f.close()


								def write(model, filename):


								    with BarracudaWriter(filename) as w:


								        # VERSION = 0xBA22AC0DA000 + BARRACUDA_VERSION

								        w.write_int64(BARRACUDA_VERSION)


								        # inputs

								        w.write_int32(len(model.inputs))

								        for name, shape in model.inputs.items():

								            w.write_str(name)

								            w.write_shape(shape)

								        # outputs

								        w.write_str_array(model.outputs)


								        # memories

								        w.write_int32(len(model.memories) // 3)

								        for mem_shape, mem_in, mem_out in zip(

								            model.memories[0::3], model.memories[1::3], model.memories[2::3]

								        ):

								            w.write_shape(mem_shape)

								            w.write_str(mem_in)

								            w.write_str(mem_out)


								        # layers

								        offset = 0

								        all_tensors = []


								        w.write_int32(len(model.layers))

								        for l in model.layers:


								            assert l.name not in l.inputs


								            w.write_str(l.name)

								            w.write_int32(l.type)

								            w.write_int32(l.activation)

								            w.write_int32(0)  # dummy

								            w.write_int32(0)  # dummy

								            w.write_shape(l.pads)

								            w.write_shape(l.strides)

								            w.write_shape(l.pool_size)

								            w.write_int32(l.axis)

								            w.write_float(l.alpha)

								            w.write_float(l.beta)

								            w.write_int32(0)  # dummy

								            w.write_str_array(l.inputs)


								            w.write_int32(len(l.tensors))

								            for x in l.tensors:

								                assert len(x.shape) == 4

								                assert x.data.nbytes % 4 == 0

								                length = (

								                    x.data.nbytes >> 2

								                )  # length is measured in float32s (at least for now)


								                w.write_str(x.name)

								                w.write_shape(x.shape)

								                w.write_int64(offset)

								                w.write_int32(x.data.itemsize)

								                w.write_int32(length)


								                offset += length

								                all_tensors.append(x)


								        for x in all_tensors:

								            w.write_array(x.data)


								def print_known_operations(known_classes, known_activations):

								    print("OPS supported by the converter:")

								    for key in sorted(known_classes.keys()):

								        print(key)

								    print("ACTIVATIONS supported by the converter:")

								    for key in sorted(known_activations.keys()):

								        print(key)