add some code
This commit is contained in:
@@ -0,0 +1,21 @@
|
||||
# weight-exchange
|
||||
|
||||
|
||||
|
||||
## Weight Exchange
|
||||
Repo wor exchanging weights betweeen torch an tensorflow.keras modules, using an intermediate numpy format.
|
||||
|
||||
Routines for loading/dumping torch weights are located in exchange/torch and can be loaded with
|
||||
```
|
||||
import exchange.torch
|
||||
```
|
||||
and routines for loading/dumping tensorflow weights are located in exchange/tf and can be loaded with
|
||||
```
|
||||
import exchange.tf
|
||||
```
|
||||
|
||||
Note that `exchange.torch` requires torch to be installed and `exchange.tf` requires tensorflow. To avoid the necessity of installing both torch and tensorflow in the working environment, none of these submodules is imported when calling `import exchange`. Similarly, the requirements listed in `requirements.txt` do include neither Tensorflow or Pytorch.
|
||||
|
||||
|
||||
## C export
|
||||
The module `exchange.c_export` contains routines to export weights to C files. On the long run it will be possible to call all `dump_...` functions with either a path string or a `CWriter` instance based on which the export format is chosen. This is currently only implemented for `torch.nn.GRU`, `torch.nn.Linear` and `torch.nn.Conv1d`.
|
||||
@@ -0,0 +1 @@
|
||||
numpy
|
||||
@@ -0,0 +1,48 @@
|
||||
"""
|
||||
/* Copyright (c) 2023 Amazon
|
||||
Written by Jan Buethe */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
"""
|
||||
|
||||
#!/usr/bin/env/python
|
||||
import os
|
||||
from setuptools import setup
|
||||
|
||||
lib_folder = os.path.dirname(os.path.realpath(__file__))
|
||||
|
||||
with open(os.path.join(lib_folder, 'requirements.txt'), 'r') as f:
|
||||
install_requires = list(f.read().splitlines())
|
||||
|
||||
print(install_requires)
|
||||
|
||||
setup(name='wexchange',
|
||||
version='1.6',
|
||||
author='Jan Buethe',
|
||||
author_email='jbuethe@amazon.de',
|
||||
description='Weight-exchange library between Pytorch and Tensorflow',
|
||||
packages=['wexchange', 'wexchange.tf', 'wexchange.torch', 'wexchange.c_export'],
|
||||
install_requires=install_requires
|
||||
)
|
||||
@@ -0,0 +1,30 @@
|
||||
"""
|
||||
/* Copyright (c) 2023 Amazon
|
||||
Written by Jan Buethe */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
"""
|
||||
|
||||
from . import c_export
|
||||
@@ -0,0 +1,31 @@
|
||||
from .c_writer import CWriter
|
||||
"""
|
||||
/* Copyright (c) 2023 Amazon
|
||||
Written by Jan Buethe */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
"""
|
||||
|
||||
from .common import print_gru_layer, print_dense_layer, print_conv1d_layer, print_tconv1d_layer, print_conv2d_layer, print_vector
|
||||
@@ -0,0 +1,181 @@
|
||||
"""
|
||||
/* Copyright (c) 2023 Amazon
|
||||
Written by Jan Buethe */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
"""
|
||||
|
||||
import os
|
||||
from collections import OrderedDict
|
||||
|
||||
class CWriter:
|
||||
def __init__(self,
|
||||
filename_without_extension,
|
||||
message=None,
|
||||
header_only=False,
|
||||
create_state_struct=False,
|
||||
enable_binary_blob=True,
|
||||
model_struct_name="Model",
|
||||
nnet_header="nnet.h",
|
||||
add_typedef=False):
|
||||
"""
|
||||
Writer class for creating souce and header files for weight exports to C
|
||||
|
||||
Parameters:
|
||||
-----------
|
||||
|
||||
filename_without_extension: str
|
||||
filename from which .c and .h files are created
|
||||
|
||||
message: str, optional
|
||||
if given and not None, this message will be printed as comment in the header file
|
||||
|
||||
header_only: bool, optional
|
||||
if True, only a header file is created; defaults to False
|
||||
|
||||
enable_binary_blob: bool, optional
|
||||
if True, export is done in binary blob format and a model type is created; defaults to False
|
||||
|
||||
create_state_struct: bool, optional
|
||||
if True, a state struct type is created in the header file; if False, state sizes are defined as macros; defaults to False
|
||||
|
||||
model_struct_name: str, optional
|
||||
name used for the model struct type; only relevant when enable_binary_blob is True; defaults to "Model"
|
||||
|
||||
nnet_header: str, optional
|
||||
name of header nnet header file; defaults to nnet.h
|
||||
|
||||
"""
|
||||
|
||||
|
||||
self.header_only = header_only
|
||||
self.enable_binary_blob = enable_binary_blob
|
||||
self.create_state_struct = create_state_struct
|
||||
self.model_struct_name = model_struct_name
|
||||
self.add_typedef = add_typedef
|
||||
|
||||
# for binary blob format, format is key=<layer name>, value=(<layer type>, <init call>)
|
||||
self.layer_dict = OrderedDict()
|
||||
|
||||
# for binary blob format, format is key=<layer name>, value=<layer type>
|
||||
self.weight_arrays = []
|
||||
|
||||
# form model struct, format is key=<layer name>, value=<number of elements>
|
||||
self.state_dict = OrderedDict()
|
||||
|
||||
self.header = open(filename_without_extension + ".h", "w")
|
||||
header_name = os.path.basename(filename_without_extension) + '.h'
|
||||
|
||||
if message is not None:
|
||||
self.header.write(f"/* {message} */\n\n")
|
||||
|
||||
self.header_guard = os.path.basename(filename_without_extension).upper() + "_H"
|
||||
self.header.write(
|
||||
f'''
|
||||
#ifndef {self.header_guard}
|
||||
#define {self.header_guard}
|
||||
|
||||
#include "{nnet_header}"
|
||||
|
||||
'''
|
||||
)
|
||||
|
||||
if not self.header_only:
|
||||
self.source = open(filename_without_extension + ".c", "w")
|
||||
if message is not None:
|
||||
self.source.write(f"/* {message} */\n\n")
|
||||
|
||||
self.source.write(
|
||||
f"""
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
""")
|
||||
self.source.write(f'#include "{header_name}"\n\n')
|
||||
|
||||
|
||||
def _finalize_header(self):
|
||||
|
||||
# create model type
|
||||
if self.add_typedef:
|
||||
self.header.write(f"\ntypedef struct {{")
|
||||
else:
|
||||
self.header.write(f"\nstruct {self.model_struct_name} {{")
|
||||
for name, data in self.layer_dict.items():
|
||||
layer_type = data[0]
|
||||
self.header.write(f"\n {layer_type} {name};")
|
||||
if self.add_typedef:
|
||||
self.header.write(f"\n}} {self.model_struct_name};\n")
|
||||
else:
|
||||
self.header.write(f"\n}};\n")
|
||||
|
||||
init_prototype = f"int init_{self.model_struct_name.lower()}({self.model_struct_name} *model, const WeightArray *arrays)"
|
||||
self.header.write(f"\n{init_prototype};\n")
|
||||
|
||||
self.header.write(f"\n#endif /* {self.header_guard} */\n")
|
||||
|
||||
def _finalize_source(self):
|
||||
|
||||
|
||||
# create weight array
|
||||
if len(set(self.weight_arrays)) != len(self.weight_arrays):
|
||||
raise ValueError("error: detected duplicates in weight arrays")
|
||||
if self.enable_binary_blob: self.source.write("\n#ifndef USE_WEIGHTS_FILE\n")
|
||||
self.source.write(f"const WeightArray {self.model_struct_name.lower()}_arrays[] = {{\n")
|
||||
for name in self.weight_arrays:
|
||||
self.source.write(f"#ifdef WEIGHTS_{name}_DEFINED\n")
|
||||
self.source.write(f' {{"{name}", WEIGHTS_{name}_TYPE, sizeof({name}), {name}}},\n')
|
||||
self.source.write(f"#endif\n")
|
||||
self.source.write(" {NULL, 0, 0, NULL}\n")
|
||||
self.source.write("};\n")
|
||||
|
||||
if self.enable_binary_blob: self.source.write("#endif /* USE_WEIGHTS_FILE */\n")
|
||||
|
||||
# create init function definition
|
||||
init_prototype = f"int init_{self.model_struct_name.lower()}({self.model_struct_name} *model, const WeightArray *arrays)"
|
||||
if self.enable_binary_blob: self.source.write("\n#ifndef DUMP_BINARY_WEIGHTS\n")
|
||||
self.source.write(f"{init_prototype} {{\n")
|
||||
for name, data in self.layer_dict.items():
|
||||
self.source.write(f" if ({data[1]}) return 1;\n")
|
||||
self.source.write(" return 0;\n")
|
||||
self.source.write("}\n")
|
||||
if self.enable_binary_blob:self.source.write("#endif /* DUMP_BINARY_WEIGHTS */\n")
|
||||
|
||||
|
||||
def close(self):
|
||||
|
||||
if not self.header_only:
|
||||
self._finalize_source()
|
||||
self.source.close()
|
||||
|
||||
self._finalize_header()
|
||||
self.header.close()
|
||||
|
||||
def __del__(self):
|
||||
try:
|
||||
self.close()
|
||||
except:
|
||||
pass
|
||||
@@ -0,0 +1,386 @@
|
||||
'''Copyright (c) 2017-2018 Mozilla
|
||||
Copyright (c) 2022 Amazon
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
'''
|
||||
|
||||
import numpy as np
|
||||
|
||||
from .c_writer import CWriter
|
||||
|
||||
def print_vector(writer, vector, name, dtype='float', reshape_8x4=False, static=True, debug_float=False):
|
||||
|
||||
if isinstance(writer, CWriter):
|
||||
f = writer.source
|
||||
binary_blob = writer.enable_binary_blob
|
||||
else:
|
||||
f = writer
|
||||
binary_blob = False
|
||||
|
||||
dtype_suffix = {
|
||||
'float' : 'float',
|
||||
'opus_uint8' : 'uint8',
|
||||
'opus_int8' : 'int8',
|
||||
'opus_uint16' : 'uint16',
|
||||
'opus_int16' : 'int16',
|
||||
'int' : 'int',
|
||||
'qweight': 'qweight'
|
||||
}
|
||||
|
||||
|
||||
if binary_blob:
|
||||
f.write(
|
||||
f'''
|
||||
#ifndef USE_WEIGHTS_FILE
|
||||
'''
|
||||
)
|
||||
writer.weight_arrays.append(name)
|
||||
|
||||
if reshape_8x4:
|
||||
vector = vector.reshape((vector.shape[0]//4, 4, vector.shape[1]//8, 8))
|
||||
vector = vector.transpose((2, 0, 3, 1))
|
||||
|
||||
v = np.reshape(vector, (-1))
|
||||
|
||||
if debug_float:
|
||||
f.write('#ifndef DISABLE_DEBUG_FLOAT\n')
|
||||
f.write(
|
||||
f'''
|
||||
#define WEIGHTS_{name}_DEFINED
|
||||
#define WEIGHTS_{name}_TYPE WEIGHT_TYPE_{dtype_suffix[dtype]}
|
||||
'''
|
||||
)
|
||||
|
||||
if static:
|
||||
f.write('static ')
|
||||
|
||||
f.write(f'const {dtype} {name}[{len(v)}] = {{\n ')
|
||||
|
||||
for i in range(0, len(v)):
|
||||
|
||||
f.write(f'{v[i]}')
|
||||
|
||||
if (i!=len(v)-1):
|
||||
f.write(',')
|
||||
else:
|
||||
break
|
||||
|
||||
if (i%8==7):
|
||||
f.write("\n ")
|
||||
else:
|
||||
f.write(" ")
|
||||
|
||||
f.write('\n};\n\n')
|
||||
if debug_float: f.write('#endif /*DISABLE_DEBUG_FLOAT*/\n')
|
||||
|
||||
if binary_blob:
|
||||
f.write(
|
||||
f'''
|
||||
#endif /* USE_WEIGHTS_FILE */
|
||||
'''
|
||||
)
|
||||
|
||||
return vector
|
||||
|
||||
|
||||
|
||||
def extract_diagonal(A):
|
||||
""" input shape is (N, k*N) """
|
||||
|
||||
N, M = A.shape
|
||||
B = A.copy()
|
||||
assert M % N == 0
|
||||
k = M // N
|
||||
|
||||
diags = []
|
||||
for l in range(k):
|
||||
diag = np.diag(B[:, l * N : (l+1) * N]).copy()
|
||||
B[:, l * N : (l+1) * N] -= np.diag(diag)
|
||||
diags.append(diag)
|
||||
|
||||
diag = np.concatenate(diags)
|
||||
|
||||
return diag, B
|
||||
|
||||
def quantize_weight(weight, scale):
|
||||
scale = scale + 1e-30
|
||||
Aq = np.round(weight / scale).astype('int')
|
||||
if Aq.max() > 127 or Aq.min() <= -128:
|
||||
raise ValueError("value out of bounds in quantize_weight")
|
||||
Aq = np.clip(np.round(weight / scale).astype('int'), -128, 127)
|
||||
return Aq
|
||||
|
||||
|
||||
def print_sparse_weight(writer, A, name, scale=1/128, have_diag=True, quantize=False):
|
||||
N = A.shape[0]
|
||||
M = A.shape[1]
|
||||
W = np.zeros((0,), dtype='int')
|
||||
W0 = np.zeros((0,))
|
||||
|
||||
if have_diag:
|
||||
diag, A = extract_diagonal(A)
|
||||
print_vector(writer, diag, name + '_diag')
|
||||
|
||||
if quantize:
|
||||
Aq = quantize_weight(A, scale)
|
||||
else:
|
||||
Aq = A
|
||||
|
||||
# extract blocks
|
||||
idx = np.zeros((0,), dtype='int')
|
||||
for i in range(M//8):
|
||||
pos = idx.shape[0]
|
||||
idx = np.append(idx, -1)
|
||||
nb_nonzero = 0
|
||||
for j in range(N//4):
|
||||
block = A[j*4:(j+1)*4, i*8:(i+1)*8]
|
||||
qblock = Aq[j*4:(j+1)*4, i*8:(i+1)*8]
|
||||
if np.sum(np.abs(block)) > 1e-10:
|
||||
nb_nonzero = nb_nonzero + 1
|
||||
idx = np.append(idx, j*4)
|
||||
vblock = qblock.transpose((1,0)).reshape((-1,))
|
||||
W0 = np.concatenate([W0, block.reshape((-1,))])
|
||||
W = np.concatenate([W, vblock])
|
||||
idx[pos] = nb_nonzero
|
||||
|
||||
if quantize: print_vector(writer, W, name + '_int8', reshape_8x4=False, dtype='opus_int8')
|
||||
print_vector(writer, W0, name + '_float', reshape_8x4=False, dtype='float', debug_float=quantize)
|
||||
print_vector(writer, idx, name + '_idx', reshape_8x4=False, dtype='int')
|
||||
|
||||
return Aq
|
||||
|
||||
|
||||
|
||||
def compute_scaling(weight):
|
||||
""" computes optimal scaling vector for weight of shape (features_in, features_out) """
|
||||
|
||||
n_in, n_out = weight.shape
|
||||
assert n_in % 4 == 0 and n_out % 8 == 0
|
||||
|
||||
weight_max_abs = np.max(np.abs(weight), axis=0)
|
||||
weight_max_sum = np.max(np.abs(weight[: n_in : 2] + weight[1 : n_in : 2]), axis=0)
|
||||
scale_max = weight_max_abs / 127
|
||||
scale_sum = weight_max_sum / 129
|
||||
|
||||
scale = np.maximum(scale_max, scale_sum)
|
||||
|
||||
return scale
|
||||
|
||||
def qn(string):
|
||||
if string == "NULL": return string
|
||||
else: return '"' + string + '"'
|
||||
|
||||
def print_linear_layer(writer : CWriter,
|
||||
name : str,
|
||||
weight : np.ndarray,
|
||||
bias : np.ndarray,
|
||||
scale : np.ndarray = None,
|
||||
sparse : bool = False,
|
||||
diagonal : bool = False,
|
||||
quantize : bool = True):
|
||||
|
||||
""" prints linear layer
|
||||
|
||||
Parameters:
|
||||
-----------
|
||||
name : str
|
||||
layer name
|
||||
weight: np.ndarray
|
||||
...
|
||||
scale: np.ndarray or None
|
||||
If None auto scaling will be applied. Otherwise, output channels will be multiplied by scale (the usual broadcasting rules apply).
|
||||
|
||||
|
||||
"""
|
||||
|
||||
if len(weight.shape) != 2:
|
||||
raise ValueError('expecting 2-dim weight array in print_linear_layer')
|
||||
|
||||
|
||||
bias_name = "NULL" if bias is None else name + "_bias"
|
||||
subias_name = name + "_subias" if quantize else "NULL"
|
||||
scale_name = name + "_scale" if quantize else "NULL"
|
||||
idx_name = name + "_weights_idx" if sparse else "NULL"
|
||||
float_weight_name = name + "_weights_float"
|
||||
int_weight_name = name + "_weights_int8" if quantize else "NULL"
|
||||
diag_name = name + "_weights_diag" if sparse and diagonal else "NULL"
|
||||
|
||||
nb_inputs, nb_outputs = weight.shape
|
||||
|
||||
if scale is None and quantize:
|
||||
scale = compute_scaling(weight)
|
||||
|
||||
|
||||
if sparse:
|
||||
weight_q = print_sparse_weight(writer, weight, name + "_weights", scale=scale, have_diag=diagonal, quantize=quantize)
|
||||
else:
|
||||
if quantize:
|
||||
weight_q = quantize_weight(weight, scale)
|
||||
print_vector(writer, weight_q, name + "_weights_int8", dtype='opus_int8', reshape_8x4=True)
|
||||
|
||||
print_vector(writer, weight, name + "_weights_float", dtype='float', reshape_8x4=False, debug_float=quantize)
|
||||
|
||||
if quantize:
|
||||
subias = (np.zeros(nb_outputs) if bias is None else bias) - np.sum(weight_q * scale, axis=0)
|
||||
print_vector(writer, subias, name + "_subias")
|
||||
|
||||
final_scale = scale / 127 * np.ones(nb_outputs)
|
||||
print_vector(writer, final_scale, name + "_scale")
|
||||
|
||||
if bias is not None:
|
||||
print_vector(writer, bias, name + "_bias")
|
||||
|
||||
|
||||
init_call = f'linear_init(&model->{name}, arrays, {qn(bias_name)}, {qn(subias_name)}, {qn(int_weight_name)},' \
|
||||
+ f'{qn(float_weight_name)}, {qn(idx_name)}, {qn(diag_name)}, {qn(scale_name)}, {nb_inputs}, {nb_outputs})'
|
||||
|
||||
writer.layer_dict[name] = ('LinearLayer', init_call)
|
||||
|
||||
|
||||
def print_dense_layer(writer : CWriter,
|
||||
name : str,
|
||||
weight : np.ndarray,
|
||||
bias : np.ndarray,
|
||||
scale=1/128,
|
||||
format : str = 'torch',
|
||||
sparse=False,
|
||||
diagonal=False,
|
||||
quantize=False):
|
||||
|
||||
if format == 'torch':
|
||||
weight = weight.transpose()
|
||||
|
||||
print_linear_layer(writer, name, weight, bias, scale=scale, sparse=sparse, diagonal=diagonal, quantize=quantize)
|
||||
|
||||
writer.header.write(f"\n#define {name.upper()}_OUT_SIZE {weight.shape[1]}\n")
|
||||
|
||||
|
||||
def print_conv1d_layer(writer : CWriter,
|
||||
name : str,
|
||||
weight : np.ndarray,
|
||||
bias : np.ndarray,
|
||||
scale=1/128,
|
||||
format : str = 'torch',
|
||||
quantize=False,
|
||||
sparse=False):
|
||||
|
||||
|
||||
if format == "torch":
|
||||
# convert to channels last
|
||||
weight = np.transpose(weight, (2, 1, 0))
|
||||
|
||||
lin_weight = np.reshape(weight, (-1, weight.shape[-1]))
|
||||
print_linear_layer(writer, name, lin_weight, bias, scale=scale, sparse=sparse, diagonal=False, quantize=quantize)
|
||||
|
||||
|
||||
writer.header.write(f"\n#define {name.upper()}_OUT_SIZE {weight.shape[2]}\n")
|
||||
writer.header.write(f"\n#define {name.upper()}_IN_SIZE {weight.shape[1]}\n")
|
||||
writer.header.write(f"\n#define {name.upper()}_STATE_SIZE ({weight.shape[1]} * ({weight.shape[0] - 1}))\n")
|
||||
|
||||
return weight.shape[0] * weight.shape[1]
|
||||
|
||||
def print_conv2d_layer(writer : CWriter,
|
||||
name : str,
|
||||
weight : np.ndarray,
|
||||
bias : np.ndarray,
|
||||
scale : float=1/128,
|
||||
quantize : bool=False):
|
||||
|
||||
if quantize:
|
||||
print("[print_conv2d_layer] warning: quantize argument ignored")
|
||||
|
||||
bias_name = name + "_bias"
|
||||
float_weight_name = name + "_weight_float"
|
||||
|
||||
print_vector(writer, weight, float_weight_name)
|
||||
print_vector(writer, bias, bias_name)
|
||||
|
||||
# init function
|
||||
out_channels, in_channels, ksize1, ksize2 = weight.shape
|
||||
init_call = f'conv2d_init(&model->{name}, arrays, "{bias_name}", "{float_weight_name}", {in_channels}, {out_channels}, {ksize1}, {ksize2})'
|
||||
|
||||
writer.layer_dict[name] = ('Conv2dLayer', init_call)
|
||||
|
||||
|
||||
|
||||
def print_gru_layer(writer : CWriter,
|
||||
name : str,
|
||||
weight : np.ndarray,
|
||||
recurrent_weight : np.ndarray,
|
||||
bias : np.ndarray,
|
||||
recurrent_bias : np.ndarray,
|
||||
format : str = 'torch',
|
||||
quantize : bool = False,
|
||||
input_sparse : bool = False,
|
||||
recurrent_sparse : bool = False,
|
||||
scale=1/128,
|
||||
recurrent_scale=1/128
|
||||
):
|
||||
|
||||
if format == "torch":
|
||||
# change gate ordering from rzn to zrn
|
||||
|
||||
N = weight.shape[0] // 3
|
||||
for x in [weight, recurrent_weight, bias, recurrent_bias]:
|
||||
if x is None: continue
|
||||
tmp = x[0:N].copy()
|
||||
x[0:N] = x[N:2*N]
|
||||
x[N:2*N] = tmp
|
||||
|
||||
weight = weight.transpose()
|
||||
recurrent_weight = recurrent_weight.transpose()
|
||||
else:
|
||||
N = weight.shape[1] // 3
|
||||
|
||||
print_linear_layer(writer, name + "_input", weight, bias, scale=scale, sparse=input_sparse, quantize=quantize)
|
||||
print_linear_layer(writer, name + "_recurrent", recurrent_weight, recurrent_bias, scale=recurrent_scale, sparse=recurrent_sparse, diagonal=recurrent_sparse, quantize=quantize)
|
||||
|
||||
# wrapping it up
|
||||
writer.header.write(f"\n#define {name.upper()}_OUT_SIZE {N}\n")
|
||||
writer.header.write(f"\n#define {name.upper()}_STATE_SIZE {N}\n")
|
||||
|
||||
return N
|
||||
|
||||
|
||||
def print_tconv1d_layer(writer : CWriter,
|
||||
name : str,
|
||||
weight : np.ndarray,
|
||||
bias : np.ndarray,
|
||||
stride: int,
|
||||
scale=1/128,
|
||||
quantize=False,
|
||||
sparse=False):
|
||||
|
||||
in_channels, out_channels, kernel_size = weight.shape
|
||||
|
||||
|
||||
linear_weight = weight.transpose(2, 1, 0).reshape(kernel_size * out_channels, in_channels).transpose(1, 0)
|
||||
linear_bias = np.repeat(bias[np.newaxis, :], kernel_size, 0).flatten()
|
||||
|
||||
print_linear_layer(writer, name, linear_weight, linear_bias, scale=scale, quantize=quantize, sparse=sparse)
|
||||
|
||||
writer.header.write(f"\n#define {name.upper()}_KERNEL_SIZE {kernel_size}\n")
|
||||
writer.header.write(f"\n#define {name.upper()}_STRIDE {stride}\n")
|
||||
writer.header.write(f"\n#define {name.upper()}_IN_CHANNELS {in_channels}\n")
|
||||
writer.header.write(f"\n#define {name.upper()}_OUT_CHANNELS {out_channels}\n")
|
||||
@@ -0,0 +1,5 @@
|
||||
from .tf import dump_tf_conv1d_weights, load_tf_conv1d_weights
|
||||
from .tf import dump_tf_dense_weights, load_tf_dense_weights
|
||||
from .tf import dump_tf_embedding_weights, load_tf_embedding_weights
|
||||
from .tf import dump_tf_gru_weights, load_tf_gru_weights
|
||||
from .tf import dump_tf_weights, load_tf_weights
|
||||
@@ -0,0 +1,188 @@
|
||||
"""
|
||||
/* Copyright (c) 2023 Amazon
|
||||
Written by Jan Buethe */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
"""
|
||||
|
||||
import os
|
||||
|
||||
import tensorflow as tf
|
||||
import numpy as np
|
||||
|
||||
from wexchange.c_export import CWriter, print_gru_layer, print_dense_layer, print_conv1d_layer
|
||||
|
||||
def dump_tf_gru_weights(where, gru, name='gru', input_sparse=False, recurrent_sparse=False, quantize=False, scale=1/128, recurrent_scale=1/128):
|
||||
|
||||
|
||||
assert gru.activation == tf.keras.activations.tanh
|
||||
assert gru.recurrent_activation == tf.keras.activations.sigmoid
|
||||
assert gru.reset_after == True
|
||||
|
||||
w_ih = gru.weights[0].numpy().transpose().copy()
|
||||
w_hh = gru.weights[1].numpy().transpose().copy()
|
||||
b_ih = gru.weights[2].numpy()[0].copy()
|
||||
b_hh = gru.weights[2].numpy()[1].copy()
|
||||
|
||||
if isinstance(where, CWriter):
|
||||
return print_gru_layer(where, name, w_ih, w_hh, b_ih, b_hh, format='tf', input_sparse=input_sparse, recurrent_sparse=recurrent_sparse, quantize=quantize, scale=scale, recurrent_scale=recurrent_scale)
|
||||
else:
|
||||
os.makedirs(where, exist_ok=True)
|
||||
|
||||
# zrn => rzn
|
||||
N = w_ih.shape[0] // 3
|
||||
for x in [w_ih, w_hh, b_ih, b_hh]:
|
||||
tmp = x[0:N].copy()
|
||||
x[0:N] = x[N:2*N]
|
||||
x[N:2*N] = tmp
|
||||
|
||||
np.save(os.path.join(where, 'weight_ih_rzn.npy'), w_ih)
|
||||
np.save(os.path.join(where, 'weight_hh_rzn.npy'), w_hh)
|
||||
np.save(os.path.join(where, 'bias_ih_rzn.npy'), b_ih)
|
||||
np.save(os.path.join(where, 'bias_hh_rzn.npy'), b_hh)
|
||||
|
||||
|
||||
def load_tf_gru_weights(path, gru):
|
||||
|
||||
assert gru.activation == tf.keras.activations.tanh
|
||||
assert gru.recurrent_activation == tf.keras.activations.sigmoid
|
||||
assert gru.reset_after == True
|
||||
|
||||
w_ih = np.load(os.path.join(path, 'weight_ih_rzn.npy'))
|
||||
w_hh = np.load(os.path.join(path, 'weight_hh_rzn.npy'))
|
||||
b_ih = np.load(os.path.join(path, 'bias_ih_rzn.npy'))
|
||||
b_hh = np.load(os.path.join(path, 'bias_hh_rzn.npy'))
|
||||
|
||||
# rzn => zrn
|
||||
N = w_ih.shape[0] // 3
|
||||
for x in [w_ih, w_hh, b_ih, b_hh]:
|
||||
tmp = x[0:N].copy()
|
||||
x[0:N] = x[N:2*N]
|
||||
x[N:2*N] = tmp
|
||||
|
||||
gru.weights[0].assign(tf.convert_to_tensor(w_ih.transpose()))
|
||||
gru.weights[1].assign(tf.convert_to_tensor(w_hh.transpose()))
|
||||
gru.weights[2].assign(tf.convert_to_tensor(np.vstack((b_ih, b_hh))))
|
||||
|
||||
|
||||
def dump_tf_dense_weights(where, dense, name='dense', scale=1/128, sparse=False, diagonal=False, quantize=False):
|
||||
|
||||
w = dense.weights[0].numpy()
|
||||
if dense.bias is None:
|
||||
b = np.zeros(dense.units, dtype=w.dtype)
|
||||
else:
|
||||
b = dense.bias.numpy()
|
||||
|
||||
|
||||
|
||||
if isinstance(where, CWriter):
|
||||
return print_dense_layer(where, name, w, b, scale=scale, format='tf', sparse=sparse, diagonal=diagonal, quantize=quantize)
|
||||
|
||||
else:
|
||||
os.makedirs(where, exist_ok=True)
|
||||
|
||||
np.save(os.path.join(where, 'weight.npy'), w.transpose())
|
||||
np.save(os.path.join(where, 'bias.npy'), b)
|
||||
|
||||
|
||||
def load_tf_dense_weights(path, dense):
|
||||
|
||||
w = np.load(os.path.join(path, 'weight.npy')).transpose()
|
||||
b = np.load(os.path.join(path, 'bias.npy'))
|
||||
|
||||
dense.weights[0].assign(tf.convert_to_tensor(w))
|
||||
if dense.bias is not None:
|
||||
dense.weights[1].assign(tf.convert_to_tensor(b))
|
||||
|
||||
|
||||
def dump_tf_conv1d_weights(where, conv, name='conv', scale=1/128, quantize=False):
|
||||
|
||||
assert conv.data_format == 'channels_last'
|
||||
|
||||
w = conv.weights[0].numpy().copy()
|
||||
if conv.bias is None:
|
||||
b = np.zeros(conv.filters, dtype=w.dtype)
|
||||
else:
|
||||
b = conv.bias.numpy()
|
||||
|
||||
if isinstance(where, CWriter):
|
||||
return print_conv1d_layer(where, name, w, b, scale=scale, format='tf', quantize=quantize)
|
||||
else:
|
||||
os.makedirs(where, exist_ok=True)
|
||||
|
||||
w = np.transpose(w, (2, 1, 0))
|
||||
np.save(os.path.join(where, 'weight_oik.npy'), w)
|
||||
np.save(os.path.join(where, 'bias.npy'), b)
|
||||
|
||||
|
||||
def load_tf_conv1d_weights(path, conv):
|
||||
|
||||
w = np.load(os.path.join(path, 'weight_oik.npy'))
|
||||
b = np.load(os.path.join(path, 'bias.npy'))
|
||||
|
||||
w = np.transpose(w, (2, 1, 0))
|
||||
|
||||
conv.weights[0].assign(tf.convert_to_tensor(w))
|
||||
if conv.bias is not None:
|
||||
conv.weights[1].assign(tf.convert_to_tensor(b))
|
||||
|
||||
|
||||
def dump_tf_embedding_weights(path, emb):
|
||||
os.makedirs(path, exist_ok=True)
|
||||
|
||||
w = emb.weights[0].numpy()
|
||||
np.save(os.path.join(path, 'weight.npy'), w)
|
||||
|
||||
|
||||
|
||||
def load_tf_embedding_weights(path, emb):
|
||||
|
||||
w = np.load(os.path.join(path, 'weight.npy'))
|
||||
emb.weights[0].assign(tf.convert_to_tensor(w))
|
||||
|
||||
|
||||
def dump_tf_weights(path, module):
|
||||
if isinstance(module, tf.keras.layers.Dense):
|
||||
dump_tf_dense_weights(path, module)
|
||||
elif isinstance(module, tf.keras.layers.GRU):
|
||||
dump_tf_gru_weights(path, module)
|
||||
elif isinstance(module, tf.keras.layers.Conv1D):
|
||||
dump_tf_conv1d_weights(path, module)
|
||||
elif isinstance(module, tf.keras.layers.Embedding):
|
||||
dump_tf_embedding_weights(path, module)
|
||||
else:
|
||||
raise ValueError(f'dump_tf_weights: layer of type {type(module)} not supported')
|
||||
|
||||
def load_tf_weights(path, module):
|
||||
if isinstance(module, tf.keras.layers.Dense):
|
||||
load_tf_dense_weights(path, module)
|
||||
elif isinstance(module, tf.keras.layers.GRU):
|
||||
load_tf_gru_weights(path, module)
|
||||
elif isinstance(module, tf.keras.layers.Conv1D):
|
||||
load_tf_conv1d_weights(path, module)
|
||||
elif isinstance(module, tf.keras.layers.Embedding):
|
||||
load_tf_embedding_weights(path, module)
|
||||
else:
|
||||
raise ValueError(f'dump_tf_weights: layer of type {type(module)} not supported')
|
||||
@@ -0,0 +1,37 @@
|
||||
"""
|
||||
/* Copyright (c) 2023 Amazon
|
||||
Written by Jan Buethe */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
"""
|
||||
|
||||
from .torch import dump_torch_conv1d_weights, load_torch_conv1d_weights
|
||||
from .torch import dump_torch_conv2d_weights, load_torch_conv2d_weights
|
||||
from .torch import dump_torch_dense_weights, load_torch_dense_weights
|
||||
from .torch import dump_torch_gru_weights, load_torch_gru_weights
|
||||
from .torch import dump_torch_grucell_weights
|
||||
from .torch import dump_torch_embedding_weights, load_torch_embedding_weights
|
||||
from .torch import dump_torch_weights, load_torch_weights
|
||||
from .torch import dump_torch_adaptive_conv1d_weights
|
||||
@@ -0,0 +1,433 @@
|
||||
"""
|
||||
/* Copyright (c) 2023 Amazon
|
||||
Written by Jan Buethe */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
import torch
|
||||
import numpy as np
|
||||
|
||||
sys.path.append(sys.path.append(os.path.join(os.path.dirname(__file__), '../osce')))
|
||||
try:
|
||||
import utils.layers as osce_layers
|
||||
from utils.layers.limited_adaptive_conv1d import LimitedAdaptiveConv1d
|
||||
from utils.layers.limited_adaptive_comb1d import LimitedAdaptiveComb1d
|
||||
from utils.layers.td_shaper import TDShaper
|
||||
has_osce=True
|
||||
except:
|
||||
has_osce=False
|
||||
|
||||
from wexchange.c_export import CWriter, print_gru_layer, print_dense_layer, print_conv1d_layer, print_tconv1d_layer, print_conv2d_layer
|
||||
|
||||
def dump_torch_adaptive_conv1d_weights(where, adaconv, name='adaconv', scale=1/128, quantize=False):
|
||||
|
||||
|
||||
w_kernel = adaconv.conv_kernel.weight.detach().cpu().numpy().copy()
|
||||
b_kernel = adaconv.conv_kernel.bias.detach().cpu().numpy().copy()
|
||||
w_gain = adaconv.filter_gain.weight.detach().cpu().numpy().copy()
|
||||
b_gain = adaconv.filter_gain.bias.detach().cpu().numpy().copy()
|
||||
|
||||
if isinstance(where, CWriter):
|
||||
# pad kernel for quantization
|
||||
left_padding = adaconv.padding[0]
|
||||
kernel_size = adaconv.kernel_size
|
||||
in_channels = adaconv.in_channels
|
||||
out_channels = adaconv.out_channels
|
||||
feature_dim = adaconv.feature_dim
|
||||
|
||||
if quantize and kernel_size % 8:
|
||||
kernel_padding = 8 - (kernel_size % 8)
|
||||
w_kernel = np.concatenate(
|
||||
(np.zeros((out_channels, in_channels, kernel_padding, feature_dim)), w_kernel.reshape(out_channels, in_channels, kernel_size, feature_dim)),
|
||||
dtype=w_kernel.dtype,
|
||||
axis=2).reshape(-1, feature_dim)
|
||||
b_kernel = np.concatenate(
|
||||
(np.zeros((out_channels, in_channels, kernel_padding)), b_kernel.reshape(out_channels, in_channels, kernel_size)),
|
||||
dtype=b_kernel.dtype,
|
||||
axis=2).reshape(-1)
|
||||
left_padding += kernel_padding
|
||||
kernel_size += kernel_padding
|
||||
|
||||
# write relevant scalar parameters to header file
|
||||
where.header.write(f"""
|
||||
#define {name.upper()}_FILTER_GAIN_A {adaconv.filter_gain_a:f}f
|
||||
#define {name.upper()}_FILTER_GAIN_B {adaconv.filter_gain_b:f}f
|
||||
#define {name.upper()}_SHAPE_GAIN {adaconv.shape_gain:f}f
|
||||
#define {name.upper()}_KERNEL_SIZE {kernel_size}
|
||||
#define {name.upper()}_FRAME_SIZE {adaconv.frame_size}
|
||||
#define {name.upper()}_LEFT_PADDING {left_padding}
|
||||
#define {name.upper()}_OVERLAP_SIZE {adaconv.overlap_size}
|
||||
#define {name.upper()}_IN_CHANNELS {adaconv.in_channels}
|
||||
#define {name.upper()}_OUT_CHANNELS {adaconv.out_channels}
|
||||
#define {name.upper()}_NORM_P {adaconv.norm_p}
|
||||
#define {name.upper()}_FEATURE_DIM {adaconv.feature_dim}
|
||||
"""
|
||||
)
|
||||
|
||||
print_dense_layer(where, name + "_kernel", w_kernel, b_kernel, scale=scale, format='torch', sparse=False, diagonal=False, quantize=quantize)
|
||||
print_dense_layer(where, name + "_gain", w_gain, b_gain, format='torch', sparse=False, diagonal=False, quantize=False)
|
||||
|
||||
|
||||
else:
|
||||
np.save(where, 'weight_kernel.npy', w_kernel)
|
||||
np.save(where, 'bias_kernel.npy', b_kernel)
|
||||
np.save(where, 'weight_gain.npy', w_gain)
|
||||
np.save(where, 'bias_gain.npy', b_gain)
|
||||
|
||||
|
||||
def dump_torch_adaptive_comb1d_weights(where, adaconv, name='adaconv', scale=1/128, quantize=False):
|
||||
|
||||
|
||||
w_kernel = adaconv.conv_kernel.weight.detach().cpu().numpy().copy()
|
||||
b_kernel = adaconv.conv_kernel.bias.detach().cpu().numpy().copy()
|
||||
w_gain = adaconv.filter_gain.weight.detach().cpu().numpy().copy()
|
||||
b_gain = adaconv.filter_gain.bias.detach().cpu().numpy().copy()
|
||||
w_global_gain = adaconv.global_filter_gain.weight.detach().cpu().numpy().copy()
|
||||
b_global_gain = adaconv.global_filter_gain.bias.detach().cpu().numpy().copy()
|
||||
|
||||
|
||||
if isinstance(where, CWriter):
|
||||
# pad kernel for quantization
|
||||
left_padding = adaconv.padding[0]
|
||||
kernel_size = adaconv.kernel_size
|
||||
|
||||
if quantize and w_kernel.shape[0] % 8:
|
||||
kernel_padding = 8 - (w_kernel.shape[0] % 8)
|
||||
w_kernel = np.concatenate((np.zeros((kernel_padding, w_kernel.shape[1])), w_kernel), dtype=w_kernel.dtype)
|
||||
b_kernel = np.concatenate((np.zeros((kernel_padding)), b_kernel), dtype=b_kernel.dtype)
|
||||
left_padding += kernel_padding
|
||||
kernel_size += kernel_padding
|
||||
# write relevant scalar parameters to header file
|
||||
where.header.write(f"""
|
||||
#define {name.upper()}_FILTER_GAIN_A {adaconv.filter_gain_a:f}f
|
||||
#define {name.upper()}_FILTER_GAIN_B {adaconv.filter_gain_b:f}f
|
||||
#define {name.upper()}_LOG_GAIN_LIMIT {adaconv.log_gain_limit:f}f
|
||||
#define {name.upper()}_KERNEL_SIZE {kernel_size}
|
||||
#define {name.upper()}_LEFT_PADDING {left_padding}
|
||||
#define {name.upper()}_FRAME_SIZE {adaconv.frame_size}
|
||||
#define {name.upper()}_OVERLAP_SIZE {adaconv.overlap_size}
|
||||
#define {name.upper()}_IN_CHANNELS {adaconv.in_channels}
|
||||
#define {name.upper()}_OUT_CHANNELS {adaconv.out_channels}
|
||||
#define {name.upper()}_NORM_P {adaconv.norm_p}
|
||||
#define {name.upper()}_FEATURE_DIM {adaconv.feature_dim}
|
||||
#define {name.upper()}_MAX_LAG {adaconv.max_lag}
|
||||
"""
|
||||
)
|
||||
|
||||
print_dense_layer(where, name + "_kernel", w_kernel, b_kernel, scale=scale, format='torch', sparse=False, diagonal=False, quantize=quantize)
|
||||
print_dense_layer(where, name + "_gain", w_gain, b_gain, format='torch', sparse=False, diagonal=False, quantize=False)
|
||||
print_dense_layer(where, name + "_global_gain", w_global_gain, b_global_gain, format='torch', sparse=False, diagonal=False, quantize=False)
|
||||
|
||||
|
||||
else:
|
||||
np.save(where, 'weight_kernel.npy', w_kernel)
|
||||
np.save(where, 'bias_kernel.npy', b_kernel)
|
||||
np.save(where, 'weight_gain.npy', w_gain)
|
||||
np.save(where, 'bias_gain.npy', b_gain)
|
||||
np.save(where, 'weight_global_gain.npy', w_global_gain)
|
||||
np.save(where, 'bias_global_gain.npy', b_global_gain)
|
||||
|
||||
def dump_torch_tdshaper(where, shaper, name='tdshaper', quantize=False, scale=1/128):
|
||||
|
||||
if isinstance(where, CWriter):
|
||||
where.header.write(f"""
|
||||
#define {name.upper()}_FEATURE_DIM {shaper.feature_dim}
|
||||
#define {name.upper()}_FRAME_SIZE {shaper.frame_size}
|
||||
#define {name.upper()}_AVG_POOL_K {shaper.avg_pool_k}
|
||||
#define {name.upper()}_INNOVATE {1 if shaper.innovate else 0}
|
||||
#define {name.upper()}_POOL_AFTER {1 if shaper.pool_after else 0}
|
||||
"""
|
||||
)
|
||||
|
||||
dump_torch_conv1d_weights(where, shaper.feature_alpha1_f, name + "_alpha1_f", quantize=quantize, scale=scale)
|
||||
dump_torch_conv1d_weights(where, shaper.feature_alpha1_t, name + "_alpha1_t")
|
||||
dump_torch_conv1d_weights(where, shaper.feature_alpha2, name + "_alpha2")
|
||||
|
||||
if shaper.innovate:
|
||||
dump_torch_conv1d_weights(where, shaper.feature_alpha1b, name + "_alpha1b")
|
||||
dump_torch_conv1d_weights(where, shaper.feature_alpha1c, name + "_alpha1c")
|
||||
dump_torch_conv1d_weights(where, shaper.feature_alpha2b, name + "_alpha2b")
|
||||
dump_torch_conv1d_weights(where, shaper.feature_alpha2c, name + "_alpha2c")
|
||||
|
||||
|
||||
|
||||
def dump_torch_gru_weights(where, gru, name='gru', input_sparse=False, recurrent_sparse=False, quantize=False, scale=1/128, recurrent_scale=1/128):
|
||||
|
||||
assert gru.num_layers == 1
|
||||
assert gru.bidirectional == False
|
||||
|
||||
w_ih = gru.weight_ih_l0.detach().cpu().numpy().copy()
|
||||
w_hh = gru.weight_hh_l0.detach().cpu().numpy().copy()
|
||||
if hasattr(gru, 'bias_ih_l0'):
|
||||
b_ih = gru.bias_ih_l0.detach().cpu().numpy().copy()
|
||||
else:
|
||||
b_ih = None
|
||||
if hasattr(gru, 'bias_hh_l0'):
|
||||
b_hh = gru.bias_hh_l0.detach().cpu().numpy().copy()
|
||||
else:
|
||||
b_hh = None
|
||||
|
||||
if isinstance(where, CWriter):
|
||||
return print_gru_layer(where, name, w_ih, w_hh, b_ih, b_hh, format='torch', input_sparse=input_sparse, recurrent_sparse=recurrent_sparse, quantize=quantize, scale=scale, recurrent_scale=recurrent_scale)
|
||||
else:
|
||||
os.makedirs(where, exist_ok=True)
|
||||
|
||||
np.save(os.path.join(where, 'weight_ih_rzn.npy'), w_ih)
|
||||
np.save(os.path.join(where, 'weight_hh_rzn.npy'), w_hh)
|
||||
np.save(os.path.join(where, 'bias_ih_rzn.npy'), b_ih)
|
||||
np.save(os.path.join(where, 'bias_hh_rzn.npy'), b_hh)
|
||||
|
||||
|
||||
def dump_torch_grucell_weights(where, gru, name='gru', input_sparse=False, recurrent_sparse=False, quantize=False, scale=1/128, recurrent_scale=1/128):
|
||||
|
||||
w_ih = gru.weight_ih.detach().cpu().numpy().copy()
|
||||
w_hh = gru.weight_hh.detach().cpu().numpy().copy()
|
||||
if hasattr(gru, 'bias_ih') and gru.bias_ih is not None:
|
||||
b_ih = gru.bias_ih.detach().cpu().numpy().copy()
|
||||
else:
|
||||
b_ih = None
|
||||
if hasattr(gru, 'bias_hh') and gru.bias_hh is not None:
|
||||
b_hh = gru.bias_hh.detach().cpu().numpy().copy()
|
||||
else:
|
||||
b_hh = None
|
||||
|
||||
if isinstance(where, CWriter):
|
||||
return print_gru_layer(where, name, w_ih, w_hh, b_ih, b_hh, format='torch', input_sparse=input_sparse, recurrent_sparse=recurrent_sparse, quantize=quantize, scale=scale, recurrent_scale=recurrent_scale)
|
||||
else:
|
||||
os.makedirs(where, exist_ok=True)
|
||||
|
||||
np.save(os.path.join(where, 'weight_ih_rzn.npy'), w_ih)
|
||||
np.save(os.path.join(where, 'weight_hh_rzn.npy'), w_hh)
|
||||
np.save(os.path.join(where, 'bias_ih_rzn.npy'), b_ih)
|
||||
np.save(os.path.join(where, 'bias_hh_rzn.npy'), b_hh)
|
||||
|
||||
|
||||
|
||||
def load_torch_gru_weights(where, gru):
|
||||
|
||||
assert gru.num_layers == 1
|
||||
assert gru.bidirectional == False
|
||||
|
||||
w_ih = np.load(os.path.join(where, 'weight_ih_rzn.npy'))
|
||||
w_hh = np.load(os.path.join(where, 'weight_hh_rzn.npy'))
|
||||
b_ih = np.load(os.path.join(where, 'bias_ih_rzn.npy'))
|
||||
b_hh = np.load(os.path.join(where, 'bias_hh_rzn.npy'))
|
||||
|
||||
with torch.no_grad():
|
||||
gru.weight_ih_l0.set_(torch.from_numpy(w_ih))
|
||||
gru.weight_hh_l0.set_(torch.from_numpy(w_hh))
|
||||
gru.bias_ih_l0.set_(torch.from_numpy(b_ih))
|
||||
gru.bias_hh_l0.set_(torch.from_numpy(b_hh))
|
||||
|
||||
|
||||
def dump_torch_dense_weights(where, dense, name='dense', scale=1/128, sparse=False, diagonal=False, quantize=False):
|
||||
|
||||
w = dense.weight.detach().cpu().numpy().copy()
|
||||
if dense.bias is None:
|
||||
b = np.zeros(dense.out_features, dtype=w.dtype)
|
||||
else:
|
||||
b = dense.bias.detach().cpu().numpy().copy()
|
||||
|
||||
if isinstance(where, CWriter):
|
||||
return print_dense_layer(where, name, w, b, scale=scale, format='torch', sparse=sparse, diagonal=diagonal, quantize=quantize)
|
||||
|
||||
else:
|
||||
os.makedirs(where, exist_ok=True)
|
||||
|
||||
np.save(os.path.join(where, 'weight.npy'), w)
|
||||
np.save(os.path.join(where, 'bias.npy'), b)
|
||||
|
||||
|
||||
def load_torch_dense_weights(where, dense):
|
||||
|
||||
w = np.load(os.path.join(where, 'weight.npy'))
|
||||
b = np.load(os.path.join(where, 'bias.npy'))
|
||||
|
||||
with torch.no_grad():
|
||||
dense.weight.set_(torch.from_numpy(w))
|
||||
if dense.bias is not None:
|
||||
dense.bias.set_(torch.from_numpy(b))
|
||||
|
||||
|
||||
def dump_torch_conv1d_weights(where, conv, name='conv', scale=1/128, quantize=False, sparse=False):
|
||||
|
||||
w = conv.weight.detach().cpu().numpy().copy()
|
||||
if conv.bias is None:
|
||||
b = np.zeros(conv.out_channels, dtype=w.dtype)
|
||||
else:
|
||||
b = conv.bias.detach().cpu().numpy().copy()
|
||||
|
||||
if isinstance(where, CWriter):
|
||||
|
||||
return print_conv1d_layer(where, name, w, b, scale=scale, format='torch', quantize=quantize, sparse=sparse)
|
||||
else:
|
||||
os.makedirs(where, exist_ok=True)
|
||||
|
||||
np.save(os.path.join(where, 'weight_oik.npy'), w)
|
||||
|
||||
np.save(os.path.join(where, 'bias.npy'), b)
|
||||
|
||||
|
||||
def load_torch_conv1d_weights(where, conv):
|
||||
|
||||
with torch.no_grad():
|
||||
w = np.load(os.path.join(where, 'weight_oik.npy'))
|
||||
conv.weight.set_(torch.from_numpy(w))
|
||||
if type(conv.bias) != type(None):
|
||||
b = np.load(os.path.join(where, 'bias.npy'))
|
||||
if conv.bias is not None:
|
||||
conv.bias.set_(torch.from_numpy(b))
|
||||
|
||||
|
||||
def dump_torch_tconv1d_weights(where, conv, name='conv', scale=1/128, quantize=False, sparse=False):
|
||||
|
||||
w = conv.weight.detach().cpu().numpy().copy()
|
||||
if conv.bias is None:
|
||||
b = np.zeros(conv.out_channels, dtype=w.dtype)
|
||||
else:
|
||||
b = conv.bias.detach().cpu().numpy().copy()
|
||||
|
||||
if isinstance(where, CWriter):
|
||||
|
||||
return print_tconv1d_layer(where, name, w, b, conv.stride[0], scale=scale, quantize=quantize, sparse=sparse)
|
||||
else:
|
||||
os.makedirs(where, exist_ok=True)
|
||||
|
||||
np.save(os.path.join(where, 'weight_oik.npy'), w)
|
||||
|
||||
np.save(os.path.join(where, 'bias.npy'), b)
|
||||
|
||||
|
||||
def load_torch_tconv1d_weights(where, conv):
|
||||
|
||||
with torch.no_grad():
|
||||
w = np.load(os.path.join(where, 'weight_oik.npy'))
|
||||
conv.weight.set_(torch.from_numpy(w))
|
||||
if type(conv.bias) != type(None):
|
||||
b = np.load(os.path.join(where, 'bias.npy'))
|
||||
if conv.bias is not None:
|
||||
conv.bias.set_(torch.from_numpy(b))
|
||||
|
||||
|
||||
def dump_torch_conv2d_weights(where, conv, name='conv', scale=1/128, quantize=False):
|
||||
w = conv.weight.detach().cpu().permute(0, 1, 3, 2).numpy().copy()
|
||||
if conv.bias is None:
|
||||
b = np.zeros(conv.out_channels, dtype=w.dtype)
|
||||
else:
|
||||
b = conv.bias.detach().cpu().numpy().copy()
|
||||
|
||||
if isinstance(where, CWriter):
|
||||
return print_conv2d_layer(where, name, w, b, scale=scale, quantize=quantize)
|
||||
|
||||
else:
|
||||
os.makedirs(where, exist_ok=True)
|
||||
|
||||
np.save(os.path.join(where, 'weight_oiwh.npy'), w)
|
||||
|
||||
np.save(os.path.join(where, 'bias.npy'), b)
|
||||
|
||||
def load_torch_conv2d_weights(where, conv):
|
||||
with torch.no_grad():
|
||||
w = np.load(os.path.join(where, 'weight_oiwh.npy'))
|
||||
conv.weight.set_(torch.from_numpy(w).permute(0, 1, 3, 2))
|
||||
if type(conv.bias) != type(None):
|
||||
b = np.load(os.path.join(where, 'bias.npy'))
|
||||
if conv.bias is not None:
|
||||
conv.bias.set_(torch.from_numpy(b))
|
||||
|
||||
|
||||
def dump_torch_embedding_weights(where, embed, name='embed', scale=1/128, sparse=False, diagonal=False, quantize=False):
|
||||
|
||||
w = embed.weight.detach().cpu().numpy().copy().transpose()
|
||||
b = np.zeros(w.shape[0], dtype=w.dtype)
|
||||
|
||||
if isinstance(where, CWriter):
|
||||
return print_dense_layer(where, name, w, b, scale=scale, format='torch', sparse=sparse, diagonal=diagonal, quantize=quantize)
|
||||
|
||||
else:
|
||||
os.makedirs(where, exist_ok=True)
|
||||
|
||||
np.save(os.path.join(where, 'weight.npy'), w)
|
||||
np.save(os.path.join(where, 'bias.npy'), b)
|
||||
|
||||
|
||||
def load_torch_embedding_weights(where, emb):
|
||||
|
||||
w = np.load(os.path.join(where, 'weight.npy'))
|
||||
|
||||
with torch.no_grad():
|
||||
emb.weight.set_(torch.from_numpy(w))
|
||||
|
||||
def dump_torch_weights(where, module, name=None, verbose=False, **kwargs):
|
||||
""" generic function for dumping weights of some torch.nn.Module """
|
||||
if verbose and name is not None:
|
||||
print(f"printing layer {name} of type {type(module)}...")
|
||||
if isinstance(module, torch.nn.Linear):
|
||||
return dump_torch_dense_weights(where, module, name, **kwargs)
|
||||
elif isinstance(module, torch.nn.GRU):
|
||||
return dump_torch_gru_weights(where, module, name, **kwargs)
|
||||
elif isinstance(module, torch.nn.GRUCell):
|
||||
return dump_torch_grucell_weights(where, module, name, **kwargs)
|
||||
elif isinstance(module, torch.nn.Conv1d):
|
||||
return dump_torch_conv1d_weights(where, module, name, **kwargs)
|
||||
elif isinstance(module, torch.nn.Conv2d):
|
||||
return dump_torch_conv2d_weights(where, module, name, **kwargs)
|
||||
elif isinstance(module, torch.nn.Embedding):
|
||||
return dump_torch_embedding_weights(where, module, name, **kwargs)
|
||||
elif isinstance(module, torch.nn.ConvTranspose1d):
|
||||
return dump_torch_tconv1d_weights(where, module, name, **kwargs)
|
||||
else:
|
||||
if has_osce:
|
||||
if isinstance(module, LimitedAdaptiveConv1d):
|
||||
dump_torch_adaptive_conv1d_weights(where, module, name, **kwargs)
|
||||
elif isinstance(module, LimitedAdaptiveComb1d):
|
||||
dump_torch_adaptive_comb1d_weights(where, module, name, **kwargs)
|
||||
elif isinstance(module, TDShaper):
|
||||
dump_torch_tdshaper(where, module, name, **kwargs)
|
||||
else:
|
||||
raise ValueError(f'dump_torch_weights: layer of type {type(module)} not supported')
|
||||
else:
|
||||
raise ValueError(f'dump_torch_weights: layer of type {type(module)} not supported')
|
||||
|
||||
def load_torch_weights(where, module):
|
||||
""" generic function for loading weights of some torch.nn.Module """
|
||||
if isinstance(module, torch.nn.Linear):
|
||||
load_torch_dense_weights(where, module)
|
||||
elif isinstance(module, torch.nn.GRU):
|
||||
load_torch_gru_weights(where, module)
|
||||
elif isinstance(module, torch.nn.Conv1d):
|
||||
load_torch_conv1d_weights(where, module)
|
||||
elif isinstance(module, torch.nn.Conv2d):
|
||||
load_torch_conv2d_weights(where, module)
|
||||
elif isinstance(module, torch.nn.Embedding):
|
||||
load_torch_embedding_weights(where, module)
|
||||
elif isinstance(module, torch.nn.ConvTranspose1d):
|
||||
return load_torch_tconv1d_weights(where, module)
|
||||
else:
|
||||
raise ValueError(f'load_torch_weights: layer of type {type(module)} not supported')
|
||||
Reference in New Issue
Block a user