add some code

This commit is contained in:
2025-09-05 13:25:11 +08:00
parent 9ff0a99e7a
commit 3cf1229a85
8911 changed files with 2535396 additions and 0 deletions

View File

@@ -0,0 +1,49 @@
import numpy as np
from tensorflow.keras.utils import Sequence
from ulaw import lin2ulaw
def lpc2rc(lpc):
#print("shape is = ", lpc.shape)
order = lpc.shape[-1]
rc = 0*lpc
for i in range(order, 0, -1):
rc[:,:,i-1] = lpc[:,:,-1]
ki = rc[:,:,i-1:i].repeat(i-1, axis=2)
lpc = (lpc[:,:,:-1] - ki*lpc[:,:,-2::-1])/(1-ki*ki)
return rc
class LPCNetLoader(Sequence):
def __init__(self, data, features, periods, batch_size, e2e=False, lookahead=2):
self.batch_size = batch_size
self.nb_batches = np.minimum(np.minimum(data.shape[0], features.shape[0]), periods.shape[0])//self.batch_size
self.data = data[:self.nb_batches*self.batch_size, :]
self.features = features[:self.nb_batches*self.batch_size, :]
self.periods = periods[:self.nb_batches*self.batch_size, :]
self.e2e = e2e
self.lookahead = lookahead
self.on_epoch_end()
def on_epoch_end(self):
self.indices = np.arange(self.nb_batches*self.batch_size)
np.random.shuffle(self.indices)
def __getitem__(self, index):
data = self.data[self.indices[index*self.batch_size:(index+1)*self.batch_size], :, :]
in_data = data[: , :, :1]
out_data = data[: , :, 1:]
features = self.features[self.indices[index*self.batch_size:(index+1)*self.batch_size], :, :-16]
periods = self.periods[self.indices[index*self.batch_size:(index+1)*self.batch_size], :, :]
outputs = [out_data]
inputs = [in_data, features, periods]
if self.lookahead > 0:
lpc = self.features[self.indices[index*self.batch_size:(index+1)*self.batch_size], 4-self.lookahead:-self.lookahead, -16:]
else:
lpc = self.features[self.indices[index*self.batch_size:(index+1)*self.batch_size], 4:, -16:]
if self.e2e:
outputs.append(lpc2rc(lpc))
else:
inputs.append(lpc)
return (inputs, outputs)
def __len__(self):
return self.nb_batches

View File

@@ -0,0 +1,111 @@
#!/usr/bin/python3
'''Copyright (c) 2021-2022 Amazon
Copyright (c) 2018-2019 Mozilla
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
'''
# Train an LPCNet model
import argparse
#from plc_loader import PLCLoader
parser = argparse.ArgumentParser(description='Train a PLC model')
parser.add_argument('bits', metavar='<bits file>', help='binary features file (int16)')
parser.add_argument('output', metavar='<output>', help='output features')
parser.add_argument('--model', metavar='<model>', default='rdovae', help='PLC model python definition (without .py)')
group1 = parser.add_mutually_exclusive_group()
group1.add_argument('--weights', metavar='<input weights>', help='model weights')
parser.add_argument('--cond-size', metavar='<units>', default=1024, type=int, help='number of units in conditioning network (default 1024)')
parser.add_argument('--batch-size', metavar='<batch size>', default=1, type=int, help='batch size to use (default 128)')
parser.add_argument('--seq-length', metavar='<sequence length>', default=1000, type=int, help='sequence length to use (default 1000)')
args = parser.parse_args()
import importlib
rdovae = importlib.import_module(args.model)
import sys
import numpy as np
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, CSVLogger
import tensorflow.keras.backend as K
import h5py
import tensorflow as tf
from rdovae import pvq_quantize
from rdovae import apply_dead_zone
# Try reducing batch_size if you run out of memory on your GPU
batch_size = args.batch_size
model, encoder, decoder, qembedding = rdovae.new_rdovae_model(nb_used_features=20, nb_bits=80, batch_size=batch_size, cond_size=args.cond_size)
model.load_weights(args.weights)
lpc_order = 16
nbits=80
bits_file = args.bits
sequence_size = args.seq_length
# u for unquantised, load 16 bit PCM samples and convert to mu-law
bits = np.memmap(bits_file + "-syms.f32", dtype='float32', mode='r')
nb_sequences = len(bits)//(40*sequence_size)//batch_size*batch_size
bits = bits[:nb_sequences*sequence_size*40]
bits = np.reshape(bits, (nb_sequences, sequence_size//2, 20*4))
print(bits.shape)
lambda_val = 0.001 * np.ones((nb_sequences, sequence_size//2, 1))
quant_id = np.round(3.8*np.log(lambda_val/.0002)).astype('int16')
quant_id = quant_id[:,:,0]
quant_embed = qembedding(quant_id)
quant_scale = tf.math.softplus(quant_embed[:,:,:nbits])
dead_zone = tf.math.softplus(quant_embed[:, :, nbits : 2 * nbits])
bits = bits*quant_scale
bits = np.round(apply_dead_zone([bits, dead_zone]).numpy())
bits = bits/quant_scale
state = np.memmap(bits_file + "-state.f32", dtype='float32', mode='r')
state = np.reshape(state, (nb_sequences, sequence_size//2, 24))
state = state[:,-1,:]
state = pvq_quantize(state, 82)
#state = state/(1e-15+tf.norm(state, axis=-1,keepdims=True))
print("shapes are:")
print(bits.shape)
print(state.shape)
bits = bits[:,1::2,:]
features = decoder.predict([bits, state], batch_size=batch_size)
features.astype('float32').tofile(args.output)

View File

@@ -0,0 +1,49 @@
"""
Modification of Tensorflow's Embedding Layer:
1. Not restricted to be the first layer of a model
2. Differentiable (allows non-integer lookups)
- For non integer lookup, this layer linearly interpolates between the adjacent embeddings in the following way to preserver gradient flow
- E = (1 - frac(x))*embed(floor(x)) + frac(x)*embed(ceil(x))
"""
import tensorflow as tf
from tensorflow.keras.layers import Layer
class diff_Embed(Layer):
"""
Parameters:
- units: int
Dimension of the Embedding
- dict_size: int
Number of Embeddings to lookup
- pcm_init: boolean
Initialized for the embedding matrix
"""
def __init__(self, units=128, dict_size = 256, pcm_init = True, initializer = None, **kwargs):
super(diff_Embed, self).__init__(**kwargs)
self.units = units
self.dict_size = dict_size
self.pcm_init = pcm_init
self.initializer = initializer
def build(self, input_shape):
w_init = tf.random_normal_initializer()
if self.pcm_init:
w_init = self.initializer
self.w = tf.Variable(initial_value=w_init(shape=(self.dict_size, self.units),dtype='float32'),trainable=True)
def call(self, inputs):
alpha = inputs - tf.math.floor(inputs)
alpha = tf.expand_dims(alpha,axis = -1)
alpha = tf.tile(alpha,[1,1,1,self.units])
inputs = tf.cast(inputs,'int32')
M = (1 - alpha)*tf.gather(self.w,inputs) + alpha*tf.gather(self.w,tf.clip_by_value(inputs + 1, 0, 255))
return M
def get_config(self):
config = super(diff_Embed, self).get_config()
config.update({"units": self.units})
config.update({"dict_size" : self.dict_size})
config.update({"pcm_init" : self.pcm_init})
config.update({"initializer" : self.initializer})
return config

View File

@@ -0,0 +1,388 @@
#!/usr/bin/python3
'''Copyright (c) 2017-2018 Mozilla
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
'''
import os
import io
import lpcnet
import sys
import numpy as np
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Layer, GRU, Dense, Conv1D, Embedding
from ulaw import ulaw2lin, lin2ulaw
from mdense import MDense
from diffembed import diff_Embed
from parameters import get_parameter
import h5py
import re
import argparse
# no cuda devices needed
os.environ['CUDA_VISIBLE_DEVICES'] = ""
# Flag for dumping e2e (differentiable lpc) network weights
flag_e2e = False
max_rnn_neurons = 1
max_conv_inputs = 1
max_mdense_tmp = 1
def printVector(f, vector, name, dtype='float', dotp=False):
global array_list
if dotp:
vector = vector.reshape((vector.shape[0]//4, 4, vector.shape[1]//8, 8))
vector = vector.transpose((2, 0, 3, 1))
v = np.reshape(vector, (-1));
#print('static const float ', name, '[', len(v), '] = \n', file=f)
if name not in array_list:
array_list.append(name)
f.write('#ifndef USE_WEIGHTS_FILE\n')
f.write('#define WEIGHTS_{}_DEFINED\n'.format(name))
f.write('#define WEIGHTS_{}_TYPE WEIGHT_TYPE_{}\n'.format(name, dtype))
f.write('static const {} {}[{}] = {{\n '.format(dtype, name, len(v)))
for i in range(0, len(v)):
f.write('{}'.format(v[i]))
if (i!=len(v)-1):
f.write(',')
else:
break;
if (i%8==7):
f.write("\n ")
else:
f.write(" ")
#print(v, file=f)
f.write('\n};\n')
f.write('#endif\n\n')
return;
def printSparseVector(f, A, name, have_diag=True):
N = A.shape[0]
M = A.shape[1]
W = np.zeros((0,), dtype='int')
W0 = np.zeros((0,))
if have_diag:
diag = np.concatenate([np.diag(A[:,:N]), np.diag(A[:,N:2*N]), np.diag(A[:,2*N:])])
A[:,:N] = A[:,:N] - np.diag(np.diag(A[:,:N]))
A[:,N:2*N] = A[:,N:2*N] - np.diag(np.diag(A[:,N:2*N]))
A[:,2*N:] = A[:,2*N:] - np.diag(np.diag(A[:,2*N:]))
printVector(f, diag, name + '_diag')
AQ = np.minimum(127, np.maximum(-128, np.round(A*128))).astype('int')
idx = np.zeros((0,), dtype='int')
for i in range(M//8):
pos = idx.shape[0]
idx = np.append(idx, -1)
nb_nonzero = 0
for j in range(N//4):
block = A[j*4:(j+1)*4, i*8:(i+1)*8]
qblock = AQ[j*4:(j+1)*4, i*8:(i+1)*8]
if np.sum(np.abs(block)) > 1e-10:
nb_nonzero = nb_nonzero + 1
idx = np.append(idx, j*4)
vblock = qblock.transpose((1,0)).reshape((-1,))
W0 = np.concatenate([W0, block.reshape((-1,))])
W = np.concatenate([W, vblock])
idx[pos] = nb_nonzero
f.write('#ifdef DOT_PROD\n')
printVector(f, W, name, dtype='qweight')
f.write('#else /*DOT_PROD*/\n')
printVector(f, W0, name, dtype='qweight')
f.write('#endif /*DOT_PROD*/\n')
#idx = np.tile(np.concatenate([np.array([N]), np.arange(N)]), 3*N//16)
printVector(f, idx, name + '_idx', dtype='int')
return AQ
def dump_layer_ignore(self, f, hf):
print("ignoring layer " + self.name + " of type " + self.__class__.__name__)
return False
Layer.dump_layer = dump_layer_ignore
def dump_sparse_gru(self, f, hf):
global max_rnn_neurons
name = 'sparse_' + self.name
print("printing layer " + name + " of type sparse " + self.__class__.__name__)
weights = self.get_weights()
qweights = printSparseVector(f, weights[1], name + '_recurrent_weights')
printVector(f, weights[-1], name + '_bias')
subias = weights[-1].copy()
subias[1,:] = subias[1,:] - np.sum(qweights*(1./128),axis=0)
printVector(f, subias, name + '_subias')
if hasattr(self, 'activation'):
activation = self.activation.__name__.upper()
else:
activation = 'TANH'
if hasattr(self, 'reset_after') and not self.reset_after:
reset_after = 0
else:
reset_after = 1
neurons = weights[0].shape[1]//3
max_rnn_neurons = max(max_rnn_neurons, neurons)
hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))
hf.write('#define {}_STATE_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))
model_struct.write(' SparseGRULayer {};\n'.format(name));
model_init.write(' if (sparse_gru_init(&model->{}, arrays, "{}_bias", "{}_subias", "{}_recurrent_weights_diag", "{}_recurrent_weights", "{}_recurrent_weights_idx", {}, ACTIVATION_{}, {})) return 1;\n'
.format(name, name, name, name, name, name, weights[0].shape[1]//3, activation, reset_after))
return True
def dump_grub(self, f, hf, gru_a_size):
global max_rnn_neurons
name = self.name
print("printing layer " + name + " of type " + self.__class__.__name__)
weights = self.get_weights()
qweight = printSparseVector(f, weights[0][:gru_a_size, :], name + '_weights', have_diag=False)
f.write('#ifdef DOT_PROD\n')
qweight2 = np.clip(np.round(128.*weights[1]).astype('int'), -128, 127)
printVector(f, qweight2, name + '_recurrent_weights', dotp=True, dtype='qweight')
f.write('#else /*DOT_PROD*/\n')
printVector(f, weights[1], name + '_recurrent_weights')
f.write('#endif /*DOT_PROD*/\n')
printVector(f, weights[-1], name + '_bias')
subias = weights[-1].copy()
subias[0,:] = subias[0,:] - np.sum(qweight*(1./128.),axis=0)
subias[1,:] = subias[1,:] - np.sum(qweight2*(1./128.),axis=0)
printVector(f, subias, name + '_subias')
if hasattr(self, 'activation'):
activation = self.activation.__name__.upper()
else:
activation = 'TANH'
if hasattr(self, 'reset_after') and not self.reset_after:
reset_after = 0
else:
reset_after = 1
neurons = weights[0].shape[1]//3
max_rnn_neurons = max(max_rnn_neurons, neurons)
model_struct.write(' GRULayer {};\n'.format(name));
model_init.write(' if (gru_init(&model->{}, arrays, "{}_bias", "{}_subias", "{}_weights", "{}_weights_idx", "{}_recurrent_weights", {}, {}, ACTIVATION_{}, {})) return 1;\n'
.format(name, name, name, name, name, name, gru_a_size, weights[0].shape[1]//3, activation, reset_after))
return True
def dump_gru_layer_dummy(self, f, hf):
name = self.name
weights = self.get_weights()
hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))
hf.write('#define {}_STATE_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))
return True;
GRU.dump_layer = dump_gru_layer_dummy
def dump_dense_layer_impl(name, weights, bias, activation, f, hf):
printVector(f, weights, name + '_weights')
printVector(f, bias, name + '_bias')
hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights.shape[1]))
model_struct.write(' DenseLayer {};\n'.format(name));
model_init.write(' if (dense_init(&model->{}, arrays, "{}_bias", "{}_weights", {}, {}, ACTIVATION_{})) return 1;\n'
.format(name, name, name, weights.shape[0], weights.shape[1], activation))
def dump_dense_layer(self, f, hf):
name = self.name
print("printing layer " + name + " of type " + self.__class__.__name__)
weights = self.get_weights()
activation = self.activation.__name__.upper()
dump_dense_layer_impl(name, weights[0], weights[1], activation, f, hf)
return False
Dense.dump_layer = dump_dense_layer
def dump_mdense_layer(self, f, hf):
global max_mdense_tmp
name = self.name
print("printing layer " + name + " of type " + self.__class__.__name__)
weights = self.get_weights()
printVector(f, np.transpose(weights[0], (0, 2, 1)), name + '_weights')
printVector(f, np.transpose(weights[1], (1, 0)), name + '_bias')
printVector(f, np.transpose(weights[2], (1, 0)), name + '_factor')
activation = self.activation.__name__.upper()
max_mdense_tmp = max(max_mdense_tmp, weights[0].shape[0]*weights[0].shape[2])
hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights[0].shape[0]))
model_struct.write(' MDenseLayer {};\n'.format(name));
model_init.write(' if (mdense_init(&model->{}, arrays, "{}_bias", "{}_weights", "{}_factor", {}, {}, {}, ACTIVATION_{})) return 1;\n'
.format(name, name, name, name, weights[0].shape[1], weights[0].shape[0], weights[0].shape[2], activation))
return False
MDense.dump_layer = dump_mdense_layer
def dump_conv1d_layer(self, f, hf):
global max_conv_inputs
name = self.name
print("printing layer " + name + " of type " + self.__class__.__name__)
weights = self.get_weights()
printVector(f, weights[0], name + '_weights')
printVector(f, weights[-1], name + '_bias')
activation = self.activation.__name__.upper()
max_conv_inputs = max(max_conv_inputs, weights[0].shape[1]*weights[0].shape[0])
hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights[0].shape[2]))
hf.write('#define {}_STATE_SIZE ({}*{})\n'.format(name.upper(), weights[0].shape[1], (weights[0].shape[0]-1)))
hf.write('#define {}_DELAY {}\n'.format(name.upper(), (weights[0].shape[0]-1)//2))
model_struct.write(' Conv1DLayer {};\n'.format(name));
model_init.write(' if (conv1d_init(&model->{}, arrays, "{}_bias", "{}_weights", {}, {}, {}, ACTIVATION_{})) return 1;\n'
.format(name, name, name, weights[0].shape[1], weights[0].shape[0], weights[0].shape[2], activation))
return True
Conv1D.dump_layer = dump_conv1d_layer
def dump_embedding_layer_impl(name, weights, f, hf):
printVector(f, weights, name + '_weights')
hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights.shape[1]))
model_struct.write(' EmbeddingLayer {};\n'.format(name));
model_init.write(' if (embedding_init(&model->{}, arrays, "{}_weights", {}, {})) return 1;\n'
.format(name, name, weights.shape[0], weights.shape[1]))
def dump_embedding_layer(self, f, hf):
name = self.name
print("printing layer " + name + " of type " + self.__class__.__name__)
weights = self.get_weights()[0]
dump_embedding_layer_impl(name, weights, f, hf)
return False
Embedding.dump_layer = dump_embedding_layer
diff_Embed.dump_layer = dump_embedding_layer
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('model_file', type=str, help='model weight h5 file')
parser.add_argument('--nnet-header', type=str, help='name of c header file for dumped model', default='nnet_data.h')
parser.add_argument('--nnet-source', type=str, help='name of c source file for dumped model', default='nnet_data.c')
parser.add_argument('--lpc-gamma', type=float, help='LPC weighting factor. If not specified I will attempt to read it from the model file with 1 as default', default=None)
parser.add_argument('--lookahead', type=float, help='Features lookahead. If not specified I will attempt to read it from the model file with 2 as default', default=None)
args = parser.parse_args()
filename = args.model_file
with h5py.File(filename, "r") as f:
units = min(f['model_weights']['gru_a']['gru_a']['recurrent_kernel:0'].shape)
units2 = min(f['model_weights']['gru_b']['gru_b']['recurrent_kernel:0'].shape)
cond_size = min(f['model_weights']['feature_dense1']['feature_dense1']['kernel:0'].shape)
e2e = 'rc2lpc' in f['model_weights']
model, _, _ = lpcnet.new_lpcnet_model(rnn_units1=units, rnn_units2=units2, flag_e2e = e2e, cond_size=cond_size)
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy'])
#model.summary()
model.load_weights(filename, by_name=True)
cfile = args.nnet_source
hfile = args.nnet_header
f = open(cfile, 'w')
hf = open(hfile, 'w')
model_struct = io.StringIO()
model_init = io.StringIO()
model_struct.write('typedef struct {\n')
model_init.write('#ifndef DUMP_BINARY_WEIGHTS\n')
model_init.write('int init_lpcnet_model(LPCNetModel *model, const WeightArray *arrays) {\n')
array_list = []
f.write('/*This file is automatically generated from a Keras model*/\n')
f.write('/*based on model {}*/\n\n'.format(sys.argv[1]))
f.write('#ifdef HAVE_CONFIG_H\n#include "config.h"\n#endif\n\n#include "nnet.h"\n#include "{}"\n\n'.format(hfile))
hf.write('/*This file is automatically generated from a Keras model*/\n\n')
hf.write('#ifndef RNN_DATA_H\n#define RNN_DATA_H\n\n#include "nnet.h"\n\n')
if e2e:
hf.write('/* This is an end-to-end model */\n')
hf.write('#define END2END\n\n')
else:
hf.write('/* This is *not* an end-to-end model */\n')
hf.write('/* #define END2END */\n\n')
# LPC weighting factor
if type(args.lpc_gamma) == type(None):
lpc_gamma = get_parameter(model, 'lpc_gamma', 1)
else:
lpc_gamma = args.lpc_gamma
hf.write('/* LPC weighting factor */\n')
hf.write('#define LPC_GAMMA ' + str(lpc_gamma) +'f\n\n')
# look-ahead
if type(args.lookahead) == type(None):
lookahead = get_parameter(model, 'lookahead', 2)
else:
lookahead = args.lookahead
hf.write('/* Features look-ahead */\n')
hf.write('#define FEATURES_DELAY ' + str(lookahead) +'\n\n')
embed_size = lpcnet.embed_size
E = model.get_layer('embed_sig').get_weights()[0]
W = model.get_layer('gru_a').get_weights()[0][:embed_size,:]
dump_embedding_layer_impl('gru_a_embed_sig', np.dot(E, W), f, hf)
W = model.get_layer('gru_a').get_weights()[0][embed_size:2*embed_size,:]
dump_embedding_layer_impl('gru_a_embed_pred', np.dot(E, W), f, hf)
W = model.get_layer('gru_a').get_weights()[0][2*embed_size:3*embed_size,:]
dump_embedding_layer_impl('gru_a_embed_exc', np.dot(E, W), f, hf)
W = model.get_layer('gru_a').get_weights()[0][3*embed_size:,:]
#FIXME: dump only half the biases
b = model.get_layer('gru_a').get_weights()[2]
dump_dense_layer_impl('gru_a_dense_feature', W, b[:len(b)//2], 'LINEAR', f, hf)
W = model.get_layer('gru_b').get_weights()[0][model.rnn_units1:,:]
b = model.get_layer('gru_b').get_weights()[2]
# Set biases to zero because they'll be included in the GRU input part
# (we need regular and SU biases)
dump_dense_layer_impl('gru_b_dense_feature', W, 0*b[:len(b)//2], 'LINEAR', f, hf)
dump_grub(model.get_layer('gru_b'), f, hf, model.rnn_units1)
layer_list = []
for i, layer in enumerate(model.layers):
if layer.dump_layer(f, hf):
layer_list.append(layer.name)
dump_sparse_gru(model.get_layer('gru_a'), f, hf)
f.write('#ifndef USE_WEIGHTS_FILE\n')
f.write('const WeightArray lpcnet_arrays[] = {\n')
for name in array_list:
f.write('#ifdef WEIGHTS_{}_DEFINED\n'.format(name))
f.write(' {{"{}", WEIGHTS_{}_TYPE, sizeof({}), {}}},\n'.format(name, name, name, name))
f.write('#endif\n')
f.write(' {NULL, 0, 0, NULL}\n};\n')
f.write('#endif\n')
model_init.write(' return 0;\n}\n')
model_init.write('#endif\n')
f.write(model_init.getvalue())
hf.write('#define MAX_RNN_NEURONS {}\n\n'.format(max_rnn_neurons))
hf.write('#define MAX_CONV_INPUTS {}\n\n'.format(max_conv_inputs))
hf.write('#define MAX_MDENSE_TMP {}\n\n'.format(max_mdense_tmp))
hf.write('typedef struct {\n')
for i, name in enumerate(layer_list):
hf.write(' float {}_state[{}_STATE_SIZE];\n'.format(name, name.upper()))
hf.write('} NNetState;\n\n')
model_struct.write('} LPCNetModel;\n\n')
hf.write(model_struct.getvalue())
hf.write('int init_lpcnet_model(LPCNetModel *model, const WeightArray *arrays);\n\n')
hf.write('\n\n#endif\n')
f.close()
hf.close()

View File

@@ -0,0 +1,296 @@
#!/usr/bin/python3
'''Copyright (c) 2021-2022 Amazon
Copyright (c) 2017-2018 Mozilla
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
'''
import lpcnet_plc
import io
import sys
import numpy as np
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Layer, GRU, Dense, Conv1D, Embedding
import h5py
import re
# Flag for dumping e2e (differentiable lpc) network weights
flag_e2e = False
max_rnn_neurons = 1
max_conv_inputs = 1
def printVector(f, vector, name, dtype='float', dotp=False):
global array_list
if dotp:
vector = vector.reshape((vector.shape[0]//4, 4, vector.shape[1]//8, 8))
vector = vector.transpose((2, 0, 3, 1))
v = np.reshape(vector, (-1));
#print('static const float ', name, '[', len(v), '] = \n', file=f)
if name not in array_list:
array_list.append(name)
f.write('#ifndef USE_WEIGHTS_FILE\n')
f.write('#define WEIGHTS_{}_DEFINED\n'.format(name))
f.write('#define WEIGHTS_{}_TYPE WEIGHT_TYPE_{}\n'.format(name, dtype))
f.write('static const {} {}[{}] = {{\n '.format(dtype, name, len(v)))
for i in range(0, len(v)):
f.write('{}'.format(v[i]))
if (i!=len(v)-1):
f.write(',')
else:
break;
if (i%8==7):
f.write("\n ")
else:
f.write(" ")
#print(v, file=f)
f.write('\n};\n')
f.write('#endif\n\n')
return;
def printSparseVector(f, A, name, have_diag=True):
N = A.shape[0]
M = A.shape[1]
W = np.zeros((0,), dtype='int')
W0 = np.zeros((0,))
if have_diag:
diag = np.concatenate([np.diag(A[:,:N]), np.diag(A[:,N:2*N]), np.diag(A[:,2*N:])])
A[:,:N] = A[:,:N] - np.diag(np.diag(A[:,:N]))
A[:,N:2*N] = A[:,N:2*N] - np.diag(np.diag(A[:,N:2*N]))
A[:,2*N:] = A[:,2*N:] - np.diag(np.diag(A[:,2*N:]))
printVector(f, diag, name + '_diag')
AQ = np.minimum(127, np.maximum(-128, np.round(A*128))).astype('int')
idx = np.zeros((0,), dtype='int')
for i in range(M//8):
pos = idx.shape[0]
idx = np.append(idx, -1)
nb_nonzero = 0
for j in range(N//4):
block = A[j*4:(j+1)*4, i*8:(i+1)*8]
qblock = AQ[j*4:(j+1)*4, i*8:(i+1)*8]
if np.sum(np.abs(block)) > 1e-10:
nb_nonzero = nb_nonzero + 1
idx = np.append(idx, j*4)
vblock = qblock.transpose((1,0)).reshape((-1,))
W0 = np.concatenate([W0, block.reshape((-1,))])
W = np.concatenate([W, vblock])
idx[pos] = nb_nonzero
f.write('#ifdef DOT_PROD\n')
printVector(f, W, name, dtype='qweight')
f.write('#else /*DOT_PROD*/\n')
printVector(f, W0, name, dtype='qweight')
f.write('#endif /*DOT_PROD*/\n')
#idx = np.tile(np.concatenate([np.array([N]), np.arange(N)]), 3*N//16)
printVector(f, idx, name + '_idx', dtype='int')
return AQ
def dump_layer_ignore(self, f, hf):
print("ignoring layer " + self.name + " of type " + self.__class__.__name__)
return False
Layer.dump_layer = dump_layer_ignore
def dump_sparse_gru(self, f, hf):
global max_rnn_neurons
name = 'sparse_' + self.name
print("printing layer " + name + " of type sparse " + self.__class__.__name__)
weights = self.get_weights()
qweights = printSparseVector(f, weights[1], name + '_recurrent_weights')
printVector(f, weights[-1], name + '_bias')
subias = weights[-1].copy()
subias[1,:] = subias[1,:] - np.sum(qweights*(1./128),axis=0)
printVector(f, subias, name + '_subias')
if hasattr(self, 'activation'):
activation = self.activation.__name__.upper()
else:
activation = 'TANH'
if hasattr(self, 'reset_after') and not self.reset_after:
reset_after = 0
else:
reset_after = 1
neurons = weights[0].shape[1]//3
max_rnn_neurons = max(max_rnn_neurons, neurons)
hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))
hf.write('#define {}_STATE_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))
model_struct.write(' SparseGRULayer {};\n'.format(name));
model_init.write(' if (sparse_gru_init(&model->{}, arrays, "{}_bias", "{}_subias", "{}_recurrent_weights_diag", "{}_recurrent_weights", "{}_recurrent_weights_idx", {}, ACTIVATION_{}, {})) return 1;\n'
.format(name, name, name, name, name, name, weights[0].shape[1]//3, activation, reset_after))
return True
def dump_gru_layer(self, f, hf):
global max_rnn_neurons
name = self.name
print("printing layer " + name + " of type " + self.__class__.__name__)
weights = self.get_weights()
qweight = printSparseVector(f, weights[0], name + '_weights', have_diag=False)
f.write('#ifdef DOT_PROD\n')
qweight2 = np.clip(np.round(128.*weights[1]).astype('int'), -128, 127)
printVector(f, qweight2, name + '_recurrent_weights', dotp=True, dtype='qweight')
f.write('#else /*DOT_PROD*/\n')
printVector(f, weights[1], name + '_recurrent_weights')
f.write('#endif /*DOT_PROD*/\n')
printVector(f, weights[-1], name + '_bias')
subias = weights[-1].copy()
subias[0,:] = subias[0,:] - np.sum(qweight*(1./128.),axis=0)
subias[1,:] = subias[1,:] - np.sum(qweight2*(1./128.),axis=0)
printVector(f, subias, name + '_subias')
if hasattr(self, 'activation'):
activation = self.activation.__name__.upper()
else:
activation = 'TANH'
if hasattr(self, 'reset_after') and not self.reset_after:
reset_after = 0
else:
reset_after = 1
neurons = weights[0].shape[1]//3
max_rnn_neurons = max(max_rnn_neurons, neurons)
hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))
hf.write('#define {}_STATE_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))
model_struct.write(' GRULayer {};\n'.format(name));
model_init.write(' if (gru_init(&model->{}, arrays, "{}_bias", "{}_subias", "{}_weights", "{}_weights_idx", "{}_recurrent_weights", {}, {}, ACTIVATION_{}, {})) return 1;\n'
.format(name, name, name, name, name, name, weights[0].shape[0], weights[0].shape[1]//3, activation, reset_after))
return True
GRU.dump_layer = dump_gru_layer
def dump_gru_layer_dummy(self, f, hf):
name = self.name
weights = self.get_weights()
hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))
hf.write('#define {}_STATE_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))
return True;
#GRU.dump_layer = dump_gru_layer_dummy
def dump_dense_layer_impl(name, weights, bias, activation, f, hf):
printVector(f, weights, name + '_weights')
printVector(f, bias, name + '_bias')
hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights.shape[1]))
model_struct.write(' DenseLayer {};\n'.format(name));
model_init.write(' if (dense_init(&model->{}, arrays, "{}_bias", "{}_weights", {}, {}, ACTIVATION_{})) return 1;\n'
.format(name, name, name, weights.shape[0], weights.shape[1], activation))
def dump_dense_layer(self, f, hf):
name = self.name
print("printing layer " + name + " of type " + self.__class__.__name__)
weights = self.get_weights()
activation = self.activation.__name__.upper()
dump_dense_layer_impl(name, weights[0], weights[1], activation, f, hf)
return False
Dense.dump_layer = dump_dense_layer
def dump_conv1d_layer(self, f, hf):
global max_conv_inputs
name = self.name
print("printing layer " + name + " of type " + self.__class__.__name__)
weights = self.get_weights()
printVector(f, weights[0], name + '_weights')
printVector(f, weights[-1], name + '_bias')
activation = self.activation.__name__.upper()
max_conv_inputs = max(max_conv_inputs, weights[0].shape[1]*weights[0].shape[0])
hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights[0].shape[2]))
hf.write('#define {}_STATE_SIZE ({}*{})\n'.format(name.upper(), weights[0].shape[1], (weights[0].shape[0]-1)))
hf.write('#define {}_DELAY {}\n'.format(name.upper(), (weights[0].shape[0]-1)//2))
model_struct.write(' Conv1DLayer {};\n'.format(name));
model_init.write(' if (conv1d_init(&model->{}, arrays, "{}_bias", "{}_weights", {}, {}, {}, ACTIVATION_{})) return 1;\n'
.format(name, name, name, weights[0].shape[1], weights[0].shape[0], weights[0].shape[2], activation))
return True
Conv1D.dump_layer = dump_conv1d_layer
filename = sys.argv[1]
with h5py.File(filename, "r") as f:
units = min(f['model_weights']['plc_gru1']['plc_gru1']['recurrent_kernel:0'].shape)
units2 = min(f['model_weights']['plc_gru2']['plc_gru2']['recurrent_kernel:0'].shape)
cond_size = f['model_weights']['plc_dense1']['plc_dense1']['kernel:0'].shape[1]
model = lpcnet_plc.new_lpcnet_plc_model(rnn_units=units, cond_size=cond_size)
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy'])
#model.summary()
model.load_weights(filename, by_name=True)
if len(sys.argv) > 2:
cfile = sys.argv[2];
hfile = sys.argv[3];
else:
cfile = 'plc_data.c'
hfile = 'plc_data.h'
f = open(cfile, 'w')
hf = open(hfile, 'w')
model_struct = io.StringIO()
model_init = io.StringIO()
model_struct.write('typedef struct {\n')
model_init.write('#ifndef DUMP_BINARY_WEIGHTS\n')
model_init.write('int init_plc_model(PLCModel *model, const WeightArray *arrays) {\n')
array_list = []
f.write('/*This file is automatically generated from a Keras model*/\n')
f.write('/*based on model {}*/\n\n'.format(sys.argv[1]))
f.write('#ifdef HAVE_CONFIG_H\n#include "config.h"\n#endif\n\n#include "nnet.h"\n#include "{}"\n\n'.format(hfile))
hf.write('/*This file is automatically generated from a Keras model*/\n\n')
hf.write('#ifndef PLC_DATA_H\n#define PLC_DATA_H\n\n#include "nnet.h"\n\n')
layer_list = []
for i, layer in enumerate(model.layers):
if layer.dump_layer(f, hf):
layer_list.append(layer.name)
#dump_sparse_gru(model.get_layer('gru_a'), f, hf)
f.write('#ifndef USE_WEIGHTS_FILE\n')
f.write('const WeightArray lpcnet_plc_arrays[] = {\n')
for name in array_list:
f.write('#ifdef WEIGHTS_{}_DEFINED\n'.format(name))
f.write(' {{"{}", WEIGHTS_{}_TYPE, sizeof({}), {}}},\n'.format(name, name, name, name))
f.write('#endif\n')
f.write(' {NULL, 0, 0, NULL}\n};\n')
f.write('#endif\n')
model_init.write(' return 0;\n}\n')
model_init.write('#endif\n')
f.write(model_init.getvalue())
hf.write('#define PLC_MAX_RNN_NEURONS {}\n\n'.format(max_rnn_neurons))
#hf.write('#define PLC_MAX_CONV_INPUTS {}\n\n'.format(max_conv_inputs))
hf.write('typedef struct {\n')
for i, name in enumerate(layer_list):
hf.write(' float {}_state[{}_STATE_SIZE];\n'.format(name, name.upper()))
hf.write('} PLCNetState;\n\n')
model_struct.write('} PLCModel;\n\n')
hf.write(model_struct.getvalue())
hf.write('int init_plc_model(PLCModel *model, const WeightArray *arrays);\n\n')
hf.write('\n\n#endif\n')
f.close()
hf.close()

View File

@@ -0,0 +1,306 @@
"""
/* Copyright (c) 2022 Amazon
Written by Jan Buethe */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
"""
import argparse
from ftplib import parse150
import os
os.environ['CUDA_VISIBLE_DEVICES'] = ""
parser = argparse.ArgumentParser()
parser.add_argument('weights', metavar="<weight file>", type=str, help='model weight file in hdf5 format')
parser.add_argument('--cond-size', type=int, help="conditioning size (default: 256)", default=256)
parser.add_argument('--latent-dim', type=int, help="dimension of latent space (default: 80)", default=80)
parser.add_argument('--quant-levels', type=int, help="number of quantization steps (default: 16)", default=16)
args = parser.parse_args()
# now import the heavy stuff
import tensorflow as tf
import numpy as np
from keraslayerdump import dump_conv1d_layer, dump_dense_layer, dump_gru_layer, printVector
from rdovae import new_rdovae_model
def start_header(header_fid, header_name):
header_guard = os.path.basename(header_name)[:-2].upper() + "_H"
header_fid.write(
f"""
#ifndef {header_guard}
#define {header_guard}
"""
)
def finish_header(header_fid):
header_fid.write(
"""
#endif
"""
)
def start_source(source_fid, header_name, weight_file):
source_fid.write(
f"""
/* this source file was automatically generated from weight file {weight_file} */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "{header_name}"
"""
)
def finish_source(source_fid):
pass
def dump_statistical_model(qembedding, f, fh):
w = qembedding.weights[0].numpy()
levels, dim = w.shape
N = dim // 6
print("dumping statistical model")
quant_scales = tf.math.softplus(w[:, : N]).numpy()
dead_zone = 0.05 * tf.math.softplus(w[:, N : 2 * N]).numpy()
r = tf.math.sigmoid(w[:, 5 * N : 6 * N]).numpy()
p0 = tf.math.sigmoid(w[:, 4 * N : 5 * N]).numpy()
p0 = 1 - r ** (0.5 + 0.5 * p0)
quant_scales_q8 = np.round(quant_scales * 2**8).astype(np.uint16)
dead_zone_q10 = np.round(dead_zone * 2**10).astype(np.uint16)
r_q15 = np.round(r * 2**15).astype(np.uint16)
p0_q15 = np.round(p0 * 2**15).astype(np.uint16)
printVector(f, quant_scales_q8, 'dred_quant_scales_q8', dtype='opus_uint16', static=False)
printVector(f, dead_zone_q10, 'dred_dead_zone_q10', dtype='opus_uint16', static=False)
printVector(f, r_q15, 'dred_r_q15', dtype='opus_uint16', static=False)
printVector(f, p0_q15, 'dred_p0_q15', dtype='opus_uint16', static=False)
fh.write(
f"""
extern const opus_uint16 dred_quant_scales_q8[{levels * N}];
extern const opus_uint16 dred_dead_zone_q10[{levels * N}];
extern const opus_uint16 dred_r_q15[{levels * N}];
extern const opus_uint16 dred_p0_q15[{levels * N}];
"""
)
if __name__ == "__main__":
model, encoder, decoder, qembedding = new_rdovae_model(20, args.latent_dim, cond_size=args.cond_size, nb_quant=args.quant_levels)
model.load_weights(args.weights)
# encoder
encoder_dense_names = [
'enc_dense1',
'enc_dense3',
'enc_dense5',
'enc_dense7',
'enc_dense8',
'gdense1',
'gdense2'
]
encoder_gru_names = [
'enc_dense2',
'enc_dense4',
'enc_dense6'
]
encoder_conv1d_names = [
'bits_dense'
]
source_fid = open("dred_rdovae_enc_data.c", 'w')
header_fid = open("dred_rdovae_enc_data.h", 'w')
start_header(header_fid, "dred_rdovae_enc_data.h")
start_source(source_fid, "dred_rdovae_enc_data.h", os.path.basename(args.weights))
header_fid.write(
f"""
#include "dred_rdovae_constants.h"
#include "nnet.h"
"""
)
# dump GRUs
max_rnn_neurons_enc = max(
[
dump_gru_layer(encoder.get_layer(name), source_fid, header_fid, dotp=True, sparse=True)
for name in encoder_gru_names
]
)
# dump conv layers
max_conv_inputs = max(
[
dump_conv1d_layer(encoder.get_layer(name), source_fid, header_fid)
for name in encoder_conv1d_names
]
)
# dump Dense layers
for name in encoder_dense_names:
layer = encoder.get_layer(name)
dump_dense_layer(layer, source_fid, header_fid)
# some global constants
header_fid.write(
f"""
#define DRED_ENC_MAX_RNN_NEURONS {max_rnn_neurons_enc}
#define DRED_ENC_MAX_CONV_INPUTS {max_conv_inputs}
"""
)
finish_header(header_fid)
finish_source(source_fid)
header_fid.close()
source_fid.close()
# statistical model
source_fid = open("dred_rdovae_stats_data.c", 'w')
header_fid = open("dred_rdovae_stats_data.h", 'w')
start_header(header_fid, "dred_rdovae_stats_data.h")
start_source(source_fid, "dred_rdovae_stats_data.h", os.path.basename(args.weights))
header_fid.write(
"""
#include "opus_types.h"
"""
)
dump_statistical_model(qembedding, source_fid, header_fid)
finish_header(header_fid)
finish_source(source_fid)
header_fid.close()
source_fid.close()
# decoder
decoder_dense_names = [
'state1',
'state2',
'state3',
'dec_dense1',
'dec_dense3',
'dec_dense5',
'dec_dense7',
'dec_dense8',
'dec_final'
]
decoder_gru_names = [
'dec_dense2',
'dec_dense4',
'dec_dense6'
]
source_fid = open("dred_rdovae_dec_data.c", 'w')
header_fid = open("dred_rdovae_dec_data.h", 'w')
start_header(header_fid, "dred_rdovae_dec_data.h")
start_source(source_fid, "dred_rdovae_dec_data.h", os.path.basename(args.weights))
header_fid.write(
f"""
#include "dred_rdovae_constants.h"
#include "nnet.h"
"""
)
# dump GRUs
max_rnn_neurons_dec = max(
[
dump_gru_layer(decoder.get_layer(name), source_fid, header_fid, dotp=True, sparse=True)
for name in decoder_gru_names
]
)
# dump Dense layers
for name in decoder_dense_names:
layer = decoder.get_layer(name)
dump_dense_layer(layer, source_fid, header_fid)
# some global constants
header_fid.write(
f"""
#define DRED_DEC_MAX_RNN_NEURONS {max_rnn_neurons_dec}
"""
)
finish_header(header_fid)
finish_source(source_fid)
header_fid.close()
source_fid.close()
# common constants
header_fid = open("dred_rdovae_constants.h", 'w')
start_header(header_fid, "dred_rdovae_constants.h")
header_fid.write(
f"""
#define DRED_NUM_FEATURES 20
#define DRED_LATENT_DIM {args.latent_dim}
#define DRED_STATE_DIM {24}
#define DRED_NUM_QUANTIZATION_LEVELS {qembedding.weights[0].shape[0]}
#define DRED_MAX_RNN_NEURONS {max(max_rnn_neurons_enc, max_rnn_neurons_dec)}
#define DRED_MAX_CONV_INPUTS {max_conv_inputs}
"""
)
finish_header(header_fid)

View File

@@ -0,0 +1,125 @@
#!/usr/bin/python3
'''Copyright (c) 2021-2022 Amazon
Copyright (c) 2018-2019 Mozilla
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
'''
# Train an LPCNet model
import argparse
#from plc_loader import PLCLoader
parser = argparse.ArgumentParser(description='Train a PLC model')
parser.add_argument('features', metavar='<features file>', help='binary features file (float32)')
parser.add_argument('output', metavar='<output>', help='trained model file (.h5)')
parser.add_argument('--model', metavar='<model>', default='rdovae', help='PLC model python definition (without .py)')
group1 = parser.add_mutually_exclusive_group()
group1.add_argument('--weights', metavar='<input weights>', help='model weights')
parser.add_argument('--cond-size', metavar='<units>', default=1024, type=int, help='number of units in conditioning network (default 1024)')
parser.add_argument('--batch-size', metavar='<batch size>', default=1, type=int, help='batch size to use (default 128)')
parser.add_argument('--seq-length', metavar='<sequence length>', default=1000, type=int, help='sequence length to use (default 1000)')
args = parser.parse_args()
import importlib
rdovae = importlib.import_module(args.model)
from rdovae import apply_dead_zone
import sys
import numpy as np
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, CSVLogger
import tensorflow.keras.backend as K
import h5py
import tensorflow as tf
from rdovae import pvq_quantize
# Try reducing batch_size if you run out of memory on your GPU
batch_size = args.batch_size
model, encoder, decoder, qembedding = rdovae.new_rdovae_model(nb_used_features=20, nb_bits=80, batch_size=batch_size, cond_size=args.cond_size)
model.load_weights(args.weights)
lpc_order = 16
feature_file = args.features
nb_features = model.nb_used_features + lpc_order
nb_used_features = model.nb_used_features
sequence_size = args.seq_length
# u for unquantised, load 16 bit PCM samples and convert to mu-law
features = np.memmap(feature_file, dtype='float32', mode='r')
nb_sequences = len(features)//(nb_features*sequence_size)//batch_size*batch_size
features = features[:nb_sequences*sequence_size*nb_features]
features = np.reshape(features, (nb_sequences, sequence_size, nb_features))
print(features.shape)
features = features[:, :, :nb_used_features]
#features = np.random.randn(73600, 1000, 17)
bits, gru_state_dec = encoder.predict([features], batch_size=batch_size)
(gru_state_dec).astype('float32').tofile(args.output + "-state.f32")
#dist = rdovae.feat_dist_loss(features, quant_out)
#rate = rdovae.sq1_rate_loss(features, model_bits)
#rate2 = rdovae.sq_rate_metric(features, model_bits)
#print(dist, rate, rate2)
print("shapes are:")
print(bits.shape)
print(gru_state_dec.shape)
features.astype('float32').tofile(args.output + "-input.f32")
#quant_out.astype('float32').tofile(args.output + "-enc_dec.f32")
nbits=80
bits.astype('float32').tofile(args.output + "-syms.f32")
lambda_val = 0.0002 * np.ones((nb_sequences, sequence_size//2, 1))
quant_id = np.round(3.8*np.log(lambda_val/.0002)).astype('int16')
quant_id = quant_id[:,:,0]
quant_embed = qembedding(quant_id)
quant_scale = tf.math.softplus(quant_embed[:,:,:nbits])
dead_zone = tf.math.softplus(quant_embed[:, :, nbits : 2 * nbits])
bits = bits*quant_scale
bits = np.round(apply_dead_zone([bits, dead_zone]).numpy())
bits = bits/quant_scale
gru_state_dec = pvq_quantize(gru_state_dec, 82)
#gru_state_dec = gru_state_dec/(1e-15+tf.norm(gru_state_dec, axis=-1,keepdims=True))
gru_state_dec = gru_state_dec[:,-1,:]
dec_out = decoder([bits[:,1::2,:], gru_state_dec])
print(dec_out.shape)
dec_out.numpy().astype('float32').tofile(args.output + "-quant_out.f32")

View File

@@ -0,0 +1,256 @@
"""
/* Copyright (c) 2022 Amazon
Written by Jan Buethe and Jean-Marc Valin */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
"""
import os
import subprocess
import argparse
import numpy as np
from scipy.io import wavfile
import tensorflow as tf
from rdovae import new_rdovae_model, pvq_quantize, apply_dead_zone, sq_rate_metric
from fec_packets import write_fec_packets, read_fec_packets
debug = False
if debug:
args = type('dummy', (object,),
{
'input' : 'item1.wav',
'weights' : 'testout/rdovae_alignment_fix_1024_120.h5',
'enc_lambda' : 0.0007,
'output' : "test_0007.fec",
'cond_size' : 1024,
'num_redundancy_frames' : 64,
'extra_delay' : 0,
'dump_data' : './dump_data'
})()
os.environ['CUDA_VISIBLE_DEVICES']=""
else:
parser = argparse.ArgumentParser(description='Encode redundancy for Opus neural FEC. Designed for use with voip application and 20ms frames')
parser.add_argument('input', metavar='<input signal>', help='audio input (.wav or .raw or .pcm as int16)')
parser.add_argument('weights', metavar='<weights>', help='trained model file (.h5)')
# parser.add_argument('enc_lambda', metavar='<lambda>', type=float, help='lambda for controlling encoder rate')
parser.add_argument('output', type=str, help='output file (will be extended with .fec)')
parser.add_argument('--dump-data', type=str, default='./dump_data', help='path to dump data executable (default ./dump_data)')
parser.add_argument('--cond-size', metavar='<units>', default=1024, type=int, help='number of units in conditioning network (default 1024)')
parser.add_argument('--quant-levels', type=int, help="number of quantization steps (default: 40)", default=40)
parser.add_argument('--num-redundancy-frames', default=64, type=int, help='number of redundancy frames (20ms) per packet (default 64)')
parser.add_argument('--extra-delay', default=0, type=int, help="last features in packet are calculated with the decoder aligned samples, use this option to add extra delay (in samples at 16kHz)")
parser.add_argument('--lossfile', type=str, help='file containing loss trace (0 for frame received, 1 for lost)')
parser.add_argument('--debug-output', action='store_true', help='if set, differently assembled features are written to disk')
args = parser.parse_args()
model, encoder, decoder, qembedding = new_rdovae_model(nb_used_features=20, nb_bits=80, batch_size=1, nb_quant=args.quant_levels, cond_size=args.cond_size)
model.load_weights(args.weights)
lpc_order = 16
## prepare input signal
# SILK frame size is 20ms and LPCNet subframes are 10ms
subframe_size = 160
frame_size = 2 * subframe_size
# 91 samples delay to align with SILK decoded frames
silk_delay = 91
# prepend zeros to have enough history to produce the first package
zero_history = (args.num_redundancy_frames - 1) * frame_size
# dump data has a (feature) delay of 10ms
dump_data_delay = 160
total_delay = silk_delay + zero_history + args.extra_delay - dump_data_delay
# load signal
if args.input.endswith('.raw') or args.input.endswith('.pcm') or args.input.endswith('.sw'):
signal = np.fromfile(args.input, dtype='int16')
elif args.input.endswith('.wav'):
fs, signal = wavfile.read(args.input)
else:
raise ValueError(f'unknown input signal format: {args.input}')
# fill up last frame with zeros
padded_signal_length = len(signal) + total_delay
tail = padded_signal_length % frame_size
right_padding = (frame_size - tail) % frame_size
signal = np.concatenate((np.zeros(total_delay, dtype=np.int16), signal, np.zeros(right_padding, dtype=np.int16)))
padded_signal_file = os.path.splitext(args.input)[0] + '_padded.raw'
signal.tofile(padded_signal_file)
# write signal and call dump_data to create features
feature_file = os.path.splitext(args.input)[0] + '_features.f32'
command = f"{args.dump_data} -test {padded_signal_file} {feature_file}"
r = subprocess.run(command, shell=True)
if r.returncode != 0:
raise RuntimeError(f"command '{command}' failed with exit code {r.returncode}")
# load features
nb_features = model.nb_used_features + lpc_order
nb_used_features = model.nb_used_features
# load features
features = np.fromfile(feature_file, dtype='float32')
num_subframes = len(features) // nb_features
num_subframes = 2 * (num_subframes // 2)
num_frames = num_subframes // 2
features = np.reshape(features, (1, -1, nb_features))
features = features[:, :, :nb_used_features]
features = features[:, :num_subframes, :]
#variable quantizer depending on the delay
q0 = 3
q1 = 15
quant_id = np.round(q1 + (q0-q1)*np.arange(args.num_redundancy_frames//2)/args.num_redundancy_frames).astype('int16')
#print(quant_id)
quant_embed = qembedding(quant_id)
# run encoder
print("running fec encoder...")
symbols, gru_state_dec = encoder.predict(features)
# apply quantization
nsymbols = 80
quant_scale = tf.math.softplus(quant_embed[:, :nsymbols]).numpy()
dead_zone = tf.math.softplus(quant_embed[:, nsymbols : 2 * nsymbols]).numpy()
#symbols = apply_dead_zone([symbols, dead_zone]).numpy()
#qsymbols = np.round(symbols)
quant_gru_state_dec = pvq_quantize(gru_state_dec, 82)
# rate estimate
hard_distr_embed = tf.math.sigmoid(quant_embed[:, 4 * nsymbols : ]).numpy()
#rate_input = np.concatenate((qsymbols, hard_distr_embed, enc_lambda), axis=-1)
#rates = sq_rate_metric(None, rate_input, reduce=False).numpy()
# run decoder
input_length = args.num_redundancy_frames // 2
offset = args.num_redundancy_frames - 1
packets = []
packet_sizes = []
sym_batch = np.zeros((num_frames-offset, args.num_redundancy_frames//2, nsymbols), dtype='float32')
quant_state = quant_gru_state_dec[0, offset:num_frames, :]
#pack symbols for batch processing
for i in range(offset, num_frames):
sym_batch[i-offset, :, :] = symbols[0, i - 2 * input_length + 2 : i + 1 : 2, :]
#quantize symbols
sym_batch = sym_batch * quant_scale
sym_batch = apply_dead_zone([sym_batch, dead_zone]).numpy()
sym_batch = np.round(sym_batch)
hard_distr_embed = np.broadcast_to(hard_distr_embed, (sym_batch.shape[0], sym_batch.shape[1], 2*sym_batch.shape[2]))
fake_lambda = np.ones((sym_batch.shape[0], sym_batch.shape[1], 1), dtype='float32')
rate_input = np.concatenate((sym_batch, hard_distr_embed, fake_lambda), axis=-1)
rates = sq_rate_metric(None, rate_input, reduce=False).numpy()
#print(rates.shape)
print("average rate = ", np.mean(rates[args.num_redundancy_frames:,:]))
#sym_batch.tofile('qsyms.f32')
sym_batch = sym_batch / quant_scale
#print(sym_batch.shape, quant_state.shape)
#features = decoder.predict([sym_batch, quant_state])
features = decoder([sym_batch, quant_state])
#for i in range(offset, num_frames):
# print(f"processing frame {i - offset}...")
# features = decoder.predict([qsymbols[:, i - 2 * input_length + 2 : i + 1 : 2, :], quant_embed_dec[:, i - 2 * input_length + 2 : i + 1 : 2, :], quant_gru_state_dec[:, i, :]])
# packets.append(features)
# packet_size = 8 * int((np.sum(rates[:, i - 2 * input_length + 2 : i + 1 : 2]) + 7) / 8) + 64
# packet_sizes.append(packet_size)
# write packets
packet_file = args.output + '.fec' if not args.output.endswith('.fec') else args.output
#write_fec_packets(packet_file, packets, packet_sizes)
#print(f"average redundancy rate: {int(round(sum(packet_sizes) / len(packet_sizes) * 50 / 1000))} kbps")
if args.lossfile != None:
loss = np.loadtxt(args.lossfile, dtype='int16')
fec_out = np.zeros((features.shape[0]*2, features.shape[-1]), dtype='float32')
foffset = -2
ptr = 0;
count = 2;
for i in range(features.shape[0]):
if (loss[i] == 0) or (i == features.shape[0]-1):
fec_out[ptr:ptr+count,:] = features[i, foffset:, :]
#print("filled ", count)
foffset = -2
ptr = ptr+count
count = 2
else:
count = count + 2
foffset = foffset - 2
fec_out_full = np.zeros((fec_out.shape[0], nb_features), dtype=np.float32)
fec_out_full[:, :nb_used_features] = fec_out
fec_out_full.tofile(packet_file[:-4] + f'_fec.f32')
#create packets array like in the original version for debugging purposes
for i in range(offset, num_frames):
packets.append(features[i-offset:i-offset+1, :, :])
if args.debug_output:
import itertools
#batches = [2, 4]
batches = [4]
#offsets = [0, 4, 20]
offsets = [0, (args.num_redundancy_frames - 2)*2]
# sanity checks
# 1. concatenate features at offset 0
for batch, offset in itertools.product(batches, offsets):
stop = packets[0].shape[1] - offset
print(batch, offset, stop)
test_features = np.concatenate([packet[:,stop - batch: stop, :] for packet in packets[::batch//2]], axis=1)
test_features_full = np.zeros((test_features.shape[1], nb_features), dtype=np.float32)
test_features_full[:, :nb_used_features] = test_features[0, :, :]
print(f"writing debug output {packet_file[:-4] + f'_tf_batch{batch}_offset{offset}.f32'}")
test_features_full.tofile(packet_file[:-4] + f'_tf_batch{batch}_offset{offset}.f32')

View File

@@ -0,0 +1,142 @@
/* Copyright (c) 2022 Amazon
Written by Jan Buethe */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <stdio.h>
#include <inttypes.h>
#include "fec_packets.h"
int get_fec_frame(const char * const filename, float *features, int packet_index, int subframe_index)
{
int16_t version;
int16_t header_size;
int16_t num_packets;
int16_t packet_size;
int16_t subframe_size;
int16_t subframes_per_packet;
int16_t num_features;
long offset;
FILE *fid = fopen(filename, "rb");
/* read header */
if (fread(&version, sizeof(version), 1, fid) != 1) goto error;
if (fread(&header_size, sizeof(header_size), 1, fid) != 1) goto error;
if (fread(&num_packets, sizeof(num_packets), 1, fid) != 1) goto error;
if (fread(&packet_size, sizeof(packet_size), 1, fid) != 1) goto error;
if (fread(&subframe_size, sizeof(subframe_size), 1, fid) != 1) goto error;
if (fread(&subframes_per_packet, sizeof(subframes_per_packet), 1, fid) != 1) goto error;
if (fread(&num_features, sizeof(num_features), 1, fid) != 1) goto error;
/* check if indices are valid */
if (packet_index >= num_packets || subframe_index >= subframes_per_packet)
{
fprintf(stderr, "get_fec_frame: index out of bounds\n");
goto error;
}
/* calculate offset in file (+ 2 is for rate) */
offset = header_size + packet_index * packet_size + 2 + subframe_index * subframe_size;
fseek(fid, offset, SEEK_SET);
/* read features */
if (fread(features, sizeof(*features), num_features, fid) != num_features) goto error;
fclose(fid);
return 0;
error:
fclose(fid);
return 1;
}
int get_fec_rate(const char * const filename, int packet_index)
{
int16_t version;
int16_t header_size;
int16_t num_packets;
int16_t packet_size;
int16_t subframe_size;
int16_t subframes_per_packet;
int16_t num_features;
long offset;
int16_t rate;
FILE *fid = fopen(filename, "rb");
/* read header */
if (fread(&version, sizeof(version), 1, fid) != 1) goto error;
if (fread(&header_size, sizeof(header_size), 1, fid) != 1) goto error;
if (fread(&num_packets, sizeof(num_packets), 1, fid) != 1) goto error;
if (fread(&packet_size, sizeof(packet_size), 1, fid) != 1) goto error;
if (fread(&subframe_size, sizeof(subframe_size), 1, fid) != 1) goto error;
if (fread(&subframes_per_packet, sizeof(subframes_per_packet), 1, fid) != 1) goto error;
if (fread(&num_features, sizeof(num_features), 1, fid) != 1) goto error;
/* check if indices are valid */
if (packet_index >= num_packets)
{
fprintf(stderr, "get_fec_rate: index out of bounds\n");
goto error;
}
/* calculate offset in file (+ 2 is for rate) */
offset = header_size + packet_index * packet_size;
fseek(fid, offset, SEEK_SET);
/* read rate */
if (fread(&rate, sizeof(rate), 1, fid) != 1) goto error;
fclose(fid);
return (int) rate;
error:
fclose(fid);
return -1;
}
#if 0
int main()
{
float features[20];
int i;
if (get_fec_frame("../test.fec", &features[0], 0, 127))
{
return 1;
}
for (i = 0; i < 20; i ++)
{
printf("%d %f\n", i, features[i]);
}
printf("rate: %d\n", get_fec_rate("../test.fec", 0));
}
#endif

View File

@@ -0,0 +1,34 @@
/* Copyright (c) 2022 Amazon
Written by Jan Buethe */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef FEC_PACKETS_H
#define FEC_PACKETS_H
int get_fec_frame(const char * const filename, float *features, int packet_index, int subframe_index);
int get_fec_rate(const char * const filename, int packet_index);
#endif

View File

@@ -0,0 +1,108 @@
"""
/* Copyright (c) 2022 Amazon
Written by Jan Buethe */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
"""
import numpy as np
def write_fec_packets(filename, packets, rates=None):
""" writes packets in binary format """
assert np.dtype(np.float32).itemsize == 4
assert np.dtype(np.int16).itemsize == 2
# derive some sizes
num_packets = len(packets)
subframes_per_packet = packets[0].shape[-2]
num_features = packets[0].shape[-1]
# size of float is 4
subframe_size = num_features * 4
packet_size = subframe_size * subframes_per_packet + 2 # two bytes for rate
version = 1
# header size (version, header_size, num_packets, packet_size, subframe_size, subrames_per_packet, num_features)
header_size = 14
with open(filename, 'wb') as f:
# header
f.write(np.int16(version).tobytes())
f.write(np.int16(header_size).tobytes())
f.write(np.int16(num_packets).tobytes())
f.write(np.int16(packet_size).tobytes())
f.write(np.int16(subframe_size).tobytes())
f.write(np.int16(subframes_per_packet).tobytes())
f.write(np.int16(num_features).tobytes())
# packets
for i, packet in enumerate(packets):
if type(rates) == type(None):
rate = 0
else:
rate = rates[i]
f.write(np.int16(rate).tobytes())
features = np.flip(packet, axis=-2)
f.write(features.astype(np.float32).tobytes())
def read_fec_packets(filename):
""" reads packets from binary format """
assert np.dtype(np.float32).itemsize == 4
assert np.dtype(np.int16).itemsize == 2
with open(filename, 'rb') as f:
# header
version = np.frombuffer(f.read(2), dtype=np.int16).item()
header_size = np.frombuffer(f.read(2), dtype=np.int16).item()
num_packets = np.frombuffer(f.read(2), dtype=np.int16).item()
packet_size = np.frombuffer(f.read(2), dtype=np.int16).item()
subframe_size = np.frombuffer(f.read(2), dtype=np.int16).item()
subframes_per_packet = np.frombuffer(f.read(2), dtype=np.int16).item()
num_features = np.frombuffer(f.read(2), dtype=np.int16).item()
dummy_features = np.zeros((1, subframes_per_packet, num_features), dtype=np.float32)
# packets
rates = []
packets = []
for i in range(num_packets):
rate = np.frombuffer(f.read(2), dtype=np.int16).item
rates.append(rate)
features = np.reshape(np.frombuffer(f.read(subframe_size * subframes_per_packet), dtype=np.float32), dummy_features.shape)
packet = np.flip(features, axis=-2)
packets.append(packet)
return packets

View File

@@ -0,0 +1,189 @@
'''Copyright (c) 2017-2018 Mozilla
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
'''
""" helper functions for dumping some Keras layers to C files """
import numpy as np
def printVector(f, vector, name, dtype='float', dotp=False, static=True):
""" prints vector as one-dimensional C array """
if dotp:
vector = vector.reshape((vector.shape[0]//4, 4, vector.shape[1]//8, 8))
vector = vector.transpose((2, 0, 3, 1))
v = np.reshape(vector, (-1))
if static:
f.write('static const {} {}[{}] = {{\n '.format(dtype, name, len(v)))
else:
f.write('const {} {}[{}] = {{\n '.format(dtype, name, len(v)))
for i in range(0, len(v)):
f.write('{}'.format(v[i]))
if (i!=len(v)-1):
f.write(',')
else:
break;
if (i%8==7):
f.write("\n ")
else:
f.write(" ")
f.write('\n};\n\n')
return vector
def printSparseVector(f, A, name, have_diag=True):
N = A.shape[0]
M = A.shape[1]
W = np.zeros((0,), dtype='int')
W0 = np.zeros((0,))
if have_diag:
diag = np.concatenate([np.diag(A[:,:N]), np.diag(A[:,N:2*N]), np.diag(A[:,2*N:])])
A[:,:N] = A[:,:N] - np.diag(np.diag(A[:,:N]))
A[:,N:2*N] = A[:,N:2*N] - np.diag(np.diag(A[:,N:2*N]))
A[:,2*N:] = A[:,2*N:] - np.diag(np.diag(A[:,2*N:]))
printVector(f, diag, name + '_diag')
AQ = np.minimum(127, np.maximum(-128, np.round(A*128))).astype('int')
idx = np.zeros((0,), dtype='int')
for i in range(M//8):
pos = idx.shape[0]
idx = np.append(idx, -1)
nb_nonzero = 0
for j in range(N//4):
block = A[j*4:(j+1)*4, i*8:(i+1)*8]
qblock = AQ[j*4:(j+1)*4, i*8:(i+1)*8]
if np.sum(np.abs(block)) > 1e-10:
nb_nonzero = nb_nonzero + 1
idx = np.append(idx, j*4)
vblock = qblock.transpose((1,0)).reshape((-1,))
W0 = np.concatenate([W0, block.reshape((-1,))])
W = np.concatenate([W, vblock])
idx[pos] = nb_nonzero
f.write('#ifdef DOT_PROD\n')
printVector(f, W, name, dtype='qweight')
f.write('#else /*DOT_PROD*/\n')
printVector(f, W0, name, dtype='qweight')
f.write('#endif /*DOT_PROD*/\n')
printVector(f, idx, name + '_idx', dtype='int')
return AQ
def dump_sparse_gru(self, f, hf):
name = 'sparse_' + self.name
print("printing layer " + name + " of type sparse " + self.__class__.__name__)
weights = self.get_weights()
qweights = printSparseVector(f, weights[1], name + '_recurrent_weights')
printVector(f, weights[-1], name + '_bias')
subias = weights[-1].copy()
subias[1,:] = subias[1,:] - np.sum(qweights*(1./128),axis=0)
printVector(f, subias, name + '_subias')
if hasattr(self, 'activation'):
activation = self.activation.__name__.upper()
else:
activation = 'TANH'
if hasattr(self, 'reset_after') and not self.reset_after:
reset_after = 0
else:
reset_after = 1
neurons = weights[0].shape[1]//3
max_rnn_neurons = neurons
f.write('const SparseGRULayer {} = {{\n {}_bias,\n {}_subias,\n {}_recurrent_weights_diag,\n {}_recurrent_weights,\n {}_recurrent_weights_idx,\n {}, ACTIVATION_{}, {}\n}};\n\n'
.format(name, name, name, name, name, name, weights[0].shape[1]//3, activation, reset_after))
hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))
hf.write('#define {}_STATE_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))
hf.write('extern const SparseGRULayer {};\n\n'.format(name));
return max_rnn_neurons
def dump_gru_layer(self, f, hf, dotp=False, sparse=False):
name = self.name
print("printing layer " + name + " of type " + self.__class__.__name__)
weights = self.get_weights()
if sparse:
qweight = printSparseVector(f, weights[0], name + '_weights', have_diag=False)
else:
qweight = printVector(f, weights[0], name + '_weights')
if dotp:
f.write('#ifdef DOT_PROD\n')
qweight2 = np.clip(np.round(128.*weights[1]).astype('int'), -128, 127)
printVector(f, qweight2, name + '_recurrent_weights', dotp=True, dtype='qweight')
f.write('#else /*DOT_PROD*/\n')
else:
qweight2 = weights[1]
printVector(f, weights[1], name + '_recurrent_weights')
if dotp:
f.write('#endif /*DOT_PROD*/\n')
printVector(f, weights[-1], name + '_bias')
subias = weights[-1].copy()
subias[0,:] = subias[0,:] - np.sum(qweight*(1./128.),axis=0)
subias[1,:] = subias[1,:] - np.sum(qweight2*(1./128.),axis=0)
printVector(f, subias, name + '_subias')
if hasattr(self, 'activation'):
activation = self.activation.__name__.upper()
else:
activation = 'TANH'
if hasattr(self, 'reset_after') and not self.reset_after:
reset_after = 0
else:
reset_after = 1
neurons = weights[0].shape[1]//3
max_rnn_neurons = neurons
f.write('const GRULayer {} = {{\n {}_bias,\n {}_subias,\n {}_weights,\n {},\n {}_recurrent_weights,\n {}, {}, ACTIVATION_{}, {}\n}};\n\n'
.format(name, name, name, name, name + "_weights_idx" if sparse else "NULL", name, weights[0].shape[0], weights[0].shape[1]//3, activation, reset_after))
hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))
hf.write('#define {}_STATE_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))
hf.write('extern const GRULayer {};\n\n'.format(name));
return max_rnn_neurons
def dump_dense_layer_impl(name, weights, bias, activation, f, hf):
printVector(f, weights, name + '_weights')
printVector(f, bias, name + '_bias')
f.write('const DenseLayer {} = {{\n {}_bias,\n {}_weights,\n {}, {}, ACTIVATION_{}\n}};\n\n'
.format(name, name, name, weights.shape[0], weights.shape[1], activation))
hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights.shape[1]))
hf.write('extern const DenseLayer {};\n\n'.format(name));
def dump_dense_layer(self, f, hf):
name = self.name
print("printing layer " + name + " of type " + self.__class__.__name__)
weights = self.get_weights()
activation = self.activation.__name__.upper()
dump_dense_layer_impl(name, weights[0], weights[1], activation, f, hf)
return False
def dump_conv1d_layer(self, f, hf):
name = self.name
print("printing layer " + name + " of type " + self.__class__.__name__)
weights = self.get_weights()
printVector(f, weights[0], name + '_weights')
printVector(f, weights[-1], name + '_bias')
activation = self.activation.__name__.upper()
max_conv_inputs = weights[0].shape[1]*weights[0].shape[0]
f.write('const Conv1DLayer {} = {{\n {}_bias,\n {}_weights,\n {}, {}, {}, ACTIVATION_{}\n}};\n\n'
.format(name, name, name, weights[0].shape[1], weights[0].shape[0], weights[0].shape[2], activation))
hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights[0].shape[2]))
hf.write('#define {}_STATE_SIZE ({}*{})\n'.format(name.upper(), weights[0].shape[1], (weights[0].shape[0]-1)))
hf.write('#define {}_DELAY {}\n'.format(name.upper(), (weights[0].shape[0]-1)//2))
hf.write('extern const Conv1DLayer {};\n\n'.format(name));
return max_conv_inputs

View File

@@ -0,0 +1,99 @@
"""
Custom Loss functions and metrics for training/analysis
"""
from tf_funcs import *
import tensorflow as tf
# The following loss functions all expect the lpcnet model to output the lpc prediction
# Computing the excitation by subtracting the lpc prediction from the target, followed by minimizing the cross entropy
def res_from_sigloss():
def loss(y_true,y_pred):
p = y_pred[:,:,0:1]
model_out = y_pred[:,:,2:]
e_gt = tf_l2u(y_true - p)
e_gt = tf.round(e_gt)
e_gt = tf.cast(e_gt,'int32')
sparse_cel = tf.keras.losses.SparseCategoricalCrossentropy(reduction=tf.keras.losses.Reduction.NONE)(e_gt,model_out)
return sparse_cel
return loss
# Interpolated and Compensated Loss (In case of end to end lpcnet)
# Interpolates between adjacent embeddings based on the fractional value of the excitation computed (similar to the embedding interpolation)
# Also adds a probability compensation (to account for matching cross entropy in the linear domain), weighted by gamma
def interp_mulaw(gamma = 1):
def loss(y_true,y_pred):
y_true = tf.cast(y_true, 'float32')
p = y_pred[:,:,0:1]
real_p = y_pred[:,:,1:2]
model_out = y_pred[:,:,2:]
e_gt = tf_l2u(y_true - p)
exc_gt = tf_l2u(y_true - real_p)
prob_compensation = tf.squeeze((K.abs(e_gt - 128)/128.0)*K.log(256.0))
regularization = tf.squeeze((K.abs(exc_gt - 128)/128.0)*K.log(256.0))
alpha = e_gt - tf.math.floor(e_gt)
alpha = tf.tile(alpha,[1,1,256])
e_gt = tf.cast(e_gt,'int32')
e_gt = tf.clip_by_value(e_gt,0,254)
interp_probab = (1 - alpha)*model_out + alpha*tf.roll(model_out,shift = -1,axis = -1)
sparse_cel = tf.keras.losses.SparseCategoricalCrossentropy(reduction=tf.keras.losses.Reduction.NONE)(e_gt,interp_probab)
loss_mod = sparse_cel + prob_compensation + gamma*regularization
return loss_mod
return loss
# Same as above, except a metric
def metric_oginterploss(y_true,y_pred):
p = y_pred[:,:,0:1]
model_out = y_pred[:,:,2:]
e_gt = tf_l2u(y_true - p)
prob_compensation = tf.squeeze((K.abs(e_gt - 128)/128.0)*K.log(256.0))
alpha = e_gt - tf.math.floor(e_gt)
alpha = tf.tile(alpha,[1,1,256])
e_gt = tf.cast(e_gt,'int32')
e_gt = tf.clip_by_value(e_gt,0,254)
interp_probab = (1 - alpha)*model_out + alpha*tf.roll(model_out,shift = -1,axis = -1)
sparse_cel = tf.keras.losses.SparseCategoricalCrossentropy(reduction=tf.keras.losses.Reduction.NONE)(e_gt,interp_probab)
loss_mod = sparse_cel + prob_compensation
return loss_mod
# Interpolated cross entropy loss metric
def metric_icel(y_true, y_pred):
p = y_pred[:,:,0:1]
model_out = y_pred[:,:,2:]
e_gt = tf_l2u(y_true - p)
alpha = e_gt - tf.math.floor(e_gt)
alpha = tf.tile(alpha,[1,1,256])
e_gt = tf.cast(e_gt,'int32')
e_gt = tf.clip_by_value(e_gt,0,254) #Check direction
interp_probab = (1 - alpha)*model_out + alpha*tf.roll(model_out,shift = -1,axis = -1)
sparse_cel = tf.keras.losses.SparseCategoricalCrossentropy(reduction=tf.keras.losses.Reduction.NONE)(e_gt,interp_probab)
return sparse_cel
# Non-interpolated (rounded) cross entropy loss metric
def metric_cel(y_true, y_pred):
y_true = tf.cast(y_true, 'float32')
p = y_pred[:,:,0:1]
model_out = y_pred[:,:,2:]
e_gt = tf_l2u(y_true - p)
e_gt = tf.round(e_gt)
e_gt = tf.cast(e_gt,'int32')
e_gt = tf.clip_by_value(e_gt,0,255)
sparse_cel = tf.keras.losses.SparseCategoricalCrossentropy(reduction=tf.keras.losses.Reduction.NONE)(e_gt,model_out)
return sparse_cel
# Variance metric of the output excitation
def metric_exc_sd(y_true,y_pred):
p = y_pred[:,:,0:1]
e_gt = tf_l2u(y_true - p)
sd_egt = tf.keras.losses.MeanSquaredError(reduction=tf.keras.losses.Reduction.NONE)(e_gt,128)
return sd_egt
def loss_matchlar():
def loss(y_true,y_pred):
model_rc = y_pred[:,:,:16]
#y_true = lpc2rc(y_true)
loss_lar_diff = K.log((1.01 + model_rc)/(1.01 - model_rc)) - K.log((1.01 + y_true)/(1.01 - y_true))
loss_lar_diff = tf.square(loss_lar_diff)
return tf.reduce_mean(loss_lar_diff, axis=-1)
return loss

View File

@@ -0,0 +1,339 @@
#!/usr/bin/python3
'''Copyright (c) 2018 Mozilla
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
'''
import math
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, GRU, Dense, Embedding, Reshape, Concatenate, Lambda, Conv1D, Multiply, Add, Bidirectional, MaxPooling1D, Activation, GaussianNoise
from tensorflow.compat.v1.keras.layers import CuDNNGRU
from tensorflow.keras import backend as K
from tensorflow.keras.constraints import Constraint
from tensorflow.keras.initializers import Initializer
from tensorflow.keras.callbacks import Callback
from mdense import MDense
import numpy as np
import h5py
import sys
from tf_funcs import *
from diffembed import diff_Embed
from parameters import set_parameter
frame_size = 160
pcm_bits = 8
embed_size = 128
pcm_levels = 2**pcm_bits
def interleave(p, samples):
p2=tf.expand_dims(p, 3)
nb_repeats = pcm_levels//(2*p.shape[2])
p3 = tf.reshape(tf.repeat(tf.concat([1-p2, p2], 3), nb_repeats), (-1, samples, pcm_levels))
return p3
def tree_to_pdf(p, samples):
return interleave(p[:,:,1:2], samples) * interleave(p[:,:,2:4], samples) * interleave(p[:,:,4:8], samples) * interleave(p[:,:,8:16], samples) \
* interleave(p[:,:,16:32], samples) * interleave(p[:,:,32:64], samples) * interleave(p[:,:,64:128], samples) * interleave(p[:,:,128:256], samples)
def tree_to_pdf_train(p):
#FIXME: try not to hardcode the 2400 samples (15 frames * 160 samples/frame)
return tree_to_pdf(p, 2400)
def tree_to_pdf_infer(p):
return tree_to_pdf(p, 1)
def quant_regularizer(x):
Q = 128
Q_1 = 1./Q
#return .01 * tf.reduce_mean(1 - tf.math.cos(2*3.1415926535897931*(Q*x-tf.round(Q*x))))
return .01 * tf.reduce_mean(K.sqrt(K.sqrt(1.0001 - tf.math.cos(2*3.1415926535897931*(Q*x-tf.round(Q*x))))))
class Sparsify(Callback):
def __init__(self, t_start, t_end, interval, density, quantize=False):
super(Sparsify, self).__init__()
self.batch = 0
self.t_start = t_start
self.t_end = t_end
self.interval = interval
self.final_density = density
self.quantize = quantize
def on_batch_end(self, batch, logs=None):
#print("batch number", self.batch)
self.batch += 1
if self.quantize or (self.batch > self.t_start and (self.batch-self.t_start) % self.interval == 0) or self.batch >= self.t_end:
#print("constrain");
layer = self.model.get_layer('gru_a')
w = layer.get_weights()
p = w[1]
nb = p.shape[1]//p.shape[0]
N = p.shape[0]
#print("nb = ", nb, ", N = ", N);
#print(p.shape)
#print ("density = ", density)
for k in range(nb):
density = self.final_density[k]
if self.batch < self.t_end and not self.quantize:
r = 1 - (self.batch-self.t_start)/(self.t_end - self.t_start)
density = 1 - (1-self.final_density[k])*(1 - r*r*r)
A = p[:, k*N:(k+1)*N]
A = A - np.diag(np.diag(A))
#This is needed because of the CuDNNGRU strange weight ordering
A = np.transpose(A, (1, 0))
L=np.reshape(A, (N//4, 4, N//8, 8))
S=np.sum(L*L, axis=-1)
S=np.sum(S, axis=1)
SS=np.sort(np.reshape(S, (-1,)))
thresh = SS[round(N*N//32*(1-density))]
mask = (S>=thresh).astype('float32')
mask = np.repeat(mask, 4, axis=0)
mask = np.repeat(mask, 8, axis=1)
mask = np.minimum(1, mask + np.diag(np.ones((N,))))
#This is needed because of the CuDNNGRU strange weight ordering
mask = np.transpose(mask, (1, 0))
p[:, k*N:(k+1)*N] = p[:, k*N:(k+1)*N]*mask
#print(thresh, np.mean(mask))
if self.quantize and ((self.batch > self.t_start and (self.batch-self.t_start) % self.interval == 0) or self.batch >= self.t_end):
if self.batch < self.t_end:
threshold = .5*(self.batch - self.t_start)/(self.t_end - self.t_start)
else:
threshold = .5
quant = np.round(p*128.)
res = p*128.-quant
mask = (np.abs(res) <= threshold).astype('float32')
p = mask/128.*quant + (1-mask)*p
w[1] = p
layer.set_weights(w)
class SparsifyGRUB(Callback):
def __init__(self, t_start, t_end, interval, grua_units, density, quantize=False):
super(SparsifyGRUB, self).__init__()
self.batch = 0
self.t_start = t_start
self.t_end = t_end
self.interval = interval
self.final_density = density
self.grua_units = grua_units
self.quantize = quantize
def on_batch_end(self, batch, logs=None):
#print("batch number", self.batch)
self.batch += 1
if self.quantize or (self.batch > self.t_start and (self.batch-self.t_start) % self.interval == 0) or self.batch >= self.t_end:
#print("constrain");
layer = self.model.get_layer('gru_b')
w = layer.get_weights()
p = w[0]
N = p.shape[0]
M = p.shape[1]//3
for k in range(3):
density = self.final_density[k]
if self.batch < self.t_end and not self.quantize:
r = 1 - (self.batch-self.t_start)/(self.t_end - self.t_start)
density = 1 - (1-self.final_density[k])*(1 - r*r*r)
A = p[:, k*M:(k+1)*M]
#This is needed because of the CuDNNGRU strange weight ordering
A = np.reshape(A, (M, N))
A = np.transpose(A, (1, 0))
N2 = self.grua_units
A2 = A[:N2, :]
L=np.reshape(A2, (N2//4, 4, M//8, 8))
S=np.sum(L*L, axis=-1)
S=np.sum(S, axis=1)
SS=np.sort(np.reshape(S, (-1,)))
thresh = SS[round(M*N2//32*(1-density))]
mask = (S>=thresh).astype('float32')
mask = np.repeat(mask, 4, axis=0)
mask = np.repeat(mask, 8, axis=1)
A = np.concatenate([A2*mask, A[N2:,:]], axis=0)
#This is needed because of the CuDNNGRU strange weight ordering
A = np.transpose(A, (1, 0))
A = np.reshape(A, (N, M))
p[:, k*M:(k+1)*M] = A
#print(thresh, np.mean(mask))
if self.quantize and ((self.batch > self.t_start and (self.batch-self.t_start) % self.interval == 0) or self.batch >= self.t_end):
if self.batch < self.t_end:
threshold = .5*(self.batch - self.t_start)/(self.t_end - self.t_start)
else:
threshold = .5
quant = np.round(p*128.)
res = p*128.-quant
mask = (np.abs(res) <= threshold).astype('float32')
p = mask/128.*quant + (1-mask)*p
w[0] = p
layer.set_weights(w)
class PCMInit(Initializer):
def __init__(self, gain=.1, seed=None):
self.gain = gain
self.seed = seed
def __call__(self, shape, dtype=None):
num_rows = 1
for dim in shape[:-1]:
num_rows *= dim
num_cols = shape[-1]
flat_shape = (num_rows, num_cols)
if self.seed is not None:
np.random.seed(self.seed)
a = np.random.uniform(-1.7321, 1.7321, flat_shape)
#a[:,0] = math.sqrt(12)*np.arange(-.5*num_rows+.5,.5*num_rows-.4)/num_rows
#a[:,1] = .5*a[:,0]*a[:,0]*a[:,0]
a = a + np.reshape(math.sqrt(12)*np.arange(-.5*num_rows+.5,.5*num_rows-.4)/num_rows, (num_rows, 1))
return self.gain * a.astype("float32")
def get_config(self):
return {
'gain': self.gain,
'seed': self.seed
}
class WeightClip(Constraint):
'''Clips the weights incident to each hidden unit to be inside a range
'''
def __init__(self, c=2):
self.c = c
def __call__(self, p):
# Ensure that abs of adjacent weights don't sum to more than 127. Otherwise there's a risk of
# saturation when implementing dot products with SSSE3 or AVX2.
return self.c*p/tf.maximum(self.c, tf.repeat(tf.abs(p[:, 1::2])+tf.abs(p[:, 0::2]), 2, axis=1))
#return K.clip(p, -self.c, self.c)
def get_config(self):
return {'name': self.__class__.__name__,
'c': self.c}
constraint = WeightClip(0.992)
def new_lpcnet_model(rnn_units1=384, rnn_units2=16, nb_used_features=20, batch_size=128, training=False, adaptation=False, quantize=False, flag_e2e = False, cond_size=128, lpc_order=16, lpc_gamma=1., lookahead=2):
pcm = Input(shape=(None, 1), batch_size=batch_size)
dpcm = Input(shape=(None, 3), batch_size=batch_size)
feat = Input(shape=(None, nb_used_features), batch_size=batch_size)
pitch = Input(shape=(None, 1), batch_size=batch_size)
dec_feat = Input(shape=(None, cond_size))
dec_state1 = Input(shape=(rnn_units1,))
dec_state2 = Input(shape=(rnn_units2,))
padding = 'valid' if training else 'same'
fconv1 = Conv1D(cond_size, 3, padding=padding, activation='tanh', name='feature_conv1')
fconv2 = Conv1D(cond_size, 3, padding=padding, activation='tanh', name='feature_conv2')
pembed = Embedding(256, 64, name='embed_pitch')
cat_feat = Concatenate()([feat, Reshape((-1, 64))(pembed(pitch))])
cfeat = fconv2(fconv1(cat_feat))
fdense1 = Dense(cond_size, activation='tanh', name='feature_dense1')
fdense2 = Dense(cond_size, activation='tanh', name='feature_dense2')
if flag_e2e and quantize:
fconv1.trainable = False
fconv2.trainable = False
fdense1.trainable = False
fdense2.trainable = False
cfeat = fdense2(fdense1(cfeat))
error_calc = Lambda(lambda x: tf_l2u(x[0] - tf.roll(x[1],1,axis = 1)))
if flag_e2e:
lpcoeffs = diff_rc2lpc(name = "rc2lpc")(cfeat)
else:
lpcoeffs = Input(shape=(None, lpc_order), batch_size=batch_size)
real_preds = diff_pred(name = "real_lpc2preds")([pcm,lpcoeffs])
weighting = lpc_gamma ** np.arange(1, 17).astype('float32')
weighted_lpcoeffs = Lambda(lambda x: x[0]*x[1])([lpcoeffs, weighting])
tensor_preds = diff_pred(name = "lpc2preds")([pcm,weighted_lpcoeffs])
past_errors = error_calc([pcm,tensor_preds])
embed = diff_Embed(name='embed_sig',initializer = PCMInit())
cpcm = Concatenate()([tf_l2u(pcm),tf_l2u(tensor_preds),past_errors])
cpcm = GaussianNoise(.3)(cpcm)
cpcm = Reshape((-1, embed_size*3))(embed(cpcm))
cpcm_decoder = Reshape((-1, embed_size*3))(embed(dpcm))
rep = Lambda(lambda x: K.repeat_elements(x, frame_size, 1))
quant = quant_regularizer if quantize else None
if training:
rnn = CuDNNGRU(rnn_units1, return_sequences=True, return_state=True, name='gru_a', stateful=True,
recurrent_constraint = constraint, recurrent_regularizer=quant)
rnn2 = CuDNNGRU(rnn_units2, return_sequences=True, return_state=True, name='gru_b', stateful=True,
kernel_constraint=constraint, recurrent_constraint = constraint, kernel_regularizer=quant, recurrent_regularizer=quant)
else:
rnn = GRU(rnn_units1, return_sequences=True, return_state=True, recurrent_activation="sigmoid", reset_after='true', name='gru_a', stateful=True,
recurrent_constraint = constraint, recurrent_regularizer=quant)
rnn2 = GRU(rnn_units2, return_sequences=True, return_state=True, recurrent_activation="sigmoid", reset_after='true', name='gru_b', stateful=True,
kernel_constraint=constraint, recurrent_constraint = constraint, kernel_regularizer=quant, recurrent_regularizer=quant)
rnn_in = Concatenate()([cpcm, rep(cfeat)])
md = MDense(pcm_levels, activation='sigmoid', name='dual_fc')
gru_out1, _ = rnn(rnn_in)
gru_out1 = GaussianNoise(.005)(gru_out1)
gru_out2, _ = rnn2(Concatenate()([gru_out1, rep(cfeat)]))
ulaw_prob = Lambda(tree_to_pdf_train)(md(gru_out2))
if adaptation:
rnn.trainable=False
rnn2.trainable=False
md.trainable=False
embed.Trainable=False
m_out = Concatenate(name='pdf')([tensor_preds,real_preds,ulaw_prob])
if not flag_e2e:
model = Model([pcm, feat, pitch, lpcoeffs], m_out)
else:
model = Model([pcm, feat, pitch], [m_out, cfeat])
model.rnn_units1 = rnn_units1
model.rnn_units2 = rnn_units2
model.nb_used_features = nb_used_features
model.frame_size = frame_size
if not flag_e2e:
encoder = Model([feat, pitch], cfeat)
dec_rnn_in = Concatenate()([cpcm_decoder, dec_feat])
else:
encoder = Model([feat, pitch], [cfeat,lpcoeffs])
dec_rnn_in = Concatenate()([cpcm_decoder, dec_feat])
dec_gru_out1, state1 = rnn(dec_rnn_in, initial_state=dec_state1)
dec_gru_out2, state2 = rnn2(Concatenate()([dec_gru_out1, dec_feat]), initial_state=dec_state2)
dec_ulaw_prob = Lambda(tree_to_pdf_infer)(md(dec_gru_out2))
if flag_e2e:
decoder = Model([dpcm, dec_feat, dec_state1, dec_state2], [dec_ulaw_prob, state1, state2])
else:
decoder = Model([dpcm, dec_feat, dec_state1, dec_state2], [dec_ulaw_prob, state1, state2])
# add parameters to model
set_parameter(model, 'lpc_gamma', lpc_gamma, dtype='float64')
set_parameter(model, 'flag_e2e', flag_e2e, dtype='bool')
set_parameter(model, 'lookahead', lookahead, dtype='int32')
return model, encoder, decoder

View File

@@ -0,0 +1,101 @@
#!/usr/bin/python3
'''Copyright (c) 2021-2022 Amazon
Copyright (c) 2018-2019 Mozilla
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
'''
import math
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, GRU, Dense, Embedding, Reshape, Concatenate, Lambda, Conv1D, Multiply, Add, Bidirectional, MaxPooling1D, Activation, GaussianNoise
from tensorflow.compat.v1.keras.layers import CuDNNGRU
from tensorflow.keras import backend as K
from tensorflow.keras.constraints import Constraint
from tensorflow.keras.initializers import Initializer
from tensorflow.keras.callbacks import Callback
import numpy as np
def quant_regularizer(x):
Q = 128
Q_1 = 1./Q
#return .01 * tf.reduce_mean(1 - tf.math.cos(2*3.1415926535897931*(Q*x-tf.round(Q*x))))
return .01 * tf.reduce_mean(K.sqrt(K.sqrt(1.0001 - tf.math.cos(2*3.1415926535897931*(Q*x-tf.round(Q*x))))))
class WeightClip(Constraint):
'''Clips the weights incident to each hidden unit to be inside a range
'''
def __init__(self, c=2):
self.c = c
def __call__(self, p):
# Ensure that abs of adjacent weights don't sum to more than 127. Otherwise there's a risk of
# saturation when implementing dot products with SSSE3 or AVX2.
return self.c*p/tf.maximum(self.c, tf.repeat(tf.abs(p[:, 1::2])+tf.abs(p[:, 0::2]), 2, axis=1))
#return K.clip(p, -self.c, self.c)
def get_config(self):
return {'name': self.__class__.__name__,
'c': self.c}
constraint = WeightClip(0.992)
def new_lpcnet_plc_model(rnn_units=256, nb_used_features=20, nb_burg_features=36, batch_size=128, training=False, adaptation=False, quantize=False, cond_size=128):
feat = Input(shape=(None, nb_used_features+nb_burg_features), batch_size=batch_size)
lost = Input(shape=(None, 1), batch_size=batch_size)
fdense1 = Dense(cond_size, activation='tanh', name='plc_dense1')
cfeat = Concatenate()([feat, lost])
cfeat = fdense1(cfeat)
#cfeat = Conv1D(cond_size, 3, padding='causal', activation='tanh', name='plc_conv1')(cfeat)
quant = quant_regularizer if quantize else None
if training:
rnn = CuDNNGRU(rnn_units, return_sequences=True, return_state=True, name='plc_gru1', stateful=True,
kernel_constraint=constraint, recurrent_constraint = constraint, kernel_regularizer=quant, recurrent_regularizer=quant)
rnn2 = CuDNNGRU(rnn_units, return_sequences=True, return_state=True, name='plc_gru2', stateful=True,
kernel_constraint=constraint, recurrent_constraint = constraint, kernel_regularizer=quant, recurrent_regularizer=quant)
else:
rnn = GRU(rnn_units, return_sequences=True, return_state=True, recurrent_activation="sigmoid", reset_after='true', name='plc_gru1', stateful=True,
kernel_constraint=constraint, recurrent_constraint = constraint, kernel_regularizer=quant, recurrent_regularizer=quant)
rnn2 = GRU(rnn_units, return_sequences=True, return_state=True, recurrent_activation="sigmoid", reset_after='true', name='plc_gru2', stateful=True,
kernel_constraint=constraint, recurrent_constraint = constraint, kernel_regularizer=quant, recurrent_regularizer=quant)
gru_out1, _ = rnn(cfeat)
gru_out1 = GaussianNoise(.005)(gru_out1)
gru_out2, _ = rnn2(gru_out1)
out_dense = Dense(nb_used_features, activation='linear', name='plc_out')
plc_out = out_dense(gru_out2)
model = Model([feat, lost], plc_out)
model.rnn_units = rnn_units
model.cond_size = cond_size
model.nb_used_features = nb_used_features
model.nb_burg_features = nb_burg_features
return model

View File

@@ -0,0 +1,95 @@
from tensorflow.keras import backend as K
from tensorflow.keras.layers import Layer, InputSpec
from tensorflow.keras import activations
from tensorflow.keras import initializers, regularizers, constraints
import numpy as np
import math
class MDense(Layer):
def __init__(self, outputs,
channels=2,
activation=None,
use_bias=True,
kernel_initializer='glorot_uniform',
bias_initializer='zeros',
kernel_regularizer=None,
bias_regularizer=None,
activity_regularizer=None,
kernel_constraint=None,
bias_constraint=None,
**kwargs):
if 'input_shape' not in kwargs and 'input_dim' in kwargs:
kwargs['input_shape'] = (kwargs.pop('input_dim'),)
super(MDense, self).__init__(**kwargs)
self.units = outputs
self.channels = channels
self.activation = activations.get(activation)
self.use_bias = use_bias
self.kernel_initializer = initializers.get(kernel_initializer)
self.bias_initializer = initializers.get(bias_initializer)
self.kernel_regularizer = regularizers.get(kernel_regularizer)
self.bias_regularizer = regularizers.get(bias_regularizer)
self.activity_regularizer = regularizers.get(activity_regularizer)
self.kernel_constraint = constraints.get(kernel_constraint)
self.bias_constraint = constraints.get(bias_constraint)
self.input_spec = InputSpec(min_ndim=2)
self.supports_masking = True
def build(self, input_shape):
assert len(input_shape) >= 2
input_dim = input_shape[-1]
self.kernel = self.add_weight(shape=(self.units, input_dim, self.channels),
initializer=self.kernel_initializer,
name='kernel',
regularizer=self.kernel_regularizer,
constraint=self.kernel_constraint)
if self.use_bias:
self.bias = self.add_weight(shape=(self.units, self.channels),
initializer=self.bias_initializer,
name='bias',
regularizer=self.bias_regularizer,
constraint=self.bias_constraint)
else:
self.bias = None
self.factor = self.add_weight(shape=(self.units, self.channels),
initializer='ones',
name='factor',
regularizer=self.bias_regularizer,
constraint=self.bias_constraint)
self.input_spec = InputSpec(min_ndim=2, axes={-1: input_dim})
self.built = True
def call(self, inputs):
output = K.dot(inputs, self.kernel)
if self.use_bias:
output = output + self.bias
output = K.tanh(output) * self.factor
output = K.sum(output, axis=-1)
if self.activation is not None:
output = self.activation(output)
return output
def compute_output_shape(self, input_shape):
assert input_shape and len(input_shape) >= 2
assert input_shape[-1]
output_shape = list(input_shape)
output_shape[-1] = self.units
return tuple(output_shape)
def get_config(self):
config = {
'units': self.units,
'activation': activations.serialize(self.activation),
'use_bias': self.use_bias,
'kernel_initializer': initializers.serialize(self.kernel_initializer),
'bias_initializer': initializers.serialize(self.bias_initializer),
'kernel_regularizer': regularizers.serialize(self.kernel_regularizer),
'bias_regularizer': regularizers.serialize(self.bias_regularizer),
'activity_regularizer': regularizers.serialize(self.activity_regularizer),
'kernel_constraint': constraints.serialize(self.kernel_constraint),
'bias_constraint': constraints.serialize(self.bias_constraint)
}
base_config = super(MDense, self).get_config()
return dict(list(base_config.items()) + list(config.items()))

View File

@@ -0,0 +1,70 @@
# Optimizing a rational function to optimize a tanh() approximation
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, GRU, Dense, Embedding, Reshape, Concatenate, Lambda, Conv1D, Multiply, Add, Bidirectional, MaxPooling1D, Activation
import tensorflow.keras.backend as K
from tensorflow.keras.optimizers import Adam, SGD
def my_loss1(y_true, y_pred):
return 1*K.mean(K.square(y_true-y_pred)) + 1*K.max(K.square(y_true-y_pred), axis=1)
def my_loss2(y_true, y_pred):
return .1*K.mean(K.square(y_true-y_pred)) + 1*K.max(K.square(y_true-y_pred), axis=1)
def my_loss3(y_true, y_pred):
return .01*K.mean(K.square(y_true-y_pred)) + 1*K.max(K.square(y_true-y_pred), axis=1)
# Using these initializers to seed the approximation
# with a reasonable starting point
def num_init(shape, dtype=None):
rr = tf.constant([[945], [105], [1]], dtype=dtype)
#rr = tf.constant([[946.56757], [98.01368], [0.66841]], dtype=dtype)
print(rr)
return rr
def den_init(shape, dtype=None):
rr = tf.constant([[945], [420], [15]], dtype=dtype)
#rr = tf.constant([[946.604], [413.342], [12.465]], dtype=dtype)
print(rr)
return rr
x = np.arange(-10, 10, .01)
N = len(x)
x = np.reshape(x, (1, -1, 1))
x2 = x*x
x2in = np.concatenate([x2*0 + 1, x2, x2*x2], axis=2)
yout = np.tanh(x)
model_x = Input(shape=(None, 1,))
model_x2 = Input(shape=(None, 3,))
num = Dense(1, name='num', use_bias=False, kernel_initializer=num_init)
den = Dense(1, name='den', use_bias=False, kernel_initializer=den_init)
def ratio(x):
return tf.minimum(1., tf.maximum(-1., x[0]*x[1]/x[2]))
out_layer = Lambda(ratio)
output = out_layer([model_x, num(model_x2), den(model_x2)])
model = Model([model_x, model_x2], output)
model.summary()
model.compile(Adam(0.05, beta_1=0.9, beta_2=0.9, decay=2e-5), loss='mean_squared_error')
model.fit([x, x2in], yout, batch_size=1, epochs=500000, validation_split=0.0)
model.compile(Adam(0.001, beta_2=0.9, decay=1e-4), loss=my_loss1)
model.fit([x, x2in], yout, batch_size=1, epochs=50000, validation_split=0.0)
model.compile(Adam(0.0001, beta_2=0.9, decay=1e-4), loss=my_loss2)
model.fit([x, x2in], yout, batch_size=1, epochs=50000, validation_split=0.0)
model.compile(Adam(0.00001, beta_2=0.9, decay=1e-4), loss=my_loss3)
model.fit([x, x2in], yout, batch_size=1, epochs=50000, validation_split=0.0)
model.save_weights('tanh.h5')

View File

@@ -0,0 +1,29 @@
""" module for handling extra model parameters for tf.keras models """
import tensorflow as tf
def set_parameter(model, parameter_name, parameter_value, dtype='float32'):
""" stores parameter_value as non-trainable weight with name parameter_name:0 """
weights = [weight for weight in model.weights if weight.name == (parameter_name + ":0")]
if len(weights) == 0:
model.add_weight(parameter_name, trainable=False, initializer=tf.keras.initializers.Constant(parameter_value), dtype=dtype)
elif len(weights) == 1:
weights[0].assign(parameter_value)
else:
raise ValueError(f"more than one weight starting with {parameter_name}:0 in model")
def get_parameter(model, parameter_name, default=None):
""" returns parameter value if parameter is present in model and otherwise default """
weights = [weight for weight in model.weights if weight.name == (parameter_name + ":0")]
if len(weights) == 0:
return default
elif len(weights) > 1:
raise ValueError(f"more than one weight starting with {parameter_name}:0 in model")
else:
return weights[0].numpy().item()

View File

@@ -0,0 +1,73 @@
#!/usr/bin/python3
'''Copyright (c) 2021-2022 Amazon
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
'''
import numpy as np
from tensorflow.keras.utils import Sequence
class PLCLoader(Sequence):
def __init__(self, features, lost, nb_burg_features, batch_size):
self.batch_size = batch_size
self.nb_batches = features.shape[0]//self.batch_size
self.features = features[:self.nb_batches*self.batch_size, :, :]
self.lost = lost.astype('float')
self.lost = self.lost[:(len(self.lost)//features.shape[1]-1)*features.shape[1]]
self.nb_burg_features = nb_burg_features
self.on_epoch_end()
def on_epoch_end(self):
self.indices = np.arange(self.nb_batches*self.batch_size)
np.random.shuffle(self.indices)
offset = np.random.randint(0, high=self.features.shape[1])
self.lost_offset = np.reshape(self.lost[offset:-self.features.shape[1]+offset], (-1, self.features.shape[1]))
self.lost_indices = np.random.randint(0, high=self.lost_offset.shape[0], size=self.nb_batches*self.batch_size)
def __getitem__(self, index):
features = self.features[self.indices[index*self.batch_size:(index+1)*self.batch_size], :, :]
burg_lost = (np.random.rand(features.shape[0], features.shape[1]) > .1).astype('float')
burg_lost = np.reshape(burg_lost, (features.shape[0], features.shape[1], 1))
burg_mask = np.tile(burg_lost, (1,1,self.nb_burg_features))
lost = self.lost_offset[self.lost_indices[index*self.batch_size:(index+1)*self.batch_size], :]
lost = np.reshape(lost, (features.shape[0], features.shape[1], 1))
lost_mask = np.tile(lost, (1,1,features.shape[2]))
in_features = features*lost_mask
in_features[:,:,:self.nb_burg_features] = in_features[:,:,:self.nb_burg_features]*burg_mask
#For the first frame after a loss, we don't have valid features, but the Burg estimate is valid.
#in_features[:,1:,self.nb_burg_features:] = in_features[:,1:,self.nb_burg_features:]*lost_mask[:,:-1,self.nb_burg_features:]
out_lost = np.copy(lost)
#out_lost[:,1:,:] = out_lost[:,1:,:]*out_lost[:,:-1,:]
out_features = np.concatenate([features[:,:,self.nb_burg_features:], 1.-out_lost], axis=-1)
burg_sign = 2*burg_lost - 1
# last dim is 1 for received packet, 0 for lost packet, and -1 when just the Burg info is missing
inputs = [in_features*lost_mask, lost*burg_sign]
outputs = [out_features]
return (inputs, outputs)
def __len__(self):
return self.nb_batches

View File

@@ -0,0 +1,372 @@
#!/usr/bin/python3
'''Copyright (c) 2022 Amazon
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
'''
import math
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, GRU, Dense, Embedding, Reshape, Concatenate, Lambda, Conv1D, Multiply, Add, Bidirectional, MaxPooling1D, Activation, GaussianNoise, AveragePooling1D, RepeatVector
from tensorflow.compat.v1.keras.layers import CuDNNGRU
from tensorflow.keras import backend as K
from tensorflow.keras.constraints import Constraint
from tensorflow.keras.initializers import Initializer
from tensorflow.keras.callbacks import Callback
from tensorflow.keras.regularizers import l1
import numpy as np
import h5py
from uniform_noise import UniformNoise
class WeightClip(Constraint):
'''Clips the weights incident to each hidden unit to be inside a range
'''
def __init__(self, c=2):
self.c = c
def __call__(self, p):
# Ensure that abs of adjacent weights don't sum to more than 127. Otherwise there's a risk of
# saturation when implementing dot products with SSSE3 or AVX2.
return self.c*p/tf.maximum(self.c, tf.repeat(tf.abs(p[:, 1::2])+tf.abs(p[:, 0::2]), 2, axis=1))
#return K.clip(p, -self.c, self.c)
def get_config(self):
return {'name': self.__class__.__name__,
'c': self.c}
constraint = WeightClip(0.496)
def soft_quantize(x):
#x = 4*x
#x = x - (.25/np.math.pi)*tf.math.sin(2*np.math.pi*x)
#x = x - (.25/np.math.pi)*tf.math.sin(2*np.math.pi*x)
#x = x - (.25/np.math.pi)*tf.math.sin(2*np.math.pi*x)
return x
def noise_quantize(x):
return soft_quantize(x + (K.random_uniform((128, 16, 80))-.5) )
def hard_quantize(x):
x = soft_quantize(x)
quantized = tf.round(x)
return x + tf.stop_gradient(quantized - x)
def apply_dead_zone(x):
d = x[1]*.05
x = x[0]
y = x - d*tf.math.tanh(x/(.1+d))
return y
def rate_loss(y_true,y_pred):
log2_e = 1.4427
n = y_pred.shape[-1]
C = n - log2_e*np.math.log(np.math.gamma(n))
k = K.sum(K.abs(y_pred), axis=-1)
p = 1.5
#rate = C + (n-1)*log2_e*tf.math.log((k**p + (n/5)**p)**(1/p))
rate = C + (n-1)*log2_e*tf.math.log(k + .112*n**2/(n/1.8+k) )
return K.mean(rate)
eps=1e-6
def safelog2(x):
log2_e = 1.4427
return log2_e*tf.math.log(eps+x)
def feat_dist_loss(y_true,y_pred):
lambda_1 = 1./K.sqrt(y_pred[:,:,:,-1])
y_pred = y_pred[:,:,:,:-1]
ceps = y_pred[:,:,:,:18] - y_true[:,:,:18]
pitch = 2*(y_pred[:,:,:,18:19] - y_true[:,:,18:19])/(y_true[:,:,18:19] + 2)
corr = y_pred[:,:,:,19:] - y_true[:,:,19:]
pitch_weight = K.square(K.maximum(0., y_true[:,:,19:]+.5))
return K.mean(lambda_1*K.mean(K.square(ceps) + 10*(1/18.)*K.abs(pitch)*pitch_weight + (1/18.)*K.square(corr), axis=-1))
def sq1_rate_loss(y_true,y_pred):
lambda_val = K.sqrt(y_pred[:,:,-1])
y_pred = y_pred[:,:,:-1]
log2_e = 1.4427
n = y_pred.shape[-1]//3
r = (y_pred[:,:,2*n:])
p0 = (y_pred[:,:,n:2*n])
p0 = 1-r**(.5+.5*p0)
y_pred = y_pred[:,:,:n]
y_pred = soft_quantize(y_pred)
y0 = K.maximum(0., 1. - K.abs(y_pred))**2
rate = -y0*safelog2(p0*r**K.abs(y_pred)) - (1-y0)*safelog2(.5*(1-p0)*(1-r)*r**(K.abs(y_pred)-1))
rate = -safelog2(-.5*tf.math.log(r)*r**K.abs(y_pred))
rate = -safelog2((1-r)/(1+r)*r**K.abs(y_pred))
#rate = -safelog2(- tf.math.sinh(.5*tf.math.log(r))* r**K.abs(y_pred) - tf.math.cosh(K.maximum(0., .5 - K.abs(y_pred))*tf.math.log(r)) + 1)
rate = lambda_val*K.sum(rate, axis=-1)
return K.mean(rate)
def sq2_rate_loss(y_true,y_pred):
lambda_val = K.sqrt(y_pred[:,:,-1])
y_pred = y_pred[:,:,:-1]
log2_e = 1.4427
n = y_pred.shape[-1]//3
r = y_pred[:,:,2*n:]
p0 = y_pred[:,:,n:2*n]
p0 = 1-r**(.5+.5*p0)
#theta = K.minimum(1., .5 + 0*p0 - 0.04*tf.math.log(r))
#p0 = 1-r**theta
y_pred = tf.round(y_pred[:,:,:n])
y0 = K.maximum(0., 1. - K.abs(y_pred))**2
rate = -y0*safelog2(p0*r**K.abs(y_pred)) - (1-y0)*safelog2(.5*(1-p0)*(1-r)*r**(K.abs(y_pred)-1))
rate = lambda_val*K.sum(rate, axis=-1)
return K.mean(rate)
def sq_rate_metric(y_true,y_pred, reduce=True):
y_pred = y_pred[:,:,:-1]
log2_e = 1.4427
n = y_pred.shape[-1]//3
r = y_pred[:,:,2*n:]
p0 = y_pred[:,:,n:2*n]
p0 = 1-r**(.5+.5*p0)
#theta = K.minimum(1., .5 + 0*p0 - 0.04*tf.math.log(r))
#p0 = 1-r**theta
y_pred = tf.round(y_pred[:,:,:n])
y0 = K.maximum(0., 1. - K.abs(y_pred))**2
rate = -y0*safelog2(p0*r**K.abs(y_pred)) - (1-y0)*safelog2(.5*(1-p0)*(1-r)*r**(K.abs(y_pred)-1))
rate = K.sum(rate, axis=-1)
if reduce:
rate = K.mean(rate)
return rate
def pvq_quant_search(x, k):
x = x/tf.reduce_sum(tf.abs(x), axis=-1, keepdims=True)
kx = k*x
y = tf.round(kx)
newk = k
for j in range(10):
#print("y = ", y)
#print("iteration ", j)
abs_y = tf.abs(y)
abs_kx = tf.abs(kx)
kk=tf.reduce_sum(abs_y, axis=-1)
#print("sums = ", kk)
plus = 1.000001*tf.reduce_min((abs_y+.5)/(abs_kx+1e-15), axis=-1)
minus = .999999*tf.reduce_max((abs_y-.5)/(abs_kx+1e-15), axis=-1)
#print("plus = ", plus)
#print("minus = ", minus)
factor = tf.where(kk>k, minus, plus)
factor = tf.where(kk==k, tf.ones_like(factor), factor)
#print("scale = ", factor)
factor = tf.expand_dims(factor, axis=-1)
#newk = newk * (k/kk)**.2
newk = newk*factor
kx = newk*x
#print("newk = ", newk)
#print("unquantized = ", newk*x)
y = tf.round(kx)
#print(y)
#print(K.mean(K.sum(K.abs(y), axis=-1)))
return y
def pvq_quantize(x, k):
x = x/(1e-15+tf.norm(x, axis=-1,keepdims=True))
quantized = pvq_quant_search(x, k)
quantized = quantized/(1e-15+tf.norm(quantized, axis=-1,keepdims=True))
return x + tf.stop_gradient(quantized - x)
def var_repeat(x):
return tf.repeat(tf.expand_dims(x[0], 1), K.shape(x[1])[1], axis=1)
nb_state_dim = 24
def new_rdovae_encoder(nb_used_features=20, nb_bits=17, bunch=4, nb_quant=40, batch_size=128, cond_size=128, cond_size2=256, training=False):
feat = Input(shape=(None, nb_used_features), batch_size=batch_size)
gru = CuDNNGRU if training else GRU
enc_dense1 = Dense(cond_size2, activation='tanh', kernel_constraint=constraint, name='enc_dense1')
enc_dense2 = gru(cond_size, return_sequences=True, kernel_constraint=constraint, recurrent_constraint=constraint, name='enc_dense2')
enc_dense3 = Dense(cond_size2, activation='tanh', kernel_constraint=constraint, name='enc_dense3')
enc_dense4 = gru(cond_size, return_sequences=True, kernel_constraint=constraint, recurrent_constraint=constraint, name='enc_dense4')
enc_dense5 = Dense(cond_size2, activation='tanh', kernel_constraint=constraint, name='enc_dense5')
enc_dense6 = gru(cond_size, return_sequences=True, kernel_constraint=constraint, recurrent_constraint=constraint, name='enc_dense6')
enc_dense7 = Dense(cond_size, activation='tanh', kernel_constraint=constraint, name='enc_dense7')
enc_dense8 = Dense(cond_size, activation='tanh', kernel_constraint=constraint, name='enc_dense8')
#bits_dense = Dense(nb_bits, activation='linear', name='bits_dense')
bits_dense = Conv1D(nb_bits, 4, padding='causal', activation='linear', name='bits_dense')
zero_out = Lambda(lambda x: 0*x)
inputs = Reshape((-1, 2*nb_used_features))(feat)
d1 = enc_dense1(inputs)
d2 = enc_dense2(d1)
d3 = enc_dense3(d2)
d4 = enc_dense4(d3)
d5 = enc_dense5(d4)
d6 = enc_dense6(d5)
d7 = enc_dense7(d6)
d8 = enc_dense8(d7)
pre_out = Concatenate()([d1, d2, d3, d4, d5, d6, d7, d8])
enc_out = bits_dense(pre_out)
global_dense1 = Dense(128, activation='tanh', name='gdense1')
global_dense2 = Dense(nb_state_dim, activation='tanh', name='gdense2')
global_bits = global_dense2(global_dense1(pre_out))
encoder = Model([feat], [enc_out, global_bits], name='encoder')
return encoder
def new_rdovae_decoder(nb_used_features=20, nb_bits=17, bunch=4, nb_quant=40, batch_size=128, cond_size=128, cond_size2=256, training=False):
bits_input = Input(shape=(None, nb_bits), batch_size=batch_size, name="dec_bits")
gru_state_input = Input(shape=(nb_state_dim,), batch_size=batch_size, name="dec_state")
gru = CuDNNGRU if training else GRU
dec_dense1 = Dense(cond_size2, activation='tanh', kernel_constraint=constraint, name='dec_dense1')
dec_dense2 = gru(cond_size, return_sequences=True, kernel_constraint=constraint, recurrent_constraint=constraint, name='dec_dense2')
dec_dense3 = Dense(cond_size2, activation='tanh', kernel_constraint=constraint, name='dec_dense3')
dec_dense4 = gru(cond_size, return_sequences=True, kernel_constraint=constraint, recurrent_constraint=constraint, name='dec_dense4')
dec_dense5 = Dense(cond_size2, activation='tanh', kernel_constraint=constraint, name='dec_dense5')
dec_dense6 = gru(cond_size, return_sequences=True, kernel_constraint=constraint, recurrent_constraint=constraint, name='dec_dense6')
dec_dense7 = Dense(cond_size, activation='tanh', kernel_constraint=constraint, name='dec_dense7')
dec_dense8 = Dense(cond_size, activation='tanh', kernel_constraint=constraint, name='dec_dense8')
dec_final = Dense(bunch*nb_used_features, activation='linear', name='dec_final')
time_reverse = Lambda(lambda x: K.reverse(x, 1))
#time_reverse = Lambda(lambda x: x)
#gru_state_rep = RepeatVector(64//bunch)(gru_state_input)
#gru_state_rep = Lambda(var_repeat, output_shape=(None, nb_state_dim)) ([gru_state_input, bits_input])
gru_state1 = Dense(cond_size, name="state1", activation='tanh')(gru_state_input)
gru_state2 = Dense(cond_size, name="state2", activation='tanh')(gru_state_input)
gru_state3 = Dense(cond_size, name="state3", activation='tanh')(gru_state_input)
dec1 = dec_dense1(time_reverse(bits_input))
dec2 = dec_dense2(dec1, initial_state=gru_state1)
dec3 = dec_dense3(dec2)
dec4 = dec_dense4(dec3, initial_state=gru_state2)
dec5 = dec_dense5(dec4)
dec6 = dec_dense6(dec5, initial_state=gru_state3)
dec7 = dec_dense7(dec6)
dec8 = dec_dense8(dec7)
output = Reshape((-1, nb_used_features))(dec_final(Concatenate()([dec1, dec2, dec3, dec4, dec5, dec6, dec7, dec8])))
decoder = Model([bits_input, gru_state_input], time_reverse(output), name='decoder')
decoder.nb_bits = nb_bits
decoder.bunch = bunch
return decoder
def new_split_decoder(decoder):
nb_bits = decoder.nb_bits
bunch = decoder.bunch
bits_input = Input(shape=(None, nb_bits), name="split_bits")
gru_state_input = Input(shape=(None,nb_state_dim), name="split_state")
range_select = Lambda(lambda x: x[0][:,x[1]:x[2],:])
elem_select = Lambda(lambda x: x[0][:,x[1],:])
points = [0, 100, 200, 300, 400]
outputs = []
for i in range(len(points)-1):
begin = points[i]//bunch
end = points[i+1]//bunch
state = elem_select([gru_state_input, end-1])
bits = range_select([bits_input, begin, end])
outputs.append(decoder([bits, state]))
output = Concatenate(axis=1)(outputs)
split = Model([bits_input, gru_state_input], output, name="split")
return split
def tensor_concat(x):
#n = x[1]//2
#x = x[0]
n=2
y = []
for i in range(n-1):
offset = 2 * (n-1-i)
tmp = K.concatenate([x[i][:, offset:, :], x[-1][:, -offset:, :]], axis=-2)
y.append(tf.expand_dims(tmp, axis=0))
y.append(tf.expand_dims(x[-1], axis=0))
return Concatenate(axis=0)(y)
def new_rdovae_model(nb_used_features=20, nb_bits=17, bunch=4, nb_quant=40, batch_size=128, cond_size=128, cond_size2=256, training=False):
feat = Input(shape=(None, nb_used_features), batch_size=batch_size)
quant_id = Input(shape=(None,), batch_size=batch_size)
lambda_val = Input(shape=(None, 1), batch_size=batch_size)
lambda_bunched = AveragePooling1D(pool_size=bunch//2, strides=bunch//2, padding="valid")(lambda_val)
lambda_up = Lambda(lambda x: K.repeat_elements(x, 2, axis=-2))(lambda_val)
qembedding = Embedding(nb_quant, 6*nb_bits, name='quant_embed', embeddings_initializer='zeros')
quant_embed_dec = qembedding(quant_id)
quant_scale = Activation('softplus')(Lambda(lambda x: x[:,:,:nb_bits], name='quant_scale_embed')(quant_embed_dec))
encoder = new_rdovae_encoder(nb_used_features, nb_bits, bunch, nb_quant, batch_size, cond_size, cond_size2, training=training)
ze, gru_state_dec = encoder([feat])
ze = Multiply()([ze, quant_scale])
decoder = new_rdovae_decoder(nb_used_features, nb_bits, bunch, nb_quant, batch_size, cond_size, cond_size2, training=training)
split_decoder = new_split_decoder(decoder)
dead_zone = Activation('softplus')(Lambda(lambda x: x[:,:,nb_bits:2*nb_bits], name='dead_zone_embed')(quant_embed_dec))
soft_distr_embed = Activation('sigmoid')(Lambda(lambda x: x[:,:,2*nb_bits:4*nb_bits], name='soft_distr_embed')(quant_embed_dec))
hard_distr_embed = Activation('sigmoid')(Lambda(lambda x: x[:,:,4*nb_bits:], name='hard_distr_embed')(quant_embed_dec))
noisequant = UniformNoise()
hardquant = Lambda(hard_quantize)
dzone = Lambda(apply_dead_zone)
dze = dzone([ze,dead_zone])
ndze = noisequant(dze)
dze_quant = hardquant(dze)
div = Lambda(lambda x: x[0]/x[1])
dze_quant = div([dze_quant,quant_scale])
ndze_unquant = div([ndze,quant_scale])
mod_select = Lambda(lambda x: x[0][:,x[1]::bunch//2,:])
gru_state_dec = Lambda(lambda x: pvq_quantize(x, 82))(gru_state_dec)
combined_output = []
unquantized_output = []
cat = Concatenate(name="out_cat")
for i in range(bunch//2):
dze_select = mod_select([dze_quant, i])
ndze_select = mod_select([ndze_unquant, i])
state_select = mod_select([gru_state_dec, i])
tmp = split_decoder([dze_select, state_select])
tmp = cat([tmp, lambda_up])
combined_output.append(tmp)
tmp = split_decoder([ndze_select, state_select])
tmp = cat([tmp, lambda_up])
unquantized_output.append(tmp)
concat = Lambda(tensor_concat, name="output")
combined_output = concat(combined_output)
unquantized_output = concat(unquantized_output)
e2 = Concatenate(name="hard_bits")([dze, hard_distr_embed, lambda_val])
e = Concatenate(name="soft_bits")([dze, soft_distr_embed, lambda_val])
model = Model([feat, quant_id, lambda_val], [combined_output, unquantized_output, e, e2], name="end2end")
model.nb_used_features = nb_used_features
return model, encoder, decoder, qembedding

View File

@@ -0,0 +1,138 @@
"""
/* Copyright (c) 2022 Amazon
Written by Jan Buethe */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
"""
import argparse
import os
import sys
os.environ['CUDA_VISIBLE_DEVICES'] = ""
parser = argparse.ArgumentParser()
parser.add_argument('weights', metavar="<weight file>", type=str, help='model weight file in hdf5 format')
parser.add_argument('output', metavar="<output folder>", type=str, help='output exchange folder')
parser.add_argument('--cond-size', type=int, help="conditioning size (default: 256)", default=256)
parser.add_argument('--latent-dim', type=int, help="dimension of latent space (default: 80)", default=80)
parser.add_argument('--quant-levels', type=int, help="number of quantization steps (default: 16)", default=16)
args = parser.parse_args()
# now import the heavy stuff
from rdovae import new_rdovae_model
from wexchange.tf import dump_tf_weights, load_tf_weights
exchange_name = {
'enc_dense1' : 'encoder_stack_layer1_dense',
'enc_dense3' : 'encoder_stack_layer3_dense',
'enc_dense5' : 'encoder_stack_layer5_dense',
'enc_dense7' : 'encoder_stack_layer7_dense',
'enc_dense8' : 'encoder_stack_layer8_dense',
'gdense1' : 'encoder_state_layer1_dense',
'gdense2' : 'encoder_state_layer2_dense',
'enc_dense2' : 'encoder_stack_layer2_gru',
'enc_dense4' : 'encoder_stack_layer4_gru',
'enc_dense6' : 'encoder_stack_layer6_gru',
'bits_dense' : 'encoder_stack_layer9_conv',
'qembedding' : 'statistical_model_embedding',
'state1' : 'decoder_state1_dense',
'state2' : 'decoder_state2_dense',
'state3' : 'decoder_state3_dense',
'dec_dense1' : 'decoder_stack_layer1_dense',
'dec_dense3' : 'decoder_stack_layer3_dense',
'dec_dense5' : 'decoder_stack_layer5_dense',
'dec_dense7' : 'decoder_stack_layer7_dense',
'dec_dense8' : 'decoder_stack_layer8_dense',
'dec_final' : 'decoder_stack_layer9_dense',
'dec_dense2' : 'decoder_stack_layer2_gru',
'dec_dense4' : 'decoder_stack_layer4_gru',
'dec_dense6' : 'decoder_stack_layer6_gru'
}
if __name__ == "__main__":
model, encoder, decoder, qembedding = new_rdovae_model(20, args.latent_dim, cond_size=args.cond_size, nb_quant=args.quant_levels)
model.load_weights(args.weights)
os.makedirs(args.output, exist_ok=True)
# encoder
encoder_dense_names = [
'enc_dense1',
'enc_dense3',
'enc_dense5',
'enc_dense7',
'enc_dense8',
'gdense1',
'gdense2'
]
encoder_gru_names = [
'enc_dense2',
'enc_dense4',
'enc_dense6'
]
encoder_conv1d_names = [
'bits_dense'
]
for name in encoder_dense_names + encoder_gru_names + encoder_conv1d_names:
print(f"writing layer {exchange_name[name]}...")
dump_tf_weights(os.path.join(args.output, exchange_name[name]), encoder.get_layer(name))
# qembedding
print(f"writing layer {exchange_name['qembedding']}...")
dump_tf_weights(os.path.join(args.output, exchange_name['qembedding']), qembedding)
# decoder
decoder_dense_names = [
'state1',
'state2',
'state3',
'dec_dense1',
'dec_dense3',
'dec_dense5',
'dec_dense7',
'dec_dense8',
'dec_final'
]
decoder_gru_names = [
'dec_dense2',
'dec_dense4',
'dec_dense6'
]
for name in decoder_dense_names + decoder_gru_names:
print(f"writing layer {exchange_name[name]}...")
dump_tf_weights(os.path.join(args.output, exchange_name[name]), decoder.get_layer(name))

View File

@@ -0,0 +1,123 @@
"""
/* Copyright (c) 2022 Amazon
Written by Jan Buethe */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
"""
import argparse
import os
import sys
os.environ['CUDA_VISIBLE_DEVICES'] = ""
parser = argparse.ArgumentParser()
parser.add_argument('input', metavar="<input folder>", type=str, help='input exchange folder')
parser.add_argument('weights', metavar="<weight file>", type=str, help='model weight file in hdf5 format')
parser.add_argument('--cond-size', type=int, help="conditioning size (default: 256)", default=256)
parser.add_argument('--latent-dim', type=int, help="dimension of latent space (default: 80)", default=80)
parser.add_argument('--quant-levels', type=int, help="number of quantization steps (default: 16)", default=16)
args = parser.parse_args()
# now import the heavy stuff
from rdovae import new_rdovae_model
from wexchange.tf import load_tf_weights
exchange_name = {
'enc_dense1' : 'encoder_stack_layer1_dense',
'enc_dense3' : 'encoder_stack_layer3_dense',
'enc_dense5' : 'encoder_stack_layer5_dense',
'enc_dense7' : 'encoder_stack_layer7_dense',
'enc_dense8' : 'encoder_stack_layer8_dense',
'gdense1' : 'encoder_state_layer1_dense',
'gdense2' : 'encoder_state_layer2_dense',
'enc_dense2' : 'encoder_stack_layer2_gru',
'enc_dense4' : 'encoder_stack_layer4_gru',
'enc_dense6' : 'encoder_stack_layer6_gru',
'bits_dense' : 'encoder_stack_layer9_conv',
'qembedding' : 'statistical_model_embedding',
'state1' : 'decoder_state1_dense',
'state2' : 'decoder_state2_dense',
'state3' : 'decoder_state3_dense',
'dec_dense1' : 'decoder_stack_layer1_dense',
'dec_dense3' : 'decoder_stack_layer3_dense',
'dec_dense5' : 'decoder_stack_layer5_dense',
'dec_dense7' : 'decoder_stack_layer7_dense',
'dec_dense8' : 'decoder_stack_layer8_dense',
'dec_final' : 'decoder_stack_layer9_dense',
'dec_dense2' : 'decoder_stack_layer2_gru',
'dec_dense4' : 'decoder_stack_layer4_gru',
'dec_dense6' : 'decoder_stack_layer6_gru'
}
if __name__ == "__main__":
model, encoder, decoder, qembedding = new_rdovae_model(20, args.latent_dim, cond_size=args.cond_size, nb_quant=args.quant_levels)
encoder_layers = [
'enc_dense1',
'enc_dense3',
'enc_dense5',
'enc_dense7',
'enc_dense8',
'gdense1',
'gdense2',
'enc_dense2',
'enc_dense4',
'enc_dense6',
'bits_dense'
]
decoder_layers = [
'state1',
'state2',
'state3',
'dec_dense1',
'dec_dense3',
'dec_dense5',
'dec_dense7',
'dec_dense8',
'dec_final',
'dec_dense2',
'dec_dense4',
'dec_dense6'
]
for name in encoder_layers:
print(f"loading weight for layer {name}...")
load_tf_weights(os.path.join(args.input, exchange_name[name]), encoder.get_layer(name))
print(f"loading weight for layer qembedding...")
load_tf_weights(os.path.join(args.input, exchange_name['qembedding']), qembedding)
for name in decoder_layers:
print(f"loading weight for layer {name}...")
load_tf_weights(os.path.join(args.input, exchange_name[name]), decoder.get_layer(name))
model.save(args.weights)

View File

@@ -0,0 +1,120 @@
#!/usr/bin/python3
'''Copyright (c) 2018 Mozilla
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
'''
import argparse
import sys
import h5py
import numpy as np
import lpcnet
from ulaw import ulaw2lin, lin2ulaw
parser = argparse.ArgumentParser()
parser.add_argument('model-file', type=str, help='model weight h5 file')
parser.add_argument('--lpc-gamma', type=float, help='LPC weighting factor. WARNING: giving an inconsistent value here will severely degrade performance', default=1)
args = parser.parse_args()
filename = args.model_file
with h5py.File(filename, "r") as f:
units = min(f['model_weights']['gru_a']['gru_a']['recurrent_kernel:0'].shape)
units2 = min(f['model_weights']['gru_b']['gru_b']['recurrent_kernel:0'].shape)
cond_size = min(f['model_weights']['feature_dense1']['feature_dense1']['kernel:0'].shape)
e2e = 'rc2lpc' in f['model_weights']
model, enc, dec = lpcnet.new_lpcnet_model(training = False, rnn_units1=units, rnn_units2=units2, flag_e2e = e2e, cond_size=cond_size, batch_size=1)
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy'])
#model.summary()
feature_file = sys.argv[2]
out_file = sys.argv[3]
frame_size = model.frame_size
nb_features = 36
nb_used_features = model.nb_used_features
features = np.fromfile(feature_file, dtype='float32')
features = np.resize(features, (-1, nb_features))
nb_frames = 1
feature_chunk_size = features.shape[0]
pcm_chunk_size = frame_size*feature_chunk_size
features = np.reshape(features, (nb_frames, feature_chunk_size, nb_features))
periods = (.1 + 50*features[:,:,18:19]+100).astype('int16')
model.load_weights(filename);
order = 16
pcm = np.zeros((nb_frames*pcm_chunk_size, ))
fexc = np.zeros((1, 1, 3), dtype='int16')+128
state1 = np.zeros((1, model.rnn_units1), dtype='float32')
state2 = np.zeros((1, model.rnn_units2), dtype='float32')
mem = 0
coef = 0.85
lpc_weights = np.array([args.lpc_gamma ** (i + 1) for i in range(16)])
fout = open(out_file, 'wb')
skip = order + 1
for c in range(0, nb_frames):
if not e2e:
cfeat = enc.predict([features[c:c+1, :, :nb_used_features], periods[c:c+1, :, :]])
else:
cfeat,lpcs = enc.predict([features[c:c+1, :, :nb_used_features], periods[c:c+1, :, :]])
for fr in range(0, feature_chunk_size):
f = c*feature_chunk_size + fr
if not e2e:
a = features[c, fr, nb_features-order:] * lpc_weights
else:
a = lpcs[c,fr]
for i in range(skip, frame_size):
pred = -sum(a*pcm[f*frame_size + i - 1:f*frame_size + i - order-1:-1])
fexc[0, 0, 1] = lin2ulaw(pred)
p, state1, state2 = dec.predict([fexc, cfeat[:, fr:fr+1, :], state1, state2])
#Lower the temperature for voiced frames to reduce noisiness
p *= np.power(p, np.maximum(0, 1.5*features[c, fr, 19] - .5))
p = p/(1e-18 + np.sum(p))
#Cut off the tail of the remaining distribution
p = np.maximum(p-0.002, 0).astype('float64')
p = p/(1e-8 + np.sum(p))
fexc[0, 0, 2] = np.argmax(np.random.multinomial(1, p[0,0,:], 1))
pcm[f*frame_size + i] = pred + ulaw2lin(fexc[0, 0, 2])
fexc[0, 0, 0] = lin2ulaw(pcm[f*frame_size + i])
mem = coef*mem + pcm[f*frame_size + i]
#print(mem)
np.array([np.round(mem)], dtype='int16').tofile(fout)
skip = 0

View File

@@ -0,0 +1,92 @@
#!/usr/bin/python3
'''Copyright (c) 2021-2022 Amazon
Copyright (c) 2018-2019 Mozilla
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
'''
# Train an LPCNet model
import argparse
from plc_loader import PLCLoader
parser = argparse.ArgumentParser(description='Test a PLC model')
parser.add_argument('weights', metavar='<weights file>', help='weights file (.h5)')
parser.add_argument('features', metavar='<features file>', help='binary features file (float32)')
parser.add_argument('output', metavar='<output>', help='reconstructed file (float32)')
parser.add_argument('--model', metavar='<model>', default='lpcnet_plc', help='PLC model python definition (without .py)')
group1 = parser.add_mutually_exclusive_group()
parser.add_argument('--gru-size', metavar='<units>', default=256, type=int, help='number of units in GRU (default 256)')
parser.add_argument('--cond-size', metavar='<units>', default=128, type=int, help='number of units in conditioning network (default 128)')
args = parser.parse_args()
import importlib
lpcnet = importlib.import_module(args.model)
import sys
import numpy as np
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, CSVLogger
import tensorflow.keras.backend as K
import h5py
import tensorflow as tf
#gpus = tf.config.experimental.list_physical_devices('GPU')
#if gpus:
# try:
# tf.config.experimental.set_virtual_device_configuration(gpus[0], [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=5120)])
# except RuntimeError as e:
# print(e)
model = lpcnet.new_lpcnet_plc_model(rnn_units=args.gru_size, batch_size=1, training=False, quantize=False, cond_size=args.cond_size)
model.compile()
lpc_order = 16
feature_file = args.features
nb_features = model.nb_used_features + lpc_order
nb_used_features = model.nb_used_features
# u for unquantised, load 16 bit PCM samples and convert to mu-law
features = np.loadtxt(feature_file)
print(features.shape)
sequence_size = features.shape[0]
lost = np.reshape(features[:,-1:], (1, sequence_size, 1))
features = features[:,:nb_used_features]
features = np.reshape(features, (1, sequence_size, nb_used_features))
model.load_weights(args.weights)
features = features*lost
out = model.predict([features, lost])
out = features + (1-lost)*out
np.savetxt(args.output, out[0,:,:])

View File

@@ -0,0 +1,70 @@
"""
Tensorflow/Keras helper functions to do the following:
1. \mu law <-> Linear domain conversion
2. Differentiable prediction from the input signal and LP coefficients
3. Differentiable transformations Reflection Coefficients (RCs) <-> LP Coefficients
"""
from tensorflow.keras.layers import Lambda, Multiply, Layer, Concatenate
from tensorflow.keras import backend as K
import tensorflow as tf
# \mu law <-> Linear conversion functions
scale = 255.0/32768.0
scale_1 = 32768.0/255.0
def tf_l2u(x):
s = K.sign(x)
x = K.abs(x)
u = (s*(128*K.log(1+scale*x)/K.log(256.0)))
u = K.clip(128 + u, 0, 255)
return u
def tf_u2l(u):
u = tf.cast(u,"float32")
u = u - 128.0
s = K.sign(u)
u = K.abs(u)
return s*scale_1*(K.exp(u/128.*K.log(256.0))-1)
# Differentiable Prediction Layer
# Computes the LP prediction from the input lag signal and the LP coefficients
# The inputs xt and lpc conform with the shapes in lpcnet.py (the '2400' is coded keeping this in mind)
class diff_pred(Layer):
def call(self, inputs, lpcoeffs_N = 16, frame_size = 160):
xt = inputs[0]
lpc = inputs[1]
rept = Lambda(lambda x: K.repeat_elements(x , frame_size, 1))
zpX = Lambda(lambda x: K.concatenate([0*x[:,0:lpcoeffs_N,:], x],axis = 1))
cX = Lambda(lambda x: K.concatenate([x[:,(lpcoeffs_N - i):(lpcoeffs_N - i + 2400),:] for i in range(lpcoeffs_N)],axis = 2))
pred = -Multiply()([rept(lpc),cX(zpX(xt))])
return K.sum(pred,axis = 2,keepdims = True)
# Differentiable Transformations (RC <-> LPC) computed using the Levinson Durbin Recursion
class diff_rc2lpc(Layer):
def call(self, inputs, lpcoeffs_N = 16):
def pred_lpc_recursive(input):
temp = (input[0] + K.repeat_elements(input[1],input[0].shape[2],2)*K.reverse(input[0],axes = 2))
temp = Concatenate(axis = 2)([temp,input[1]])
return temp
Llpc = Lambda(pred_lpc_recursive)
inputs = inputs[:,:,:lpcoeffs_N]
lpc_init = inputs
for i in range(1,lpcoeffs_N):
lpc_init = Llpc([lpc_init[:,:,:i],K.expand_dims(inputs[:,:,i],axis = -1)])
return lpc_init
class diff_lpc2rc(Layer):
def call(self, inputs, lpcoeffs_N = 16):
def pred_rc_recursive(input):
ki = K.repeat_elements(K.expand_dims(input[1][:,:,0],axis = -1),input[0].shape[2],2)
temp = (input[0] - ki*K.reverse(input[0],axes = 2))/(1 - ki*ki)
temp = Concatenate(axis = 2)([temp,input[1]])
return temp
Lrc = Lambda(pred_rc_recursive)
rc_init = inputs
for i in range(1,lpcoeffs_N):
j = (lpcoeffs_N - i + 1)
rc_init = Lrc([rc_init[:,:,:(j - 1)],rc_init[:,:,(j - 1):]])
return rc_init

View File

@@ -0,0 +1,214 @@
#!/usr/bin/python3
'''Copyright (c) 2018 Mozilla
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
'''
# Train an LPCNet model
import argparse
import os
from dataloader import LPCNetLoader
parser = argparse.ArgumentParser(description='Train an LPCNet model')
parser.add_argument('features', metavar='<features file>', help='binary features file (float32)')
parser.add_argument('data', metavar='<audio data file>', help='binary audio data file (uint8)')
parser.add_argument('output', metavar='<output>', help='trained model file (.h5)')
parser.add_argument('--model', metavar='<model>', default='lpcnet', help='LPCNet model python definition (without .py)')
group1 = parser.add_mutually_exclusive_group()
group1.add_argument('--quantize', metavar='<input weights>', help='quantize model')
group1.add_argument('--retrain', metavar='<input weights>', help='continue training model')
parser.add_argument('--density', metavar='<global density>', type=float, help='average density of the recurrent weights (default 0.1)')
parser.add_argument('--density-split', nargs=3, metavar=('<update>', '<reset>', '<state>'), type=float, help='density of each recurrent gate (default 0.05, 0.05, 0.2)')
parser.add_argument('--grub-density', metavar='<global GRU B density>', type=float, help='average density of the recurrent weights (default 1.0)')
parser.add_argument('--grub-density-split', nargs=3, metavar=('<update>', '<reset>', '<state>'), type=float, help='density of each GRU B input gate (default 1.0, 1.0, 1.0)')
parser.add_argument('--grua-size', metavar='<units>', default=384, type=int, help='number of units in GRU A (default 384)')
parser.add_argument('--grub-size', metavar='<units>', default=16, type=int, help='number of units in GRU B (default 16)')
parser.add_argument('--cond-size', metavar='<units>', default=128, type=int, help='number of units in conditioning network, aka frame rate network (default 128)')
parser.add_argument('--epochs', metavar='<epochs>', default=120, type=int, help='number of epochs to train for (default 120)')
parser.add_argument('--batch-size', metavar='<batch size>', default=128, type=int, help='batch size to use (default 128)')
parser.add_argument('--end2end', dest='flag_e2e', action='store_true', help='Enable end-to-end training (with differentiable LPC computation')
parser.add_argument('--lr', metavar='<learning rate>', type=float, help='learning rate')
parser.add_argument('--decay', metavar='<decay>', type=float, help='learning rate decay')
parser.add_argument('--gamma', metavar='<gamma>', type=float, help='adjust u-law compensation (default 2.0, should not be less than 1.0)')
parser.add_argument('--lookahead', metavar='<nb frames>', default=2, type=int, help='Number of look-ahead frames (default 2)')
parser.add_argument('--logdir', metavar='<log dir>', help='directory for tensorboard log files')
parser.add_argument('--lpc-gamma', type=float, default=1, help='gamma for LPC weighting')
parser.add_argument('--cuda-devices', metavar='<cuda devices>', type=str, default=None, help='string with comma separated cuda device ids')
args = parser.parse_args()
# set visible cuda devices
if args.cuda_devices != None:
os.environ['CUDA_VISIBLE_DEVICES'] = args.cuda_devices
density = (0.05, 0.05, 0.2)
if args.density_split is not None:
density = args.density_split
elif args.density is not None:
density = [0.5*args.density, 0.5*args.density, 2.0*args.density];
grub_density = (1., 1., 1.)
if args.grub_density_split is not None:
grub_density = args.grub_density_split
elif args.grub_density is not None:
grub_density = [0.5*args.grub_density, 0.5*args.grub_density, 2.0*args.grub_density];
gamma = 2.0 if args.gamma is None else args.gamma
import importlib
lpcnet = importlib.import_module(args.model)
import sys
import numpy as np
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, CSVLogger
from ulaw import ulaw2lin, lin2ulaw
import tensorflow.keras.backend as K
import h5py
import tensorflow as tf
from tf_funcs import *
from lossfuncs import *
#gpus = tf.config.experimental.list_physical_devices('GPU')
#if gpus:
# try:
# tf.config.experimental.set_virtual_device_configuration(gpus[0], [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=5120)])
# except RuntimeError as e:
# print(e)
nb_epochs = args.epochs
# Try reducing batch_size if you run out of memory on your GPU
batch_size = args.batch_size
quantize = args.quantize is not None
retrain = args.retrain is not None
lpc_order = 16
if quantize:
lr = 0.00003
decay = 0
input_model = args.quantize
else:
lr = 0.001
decay = 5e-5
if args.lr is not None:
lr = args.lr
if args.decay is not None:
decay = args.decay
if retrain:
input_model = args.retrain
flag_e2e = args.flag_e2e
opt = Adam(lr, decay=decay, beta_1=0.5, beta_2=0.8)
strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy()
with strategy.scope():
model, _, _ = lpcnet.new_lpcnet_model(rnn_units1=args.grua_size,
rnn_units2=args.grub_size,
batch_size=batch_size, training=True,
quantize=quantize,
flag_e2e=flag_e2e,
cond_size=args.cond_size,
lpc_gamma=args.lpc_gamma,
lookahead=args.lookahead
)
if not flag_e2e:
model.compile(optimizer=opt, loss=metric_cel, metrics=metric_cel)
else:
model.compile(optimizer=opt, loss = [interp_mulaw(gamma=gamma), loss_matchlar()], loss_weights = [1.0, 2.0], metrics={'pdf':[metric_cel,metric_icel,metric_exc_sd,metric_oginterploss]})
model.summary()
feature_file = args.features
pcm_file = args.data # 16 bit unsigned short PCM samples
frame_size = model.frame_size
nb_features = model.nb_used_features + lpc_order
nb_used_features = model.nb_used_features
feature_chunk_size = 15
pcm_chunk_size = frame_size*feature_chunk_size
# u for unquantised, load 16 bit PCM samples and convert to mu-law
data = np.memmap(pcm_file, dtype='int16', mode='r')
nb_frames = (len(data)//(2*pcm_chunk_size)-1)//batch_size*batch_size
features = np.memmap(feature_file, dtype='float32', mode='r')
# limit to discrete number of frames
data = data[(4-args.lookahead)*2*frame_size:]
data = data[:nb_frames*2*pcm_chunk_size]
data = np.reshape(data, (nb_frames, pcm_chunk_size, 2))
#print("ulaw std = ", np.std(out_exc))
sizeof = features.strides[-1]
features = np.lib.stride_tricks.as_strided(features, shape=(nb_frames, feature_chunk_size+4, nb_features),
strides=(feature_chunk_size*nb_features*sizeof, nb_features*sizeof, sizeof))
#features = features[:, :, :nb_used_features]
periods = (.1 + 50*features[:,:,nb_used_features-2:nb_used_features-1]+100).astype('int16')
#periods = np.minimum(periods, 255)
# dump models to disk as we go
checkpoint = ModelCheckpoint('{}_{}_{}.h5'.format(args.output, args.grua_size, '{epoch:02d}'))
if args.retrain is not None:
model.load_weights(args.retrain)
if quantize or retrain:
#Adapting from an existing model
model.load_weights(input_model)
if quantize:
sparsify = lpcnet.Sparsify(10000, 30000, 100, density, quantize=True)
grub_sparsify = lpcnet.SparsifyGRUB(10000, 30000, 100, args.grua_size, grub_density, quantize=True)
else:
sparsify = lpcnet.Sparsify(0, 0, 1, density)
grub_sparsify = lpcnet.SparsifyGRUB(0, 0, 1, args.grua_size, grub_density)
else:
#Training from scratch
sparsify = lpcnet.Sparsify(2000, 20000, 400, density)
grub_sparsify = lpcnet.SparsifyGRUB(2000, 40000, 400, args.grua_size, grub_density)
model.save_weights('{}_{}_initial.h5'.format(args.output, args.grua_size))
loader = LPCNetLoader(data, features, periods, batch_size, e2e=flag_e2e, lookahead=args.lookahead)
callbacks = [checkpoint, sparsify, grub_sparsify]
if args.logdir is not None:
logdir = '{}/{}_{}_logs'.format(args.logdir, args.output, args.grua_size)
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=logdir)
callbacks.append(tensorboard_callback)
model.fit(loader, epochs=nb_epochs, validation_split=0.0, callbacks=callbacks)

View File

@@ -0,0 +1,197 @@
#!/usr/bin/python3
'''Copyright (c) 2021-2022 Amazon
Copyright (c) 2018-2019 Mozilla
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
'''
# Train an LPCNet model
import argparse
from plc_loader import PLCLoader
parser = argparse.ArgumentParser(description='Train a PLC model')
parser.add_argument('features', metavar='<features file>', help='binary features file (float32)')
parser.add_argument('lost_file', metavar='<packet loss file>', help='packet loss traces (int8)')
parser.add_argument('output', metavar='<output>', help='trained model file (.h5)')
parser.add_argument('--model', metavar='<model>', default='lpcnet_plc', help='PLC model python definition (without .py)')
group1 = parser.add_mutually_exclusive_group()
group1.add_argument('--quantize', metavar='<input weights>', help='quantize model')
group1.add_argument('--retrain', metavar='<input weights>', help='continue training model')
parser.add_argument('--gru-size', metavar='<units>', default=256, type=int, help='number of units in GRU (default 256)')
parser.add_argument('--cond-size', metavar='<units>', default=128, type=int, help='number of units in conditioning network (default 128)')
parser.add_argument('--epochs', metavar='<epochs>', default=120, type=int, help='number of epochs to train for (default 120)')
parser.add_argument('--batch-size', metavar='<batch size>', default=128, type=int, help='batch size to use (default 128)')
parser.add_argument('--seq-length', metavar='<sequence length>', default=1000, type=int, help='sequence length to use (default 1000)')
parser.add_argument('--lr', metavar='<learning rate>', type=float, help='learning rate')
parser.add_argument('--decay', metavar='<decay>', type=float, help='learning rate decay')
parser.add_argument('--band-loss', metavar='<weight>', default=1.0, type=float, help='weight of band loss (default 1.0)')
parser.add_argument('--loss-bias', metavar='<bias>', default=0.0, type=float, help='loss bias towards low energy (default 0.0)')
parser.add_argument('--logdir', metavar='<log dir>', help='directory for tensorboard log files')
args = parser.parse_args()
import importlib
lpcnet = importlib.import_module(args.model)
import sys
import numpy as np
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, CSVLogger
import tensorflow.keras.backend as K
import h5py
import tensorflow as tf
#gpus = tf.config.experimental.list_physical_devices('GPU')
#if gpus:
# try:
# tf.config.experimental.set_virtual_device_configuration(gpus[0], [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=5120)])
# except RuntimeError as e:
# print(e)
nb_epochs = args.epochs
# Try reducing batch_size if you run out of memory on your GPU
batch_size = args.batch_size
quantize = args.quantize is not None
retrain = args.retrain is not None
if quantize:
lr = 0.00003
decay = 0
input_model = args.quantize
else:
lr = 0.001
decay = 2.5e-5
if args.lr is not None:
lr = args.lr
if args.decay is not None:
decay = args.decay
if retrain:
input_model = args.retrain
def plc_loss(alpha=1.0, bias=0.):
def loss(y_true,y_pred):
mask = y_true[:,:,-1:]
y_true = y_true[:,:,:-1]
e = (y_pred - y_true)*mask
e_bands = tf.signal.idct(e[:,:,:-2], norm='ortho')
bias_mask = K.minimum(1., K.maximum(0., 4*y_true[:,:,-1:]))
l1_loss = K.mean(K.abs(e)) + 0.1*K.mean(K.maximum(0., -e[:,:,-1:])) + alpha*K.mean(K.abs(e_bands) + bias*bias_mask*K.maximum(0., e_bands)) + K.mean(K.minimum(K.abs(e[:,:,18:19]),1.)) + 8*K.mean(K.minimum(K.abs(e[:,:,18:19]),.4))
return l1_loss
return loss
def plc_l1_loss():
def L1_loss(y_true,y_pred):
mask = y_true[:,:,-1:]
y_true = y_true[:,:,:-1]
e = (y_pred - y_true)*mask
l1_loss = K.mean(K.abs(e))
return l1_loss
return L1_loss
def plc_ceps_loss():
def ceps_loss(y_true,y_pred):
mask = y_true[:,:,-1:]
y_true = y_true[:,:,:-1]
e = (y_pred - y_true)*mask
l1_loss = K.mean(K.abs(e[:,:,:-2]))
return l1_loss
return ceps_loss
def plc_band_loss():
def L1_band_loss(y_true,y_pred):
mask = y_true[:,:,-1:]
y_true = y_true[:,:,:-1]
e = (y_pred - y_true)*mask
e_bands = tf.signal.idct(e[:,:,:-2], norm='ortho')
l1_loss = K.mean(K.abs(e_bands))
return l1_loss
return L1_band_loss
def plc_pitch_loss():
def pitch_loss(y_true,y_pred):
mask = y_true[:,:,-1:]
y_true = y_true[:,:,:-1]
e = (y_pred - y_true)*mask
l1_loss = K.mean(K.minimum(K.abs(e[:,:,18:19]),.4))
return l1_loss
return pitch_loss
opt = Adam(lr, decay=decay, beta_2=0.99)
strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy()
with strategy.scope():
model = lpcnet.new_lpcnet_plc_model(rnn_units=args.gru_size, batch_size=batch_size, training=True, quantize=quantize, cond_size=args.cond_size)
model.compile(optimizer=opt, loss=plc_loss(alpha=args.band_loss, bias=args.loss_bias), metrics=[plc_l1_loss(), plc_ceps_loss(), plc_band_loss(), plc_pitch_loss()])
model.summary()
lpc_order = 16
feature_file = args.features
nb_features = model.nb_used_features + lpc_order + model.nb_burg_features
nb_used_features = model.nb_used_features
nb_burg_features = model.nb_burg_features
sequence_size = args.seq_length
# u for unquantised, load 16 bit PCM samples and convert to mu-law
features = np.memmap(feature_file, dtype='float32', mode='r')
nb_sequences = len(features)//(nb_features*sequence_size)//batch_size*batch_size
features = features[:nb_sequences*sequence_size*nb_features]
features = np.reshape(features, (nb_sequences, sequence_size, nb_features))
features = features[:, :, :nb_used_features+model.nb_burg_features]
lost = np.memmap(args.lost_file, dtype='int8', mode='r')
# dump models to disk as we go
checkpoint = ModelCheckpoint('{}_{}_{}.h5'.format(args.output, args.gru_size, '{epoch:02d}'))
if args.retrain is not None:
model.load_weights(args.retrain)
if quantize or retrain:
#Adapting from an existing model
model.load_weights(input_model)
model.save_weights('{}_{}_initial.h5'.format(args.output, args.gru_size))
loader = PLCLoader(features, lost, nb_burg_features, batch_size)
callbacks = [checkpoint]
if args.logdir is not None:
logdir = '{}/{}_{}_logs'.format(args.logdir, args.output, args.gru_size)
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=logdir)
callbacks.append(tensorboard_callback)
model.fit(loader, epochs=nb_epochs, validation_split=0.0, callbacks=callbacks)

View File

@@ -0,0 +1,151 @@
#!/usr/bin/python3
'''Copyright (c) 2021-2022 Amazon
Copyright (c) 2018-2019 Mozilla
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
'''
# Train an LPCNet model
import tensorflow as tf
strategy = tf.distribute.MultiWorkerMirroredStrategy()
import argparse
#from plc_loader import PLCLoader
parser = argparse.ArgumentParser(description='Train a quantization model')
parser.add_argument('features', metavar='<features file>', help='binary features file (float32)')
parser.add_argument('output', metavar='<output>', help='trained model file (.h5)')
parser.add_argument('--model', metavar='<model>', default='rdovae', help='PLC model python definition (without .py)')
group1 = parser.add_mutually_exclusive_group()
group1.add_argument('--quantize', metavar='<input weights>', help='quantize model')
group1.add_argument('--retrain', metavar='<input weights>', help='continue training model')
parser.add_argument('--cond-size', metavar='<units>', default=1024, type=int, help='number of units in conditioning network (default 1024)')
parser.add_argument('--epochs', metavar='<epochs>', default=120, type=int, help='number of epochs to train for (default 120)')
parser.add_argument('--batch-size', metavar='<batch size>', default=128, type=int, help='batch size to use (default 128)')
parser.add_argument('--seq-length', metavar='<sequence length>', default=1000, type=int, help='sequence length to use (default 1000)')
parser.add_argument('--lr', metavar='<learning rate>', type=float, help='learning rate')
parser.add_argument('--decay', metavar='<decay>', type=float, help='learning rate decay')
parser.add_argument('--logdir', metavar='<log dir>', help='directory for tensorboard log files')
args = parser.parse_args()
import importlib
rdovae = importlib.import_module(args.model)
import sys
import numpy as np
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, CSVLogger
import tensorflow.keras.backend as K
import h5py
#gpus = tf.config.experimental.list_physical_devices('GPU')
#if gpus:
# try:
# tf.config.experimental.set_virtual_device_configuration(gpus[0], [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=5120)])
# except RuntimeError as e:
# print(e)
nb_epochs = args.epochs
# Try reducing batch_size if you run out of memory on your GPU
batch_size = args.batch_size
quantize = args.quantize is not None
retrain = args.retrain is not None
if quantize:
lr = 0.00003
decay = 0
input_model = args.quantize
else:
lr = 0.001
decay = 2.5e-5
if args.lr is not None:
lr = args.lr
if args.decay is not None:
decay = args.decay
if retrain:
input_model = args.retrain
opt = Adam(lr, decay=decay, beta_2=0.99)
with strategy.scope():
model, encoder, decoder, _ = rdovae.new_rdovae_model(nb_used_features=20, nb_bits=80, batch_size=batch_size, cond_size=args.cond_size, nb_quant=16)
model.compile(optimizer=opt, loss=[rdovae.feat_dist_loss, rdovae.feat_dist_loss, rdovae.sq1_rate_loss, rdovae.sq2_rate_loss], loss_weights=[.5, .5, 1., .1], metrics={'hard_bits':rdovae.sq_rate_metric})
model.summary()
lpc_order = 16
feature_file = args.features
nb_features = model.nb_used_features + lpc_order
nb_used_features = model.nb_used_features
sequence_size = args.seq_length
# u for unquantised, load 16 bit PCM samples and convert to mu-law
features = np.memmap(feature_file, dtype='float32', mode='r')
nb_sequences = len(features)//(nb_features*sequence_size)//batch_size*batch_size
features = features[:nb_sequences*sequence_size*nb_features]
features = np.reshape(features, (nb_sequences, sequence_size, nb_features))
print(features.shape)
features = features[:, :, :nb_used_features]
#lambda_val = np.repeat(np.random.uniform(.0007, .002, (features.shape[0], 1, 1)), features.shape[1]//2, axis=1)
#quant_id = np.round(10*np.log(lambda_val/.0007)).astype('int16')
#quant_id = quant_id[:,:,0]
quant_id = np.repeat(np.random.randint(16, size=(features.shape[0], 1, 1), dtype='int16'), features.shape[1]//2, axis=1)
lambda_val = .0002*np.exp(quant_id/3.8)
quant_id = quant_id[:,:,0]
# dump models to disk as we go
checkpoint = ModelCheckpoint('{}_{}_{}.h5'.format(args.output, args.cond_size, '{epoch:02d}'))
if args.retrain is not None:
model.load_weights(args.retrain)
if quantize or retrain:
#Adapting from an existing model
model.load_weights(input_model)
model.save_weights('{}_{}_initial.h5'.format(args.output, args.cond_size))
callbacks = [checkpoint]
#callbacks = []
if args.logdir is not None:
logdir = '{}/{}_{}_logs'.format(args.logdir, args.output, args.cond_size)
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=logdir)
callbacks.append(tensorboard_callback)
model.fit([features, quant_id, lambda_val], [features, features, features, features], batch_size=batch_size, epochs=nb_epochs, validation_split=0.0, callbacks=callbacks)

View File

@@ -0,0 +1,19 @@
import numpy as np
import math
scale = 255.0/32768.0
scale_1 = 32768.0/255.0
def ulaw2lin(u):
u = u - 128
s = np.sign(u)
u = np.abs(u)
return s*scale_1*(np.exp(u/128.*math.log(256))-1)
def lin2ulaw(x):
s = np.sign(x)
x = np.abs(x)
u = (s*(128*np.log(1+scale*x)/math.log(256)))
u = np.clip(128 + np.round(u), 0, 255)
return u.astype('int16')

View File

@@ -0,0 +1,78 @@
# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contains the UniformNoise layer."""
import tensorflow.compat.v2 as tf
from tensorflow.keras import backend
from tensorflow.keras.layers import Layer
class UniformNoise(Layer):
"""Apply additive zero-centered uniform noise.
This is useful to mitigate overfitting
(you could see it as a form of random data augmentation).
Gaussian Noise (GS) is a natural choice as corruption process
for real valued inputs.
As it is a regularization layer, it is only active at training time.
Args:
stddev: Float, standard deviation of the noise distribution.
seed: Integer, optional random seed to enable deterministic behavior.
Call arguments:
inputs: Input tensor (of any rank).
training: Python boolean indicating whether the layer should behave in
training mode (adding noise) or in inference mode (doing nothing).
Input shape:
Arbitrary. Use the keyword argument `input_shape`
(tuple of integers, does not include the samples axis)
when using this layer as the first layer in a model.
Output shape:
Same shape as input.
"""
def __init__(self, stddev=0.5, seed=None, **kwargs):
super().__init__(**kwargs)
self.supports_masking = True
self.stddev = stddev
def call(self, inputs, training=None):
def noised():
return inputs + backend.random_uniform(
shape=tf.shape(inputs),
minval=-self.stddev,
maxval=self.stddev,
dtype=inputs.dtype,
)
return backend.in_train_phase(noised, inputs, training=training)
def get_config(self):
config = {"stddev": self.stddev}
base_config = super().get_config()
return dict(list(base_config.items()) + list(config.items()))
def compute_output_shape(self, input_shape):
return input_shape