add some code
This commit is contained in:
@@ -0,0 +1,49 @@
|
||||
import numpy as np
|
||||
from tensorflow.keras.utils import Sequence
|
||||
from ulaw import lin2ulaw
|
||||
|
||||
def lpc2rc(lpc):
|
||||
#print("shape is = ", lpc.shape)
|
||||
order = lpc.shape[-1]
|
||||
rc = 0*lpc
|
||||
for i in range(order, 0, -1):
|
||||
rc[:,:,i-1] = lpc[:,:,-1]
|
||||
ki = rc[:,:,i-1:i].repeat(i-1, axis=2)
|
||||
lpc = (lpc[:,:,:-1] - ki*lpc[:,:,-2::-1])/(1-ki*ki)
|
||||
return rc
|
||||
|
||||
class LPCNetLoader(Sequence):
|
||||
def __init__(self, data, features, periods, batch_size, e2e=False, lookahead=2):
|
||||
self.batch_size = batch_size
|
||||
self.nb_batches = np.minimum(np.minimum(data.shape[0], features.shape[0]), periods.shape[0])//self.batch_size
|
||||
self.data = data[:self.nb_batches*self.batch_size, :]
|
||||
self.features = features[:self.nb_batches*self.batch_size, :]
|
||||
self.periods = periods[:self.nb_batches*self.batch_size, :]
|
||||
self.e2e = e2e
|
||||
self.lookahead = lookahead
|
||||
self.on_epoch_end()
|
||||
|
||||
def on_epoch_end(self):
|
||||
self.indices = np.arange(self.nb_batches*self.batch_size)
|
||||
np.random.shuffle(self.indices)
|
||||
|
||||
def __getitem__(self, index):
|
||||
data = self.data[self.indices[index*self.batch_size:(index+1)*self.batch_size], :, :]
|
||||
in_data = data[: , :, :1]
|
||||
out_data = data[: , :, 1:]
|
||||
features = self.features[self.indices[index*self.batch_size:(index+1)*self.batch_size], :, :-16]
|
||||
periods = self.periods[self.indices[index*self.batch_size:(index+1)*self.batch_size], :, :]
|
||||
outputs = [out_data]
|
||||
inputs = [in_data, features, periods]
|
||||
if self.lookahead > 0:
|
||||
lpc = self.features[self.indices[index*self.batch_size:(index+1)*self.batch_size], 4-self.lookahead:-self.lookahead, -16:]
|
||||
else:
|
||||
lpc = self.features[self.indices[index*self.batch_size:(index+1)*self.batch_size], 4:, -16:]
|
||||
if self.e2e:
|
||||
outputs.append(lpc2rc(lpc))
|
||||
else:
|
||||
inputs.append(lpc)
|
||||
return (inputs, outputs)
|
||||
|
||||
def __len__(self):
|
||||
return self.nb_batches
|
||||
@@ -0,0 +1,111 @@
|
||||
#!/usr/bin/python3
|
||||
'''Copyright (c) 2021-2022 Amazon
|
||||
Copyright (c) 2018-2019 Mozilla
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
'''
|
||||
|
||||
# Train an LPCNet model
|
||||
|
||||
import argparse
|
||||
#from plc_loader import PLCLoader
|
||||
|
||||
parser = argparse.ArgumentParser(description='Train a PLC model')
|
||||
|
||||
parser.add_argument('bits', metavar='<bits file>', help='binary features file (int16)')
|
||||
parser.add_argument('output', metavar='<output>', help='output features')
|
||||
parser.add_argument('--model', metavar='<model>', default='rdovae', help='PLC model python definition (without .py)')
|
||||
group1 = parser.add_mutually_exclusive_group()
|
||||
group1.add_argument('--weights', metavar='<input weights>', help='model weights')
|
||||
parser.add_argument('--cond-size', metavar='<units>', default=1024, type=int, help='number of units in conditioning network (default 1024)')
|
||||
parser.add_argument('--batch-size', metavar='<batch size>', default=1, type=int, help='batch size to use (default 128)')
|
||||
parser.add_argument('--seq-length', metavar='<sequence length>', default=1000, type=int, help='sequence length to use (default 1000)')
|
||||
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
import importlib
|
||||
rdovae = importlib.import_module(args.model)
|
||||
|
||||
import sys
|
||||
import numpy as np
|
||||
from tensorflow.keras.optimizers import Adam
|
||||
from tensorflow.keras.callbacks import ModelCheckpoint, CSVLogger
|
||||
import tensorflow.keras.backend as K
|
||||
import h5py
|
||||
|
||||
import tensorflow as tf
|
||||
from rdovae import pvq_quantize
|
||||
from rdovae import apply_dead_zone
|
||||
|
||||
# Try reducing batch_size if you run out of memory on your GPU
|
||||
batch_size = args.batch_size
|
||||
|
||||
model, encoder, decoder, qembedding = rdovae.new_rdovae_model(nb_used_features=20, nb_bits=80, batch_size=batch_size, cond_size=args.cond_size)
|
||||
model.load_weights(args.weights)
|
||||
|
||||
lpc_order = 16
|
||||
nbits=80
|
||||
|
||||
|
||||
bits_file = args.bits
|
||||
sequence_size = args.seq_length
|
||||
|
||||
# u for unquantised, load 16 bit PCM samples and convert to mu-law
|
||||
|
||||
|
||||
bits = np.memmap(bits_file + "-syms.f32", dtype='float32', mode='r')
|
||||
nb_sequences = len(bits)//(40*sequence_size)//batch_size*batch_size
|
||||
bits = bits[:nb_sequences*sequence_size*40]
|
||||
|
||||
bits = np.reshape(bits, (nb_sequences, sequence_size//2, 20*4))
|
||||
print(bits.shape)
|
||||
|
||||
lambda_val = 0.001 * np.ones((nb_sequences, sequence_size//2, 1))
|
||||
quant_id = np.round(3.8*np.log(lambda_val/.0002)).astype('int16')
|
||||
quant_id = quant_id[:,:,0]
|
||||
quant_embed = qembedding(quant_id)
|
||||
quant_scale = tf.math.softplus(quant_embed[:,:,:nbits])
|
||||
dead_zone = tf.math.softplus(quant_embed[:, :, nbits : 2 * nbits])
|
||||
|
||||
bits = bits*quant_scale
|
||||
bits = np.round(apply_dead_zone([bits, dead_zone]).numpy())
|
||||
bits = bits/quant_scale
|
||||
|
||||
|
||||
state = np.memmap(bits_file + "-state.f32", dtype='float32', mode='r')
|
||||
|
||||
state = np.reshape(state, (nb_sequences, sequence_size//2, 24))
|
||||
state = state[:,-1,:]
|
||||
state = pvq_quantize(state, 82)
|
||||
#state = state/(1e-15+tf.norm(state, axis=-1,keepdims=True))
|
||||
|
||||
print("shapes are:")
|
||||
print(bits.shape)
|
||||
print(state.shape)
|
||||
|
||||
bits = bits[:,1::2,:]
|
||||
features = decoder.predict([bits, state], batch_size=batch_size)
|
||||
|
||||
features.astype('float32').tofile(args.output)
|
||||
@@ -0,0 +1,49 @@
|
||||
"""
|
||||
Modification of Tensorflow's Embedding Layer:
|
||||
1. Not restricted to be the first layer of a model
|
||||
2. Differentiable (allows non-integer lookups)
|
||||
- For non integer lookup, this layer linearly interpolates between the adjacent embeddings in the following way to preserver gradient flow
|
||||
- E = (1 - frac(x))*embed(floor(x)) + frac(x)*embed(ceil(x))
|
||||
"""
|
||||
|
||||
import tensorflow as tf
|
||||
from tensorflow.keras.layers import Layer
|
||||
|
||||
class diff_Embed(Layer):
|
||||
"""
|
||||
Parameters:
|
||||
- units: int
|
||||
Dimension of the Embedding
|
||||
- dict_size: int
|
||||
Number of Embeddings to lookup
|
||||
- pcm_init: boolean
|
||||
Initialized for the embedding matrix
|
||||
"""
|
||||
def __init__(self, units=128, dict_size = 256, pcm_init = True, initializer = None, **kwargs):
|
||||
super(diff_Embed, self).__init__(**kwargs)
|
||||
self.units = units
|
||||
self.dict_size = dict_size
|
||||
self.pcm_init = pcm_init
|
||||
self.initializer = initializer
|
||||
|
||||
def build(self, input_shape):
|
||||
w_init = tf.random_normal_initializer()
|
||||
if self.pcm_init:
|
||||
w_init = self.initializer
|
||||
self.w = tf.Variable(initial_value=w_init(shape=(self.dict_size, self.units),dtype='float32'),trainable=True)
|
||||
|
||||
def call(self, inputs):
|
||||
alpha = inputs - tf.math.floor(inputs)
|
||||
alpha = tf.expand_dims(alpha,axis = -1)
|
||||
alpha = tf.tile(alpha,[1,1,1,self.units])
|
||||
inputs = tf.cast(inputs,'int32')
|
||||
M = (1 - alpha)*tf.gather(self.w,inputs) + alpha*tf.gather(self.w,tf.clip_by_value(inputs + 1, 0, 255))
|
||||
return M
|
||||
|
||||
def get_config(self):
|
||||
config = super(diff_Embed, self).get_config()
|
||||
config.update({"units": self.units})
|
||||
config.update({"dict_size" : self.dict_size})
|
||||
config.update({"pcm_init" : self.pcm_init})
|
||||
config.update({"initializer" : self.initializer})
|
||||
return config
|
||||
388
managed_components/78__esp-opus/dnn/training_tf2/dump_lpcnet.py
Normal file
388
managed_components/78__esp-opus/dnn/training_tf2/dump_lpcnet.py
Normal file
@@ -0,0 +1,388 @@
|
||||
#!/usr/bin/python3
|
||||
'''Copyright (c) 2017-2018 Mozilla
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
'''
|
||||
|
||||
import os
|
||||
import io
|
||||
import lpcnet
|
||||
import sys
|
||||
import numpy as np
|
||||
from tensorflow.keras.optimizers import Adam
|
||||
from tensorflow.keras.layers import Layer, GRU, Dense, Conv1D, Embedding
|
||||
from ulaw import ulaw2lin, lin2ulaw
|
||||
from mdense import MDense
|
||||
from diffembed import diff_Embed
|
||||
from parameters import get_parameter
|
||||
import h5py
|
||||
import re
|
||||
import argparse
|
||||
|
||||
|
||||
# no cuda devices needed
|
||||
os.environ['CUDA_VISIBLE_DEVICES'] = ""
|
||||
|
||||
# Flag for dumping e2e (differentiable lpc) network weights
|
||||
flag_e2e = False
|
||||
|
||||
|
||||
max_rnn_neurons = 1
|
||||
max_conv_inputs = 1
|
||||
max_mdense_tmp = 1
|
||||
|
||||
def printVector(f, vector, name, dtype='float', dotp=False):
|
||||
global array_list
|
||||
if dotp:
|
||||
vector = vector.reshape((vector.shape[0]//4, 4, vector.shape[1]//8, 8))
|
||||
vector = vector.transpose((2, 0, 3, 1))
|
||||
v = np.reshape(vector, (-1));
|
||||
#print('static const float ', name, '[', len(v), '] = \n', file=f)
|
||||
if name not in array_list:
|
||||
array_list.append(name)
|
||||
f.write('#ifndef USE_WEIGHTS_FILE\n')
|
||||
f.write('#define WEIGHTS_{}_DEFINED\n'.format(name))
|
||||
f.write('#define WEIGHTS_{}_TYPE WEIGHT_TYPE_{}\n'.format(name, dtype))
|
||||
f.write('static const {} {}[{}] = {{\n '.format(dtype, name, len(v)))
|
||||
for i in range(0, len(v)):
|
||||
f.write('{}'.format(v[i]))
|
||||
if (i!=len(v)-1):
|
||||
f.write(',')
|
||||
else:
|
||||
break;
|
||||
if (i%8==7):
|
||||
f.write("\n ")
|
||||
else:
|
||||
f.write(" ")
|
||||
#print(v, file=f)
|
||||
f.write('\n};\n')
|
||||
f.write('#endif\n\n')
|
||||
return;
|
||||
|
||||
def printSparseVector(f, A, name, have_diag=True):
|
||||
N = A.shape[0]
|
||||
M = A.shape[1]
|
||||
W = np.zeros((0,), dtype='int')
|
||||
W0 = np.zeros((0,))
|
||||
if have_diag:
|
||||
diag = np.concatenate([np.diag(A[:,:N]), np.diag(A[:,N:2*N]), np.diag(A[:,2*N:])])
|
||||
A[:,:N] = A[:,:N] - np.diag(np.diag(A[:,:N]))
|
||||
A[:,N:2*N] = A[:,N:2*N] - np.diag(np.diag(A[:,N:2*N]))
|
||||
A[:,2*N:] = A[:,2*N:] - np.diag(np.diag(A[:,2*N:]))
|
||||
printVector(f, diag, name + '_diag')
|
||||
AQ = np.minimum(127, np.maximum(-128, np.round(A*128))).astype('int')
|
||||
idx = np.zeros((0,), dtype='int')
|
||||
for i in range(M//8):
|
||||
pos = idx.shape[0]
|
||||
idx = np.append(idx, -1)
|
||||
nb_nonzero = 0
|
||||
for j in range(N//4):
|
||||
block = A[j*4:(j+1)*4, i*8:(i+1)*8]
|
||||
qblock = AQ[j*4:(j+1)*4, i*8:(i+1)*8]
|
||||
if np.sum(np.abs(block)) > 1e-10:
|
||||
nb_nonzero = nb_nonzero + 1
|
||||
idx = np.append(idx, j*4)
|
||||
vblock = qblock.transpose((1,0)).reshape((-1,))
|
||||
W0 = np.concatenate([W0, block.reshape((-1,))])
|
||||
W = np.concatenate([W, vblock])
|
||||
idx[pos] = nb_nonzero
|
||||
f.write('#ifdef DOT_PROD\n')
|
||||
printVector(f, W, name, dtype='qweight')
|
||||
f.write('#else /*DOT_PROD*/\n')
|
||||
printVector(f, W0, name, dtype='qweight')
|
||||
f.write('#endif /*DOT_PROD*/\n')
|
||||
#idx = np.tile(np.concatenate([np.array([N]), np.arange(N)]), 3*N//16)
|
||||
printVector(f, idx, name + '_idx', dtype='int')
|
||||
return AQ
|
||||
|
||||
def dump_layer_ignore(self, f, hf):
|
||||
print("ignoring layer " + self.name + " of type " + self.__class__.__name__)
|
||||
return False
|
||||
Layer.dump_layer = dump_layer_ignore
|
||||
|
||||
def dump_sparse_gru(self, f, hf):
|
||||
global max_rnn_neurons
|
||||
name = 'sparse_' + self.name
|
||||
print("printing layer " + name + " of type sparse " + self.__class__.__name__)
|
||||
weights = self.get_weights()
|
||||
qweights = printSparseVector(f, weights[1], name + '_recurrent_weights')
|
||||
printVector(f, weights[-1], name + '_bias')
|
||||
subias = weights[-1].copy()
|
||||
subias[1,:] = subias[1,:] - np.sum(qweights*(1./128),axis=0)
|
||||
printVector(f, subias, name + '_subias')
|
||||
if hasattr(self, 'activation'):
|
||||
activation = self.activation.__name__.upper()
|
||||
else:
|
||||
activation = 'TANH'
|
||||
if hasattr(self, 'reset_after') and not self.reset_after:
|
||||
reset_after = 0
|
||||
else:
|
||||
reset_after = 1
|
||||
neurons = weights[0].shape[1]//3
|
||||
max_rnn_neurons = max(max_rnn_neurons, neurons)
|
||||
hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))
|
||||
hf.write('#define {}_STATE_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))
|
||||
model_struct.write(' SparseGRULayer {};\n'.format(name));
|
||||
model_init.write(' if (sparse_gru_init(&model->{}, arrays, "{}_bias", "{}_subias", "{}_recurrent_weights_diag", "{}_recurrent_weights", "{}_recurrent_weights_idx", {}, ACTIVATION_{}, {})) return 1;\n'
|
||||
.format(name, name, name, name, name, name, weights[0].shape[1]//3, activation, reset_after))
|
||||
return True
|
||||
|
||||
def dump_grub(self, f, hf, gru_a_size):
|
||||
global max_rnn_neurons
|
||||
name = self.name
|
||||
print("printing layer " + name + " of type " + self.__class__.__name__)
|
||||
weights = self.get_weights()
|
||||
qweight = printSparseVector(f, weights[0][:gru_a_size, :], name + '_weights', have_diag=False)
|
||||
|
||||
f.write('#ifdef DOT_PROD\n')
|
||||
qweight2 = np.clip(np.round(128.*weights[1]).astype('int'), -128, 127)
|
||||
printVector(f, qweight2, name + '_recurrent_weights', dotp=True, dtype='qweight')
|
||||
f.write('#else /*DOT_PROD*/\n')
|
||||
printVector(f, weights[1], name + '_recurrent_weights')
|
||||
f.write('#endif /*DOT_PROD*/\n')
|
||||
|
||||
printVector(f, weights[-1], name + '_bias')
|
||||
subias = weights[-1].copy()
|
||||
subias[0,:] = subias[0,:] - np.sum(qweight*(1./128.),axis=0)
|
||||
subias[1,:] = subias[1,:] - np.sum(qweight2*(1./128.),axis=0)
|
||||
printVector(f, subias, name + '_subias')
|
||||
if hasattr(self, 'activation'):
|
||||
activation = self.activation.__name__.upper()
|
||||
else:
|
||||
activation = 'TANH'
|
||||
if hasattr(self, 'reset_after') and not self.reset_after:
|
||||
reset_after = 0
|
||||
else:
|
||||
reset_after = 1
|
||||
neurons = weights[0].shape[1]//3
|
||||
max_rnn_neurons = max(max_rnn_neurons, neurons)
|
||||
model_struct.write(' GRULayer {};\n'.format(name));
|
||||
model_init.write(' if (gru_init(&model->{}, arrays, "{}_bias", "{}_subias", "{}_weights", "{}_weights_idx", "{}_recurrent_weights", {}, {}, ACTIVATION_{}, {})) return 1;\n'
|
||||
.format(name, name, name, name, name, name, gru_a_size, weights[0].shape[1]//3, activation, reset_after))
|
||||
return True
|
||||
|
||||
def dump_gru_layer_dummy(self, f, hf):
|
||||
name = self.name
|
||||
weights = self.get_weights()
|
||||
hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))
|
||||
hf.write('#define {}_STATE_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))
|
||||
return True;
|
||||
|
||||
GRU.dump_layer = dump_gru_layer_dummy
|
||||
|
||||
def dump_dense_layer_impl(name, weights, bias, activation, f, hf):
|
||||
printVector(f, weights, name + '_weights')
|
||||
printVector(f, bias, name + '_bias')
|
||||
hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights.shape[1]))
|
||||
model_struct.write(' DenseLayer {};\n'.format(name));
|
||||
model_init.write(' if (dense_init(&model->{}, arrays, "{}_bias", "{}_weights", {}, {}, ACTIVATION_{})) return 1;\n'
|
||||
.format(name, name, name, weights.shape[0], weights.shape[1], activation))
|
||||
|
||||
def dump_dense_layer(self, f, hf):
|
||||
name = self.name
|
||||
print("printing layer " + name + " of type " + self.__class__.__name__)
|
||||
weights = self.get_weights()
|
||||
activation = self.activation.__name__.upper()
|
||||
dump_dense_layer_impl(name, weights[0], weights[1], activation, f, hf)
|
||||
return False
|
||||
|
||||
Dense.dump_layer = dump_dense_layer
|
||||
|
||||
def dump_mdense_layer(self, f, hf):
|
||||
global max_mdense_tmp
|
||||
name = self.name
|
||||
print("printing layer " + name + " of type " + self.__class__.__name__)
|
||||
weights = self.get_weights()
|
||||
printVector(f, np.transpose(weights[0], (0, 2, 1)), name + '_weights')
|
||||
printVector(f, np.transpose(weights[1], (1, 0)), name + '_bias')
|
||||
printVector(f, np.transpose(weights[2], (1, 0)), name + '_factor')
|
||||
activation = self.activation.__name__.upper()
|
||||
max_mdense_tmp = max(max_mdense_tmp, weights[0].shape[0]*weights[0].shape[2])
|
||||
hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights[0].shape[0]))
|
||||
model_struct.write(' MDenseLayer {};\n'.format(name));
|
||||
model_init.write(' if (mdense_init(&model->{}, arrays, "{}_bias", "{}_weights", "{}_factor", {}, {}, {}, ACTIVATION_{})) return 1;\n'
|
||||
.format(name, name, name, name, weights[0].shape[1], weights[0].shape[0], weights[0].shape[2], activation))
|
||||
return False
|
||||
MDense.dump_layer = dump_mdense_layer
|
||||
|
||||
def dump_conv1d_layer(self, f, hf):
|
||||
global max_conv_inputs
|
||||
name = self.name
|
||||
print("printing layer " + name + " of type " + self.__class__.__name__)
|
||||
weights = self.get_weights()
|
||||
printVector(f, weights[0], name + '_weights')
|
||||
printVector(f, weights[-1], name + '_bias')
|
||||
activation = self.activation.__name__.upper()
|
||||
max_conv_inputs = max(max_conv_inputs, weights[0].shape[1]*weights[0].shape[0])
|
||||
hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights[0].shape[2]))
|
||||
hf.write('#define {}_STATE_SIZE ({}*{})\n'.format(name.upper(), weights[0].shape[1], (weights[0].shape[0]-1)))
|
||||
hf.write('#define {}_DELAY {}\n'.format(name.upper(), (weights[0].shape[0]-1)//2))
|
||||
model_struct.write(' Conv1DLayer {};\n'.format(name));
|
||||
model_init.write(' if (conv1d_init(&model->{}, arrays, "{}_bias", "{}_weights", {}, {}, {}, ACTIVATION_{})) return 1;\n'
|
||||
.format(name, name, name, weights[0].shape[1], weights[0].shape[0], weights[0].shape[2], activation))
|
||||
return True
|
||||
Conv1D.dump_layer = dump_conv1d_layer
|
||||
|
||||
|
||||
def dump_embedding_layer_impl(name, weights, f, hf):
|
||||
printVector(f, weights, name + '_weights')
|
||||
hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights.shape[1]))
|
||||
model_struct.write(' EmbeddingLayer {};\n'.format(name));
|
||||
model_init.write(' if (embedding_init(&model->{}, arrays, "{}_weights", {}, {})) return 1;\n'
|
||||
.format(name, name, weights.shape[0], weights.shape[1]))
|
||||
|
||||
def dump_embedding_layer(self, f, hf):
|
||||
name = self.name
|
||||
print("printing layer " + name + " of type " + self.__class__.__name__)
|
||||
weights = self.get_weights()[0]
|
||||
dump_embedding_layer_impl(name, weights, f, hf)
|
||||
return False
|
||||
Embedding.dump_layer = dump_embedding_layer
|
||||
diff_Embed.dump_layer = dump_embedding_layer
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('model_file', type=str, help='model weight h5 file')
|
||||
parser.add_argument('--nnet-header', type=str, help='name of c header file for dumped model', default='nnet_data.h')
|
||||
parser.add_argument('--nnet-source', type=str, help='name of c source file for dumped model', default='nnet_data.c')
|
||||
parser.add_argument('--lpc-gamma', type=float, help='LPC weighting factor. If not specified I will attempt to read it from the model file with 1 as default', default=None)
|
||||
parser.add_argument('--lookahead', type=float, help='Features lookahead. If not specified I will attempt to read it from the model file with 2 as default', default=None)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
filename = args.model_file
|
||||
with h5py.File(filename, "r") as f:
|
||||
units = min(f['model_weights']['gru_a']['gru_a']['recurrent_kernel:0'].shape)
|
||||
units2 = min(f['model_weights']['gru_b']['gru_b']['recurrent_kernel:0'].shape)
|
||||
cond_size = min(f['model_weights']['feature_dense1']['feature_dense1']['kernel:0'].shape)
|
||||
e2e = 'rc2lpc' in f['model_weights']
|
||||
|
||||
model, _, _ = lpcnet.new_lpcnet_model(rnn_units1=units, rnn_units2=units2, flag_e2e = e2e, cond_size=cond_size)
|
||||
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy'])
|
||||
#model.summary()
|
||||
|
||||
model.load_weights(filename, by_name=True)
|
||||
|
||||
cfile = args.nnet_source
|
||||
hfile = args.nnet_header
|
||||
|
||||
f = open(cfile, 'w')
|
||||
hf = open(hfile, 'w')
|
||||
model_struct = io.StringIO()
|
||||
model_init = io.StringIO()
|
||||
model_struct.write('typedef struct {\n')
|
||||
model_init.write('#ifndef DUMP_BINARY_WEIGHTS\n')
|
||||
model_init.write('int init_lpcnet_model(LPCNetModel *model, const WeightArray *arrays) {\n')
|
||||
array_list = []
|
||||
|
||||
f.write('/*This file is automatically generated from a Keras model*/\n')
|
||||
f.write('/*based on model {}*/\n\n'.format(sys.argv[1]))
|
||||
f.write('#ifdef HAVE_CONFIG_H\n#include "config.h"\n#endif\n\n#include "nnet.h"\n#include "{}"\n\n'.format(hfile))
|
||||
|
||||
hf.write('/*This file is automatically generated from a Keras model*/\n\n')
|
||||
hf.write('#ifndef RNN_DATA_H\n#define RNN_DATA_H\n\n#include "nnet.h"\n\n')
|
||||
|
||||
if e2e:
|
||||
hf.write('/* This is an end-to-end model */\n')
|
||||
hf.write('#define END2END\n\n')
|
||||
else:
|
||||
hf.write('/* This is *not* an end-to-end model */\n')
|
||||
hf.write('/* #define END2END */\n\n')
|
||||
|
||||
# LPC weighting factor
|
||||
if type(args.lpc_gamma) == type(None):
|
||||
lpc_gamma = get_parameter(model, 'lpc_gamma', 1)
|
||||
else:
|
||||
lpc_gamma = args.lpc_gamma
|
||||
|
||||
hf.write('/* LPC weighting factor */\n')
|
||||
hf.write('#define LPC_GAMMA ' + str(lpc_gamma) +'f\n\n')
|
||||
|
||||
# look-ahead
|
||||
if type(args.lookahead) == type(None):
|
||||
lookahead = get_parameter(model, 'lookahead', 2)
|
||||
else:
|
||||
lookahead = args.lookahead
|
||||
|
||||
hf.write('/* Features look-ahead */\n')
|
||||
hf.write('#define FEATURES_DELAY ' + str(lookahead) +'\n\n')
|
||||
|
||||
embed_size = lpcnet.embed_size
|
||||
|
||||
E = model.get_layer('embed_sig').get_weights()[0]
|
||||
W = model.get_layer('gru_a').get_weights()[0][:embed_size,:]
|
||||
dump_embedding_layer_impl('gru_a_embed_sig', np.dot(E, W), f, hf)
|
||||
W = model.get_layer('gru_a').get_weights()[0][embed_size:2*embed_size,:]
|
||||
dump_embedding_layer_impl('gru_a_embed_pred', np.dot(E, W), f, hf)
|
||||
W = model.get_layer('gru_a').get_weights()[0][2*embed_size:3*embed_size,:]
|
||||
dump_embedding_layer_impl('gru_a_embed_exc', np.dot(E, W), f, hf)
|
||||
W = model.get_layer('gru_a').get_weights()[0][3*embed_size:,:]
|
||||
#FIXME: dump only half the biases
|
||||
b = model.get_layer('gru_a').get_weights()[2]
|
||||
dump_dense_layer_impl('gru_a_dense_feature', W, b[:len(b)//2], 'LINEAR', f, hf)
|
||||
|
||||
W = model.get_layer('gru_b').get_weights()[0][model.rnn_units1:,:]
|
||||
b = model.get_layer('gru_b').get_weights()[2]
|
||||
# Set biases to zero because they'll be included in the GRU input part
|
||||
# (we need regular and SU biases)
|
||||
dump_dense_layer_impl('gru_b_dense_feature', W, 0*b[:len(b)//2], 'LINEAR', f, hf)
|
||||
dump_grub(model.get_layer('gru_b'), f, hf, model.rnn_units1)
|
||||
|
||||
layer_list = []
|
||||
for i, layer in enumerate(model.layers):
|
||||
if layer.dump_layer(f, hf):
|
||||
layer_list.append(layer.name)
|
||||
|
||||
dump_sparse_gru(model.get_layer('gru_a'), f, hf)
|
||||
|
||||
f.write('#ifndef USE_WEIGHTS_FILE\n')
|
||||
f.write('const WeightArray lpcnet_arrays[] = {\n')
|
||||
for name in array_list:
|
||||
f.write('#ifdef WEIGHTS_{}_DEFINED\n'.format(name))
|
||||
f.write(' {{"{}", WEIGHTS_{}_TYPE, sizeof({}), {}}},\n'.format(name, name, name, name))
|
||||
f.write('#endif\n')
|
||||
f.write(' {NULL, 0, 0, NULL}\n};\n')
|
||||
f.write('#endif\n')
|
||||
|
||||
model_init.write(' return 0;\n}\n')
|
||||
model_init.write('#endif\n')
|
||||
f.write(model_init.getvalue())
|
||||
|
||||
hf.write('#define MAX_RNN_NEURONS {}\n\n'.format(max_rnn_neurons))
|
||||
hf.write('#define MAX_CONV_INPUTS {}\n\n'.format(max_conv_inputs))
|
||||
hf.write('#define MAX_MDENSE_TMP {}\n\n'.format(max_mdense_tmp))
|
||||
|
||||
|
||||
hf.write('typedef struct {\n')
|
||||
for i, name in enumerate(layer_list):
|
||||
hf.write(' float {}_state[{}_STATE_SIZE];\n'.format(name, name.upper()))
|
||||
hf.write('} NNetState;\n\n')
|
||||
|
||||
model_struct.write('} LPCNetModel;\n\n')
|
||||
hf.write(model_struct.getvalue())
|
||||
hf.write('int init_lpcnet_model(LPCNetModel *model, const WeightArray *arrays);\n\n')
|
||||
hf.write('\n\n#endif\n')
|
||||
|
||||
f.close()
|
||||
hf.close()
|
||||
296
managed_components/78__esp-opus/dnn/training_tf2/dump_plc.py
Normal file
296
managed_components/78__esp-opus/dnn/training_tf2/dump_plc.py
Normal file
@@ -0,0 +1,296 @@
|
||||
#!/usr/bin/python3
|
||||
'''Copyright (c) 2021-2022 Amazon
|
||||
Copyright (c) 2017-2018 Mozilla
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
'''
|
||||
|
||||
import lpcnet_plc
|
||||
import io
|
||||
import sys
|
||||
import numpy as np
|
||||
from tensorflow.keras.optimizers import Adam
|
||||
from tensorflow.keras.layers import Layer, GRU, Dense, Conv1D, Embedding
|
||||
import h5py
|
||||
import re
|
||||
|
||||
# Flag for dumping e2e (differentiable lpc) network weights
|
||||
flag_e2e = False
|
||||
|
||||
max_rnn_neurons = 1
|
||||
max_conv_inputs = 1
|
||||
|
||||
def printVector(f, vector, name, dtype='float', dotp=False):
|
||||
global array_list
|
||||
if dotp:
|
||||
vector = vector.reshape((vector.shape[0]//4, 4, vector.shape[1]//8, 8))
|
||||
vector = vector.transpose((2, 0, 3, 1))
|
||||
v = np.reshape(vector, (-1));
|
||||
#print('static const float ', name, '[', len(v), '] = \n', file=f)
|
||||
if name not in array_list:
|
||||
array_list.append(name)
|
||||
f.write('#ifndef USE_WEIGHTS_FILE\n')
|
||||
f.write('#define WEIGHTS_{}_DEFINED\n'.format(name))
|
||||
f.write('#define WEIGHTS_{}_TYPE WEIGHT_TYPE_{}\n'.format(name, dtype))
|
||||
f.write('static const {} {}[{}] = {{\n '.format(dtype, name, len(v)))
|
||||
for i in range(0, len(v)):
|
||||
f.write('{}'.format(v[i]))
|
||||
if (i!=len(v)-1):
|
||||
f.write(',')
|
||||
else:
|
||||
break;
|
||||
if (i%8==7):
|
||||
f.write("\n ")
|
||||
else:
|
||||
f.write(" ")
|
||||
#print(v, file=f)
|
||||
f.write('\n};\n')
|
||||
f.write('#endif\n\n')
|
||||
return;
|
||||
|
||||
def printSparseVector(f, A, name, have_diag=True):
|
||||
N = A.shape[0]
|
||||
M = A.shape[1]
|
||||
W = np.zeros((0,), dtype='int')
|
||||
W0 = np.zeros((0,))
|
||||
if have_diag:
|
||||
diag = np.concatenate([np.diag(A[:,:N]), np.diag(A[:,N:2*N]), np.diag(A[:,2*N:])])
|
||||
A[:,:N] = A[:,:N] - np.diag(np.diag(A[:,:N]))
|
||||
A[:,N:2*N] = A[:,N:2*N] - np.diag(np.diag(A[:,N:2*N]))
|
||||
A[:,2*N:] = A[:,2*N:] - np.diag(np.diag(A[:,2*N:]))
|
||||
printVector(f, diag, name + '_diag')
|
||||
AQ = np.minimum(127, np.maximum(-128, np.round(A*128))).astype('int')
|
||||
idx = np.zeros((0,), dtype='int')
|
||||
for i in range(M//8):
|
||||
pos = idx.shape[0]
|
||||
idx = np.append(idx, -1)
|
||||
nb_nonzero = 0
|
||||
for j in range(N//4):
|
||||
block = A[j*4:(j+1)*4, i*8:(i+1)*8]
|
||||
qblock = AQ[j*4:(j+1)*4, i*8:(i+1)*8]
|
||||
if np.sum(np.abs(block)) > 1e-10:
|
||||
nb_nonzero = nb_nonzero + 1
|
||||
idx = np.append(idx, j*4)
|
||||
vblock = qblock.transpose((1,0)).reshape((-1,))
|
||||
W0 = np.concatenate([W0, block.reshape((-1,))])
|
||||
W = np.concatenate([W, vblock])
|
||||
idx[pos] = nb_nonzero
|
||||
f.write('#ifdef DOT_PROD\n')
|
||||
printVector(f, W, name, dtype='qweight')
|
||||
f.write('#else /*DOT_PROD*/\n')
|
||||
printVector(f, W0, name, dtype='qweight')
|
||||
f.write('#endif /*DOT_PROD*/\n')
|
||||
#idx = np.tile(np.concatenate([np.array([N]), np.arange(N)]), 3*N//16)
|
||||
printVector(f, idx, name + '_idx', dtype='int')
|
||||
return AQ
|
||||
|
||||
def dump_layer_ignore(self, f, hf):
|
||||
print("ignoring layer " + self.name + " of type " + self.__class__.__name__)
|
||||
return False
|
||||
Layer.dump_layer = dump_layer_ignore
|
||||
|
||||
def dump_sparse_gru(self, f, hf):
|
||||
global max_rnn_neurons
|
||||
name = 'sparse_' + self.name
|
||||
print("printing layer " + name + " of type sparse " + self.__class__.__name__)
|
||||
weights = self.get_weights()
|
||||
qweights = printSparseVector(f, weights[1], name + '_recurrent_weights')
|
||||
printVector(f, weights[-1], name + '_bias')
|
||||
subias = weights[-1].copy()
|
||||
subias[1,:] = subias[1,:] - np.sum(qweights*(1./128),axis=0)
|
||||
printVector(f, subias, name + '_subias')
|
||||
if hasattr(self, 'activation'):
|
||||
activation = self.activation.__name__.upper()
|
||||
else:
|
||||
activation = 'TANH'
|
||||
if hasattr(self, 'reset_after') and not self.reset_after:
|
||||
reset_after = 0
|
||||
else:
|
||||
reset_after = 1
|
||||
neurons = weights[0].shape[1]//3
|
||||
max_rnn_neurons = max(max_rnn_neurons, neurons)
|
||||
hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))
|
||||
hf.write('#define {}_STATE_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))
|
||||
model_struct.write(' SparseGRULayer {};\n'.format(name));
|
||||
model_init.write(' if (sparse_gru_init(&model->{}, arrays, "{}_bias", "{}_subias", "{}_recurrent_weights_diag", "{}_recurrent_weights", "{}_recurrent_weights_idx", {}, ACTIVATION_{}, {})) return 1;\n'
|
||||
.format(name, name, name, name, name, name, weights[0].shape[1]//3, activation, reset_after))
|
||||
return True
|
||||
|
||||
def dump_gru_layer(self, f, hf):
|
||||
global max_rnn_neurons
|
||||
name = self.name
|
||||
print("printing layer " + name + " of type " + self.__class__.__name__)
|
||||
weights = self.get_weights()
|
||||
qweight = printSparseVector(f, weights[0], name + '_weights', have_diag=False)
|
||||
|
||||
f.write('#ifdef DOT_PROD\n')
|
||||
qweight2 = np.clip(np.round(128.*weights[1]).astype('int'), -128, 127)
|
||||
printVector(f, qweight2, name + '_recurrent_weights', dotp=True, dtype='qweight')
|
||||
f.write('#else /*DOT_PROD*/\n')
|
||||
printVector(f, weights[1], name + '_recurrent_weights')
|
||||
f.write('#endif /*DOT_PROD*/\n')
|
||||
|
||||
printVector(f, weights[-1], name + '_bias')
|
||||
subias = weights[-1].copy()
|
||||
subias[0,:] = subias[0,:] - np.sum(qweight*(1./128.),axis=0)
|
||||
subias[1,:] = subias[1,:] - np.sum(qweight2*(1./128.),axis=0)
|
||||
printVector(f, subias, name + '_subias')
|
||||
if hasattr(self, 'activation'):
|
||||
activation = self.activation.__name__.upper()
|
||||
else:
|
||||
activation = 'TANH'
|
||||
if hasattr(self, 'reset_after') and not self.reset_after:
|
||||
reset_after = 0
|
||||
else:
|
||||
reset_after = 1
|
||||
neurons = weights[0].shape[1]//3
|
||||
max_rnn_neurons = max(max_rnn_neurons, neurons)
|
||||
hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))
|
||||
hf.write('#define {}_STATE_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))
|
||||
model_struct.write(' GRULayer {};\n'.format(name));
|
||||
model_init.write(' if (gru_init(&model->{}, arrays, "{}_bias", "{}_subias", "{}_weights", "{}_weights_idx", "{}_recurrent_weights", {}, {}, ACTIVATION_{}, {})) return 1;\n'
|
||||
.format(name, name, name, name, name, name, weights[0].shape[0], weights[0].shape[1]//3, activation, reset_after))
|
||||
return True
|
||||
GRU.dump_layer = dump_gru_layer
|
||||
|
||||
def dump_gru_layer_dummy(self, f, hf):
|
||||
name = self.name
|
||||
weights = self.get_weights()
|
||||
hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))
|
||||
hf.write('#define {}_STATE_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))
|
||||
return True;
|
||||
|
||||
#GRU.dump_layer = dump_gru_layer_dummy
|
||||
|
||||
def dump_dense_layer_impl(name, weights, bias, activation, f, hf):
|
||||
printVector(f, weights, name + '_weights')
|
||||
printVector(f, bias, name + '_bias')
|
||||
hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights.shape[1]))
|
||||
model_struct.write(' DenseLayer {};\n'.format(name));
|
||||
model_init.write(' if (dense_init(&model->{}, arrays, "{}_bias", "{}_weights", {}, {}, ACTIVATION_{})) return 1;\n'
|
||||
.format(name, name, name, weights.shape[0], weights.shape[1], activation))
|
||||
|
||||
def dump_dense_layer(self, f, hf):
|
||||
name = self.name
|
||||
print("printing layer " + name + " of type " + self.__class__.__name__)
|
||||
weights = self.get_weights()
|
||||
activation = self.activation.__name__.upper()
|
||||
dump_dense_layer_impl(name, weights[0], weights[1], activation, f, hf)
|
||||
return False
|
||||
|
||||
Dense.dump_layer = dump_dense_layer
|
||||
|
||||
def dump_conv1d_layer(self, f, hf):
|
||||
global max_conv_inputs
|
||||
name = self.name
|
||||
print("printing layer " + name + " of type " + self.__class__.__name__)
|
||||
weights = self.get_weights()
|
||||
printVector(f, weights[0], name + '_weights')
|
||||
printVector(f, weights[-1], name + '_bias')
|
||||
activation = self.activation.__name__.upper()
|
||||
max_conv_inputs = max(max_conv_inputs, weights[0].shape[1]*weights[0].shape[0])
|
||||
hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights[0].shape[2]))
|
||||
hf.write('#define {}_STATE_SIZE ({}*{})\n'.format(name.upper(), weights[0].shape[1], (weights[0].shape[0]-1)))
|
||||
hf.write('#define {}_DELAY {}\n'.format(name.upper(), (weights[0].shape[0]-1)//2))
|
||||
model_struct.write(' Conv1DLayer {};\n'.format(name));
|
||||
model_init.write(' if (conv1d_init(&model->{}, arrays, "{}_bias", "{}_weights", {}, {}, {}, ACTIVATION_{})) return 1;\n'
|
||||
.format(name, name, name, weights[0].shape[1], weights[0].shape[0], weights[0].shape[2], activation))
|
||||
return True
|
||||
Conv1D.dump_layer = dump_conv1d_layer
|
||||
|
||||
|
||||
|
||||
filename = sys.argv[1]
|
||||
with h5py.File(filename, "r") as f:
|
||||
units = min(f['model_weights']['plc_gru1']['plc_gru1']['recurrent_kernel:0'].shape)
|
||||
units2 = min(f['model_weights']['plc_gru2']['plc_gru2']['recurrent_kernel:0'].shape)
|
||||
cond_size = f['model_weights']['plc_dense1']['plc_dense1']['kernel:0'].shape[1]
|
||||
|
||||
model = lpcnet_plc.new_lpcnet_plc_model(rnn_units=units, cond_size=cond_size)
|
||||
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy'])
|
||||
#model.summary()
|
||||
|
||||
model.load_weights(filename, by_name=True)
|
||||
|
||||
if len(sys.argv) > 2:
|
||||
cfile = sys.argv[2];
|
||||
hfile = sys.argv[3];
|
||||
else:
|
||||
cfile = 'plc_data.c'
|
||||
hfile = 'plc_data.h'
|
||||
|
||||
|
||||
f = open(cfile, 'w')
|
||||
hf = open(hfile, 'w')
|
||||
model_struct = io.StringIO()
|
||||
model_init = io.StringIO()
|
||||
model_struct.write('typedef struct {\n')
|
||||
model_init.write('#ifndef DUMP_BINARY_WEIGHTS\n')
|
||||
model_init.write('int init_plc_model(PLCModel *model, const WeightArray *arrays) {\n')
|
||||
array_list = []
|
||||
|
||||
|
||||
f.write('/*This file is automatically generated from a Keras model*/\n')
|
||||
f.write('/*based on model {}*/\n\n'.format(sys.argv[1]))
|
||||
f.write('#ifdef HAVE_CONFIG_H\n#include "config.h"\n#endif\n\n#include "nnet.h"\n#include "{}"\n\n'.format(hfile))
|
||||
|
||||
hf.write('/*This file is automatically generated from a Keras model*/\n\n')
|
||||
hf.write('#ifndef PLC_DATA_H\n#define PLC_DATA_H\n\n#include "nnet.h"\n\n')
|
||||
|
||||
layer_list = []
|
||||
for i, layer in enumerate(model.layers):
|
||||
if layer.dump_layer(f, hf):
|
||||
layer_list.append(layer.name)
|
||||
|
||||
#dump_sparse_gru(model.get_layer('gru_a'), f, hf)
|
||||
f.write('#ifndef USE_WEIGHTS_FILE\n')
|
||||
f.write('const WeightArray lpcnet_plc_arrays[] = {\n')
|
||||
for name in array_list:
|
||||
f.write('#ifdef WEIGHTS_{}_DEFINED\n'.format(name))
|
||||
f.write(' {{"{}", WEIGHTS_{}_TYPE, sizeof({}), {}}},\n'.format(name, name, name, name))
|
||||
f.write('#endif\n')
|
||||
f.write(' {NULL, 0, 0, NULL}\n};\n')
|
||||
f.write('#endif\n')
|
||||
|
||||
model_init.write(' return 0;\n}\n')
|
||||
model_init.write('#endif\n')
|
||||
f.write(model_init.getvalue())
|
||||
|
||||
|
||||
hf.write('#define PLC_MAX_RNN_NEURONS {}\n\n'.format(max_rnn_neurons))
|
||||
#hf.write('#define PLC_MAX_CONV_INPUTS {}\n\n'.format(max_conv_inputs))
|
||||
|
||||
hf.write('typedef struct {\n')
|
||||
for i, name in enumerate(layer_list):
|
||||
hf.write(' float {}_state[{}_STATE_SIZE];\n'.format(name, name.upper()))
|
||||
hf.write('} PLCNetState;\n\n')
|
||||
|
||||
model_struct.write('} PLCModel;\n\n')
|
||||
hf.write(model_struct.getvalue())
|
||||
hf.write('int init_plc_model(PLCModel *model, const WeightArray *arrays);\n\n')
|
||||
|
||||
hf.write('\n\n#endif\n')
|
||||
|
||||
f.close()
|
||||
hf.close()
|
||||
306
managed_components/78__esp-opus/dnn/training_tf2/dump_rdovae.py
Normal file
306
managed_components/78__esp-opus/dnn/training_tf2/dump_rdovae.py
Normal file
@@ -0,0 +1,306 @@
|
||||
"""
|
||||
/* Copyright (c) 2022 Amazon
|
||||
Written by Jan Buethe */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
"""
|
||||
|
||||
|
||||
import argparse
|
||||
from ftplib import parse150
|
||||
import os
|
||||
|
||||
os.environ['CUDA_VISIBLE_DEVICES'] = ""
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
|
||||
parser.add_argument('weights', metavar="<weight file>", type=str, help='model weight file in hdf5 format')
|
||||
parser.add_argument('--cond-size', type=int, help="conditioning size (default: 256)", default=256)
|
||||
parser.add_argument('--latent-dim', type=int, help="dimension of latent space (default: 80)", default=80)
|
||||
parser.add_argument('--quant-levels', type=int, help="number of quantization steps (default: 16)", default=16)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# now import the heavy stuff
|
||||
import tensorflow as tf
|
||||
import numpy as np
|
||||
from keraslayerdump import dump_conv1d_layer, dump_dense_layer, dump_gru_layer, printVector
|
||||
from rdovae import new_rdovae_model
|
||||
|
||||
def start_header(header_fid, header_name):
|
||||
header_guard = os.path.basename(header_name)[:-2].upper() + "_H"
|
||||
header_fid.write(
|
||||
f"""
|
||||
#ifndef {header_guard}
|
||||
#define {header_guard}
|
||||
|
||||
"""
|
||||
)
|
||||
|
||||
def finish_header(header_fid):
|
||||
header_fid.write(
|
||||
"""
|
||||
#endif
|
||||
|
||||
"""
|
||||
)
|
||||
|
||||
def start_source(source_fid, header_name, weight_file):
|
||||
source_fid.write(
|
||||
f"""
|
||||
/* this source file was automatically generated from weight file {weight_file} */
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "{header_name}"
|
||||
|
||||
"""
|
||||
)
|
||||
|
||||
def finish_source(source_fid):
|
||||
pass
|
||||
|
||||
|
||||
def dump_statistical_model(qembedding, f, fh):
|
||||
w = qembedding.weights[0].numpy()
|
||||
levels, dim = w.shape
|
||||
N = dim // 6
|
||||
|
||||
print("dumping statistical model")
|
||||
quant_scales = tf.math.softplus(w[:, : N]).numpy()
|
||||
dead_zone = 0.05 * tf.math.softplus(w[:, N : 2 * N]).numpy()
|
||||
r = tf.math.sigmoid(w[:, 5 * N : 6 * N]).numpy()
|
||||
p0 = tf.math.sigmoid(w[:, 4 * N : 5 * N]).numpy()
|
||||
p0 = 1 - r ** (0.5 + 0.5 * p0)
|
||||
|
||||
quant_scales_q8 = np.round(quant_scales * 2**8).astype(np.uint16)
|
||||
dead_zone_q10 = np.round(dead_zone * 2**10).astype(np.uint16)
|
||||
r_q15 = np.round(r * 2**15).astype(np.uint16)
|
||||
p0_q15 = np.round(p0 * 2**15).astype(np.uint16)
|
||||
|
||||
printVector(f, quant_scales_q8, 'dred_quant_scales_q8', dtype='opus_uint16', static=False)
|
||||
printVector(f, dead_zone_q10, 'dred_dead_zone_q10', dtype='opus_uint16', static=False)
|
||||
printVector(f, r_q15, 'dred_r_q15', dtype='opus_uint16', static=False)
|
||||
printVector(f, p0_q15, 'dred_p0_q15', dtype='opus_uint16', static=False)
|
||||
|
||||
fh.write(
|
||||
f"""
|
||||
extern const opus_uint16 dred_quant_scales_q8[{levels * N}];
|
||||
extern const opus_uint16 dred_dead_zone_q10[{levels * N}];
|
||||
extern const opus_uint16 dred_r_q15[{levels * N}];
|
||||
extern const opus_uint16 dred_p0_q15[{levels * N}];
|
||||
|
||||
"""
|
||||
)
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
model, encoder, decoder, qembedding = new_rdovae_model(20, args.latent_dim, cond_size=args.cond_size, nb_quant=args.quant_levels)
|
||||
model.load_weights(args.weights)
|
||||
|
||||
|
||||
|
||||
|
||||
# encoder
|
||||
encoder_dense_names = [
|
||||
'enc_dense1',
|
||||
'enc_dense3',
|
||||
'enc_dense5',
|
||||
'enc_dense7',
|
||||
'enc_dense8',
|
||||
'gdense1',
|
||||
'gdense2'
|
||||
]
|
||||
|
||||
encoder_gru_names = [
|
||||
'enc_dense2',
|
||||
'enc_dense4',
|
||||
'enc_dense6'
|
||||
]
|
||||
|
||||
encoder_conv1d_names = [
|
||||
'bits_dense'
|
||||
]
|
||||
|
||||
source_fid = open("dred_rdovae_enc_data.c", 'w')
|
||||
header_fid = open("dred_rdovae_enc_data.h", 'w')
|
||||
|
||||
start_header(header_fid, "dred_rdovae_enc_data.h")
|
||||
start_source(source_fid, "dred_rdovae_enc_data.h", os.path.basename(args.weights))
|
||||
|
||||
header_fid.write(
|
||||
f"""
|
||||
#include "dred_rdovae_constants.h"
|
||||
|
||||
#include "nnet.h"
|
||||
"""
|
||||
)
|
||||
|
||||
# dump GRUs
|
||||
max_rnn_neurons_enc = max(
|
||||
[
|
||||
dump_gru_layer(encoder.get_layer(name), source_fid, header_fid, dotp=True, sparse=True)
|
||||
for name in encoder_gru_names
|
||||
]
|
||||
)
|
||||
|
||||
# dump conv layers
|
||||
max_conv_inputs = max(
|
||||
[
|
||||
dump_conv1d_layer(encoder.get_layer(name), source_fid, header_fid)
|
||||
for name in encoder_conv1d_names
|
||||
]
|
||||
)
|
||||
|
||||
# dump Dense layers
|
||||
for name in encoder_dense_names:
|
||||
layer = encoder.get_layer(name)
|
||||
dump_dense_layer(layer, source_fid, header_fid)
|
||||
|
||||
# some global constants
|
||||
header_fid.write(
|
||||
f"""
|
||||
|
||||
#define DRED_ENC_MAX_RNN_NEURONS {max_rnn_neurons_enc}
|
||||
|
||||
#define DRED_ENC_MAX_CONV_INPUTS {max_conv_inputs}
|
||||
|
||||
"""
|
||||
)
|
||||
|
||||
finish_header(header_fid)
|
||||
finish_source(source_fid)
|
||||
|
||||
header_fid.close()
|
||||
source_fid.close()
|
||||
|
||||
# statistical model
|
||||
source_fid = open("dred_rdovae_stats_data.c", 'w')
|
||||
header_fid = open("dred_rdovae_stats_data.h", 'w')
|
||||
|
||||
start_header(header_fid, "dred_rdovae_stats_data.h")
|
||||
start_source(source_fid, "dred_rdovae_stats_data.h", os.path.basename(args.weights))
|
||||
|
||||
header_fid.write(
|
||||
"""
|
||||
|
||||
#include "opus_types.h"
|
||||
|
||||
"""
|
||||
)
|
||||
|
||||
dump_statistical_model(qembedding, source_fid, header_fid)
|
||||
|
||||
finish_header(header_fid)
|
||||
finish_source(source_fid)
|
||||
|
||||
header_fid.close()
|
||||
source_fid.close()
|
||||
|
||||
# decoder
|
||||
decoder_dense_names = [
|
||||
'state1',
|
||||
'state2',
|
||||
'state3',
|
||||
'dec_dense1',
|
||||
'dec_dense3',
|
||||
'dec_dense5',
|
||||
'dec_dense7',
|
||||
'dec_dense8',
|
||||
'dec_final'
|
||||
]
|
||||
|
||||
decoder_gru_names = [
|
||||
'dec_dense2',
|
||||
'dec_dense4',
|
||||
'dec_dense6'
|
||||
]
|
||||
|
||||
source_fid = open("dred_rdovae_dec_data.c", 'w')
|
||||
header_fid = open("dred_rdovae_dec_data.h", 'w')
|
||||
|
||||
start_header(header_fid, "dred_rdovae_dec_data.h")
|
||||
start_source(source_fid, "dred_rdovae_dec_data.h", os.path.basename(args.weights))
|
||||
|
||||
header_fid.write(
|
||||
f"""
|
||||
#include "dred_rdovae_constants.h"
|
||||
|
||||
#include "nnet.h"
|
||||
"""
|
||||
)
|
||||
|
||||
|
||||
# dump GRUs
|
||||
max_rnn_neurons_dec = max(
|
||||
[
|
||||
dump_gru_layer(decoder.get_layer(name), source_fid, header_fid, dotp=True, sparse=True)
|
||||
for name in decoder_gru_names
|
||||
]
|
||||
)
|
||||
|
||||
# dump Dense layers
|
||||
for name in decoder_dense_names:
|
||||
layer = decoder.get_layer(name)
|
||||
dump_dense_layer(layer, source_fid, header_fid)
|
||||
|
||||
# some global constants
|
||||
header_fid.write(
|
||||
f"""
|
||||
|
||||
#define DRED_DEC_MAX_RNN_NEURONS {max_rnn_neurons_dec}
|
||||
|
||||
"""
|
||||
)
|
||||
|
||||
finish_header(header_fid)
|
||||
finish_source(source_fid)
|
||||
|
||||
header_fid.close()
|
||||
source_fid.close()
|
||||
|
||||
# common constants
|
||||
header_fid = open("dred_rdovae_constants.h", 'w')
|
||||
start_header(header_fid, "dred_rdovae_constants.h")
|
||||
|
||||
header_fid.write(
|
||||
f"""
|
||||
#define DRED_NUM_FEATURES 20
|
||||
|
||||
#define DRED_LATENT_DIM {args.latent_dim}
|
||||
|
||||
#define DRED_STATE_DIM {24}
|
||||
|
||||
#define DRED_NUM_QUANTIZATION_LEVELS {qembedding.weights[0].shape[0]}
|
||||
|
||||
#define DRED_MAX_RNN_NEURONS {max(max_rnn_neurons_enc, max_rnn_neurons_dec)}
|
||||
|
||||
#define DRED_MAX_CONV_INPUTS {max_conv_inputs}
|
||||
"""
|
||||
)
|
||||
|
||||
finish_header(header_fid)
|
||||
@@ -0,0 +1,125 @@
|
||||
#!/usr/bin/python3
|
||||
'''Copyright (c) 2021-2022 Amazon
|
||||
Copyright (c) 2018-2019 Mozilla
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
'''
|
||||
|
||||
# Train an LPCNet model
|
||||
|
||||
import argparse
|
||||
#from plc_loader import PLCLoader
|
||||
|
||||
parser = argparse.ArgumentParser(description='Train a PLC model')
|
||||
|
||||
parser.add_argument('features', metavar='<features file>', help='binary features file (float32)')
|
||||
parser.add_argument('output', metavar='<output>', help='trained model file (.h5)')
|
||||
parser.add_argument('--model', metavar='<model>', default='rdovae', help='PLC model python definition (without .py)')
|
||||
group1 = parser.add_mutually_exclusive_group()
|
||||
group1.add_argument('--weights', metavar='<input weights>', help='model weights')
|
||||
parser.add_argument('--cond-size', metavar='<units>', default=1024, type=int, help='number of units in conditioning network (default 1024)')
|
||||
parser.add_argument('--batch-size', metavar='<batch size>', default=1, type=int, help='batch size to use (default 128)')
|
||||
parser.add_argument('--seq-length', metavar='<sequence length>', default=1000, type=int, help='sequence length to use (default 1000)')
|
||||
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
import importlib
|
||||
rdovae = importlib.import_module(args.model)
|
||||
|
||||
from rdovae import apply_dead_zone
|
||||
|
||||
import sys
|
||||
import numpy as np
|
||||
from tensorflow.keras.optimizers import Adam
|
||||
from tensorflow.keras.callbacks import ModelCheckpoint, CSVLogger
|
||||
import tensorflow.keras.backend as K
|
||||
import h5py
|
||||
|
||||
import tensorflow as tf
|
||||
from rdovae import pvq_quantize
|
||||
|
||||
# Try reducing batch_size if you run out of memory on your GPU
|
||||
batch_size = args.batch_size
|
||||
|
||||
model, encoder, decoder, qembedding = rdovae.new_rdovae_model(nb_used_features=20, nb_bits=80, batch_size=batch_size, cond_size=args.cond_size)
|
||||
model.load_weights(args.weights)
|
||||
|
||||
lpc_order = 16
|
||||
|
||||
feature_file = args.features
|
||||
nb_features = model.nb_used_features + lpc_order
|
||||
nb_used_features = model.nb_used_features
|
||||
sequence_size = args.seq_length
|
||||
|
||||
# u for unquantised, load 16 bit PCM samples and convert to mu-law
|
||||
|
||||
|
||||
features = np.memmap(feature_file, dtype='float32', mode='r')
|
||||
nb_sequences = len(features)//(nb_features*sequence_size)//batch_size*batch_size
|
||||
features = features[:nb_sequences*sequence_size*nb_features]
|
||||
|
||||
features = np.reshape(features, (nb_sequences, sequence_size, nb_features))
|
||||
print(features.shape)
|
||||
features = features[:, :, :nb_used_features]
|
||||
#features = np.random.randn(73600, 1000, 17)
|
||||
|
||||
|
||||
bits, gru_state_dec = encoder.predict([features], batch_size=batch_size)
|
||||
(gru_state_dec).astype('float32').tofile(args.output + "-state.f32")
|
||||
|
||||
|
||||
#dist = rdovae.feat_dist_loss(features, quant_out)
|
||||
#rate = rdovae.sq1_rate_loss(features, model_bits)
|
||||
#rate2 = rdovae.sq_rate_metric(features, model_bits)
|
||||
#print(dist, rate, rate2)
|
||||
|
||||
print("shapes are:")
|
||||
print(bits.shape)
|
||||
print(gru_state_dec.shape)
|
||||
|
||||
features.astype('float32').tofile(args.output + "-input.f32")
|
||||
#quant_out.astype('float32').tofile(args.output + "-enc_dec.f32")
|
||||
nbits=80
|
||||
bits.astype('float32').tofile(args.output + "-syms.f32")
|
||||
|
||||
lambda_val = 0.0002 * np.ones((nb_sequences, sequence_size//2, 1))
|
||||
quant_id = np.round(3.8*np.log(lambda_val/.0002)).astype('int16')
|
||||
quant_id = quant_id[:,:,0]
|
||||
quant_embed = qembedding(quant_id)
|
||||
quant_scale = tf.math.softplus(quant_embed[:,:,:nbits])
|
||||
dead_zone = tf.math.softplus(quant_embed[:, :, nbits : 2 * nbits])
|
||||
|
||||
bits = bits*quant_scale
|
||||
bits = np.round(apply_dead_zone([bits, dead_zone]).numpy())
|
||||
bits = bits/quant_scale
|
||||
|
||||
gru_state_dec = pvq_quantize(gru_state_dec, 82)
|
||||
#gru_state_dec = gru_state_dec/(1e-15+tf.norm(gru_state_dec, axis=-1,keepdims=True))
|
||||
gru_state_dec = gru_state_dec[:,-1,:]
|
||||
dec_out = decoder([bits[:,1::2,:], gru_state_dec])
|
||||
|
||||
print(dec_out.shape)
|
||||
|
||||
dec_out.numpy().astype('float32').tofile(args.output + "-quant_out.f32")
|
||||
256
managed_components/78__esp-opus/dnn/training_tf2/fec_encoder.py
Normal file
256
managed_components/78__esp-opus/dnn/training_tf2/fec_encoder.py
Normal file
@@ -0,0 +1,256 @@
|
||||
"""
|
||||
/* Copyright (c) 2022 Amazon
|
||||
Written by Jan Buethe and Jean-Marc Valin */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
"""
|
||||
import os
|
||||
import subprocess
|
||||
import argparse
|
||||
|
||||
|
||||
import numpy as np
|
||||
from scipy.io import wavfile
|
||||
import tensorflow as tf
|
||||
|
||||
from rdovae import new_rdovae_model, pvq_quantize, apply_dead_zone, sq_rate_metric
|
||||
from fec_packets import write_fec_packets, read_fec_packets
|
||||
|
||||
|
||||
debug = False
|
||||
|
||||
if debug:
|
||||
args = type('dummy', (object,),
|
||||
{
|
||||
'input' : 'item1.wav',
|
||||
'weights' : 'testout/rdovae_alignment_fix_1024_120.h5',
|
||||
'enc_lambda' : 0.0007,
|
||||
'output' : "test_0007.fec",
|
||||
'cond_size' : 1024,
|
||||
'num_redundancy_frames' : 64,
|
||||
'extra_delay' : 0,
|
||||
'dump_data' : './dump_data'
|
||||
})()
|
||||
os.environ['CUDA_VISIBLE_DEVICES']=""
|
||||
else:
|
||||
parser = argparse.ArgumentParser(description='Encode redundancy for Opus neural FEC. Designed for use with voip application and 20ms frames')
|
||||
|
||||
parser.add_argument('input', metavar='<input signal>', help='audio input (.wav or .raw or .pcm as int16)')
|
||||
parser.add_argument('weights', metavar='<weights>', help='trained model file (.h5)')
|
||||
# parser.add_argument('enc_lambda', metavar='<lambda>', type=float, help='lambda for controlling encoder rate')
|
||||
parser.add_argument('output', type=str, help='output file (will be extended with .fec)')
|
||||
|
||||
parser.add_argument('--dump-data', type=str, default='./dump_data', help='path to dump data executable (default ./dump_data)')
|
||||
parser.add_argument('--cond-size', metavar='<units>', default=1024, type=int, help='number of units in conditioning network (default 1024)')
|
||||
parser.add_argument('--quant-levels', type=int, help="number of quantization steps (default: 40)", default=40)
|
||||
parser.add_argument('--num-redundancy-frames', default=64, type=int, help='number of redundancy frames (20ms) per packet (default 64)')
|
||||
parser.add_argument('--extra-delay', default=0, type=int, help="last features in packet are calculated with the decoder aligned samples, use this option to add extra delay (in samples at 16kHz)")
|
||||
parser.add_argument('--lossfile', type=str, help='file containing loss trace (0 for frame received, 1 for lost)')
|
||||
|
||||
parser.add_argument('--debug-output', action='store_true', help='if set, differently assembled features are written to disk')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
model, encoder, decoder, qembedding = new_rdovae_model(nb_used_features=20, nb_bits=80, batch_size=1, nb_quant=args.quant_levels, cond_size=args.cond_size)
|
||||
model.load_weights(args.weights)
|
||||
|
||||
lpc_order = 16
|
||||
|
||||
## prepare input signal
|
||||
# SILK frame size is 20ms and LPCNet subframes are 10ms
|
||||
subframe_size = 160
|
||||
frame_size = 2 * subframe_size
|
||||
|
||||
# 91 samples delay to align with SILK decoded frames
|
||||
silk_delay = 91
|
||||
|
||||
# prepend zeros to have enough history to produce the first package
|
||||
zero_history = (args.num_redundancy_frames - 1) * frame_size
|
||||
|
||||
# dump data has a (feature) delay of 10ms
|
||||
dump_data_delay = 160
|
||||
|
||||
total_delay = silk_delay + zero_history + args.extra_delay - dump_data_delay
|
||||
|
||||
# load signal
|
||||
if args.input.endswith('.raw') or args.input.endswith('.pcm') or args.input.endswith('.sw'):
|
||||
signal = np.fromfile(args.input, dtype='int16')
|
||||
|
||||
elif args.input.endswith('.wav'):
|
||||
fs, signal = wavfile.read(args.input)
|
||||
else:
|
||||
raise ValueError(f'unknown input signal format: {args.input}')
|
||||
|
||||
# fill up last frame with zeros
|
||||
padded_signal_length = len(signal) + total_delay
|
||||
tail = padded_signal_length % frame_size
|
||||
right_padding = (frame_size - tail) % frame_size
|
||||
|
||||
signal = np.concatenate((np.zeros(total_delay, dtype=np.int16), signal, np.zeros(right_padding, dtype=np.int16)))
|
||||
|
||||
padded_signal_file = os.path.splitext(args.input)[0] + '_padded.raw'
|
||||
signal.tofile(padded_signal_file)
|
||||
|
||||
# write signal and call dump_data to create features
|
||||
|
||||
feature_file = os.path.splitext(args.input)[0] + '_features.f32'
|
||||
command = f"{args.dump_data} -test {padded_signal_file} {feature_file}"
|
||||
r = subprocess.run(command, shell=True)
|
||||
if r.returncode != 0:
|
||||
raise RuntimeError(f"command '{command}' failed with exit code {r.returncode}")
|
||||
|
||||
# load features
|
||||
nb_features = model.nb_used_features + lpc_order
|
||||
nb_used_features = model.nb_used_features
|
||||
|
||||
# load features
|
||||
features = np.fromfile(feature_file, dtype='float32')
|
||||
num_subframes = len(features) // nb_features
|
||||
num_subframes = 2 * (num_subframes // 2)
|
||||
num_frames = num_subframes // 2
|
||||
|
||||
features = np.reshape(features, (1, -1, nb_features))
|
||||
features = features[:, :, :nb_used_features]
|
||||
features = features[:, :num_subframes, :]
|
||||
|
||||
#variable quantizer depending on the delay
|
||||
q0 = 3
|
||||
q1 = 15
|
||||
quant_id = np.round(q1 + (q0-q1)*np.arange(args.num_redundancy_frames//2)/args.num_redundancy_frames).astype('int16')
|
||||
#print(quant_id)
|
||||
|
||||
quant_embed = qembedding(quant_id)
|
||||
|
||||
# run encoder
|
||||
print("running fec encoder...")
|
||||
symbols, gru_state_dec = encoder.predict(features)
|
||||
|
||||
# apply quantization
|
||||
nsymbols = 80
|
||||
quant_scale = tf.math.softplus(quant_embed[:, :nsymbols]).numpy()
|
||||
dead_zone = tf.math.softplus(quant_embed[:, nsymbols : 2 * nsymbols]).numpy()
|
||||
#symbols = apply_dead_zone([symbols, dead_zone]).numpy()
|
||||
#qsymbols = np.round(symbols)
|
||||
quant_gru_state_dec = pvq_quantize(gru_state_dec, 82)
|
||||
|
||||
# rate estimate
|
||||
hard_distr_embed = tf.math.sigmoid(quant_embed[:, 4 * nsymbols : ]).numpy()
|
||||
#rate_input = np.concatenate((qsymbols, hard_distr_embed, enc_lambda), axis=-1)
|
||||
#rates = sq_rate_metric(None, rate_input, reduce=False).numpy()
|
||||
|
||||
# run decoder
|
||||
input_length = args.num_redundancy_frames // 2
|
||||
offset = args.num_redundancy_frames - 1
|
||||
|
||||
packets = []
|
||||
packet_sizes = []
|
||||
|
||||
sym_batch = np.zeros((num_frames-offset, args.num_redundancy_frames//2, nsymbols), dtype='float32')
|
||||
quant_state = quant_gru_state_dec[0, offset:num_frames, :]
|
||||
#pack symbols for batch processing
|
||||
for i in range(offset, num_frames):
|
||||
sym_batch[i-offset, :, :] = symbols[0, i - 2 * input_length + 2 : i + 1 : 2, :]
|
||||
|
||||
#quantize symbols
|
||||
sym_batch = sym_batch * quant_scale
|
||||
sym_batch = apply_dead_zone([sym_batch, dead_zone]).numpy()
|
||||
sym_batch = np.round(sym_batch)
|
||||
|
||||
hard_distr_embed = np.broadcast_to(hard_distr_embed, (sym_batch.shape[0], sym_batch.shape[1], 2*sym_batch.shape[2]))
|
||||
fake_lambda = np.ones((sym_batch.shape[0], sym_batch.shape[1], 1), dtype='float32')
|
||||
rate_input = np.concatenate((sym_batch, hard_distr_embed, fake_lambda), axis=-1)
|
||||
rates = sq_rate_metric(None, rate_input, reduce=False).numpy()
|
||||
#print(rates.shape)
|
||||
print("average rate = ", np.mean(rates[args.num_redundancy_frames:,:]))
|
||||
|
||||
#sym_batch.tofile('qsyms.f32')
|
||||
|
||||
sym_batch = sym_batch / quant_scale
|
||||
#print(sym_batch.shape, quant_state.shape)
|
||||
#features = decoder.predict([sym_batch, quant_state])
|
||||
features = decoder([sym_batch, quant_state])
|
||||
|
||||
#for i in range(offset, num_frames):
|
||||
# print(f"processing frame {i - offset}...")
|
||||
# features = decoder.predict([qsymbols[:, i - 2 * input_length + 2 : i + 1 : 2, :], quant_embed_dec[:, i - 2 * input_length + 2 : i + 1 : 2, :], quant_gru_state_dec[:, i, :]])
|
||||
# packets.append(features)
|
||||
# packet_size = 8 * int((np.sum(rates[:, i - 2 * input_length + 2 : i + 1 : 2]) + 7) / 8) + 64
|
||||
# packet_sizes.append(packet_size)
|
||||
|
||||
|
||||
# write packets
|
||||
packet_file = args.output + '.fec' if not args.output.endswith('.fec') else args.output
|
||||
#write_fec_packets(packet_file, packets, packet_sizes)
|
||||
|
||||
|
||||
#print(f"average redundancy rate: {int(round(sum(packet_sizes) / len(packet_sizes) * 50 / 1000))} kbps")
|
||||
|
||||
if args.lossfile != None:
|
||||
loss = np.loadtxt(args.lossfile, dtype='int16')
|
||||
fec_out = np.zeros((features.shape[0]*2, features.shape[-1]), dtype='float32')
|
||||
foffset = -2
|
||||
ptr = 0;
|
||||
count = 2;
|
||||
for i in range(features.shape[0]):
|
||||
if (loss[i] == 0) or (i == features.shape[0]-1):
|
||||
fec_out[ptr:ptr+count,:] = features[i, foffset:, :]
|
||||
#print("filled ", count)
|
||||
foffset = -2
|
||||
ptr = ptr+count
|
||||
count = 2
|
||||
else:
|
||||
count = count + 2
|
||||
foffset = foffset - 2
|
||||
|
||||
fec_out_full = np.zeros((fec_out.shape[0], nb_features), dtype=np.float32)
|
||||
fec_out_full[:, :nb_used_features] = fec_out
|
||||
|
||||
fec_out_full.tofile(packet_file[:-4] + f'_fec.f32')
|
||||
|
||||
|
||||
#create packets array like in the original version for debugging purposes
|
||||
for i in range(offset, num_frames):
|
||||
packets.append(features[i-offset:i-offset+1, :, :])
|
||||
|
||||
if args.debug_output:
|
||||
import itertools
|
||||
|
||||
#batches = [2, 4]
|
||||
batches = [4]
|
||||
#offsets = [0, 4, 20]
|
||||
offsets = [0, (args.num_redundancy_frames - 2)*2]
|
||||
# sanity checks
|
||||
# 1. concatenate features at offset 0
|
||||
for batch, offset in itertools.product(batches, offsets):
|
||||
|
||||
stop = packets[0].shape[1] - offset
|
||||
print(batch, offset, stop)
|
||||
test_features = np.concatenate([packet[:,stop - batch: stop, :] for packet in packets[::batch//2]], axis=1)
|
||||
|
||||
test_features_full = np.zeros((test_features.shape[1], nb_features), dtype=np.float32)
|
||||
test_features_full[:, :nb_used_features] = test_features[0, :, :]
|
||||
|
||||
print(f"writing debug output {packet_file[:-4] + f'_tf_batch{batch}_offset{offset}.f32'}")
|
||||
test_features_full.tofile(packet_file[:-4] + f'_tf_batch{batch}_offset{offset}.f32')
|
||||
142
managed_components/78__esp-opus/dnn/training_tf2/fec_packets.c
Normal file
142
managed_components/78__esp-opus/dnn/training_tf2/fec_packets.c
Normal file
@@ -0,0 +1,142 @@
|
||||
/* Copyright (c) 2022 Amazon
|
||||
Written by Jan Buethe */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <inttypes.h>
|
||||
|
||||
#include "fec_packets.h"
|
||||
|
||||
int get_fec_frame(const char * const filename, float *features, int packet_index, int subframe_index)
|
||||
{
|
||||
|
||||
int16_t version;
|
||||
int16_t header_size;
|
||||
int16_t num_packets;
|
||||
int16_t packet_size;
|
||||
int16_t subframe_size;
|
||||
int16_t subframes_per_packet;
|
||||
int16_t num_features;
|
||||
long offset;
|
||||
|
||||
FILE *fid = fopen(filename, "rb");
|
||||
|
||||
/* read header */
|
||||
if (fread(&version, sizeof(version), 1, fid) != 1) goto error;
|
||||
if (fread(&header_size, sizeof(header_size), 1, fid) != 1) goto error;
|
||||
if (fread(&num_packets, sizeof(num_packets), 1, fid) != 1) goto error;
|
||||
if (fread(&packet_size, sizeof(packet_size), 1, fid) != 1) goto error;
|
||||
if (fread(&subframe_size, sizeof(subframe_size), 1, fid) != 1) goto error;
|
||||
if (fread(&subframes_per_packet, sizeof(subframes_per_packet), 1, fid) != 1) goto error;
|
||||
if (fread(&num_features, sizeof(num_features), 1, fid) != 1) goto error;
|
||||
|
||||
/* check if indices are valid */
|
||||
if (packet_index >= num_packets || subframe_index >= subframes_per_packet)
|
||||
{
|
||||
fprintf(stderr, "get_fec_frame: index out of bounds\n");
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* calculate offset in file (+ 2 is for rate) */
|
||||
offset = header_size + packet_index * packet_size + 2 + subframe_index * subframe_size;
|
||||
fseek(fid, offset, SEEK_SET);
|
||||
|
||||
/* read features */
|
||||
if (fread(features, sizeof(*features), num_features, fid) != num_features) goto error;
|
||||
|
||||
fclose(fid);
|
||||
return 0;
|
||||
|
||||
error:
|
||||
fclose(fid);
|
||||
return 1;
|
||||
}
|
||||
|
||||
int get_fec_rate(const char * const filename, int packet_index)
|
||||
{
|
||||
int16_t version;
|
||||
int16_t header_size;
|
||||
int16_t num_packets;
|
||||
int16_t packet_size;
|
||||
int16_t subframe_size;
|
||||
int16_t subframes_per_packet;
|
||||
int16_t num_features;
|
||||
long offset;
|
||||
int16_t rate;
|
||||
|
||||
FILE *fid = fopen(filename, "rb");
|
||||
|
||||
/* read header */
|
||||
if (fread(&version, sizeof(version), 1, fid) != 1) goto error;
|
||||
if (fread(&header_size, sizeof(header_size), 1, fid) != 1) goto error;
|
||||
if (fread(&num_packets, sizeof(num_packets), 1, fid) != 1) goto error;
|
||||
if (fread(&packet_size, sizeof(packet_size), 1, fid) != 1) goto error;
|
||||
if (fread(&subframe_size, sizeof(subframe_size), 1, fid) != 1) goto error;
|
||||
if (fread(&subframes_per_packet, sizeof(subframes_per_packet), 1, fid) != 1) goto error;
|
||||
if (fread(&num_features, sizeof(num_features), 1, fid) != 1) goto error;
|
||||
|
||||
/* check if indices are valid */
|
||||
if (packet_index >= num_packets)
|
||||
{
|
||||
fprintf(stderr, "get_fec_rate: index out of bounds\n");
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* calculate offset in file (+ 2 is for rate) */
|
||||
offset = header_size + packet_index * packet_size;
|
||||
fseek(fid, offset, SEEK_SET);
|
||||
|
||||
/* read rate */
|
||||
if (fread(&rate, sizeof(rate), 1, fid) != 1) goto error;
|
||||
|
||||
fclose(fid);
|
||||
return (int) rate;
|
||||
|
||||
error:
|
||||
fclose(fid);
|
||||
return -1;
|
||||
}
|
||||
|
||||
#if 0
|
||||
int main()
|
||||
{
|
||||
float features[20];
|
||||
int i;
|
||||
|
||||
if (get_fec_frame("../test.fec", &features[0], 0, 127))
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
for (i = 0; i < 20; i ++)
|
||||
{
|
||||
printf("%d %f\n", i, features[i]);
|
||||
}
|
||||
|
||||
printf("rate: %d\n", get_fec_rate("../test.fec", 0));
|
||||
|
||||
}
|
||||
#endif
|
||||
@@ -0,0 +1,34 @@
|
||||
/* Copyright (c) 2022 Amazon
|
||||
Written by Jan Buethe */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef FEC_PACKETS_H
|
||||
#define FEC_PACKETS_H
|
||||
|
||||
int get_fec_frame(const char * const filename, float *features, int packet_index, int subframe_index);
|
||||
int get_fec_rate(const char * const filename, int packet_index);
|
||||
|
||||
#endif
|
||||
108
managed_components/78__esp-opus/dnn/training_tf2/fec_packets.py
Normal file
108
managed_components/78__esp-opus/dnn/training_tf2/fec_packets.py
Normal file
@@ -0,0 +1,108 @@
|
||||
"""
|
||||
/* Copyright (c) 2022 Amazon
|
||||
Written by Jan Buethe */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
|
||||
|
||||
|
||||
def write_fec_packets(filename, packets, rates=None):
|
||||
""" writes packets in binary format """
|
||||
|
||||
assert np.dtype(np.float32).itemsize == 4
|
||||
assert np.dtype(np.int16).itemsize == 2
|
||||
|
||||
# derive some sizes
|
||||
num_packets = len(packets)
|
||||
subframes_per_packet = packets[0].shape[-2]
|
||||
num_features = packets[0].shape[-1]
|
||||
|
||||
# size of float is 4
|
||||
subframe_size = num_features * 4
|
||||
packet_size = subframe_size * subframes_per_packet + 2 # two bytes for rate
|
||||
|
||||
version = 1
|
||||
# header size (version, header_size, num_packets, packet_size, subframe_size, subrames_per_packet, num_features)
|
||||
header_size = 14
|
||||
|
||||
with open(filename, 'wb') as f:
|
||||
|
||||
# header
|
||||
f.write(np.int16(version).tobytes())
|
||||
f.write(np.int16(header_size).tobytes())
|
||||
f.write(np.int16(num_packets).tobytes())
|
||||
f.write(np.int16(packet_size).tobytes())
|
||||
f.write(np.int16(subframe_size).tobytes())
|
||||
f.write(np.int16(subframes_per_packet).tobytes())
|
||||
f.write(np.int16(num_features).tobytes())
|
||||
|
||||
# packets
|
||||
for i, packet in enumerate(packets):
|
||||
if type(rates) == type(None):
|
||||
rate = 0
|
||||
else:
|
||||
rate = rates[i]
|
||||
|
||||
f.write(np.int16(rate).tobytes())
|
||||
|
||||
features = np.flip(packet, axis=-2)
|
||||
f.write(features.astype(np.float32).tobytes())
|
||||
|
||||
|
||||
def read_fec_packets(filename):
|
||||
""" reads packets from binary format """
|
||||
|
||||
assert np.dtype(np.float32).itemsize == 4
|
||||
assert np.dtype(np.int16).itemsize == 2
|
||||
|
||||
with open(filename, 'rb') as f:
|
||||
|
||||
# header
|
||||
version = np.frombuffer(f.read(2), dtype=np.int16).item()
|
||||
header_size = np.frombuffer(f.read(2), dtype=np.int16).item()
|
||||
num_packets = np.frombuffer(f.read(2), dtype=np.int16).item()
|
||||
packet_size = np.frombuffer(f.read(2), dtype=np.int16).item()
|
||||
subframe_size = np.frombuffer(f.read(2), dtype=np.int16).item()
|
||||
subframes_per_packet = np.frombuffer(f.read(2), dtype=np.int16).item()
|
||||
num_features = np.frombuffer(f.read(2), dtype=np.int16).item()
|
||||
|
||||
dummy_features = np.zeros((1, subframes_per_packet, num_features), dtype=np.float32)
|
||||
|
||||
# packets
|
||||
rates = []
|
||||
packets = []
|
||||
for i in range(num_packets):
|
||||
|
||||
rate = np.frombuffer(f.read(2), dtype=np.int16).item
|
||||
rates.append(rate)
|
||||
|
||||
features = np.reshape(np.frombuffer(f.read(subframe_size * subframes_per_packet), dtype=np.float32), dummy_features.shape)
|
||||
packet = np.flip(features, axis=-2)
|
||||
packets.append(packet)
|
||||
|
||||
return packets
|
||||
@@ -0,0 +1,189 @@
|
||||
'''Copyright (c) 2017-2018 Mozilla
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
'''
|
||||
|
||||
""" helper functions for dumping some Keras layers to C files """
|
||||
|
||||
import numpy as np
|
||||
|
||||
|
||||
def printVector(f, vector, name, dtype='float', dotp=False, static=True):
|
||||
""" prints vector as one-dimensional C array """
|
||||
if dotp:
|
||||
vector = vector.reshape((vector.shape[0]//4, 4, vector.shape[1]//8, 8))
|
||||
vector = vector.transpose((2, 0, 3, 1))
|
||||
v = np.reshape(vector, (-1))
|
||||
if static:
|
||||
f.write('static const {} {}[{}] = {{\n '.format(dtype, name, len(v)))
|
||||
else:
|
||||
f.write('const {} {}[{}] = {{\n '.format(dtype, name, len(v)))
|
||||
for i in range(0, len(v)):
|
||||
f.write('{}'.format(v[i]))
|
||||
if (i!=len(v)-1):
|
||||
f.write(',')
|
||||
else:
|
||||
break;
|
||||
if (i%8==7):
|
||||
f.write("\n ")
|
||||
else:
|
||||
f.write(" ")
|
||||
f.write('\n};\n\n')
|
||||
return vector
|
||||
|
||||
def printSparseVector(f, A, name, have_diag=True):
|
||||
N = A.shape[0]
|
||||
M = A.shape[1]
|
||||
W = np.zeros((0,), dtype='int')
|
||||
W0 = np.zeros((0,))
|
||||
if have_diag:
|
||||
diag = np.concatenate([np.diag(A[:,:N]), np.diag(A[:,N:2*N]), np.diag(A[:,2*N:])])
|
||||
A[:,:N] = A[:,:N] - np.diag(np.diag(A[:,:N]))
|
||||
A[:,N:2*N] = A[:,N:2*N] - np.diag(np.diag(A[:,N:2*N]))
|
||||
A[:,2*N:] = A[:,2*N:] - np.diag(np.diag(A[:,2*N:]))
|
||||
printVector(f, diag, name + '_diag')
|
||||
AQ = np.minimum(127, np.maximum(-128, np.round(A*128))).astype('int')
|
||||
idx = np.zeros((0,), dtype='int')
|
||||
for i in range(M//8):
|
||||
pos = idx.shape[0]
|
||||
idx = np.append(idx, -1)
|
||||
nb_nonzero = 0
|
||||
for j in range(N//4):
|
||||
block = A[j*4:(j+1)*4, i*8:(i+1)*8]
|
||||
qblock = AQ[j*4:(j+1)*4, i*8:(i+1)*8]
|
||||
if np.sum(np.abs(block)) > 1e-10:
|
||||
nb_nonzero = nb_nonzero + 1
|
||||
idx = np.append(idx, j*4)
|
||||
vblock = qblock.transpose((1,0)).reshape((-1,))
|
||||
W0 = np.concatenate([W0, block.reshape((-1,))])
|
||||
W = np.concatenate([W, vblock])
|
||||
idx[pos] = nb_nonzero
|
||||
f.write('#ifdef DOT_PROD\n')
|
||||
printVector(f, W, name, dtype='qweight')
|
||||
f.write('#else /*DOT_PROD*/\n')
|
||||
printVector(f, W0, name, dtype='qweight')
|
||||
f.write('#endif /*DOT_PROD*/\n')
|
||||
printVector(f, idx, name + '_idx', dtype='int')
|
||||
return AQ
|
||||
|
||||
def dump_sparse_gru(self, f, hf):
|
||||
name = 'sparse_' + self.name
|
||||
print("printing layer " + name + " of type sparse " + self.__class__.__name__)
|
||||
weights = self.get_weights()
|
||||
qweights = printSparseVector(f, weights[1], name + '_recurrent_weights')
|
||||
printVector(f, weights[-1], name + '_bias')
|
||||
subias = weights[-1].copy()
|
||||
subias[1,:] = subias[1,:] - np.sum(qweights*(1./128),axis=0)
|
||||
printVector(f, subias, name + '_subias')
|
||||
if hasattr(self, 'activation'):
|
||||
activation = self.activation.__name__.upper()
|
||||
else:
|
||||
activation = 'TANH'
|
||||
if hasattr(self, 'reset_after') and not self.reset_after:
|
||||
reset_after = 0
|
||||
else:
|
||||
reset_after = 1
|
||||
neurons = weights[0].shape[1]//3
|
||||
max_rnn_neurons = neurons
|
||||
f.write('const SparseGRULayer {} = {{\n {}_bias,\n {}_subias,\n {}_recurrent_weights_diag,\n {}_recurrent_weights,\n {}_recurrent_weights_idx,\n {}, ACTIVATION_{}, {}\n}};\n\n'
|
||||
.format(name, name, name, name, name, name, weights[0].shape[1]//3, activation, reset_after))
|
||||
hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))
|
||||
hf.write('#define {}_STATE_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))
|
||||
hf.write('extern const SparseGRULayer {};\n\n'.format(name));
|
||||
return max_rnn_neurons
|
||||
|
||||
def dump_gru_layer(self, f, hf, dotp=False, sparse=False):
|
||||
name = self.name
|
||||
print("printing layer " + name + " of type " + self.__class__.__name__)
|
||||
weights = self.get_weights()
|
||||
if sparse:
|
||||
qweight = printSparseVector(f, weights[0], name + '_weights', have_diag=False)
|
||||
else:
|
||||
qweight = printVector(f, weights[0], name + '_weights')
|
||||
|
||||
if dotp:
|
||||
f.write('#ifdef DOT_PROD\n')
|
||||
qweight2 = np.clip(np.round(128.*weights[1]).astype('int'), -128, 127)
|
||||
printVector(f, qweight2, name + '_recurrent_weights', dotp=True, dtype='qweight')
|
||||
f.write('#else /*DOT_PROD*/\n')
|
||||
else:
|
||||
qweight2 = weights[1]
|
||||
|
||||
printVector(f, weights[1], name + '_recurrent_weights')
|
||||
if dotp:
|
||||
f.write('#endif /*DOT_PROD*/\n')
|
||||
|
||||
printVector(f, weights[-1], name + '_bias')
|
||||
subias = weights[-1].copy()
|
||||
subias[0,:] = subias[0,:] - np.sum(qweight*(1./128.),axis=0)
|
||||
subias[1,:] = subias[1,:] - np.sum(qweight2*(1./128.),axis=0)
|
||||
printVector(f, subias, name + '_subias')
|
||||
if hasattr(self, 'activation'):
|
||||
activation = self.activation.__name__.upper()
|
||||
else:
|
||||
activation = 'TANH'
|
||||
if hasattr(self, 'reset_after') and not self.reset_after:
|
||||
reset_after = 0
|
||||
else:
|
||||
reset_after = 1
|
||||
neurons = weights[0].shape[1]//3
|
||||
max_rnn_neurons = neurons
|
||||
f.write('const GRULayer {} = {{\n {}_bias,\n {}_subias,\n {}_weights,\n {},\n {}_recurrent_weights,\n {}, {}, ACTIVATION_{}, {}\n}};\n\n'
|
||||
.format(name, name, name, name, name + "_weights_idx" if sparse else "NULL", name, weights[0].shape[0], weights[0].shape[1]//3, activation, reset_after))
|
||||
hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))
|
||||
hf.write('#define {}_STATE_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))
|
||||
hf.write('extern const GRULayer {};\n\n'.format(name));
|
||||
return max_rnn_neurons
|
||||
|
||||
def dump_dense_layer_impl(name, weights, bias, activation, f, hf):
|
||||
printVector(f, weights, name + '_weights')
|
||||
printVector(f, bias, name + '_bias')
|
||||
f.write('const DenseLayer {} = {{\n {}_bias,\n {}_weights,\n {}, {}, ACTIVATION_{}\n}};\n\n'
|
||||
.format(name, name, name, weights.shape[0], weights.shape[1], activation))
|
||||
hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights.shape[1]))
|
||||
hf.write('extern const DenseLayer {};\n\n'.format(name));
|
||||
|
||||
def dump_dense_layer(self, f, hf):
|
||||
name = self.name
|
||||
print("printing layer " + name + " of type " + self.__class__.__name__)
|
||||
weights = self.get_weights()
|
||||
activation = self.activation.__name__.upper()
|
||||
dump_dense_layer_impl(name, weights[0], weights[1], activation, f, hf)
|
||||
return False
|
||||
|
||||
def dump_conv1d_layer(self, f, hf):
|
||||
name = self.name
|
||||
print("printing layer " + name + " of type " + self.__class__.__name__)
|
||||
weights = self.get_weights()
|
||||
printVector(f, weights[0], name + '_weights')
|
||||
printVector(f, weights[-1], name + '_bias')
|
||||
activation = self.activation.__name__.upper()
|
||||
max_conv_inputs = weights[0].shape[1]*weights[0].shape[0]
|
||||
f.write('const Conv1DLayer {} = {{\n {}_bias,\n {}_weights,\n {}, {}, {}, ACTIVATION_{}\n}};\n\n'
|
||||
.format(name, name, name, weights[0].shape[1], weights[0].shape[0], weights[0].shape[2], activation))
|
||||
hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights[0].shape[2]))
|
||||
hf.write('#define {}_STATE_SIZE ({}*{})\n'.format(name.upper(), weights[0].shape[1], (weights[0].shape[0]-1)))
|
||||
hf.write('#define {}_DELAY {}\n'.format(name.upper(), (weights[0].shape[0]-1)//2))
|
||||
hf.write('extern const Conv1DLayer {};\n\n'.format(name));
|
||||
return max_conv_inputs
|
||||
@@ -0,0 +1,99 @@
|
||||
"""
|
||||
Custom Loss functions and metrics for training/analysis
|
||||
"""
|
||||
|
||||
from tf_funcs import *
|
||||
import tensorflow as tf
|
||||
|
||||
# The following loss functions all expect the lpcnet model to output the lpc prediction
|
||||
|
||||
# Computing the excitation by subtracting the lpc prediction from the target, followed by minimizing the cross entropy
|
||||
def res_from_sigloss():
|
||||
def loss(y_true,y_pred):
|
||||
p = y_pred[:,:,0:1]
|
||||
model_out = y_pred[:,:,2:]
|
||||
e_gt = tf_l2u(y_true - p)
|
||||
e_gt = tf.round(e_gt)
|
||||
e_gt = tf.cast(e_gt,'int32')
|
||||
sparse_cel = tf.keras.losses.SparseCategoricalCrossentropy(reduction=tf.keras.losses.Reduction.NONE)(e_gt,model_out)
|
||||
return sparse_cel
|
||||
return loss
|
||||
|
||||
# Interpolated and Compensated Loss (In case of end to end lpcnet)
|
||||
# Interpolates between adjacent embeddings based on the fractional value of the excitation computed (similar to the embedding interpolation)
|
||||
# Also adds a probability compensation (to account for matching cross entropy in the linear domain), weighted by gamma
|
||||
def interp_mulaw(gamma = 1):
|
||||
def loss(y_true,y_pred):
|
||||
y_true = tf.cast(y_true, 'float32')
|
||||
p = y_pred[:,:,0:1]
|
||||
real_p = y_pred[:,:,1:2]
|
||||
model_out = y_pred[:,:,2:]
|
||||
e_gt = tf_l2u(y_true - p)
|
||||
exc_gt = tf_l2u(y_true - real_p)
|
||||
prob_compensation = tf.squeeze((K.abs(e_gt - 128)/128.0)*K.log(256.0))
|
||||
regularization = tf.squeeze((K.abs(exc_gt - 128)/128.0)*K.log(256.0))
|
||||
alpha = e_gt - tf.math.floor(e_gt)
|
||||
alpha = tf.tile(alpha,[1,1,256])
|
||||
e_gt = tf.cast(e_gt,'int32')
|
||||
e_gt = tf.clip_by_value(e_gt,0,254)
|
||||
interp_probab = (1 - alpha)*model_out + alpha*tf.roll(model_out,shift = -1,axis = -1)
|
||||
sparse_cel = tf.keras.losses.SparseCategoricalCrossentropy(reduction=tf.keras.losses.Reduction.NONE)(e_gt,interp_probab)
|
||||
loss_mod = sparse_cel + prob_compensation + gamma*regularization
|
||||
return loss_mod
|
||||
return loss
|
||||
|
||||
# Same as above, except a metric
|
||||
def metric_oginterploss(y_true,y_pred):
|
||||
p = y_pred[:,:,0:1]
|
||||
model_out = y_pred[:,:,2:]
|
||||
e_gt = tf_l2u(y_true - p)
|
||||
prob_compensation = tf.squeeze((K.abs(e_gt - 128)/128.0)*K.log(256.0))
|
||||
alpha = e_gt - tf.math.floor(e_gt)
|
||||
alpha = tf.tile(alpha,[1,1,256])
|
||||
e_gt = tf.cast(e_gt,'int32')
|
||||
e_gt = tf.clip_by_value(e_gt,0,254)
|
||||
interp_probab = (1 - alpha)*model_out + alpha*tf.roll(model_out,shift = -1,axis = -1)
|
||||
sparse_cel = tf.keras.losses.SparseCategoricalCrossentropy(reduction=tf.keras.losses.Reduction.NONE)(e_gt,interp_probab)
|
||||
loss_mod = sparse_cel + prob_compensation
|
||||
return loss_mod
|
||||
|
||||
# Interpolated cross entropy loss metric
|
||||
def metric_icel(y_true, y_pred):
|
||||
p = y_pred[:,:,0:1]
|
||||
model_out = y_pred[:,:,2:]
|
||||
e_gt = tf_l2u(y_true - p)
|
||||
alpha = e_gt - tf.math.floor(e_gt)
|
||||
alpha = tf.tile(alpha,[1,1,256])
|
||||
e_gt = tf.cast(e_gt,'int32')
|
||||
e_gt = tf.clip_by_value(e_gt,0,254) #Check direction
|
||||
interp_probab = (1 - alpha)*model_out + alpha*tf.roll(model_out,shift = -1,axis = -1)
|
||||
sparse_cel = tf.keras.losses.SparseCategoricalCrossentropy(reduction=tf.keras.losses.Reduction.NONE)(e_gt,interp_probab)
|
||||
return sparse_cel
|
||||
|
||||
# Non-interpolated (rounded) cross entropy loss metric
|
||||
def metric_cel(y_true, y_pred):
|
||||
y_true = tf.cast(y_true, 'float32')
|
||||
p = y_pred[:,:,0:1]
|
||||
model_out = y_pred[:,:,2:]
|
||||
e_gt = tf_l2u(y_true - p)
|
||||
e_gt = tf.round(e_gt)
|
||||
e_gt = tf.cast(e_gt,'int32')
|
||||
e_gt = tf.clip_by_value(e_gt,0,255)
|
||||
sparse_cel = tf.keras.losses.SparseCategoricalCrossentropy(reduction=tf.keras.losses.Reduction.NONE)(e_gt,model_out)
|
||||
return sparse_cel
|
||||
|
||||
# Variance metric of the output excitation
|
||||
def metric_exc_sd(y_true,y_pred):
|
||||
p = y_pred[:,:,0:1]
|
||||
e_gt = tf_l2u(y_true - p)
|
||||
sd_egt = tf.keras.losses.MeanSquaredError(reduction=tf.keras.losses.Reduction.NONE)(e_gt,128)
|
||||
return sd_egt
|
||||
|
||||
def loss_matchlar():
|
||||
def loss(y_true,y_pred):
|
||||
model_rc = y_pred[:,:,:16]
|
||||
#y_true = lpc2rc(y_true)
|
||||
loss_lar_diff = K.log((1.01 + model_rc)/(1.01 - model_rc)) - K.log((1.01 + y_true)/(1.01 - y_true))
|
||||
loss_lar_diff = tf.square(loss_lar_diff)
|
||||
return tf.reduce_mean(loss_lar_diff, axis=-1)
|
||||
return loss
|
||||
339
managed_components/78__esp-opus/dnn/training_tf2/lpcnet.py
Normal file
339
managed_components/78__esp-opus/dnn/training_tf2/lpcnet.py
Normal file
@@ -0,0 +1,339 @@
|
||||
#!/usr/bin/python3
|
||||
'''Copyright (c) 2018 Mozilla
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
'''
|
||||
|
||||
import math
|
||||
import tensorflow as tf
|
||||
from tensorflow.keras.models import Model
|
||||
from tensorflow.keras.layers import Input, GRU, Dense, Embedding, Reshape, Concatenate, Lambda, Conv1D, Multiply, Add, Bidirectional, MaxPooling1D, Activation, GaussianNoise
|
||||
from tensorflow.compat.v1.keras.layers import CuDNNGRU
|
||||
from tensorflow.keras import backend as K
|
||||
from tensorflow.keras.constraints import Constraint
|
||||
from tensorflow.keras.initializers import Initializer
|
||||
from tensorflow.keras.callbacks import Callback
|
||||
from mdense import MDense
|
||||
import numpy as np
|
||||
import h5py
|
||||
import sys
|
||||
from tf_funcs import *
|
||||
from diffembed import diff_Embed
|
||||
from parameters import set_parameter
|
||||
|
||||
frame_size = 160
|
||||
pcm_bits = 8
|
||||
embed_size = 128
|
||||
pcm_levels = 2**pcm_bits
|
||||
|
||||
def interleave(p, samples):
|
||||
p2=tf.expand_dims(p, 3)
|
||||
nb_repeats = pcm_levels//(2*p.shape[2])
|
||||
p3 = tf.reshape(tf.repeat(tf.concat([1-p2, p2], 3), nb_repeats), (-1, samples, pcm_levels))
|
||||
return p3
|
||||
|
||||
def tree_to_pdf(p, samples):
|
||||
return interleave(p[:,:,1:2], samples) * interleave(p[:,:,2:4], samples) * interleave(p[:,:,4:8], samples) * interleave(p[:,:,8:16], samples) \
|
||||
* interleave(p[:,:,16:32], samples) * interleave(p[:,:,32:64], samples) * interleave(p[:,:,64:128], samples) * interleave(p[:,:,128:256], samples)
|
||||
|
||||
def tree_to_pdf_train(p):
|
||||
#FIXME: try not to hardcode the 2400 samples (15 frames * 160 samples/frame)
|
||||
return tree_to_pdf(p, 2400)
|
||||
|
||||
def tree_to_pdf_infer(p):
|
||||
return tree_to_pdf(p, 1)
|
||||
|
||||
def quant_regularizer(x):
|
||||
Q = 128
|
||||
Q_1 = 1./Q
|
||||
#return .01 * tf.reduce_mean(1 - tf.math.cos(2*3.1415926535897931*(Q*x-tf.round(Q*x))))
|
||||
return .01 * tf.reduce_mean(K.sqrt(K.sqrt(1.0001 - tf.math.cos(2*3.1415926535897931*(Q*x-tf.round(Q*x))))))
|
||||
|
||||
class Sparsify(Callback):
|
||||
def __init__(self, t_start, t_end, interval, density, quantize=False):
|
||||
super(Sparsify, self).__init__()
|
||||
self.batch = 0
|
||||
self.t_start = t_start
|
||||
self.t_end = t_end
|
||||
self.interval = interval
|
||||
self.final_density = density
|
||||
self.quantize = quantize
|
||||
|
||||
def on_batch_end(self, batch, logs=None):
|
||||
#print("batch number", self.batch)
|
||||
self.batch += 1
|
||||
if self.quantize or (self.batch > self.t_start and (self.batch-self.t_start) % self.interval == 0) or self.batch >= self.t_end:
|
||||
#print("constrain");
|
||||
layer = self.model.get_layer('gru_a')
|
||||
w = layer.get_weights()
|
||||
p = w[1]
|
||||
nb = p.shape[1]//p.shape[0]
|
||||
N = p.shape[0]
|
||||
#print("nb = ", nb, ", N = ", N);
|
||||
#print(p.shape)
|
||||
#print ("density = ", density)
|
||||
for k in range(nb):
|
||||
density = self.final_density[k]
|
||||
if self.batch < self.t_end and not self.quantize:
|
||||
r = 1 - (self.batch-self.t_start)/(self.t_end - self.t_start)
|
||||
density = 1 - (1-self.final_density[k])*(1 - r*r*r)
|
||||
A = p[:, k*N:(k+1)*N]
|
||||
A = A - np.diag(np.diag(A))
|
||||
#This is needed because of the CuDNNGRU strange weight ordering
|
||||
A = np.transpose(A, (1, 0))
|
||||
L=np.reshape(A, (N//4, 4, N//8, 8))
|
||||
S=np.sum(L*L, axis=-1)
|
||||
S=np.sum(S, axis=1)
|
||||
SS=np.sort(np.reshape(S, (-1,)))
|
||||
thresh = SS[round(N*N//32*(1-density))]
|
||||
mask = (S>=thresh).astype('float32')
|
||||
mask = np.repeat(mask, 4, axis=0)
|
||||
mask = np.repeat(mask, 8, axis=1)
|
||||
mask = np.minimum(1, mask + np.diag(np.ones((N,))))
|
||||
#This is needed because of the CuDNNGRU strange weight ordering
|
||||
mask = np.transpose(mask, (1, 0))
|
||||
p[:, k*N:(k+1)*N] = p[:, k*N:(k+1)*N]*mask
|
||||
#print(thresh, np.mean(mask))
|
||||
if self.quantize and ((self.batch > self.t_start and (self.batch-self.t_start) % self.interval == 0) or self.batch >= self.t_end):
|
||||
if self.batch < self.t_end:
|
||||
threshold = .5*(self.batch - self.t_start)/(self.t_end - self.t_start)
|
||||
else:
|
||||
threshold = .5
|
||||
quant = np.round(p*128.)
|
||||
res = p*128.-quant
|
||||
mask = (np.abs(res) <= threshold).astype('float32')
|
||||
p = mask/128.*quant + (1-mask)*p
|
||||
|
||||
w[1] = p
|
||||
layer.set_weights(w)
|
||||
|
||||
class SparsifyGRUB(Callback):
|
||||
def __init__(self, t_start, t_end, interval, grua_units, density, quantize=False):
|
||||
super(SparsifyGRUB, self).__init__()
|
||||
self.batch = 0
|
||||
self.t_start = t_start
|
||||
self.t_end = t_end
|
||||
self.interval = interval
|
||||
self.final_density = density
|
||||
self.grua_units = grua_units
|
||||
self.quantize = quantize
|
||||
|
||||
def on_batch_end(self, batch, logs=None):
|
||||
#print("batch number", self.batch)
|
||||
self.batch += 1
|
||||
if self.quantize or (self.batch > self.t_start and (self.batch-self.t_start) % self.interval == 0) or self.batch >= self.t_end:
|
||||
#print("constrain");
|
||||
layer = self.model.get_layer('gru_b')
|
||||
w = layer.get_weights()
|
||||
p = w[0]
|
||||
N = p.shape[0]
|
||||
M = p.shape[1]//3
|
||||
for k in range(3):
|
||||
density = self.final_density[k]
|
||||
if self.batch < self.t_end and not self.quantize:
|
||||
r = 1 - (self.batch-self.t_start)/(self.t_end - self.t_start)
|
||||
density = 1 - (1-self.final_density[k])*(1 - r*r*r)
|
||||
A = p[:, k*M:(k+1)*M]
|
||||
#This is needed because of the CuDNNGRU strange weight ordering
|
||||
A = np.reshape(A, (M, N))
|
||||
A = np.transpose(A, (1, 0))
|
||||
N2 = self.grua_units
|
||||
A2 = A[:N2, :]
|
||||
L=np.reshape(A2, (N2//4, 4, M//8, 8))
|
||||
S=np.sum(L*L, axis=-1)
|
||||
S=np.sum(S, axis=1)
|
||||
SS=np.sort(np.reshape(S, (-1,)))
|
||||
thresh = SS[round(M*N2//32*(1-density))]
|
||||
mask = (S>=thresh).astype('float32')
|
||||
mask = np.repeat(mask, 4, axis=0)
|
||||
mask = np.repeat(mask, 8, axis=1)
|
||||
A = np.concatenate([A2*mask, A[N2:,:]], axis=0)
|
||||
#This is needed because of the CuDNNGRU strange weight ordering
|
||||
A = np.transpose(A, (1, 0))
|
||||
A = np.reshape(A, (N, M))
|
||||
p[:, k*M:(k+1)*M] = A
|
||||
#print(thresh, np.mean(mask))
|
||||
if self.quantize and ((self.batch > self.t_start and (self.batch-self.t_start) % self.interval == 0) or self.batch >= self.t_end):
|
||||
if self.batch < self.t_end:
|
||||
threshold = .5*(self.batch - self.t_start)/(self.t_end - self.t_start)
|
||||
else:
|
||||
threshold = .5
|
||||
quant = np.round(p*128.)
|
||||
res = p*128.-quant
|
||||
mask = (np.abs(res) <= threshold).astype('float32')
|
||||
p = mask/128.*quant + (1-mask)*p
|
||||
|
||||
w[0] = p
|
||||
layer.set_weights(w)
|
||||
|
||||
|
||||
class PCMInit(Initializer):
|
||||
def __init__(self, gain=.1, seed=None):
|
||||
self.gain = gain
|
||||
self.seed = seed
|
||||
|
||||
def __call__(self, shape, dtype=None):
|
||||
num_rows = 1
|
||||
for dim in shape[:-1]:
|
||||
num_rows *= dim
|
||||
num_cols = shape[-1]
|
||||
flat_shape = (num_rows, num_cols)
|
||||
if self.seed is not None:
|
||||
np.random.seed(self.seed)
|
||||
a = np.random.uniform(-1.7321, 1.7321, flat_shape)
|
||||
#a[:,0] = math.sqrt(12)*np.arange(-.5*num_rows+.5,.5*num_rows-.4)/num_rows
|
||||
#a[:,1] = .5*a[:,0]*a[:,0]*a[:,0]
|
||||
a = a + np.reshape(math.sqrt(12)*np.arange(-.5*num_rows+.5,.5*num_rows-.4)/num_rows, (num_rows, 1))
|
||||
return self.gain * a.astype("float32")
|
||||
|
||||
def get_config(self):
|
||||
return {
|
||||
'gain': self.gain,
|
||||
'seed': self.seed
|
||||
}
|
||||
|
||||
class WeightClip(Constraint):
|
||||
'''Clips the weights incident to each hidden unit to be inside a range
|
||||
'''
|
||||
def __init__(self, c=2):
|
||||
self.c = c
|
||||
|
||||
def __call__(self, p):
|
||||
# Ensure that abs of adjacent weights don't sum to more than 127. Otherwise there's a risk of
|
||||
# saturation when implementing dot products with SSSE3 or AVX2.
|
||||
return self.c*p/tf.maximum(self.c, tf.repeat(tf.abs(p[:, 1::2])+tf.abs(p[:, 0::2]), 2, axis=1))
|
||||
#return K.clip(p, -self.c, self.c)
|
||||
|
||||
def get_config(self):
|
||||
return {'name': self.__class__.__name__,
|
||||
'c': self.c}
|
||||
|
||||
constraint = WeightClip(0.992)
|
||||
|
||||
def new_lpcnet_model(rnn_units1=384, rnn_units2=16, nb_used_features=20, batch_size=128, training=False, adaptation=False, quantize=False, flag_e2e = False, cond_size=128, lpc_order=16, lpc_gamma=1., lookahead=2):
|
||||
pcm = Input(shape=(None, 1), batch_size=batch_size)
|
||||
dpcm = Input(shape=(None, 3), batch_size=batch_size)
|
||||
feat = Input(shape=(None, nb_used_features), batch_size=batch_size)
|
||||
pitch = Input(shape=(None, 1), batch_size=batch_size)
|
||||
dec_feat = Input(shape=(None, cond_size))
|
||||
dec_state1 = Input(shape=(rnn_units1,))
|
||||
dec_state2 = Input(shape=(rnn_units2,))
|
||||
|
||||
padding = 'valid' if training else 'same'
|
||||
fconv1 = Conv1D(cond_size, 3, padding=padding, activation='tanh', name='feature_conv1')
|
||||
fconv2 = Conv1D(cond_size, 3, padding=padding, activation='tanh', name='feature_conv2')
|
||||
pembed = Embedding(256, 64, name='embed_pitch')
|
||||
cat_feat = Concatenate()([feat, Reshape((-1, 64))(pembed(pitch))])
|
||||
|
||||
cfeat = fconv2(fconv1(cat_feat))
|
||||
|
||||
fdense1 = Dense(cond_size, activation='tanh', name='feature_dense1')
|
||||
fdense2 = Dense(cond_size, activation='tanh', name='feature_dense2')
|
||||
|
||||
if flag_e2e and quantize:
|
||||
fconv1.trainable = False
|
||||
fconv2.trainable = False
|
||||
fdense1.trainable = False
|
||||
fdense2.trainable = False
|
||||
|
||||
cfeat = fdense2(fdense1(cfeat))
|
||||
|
||||
error_calc = Lambda(lambda x: tf_l2u(x[0] - tf.roll(x[1],1,axis = 1)))
|
||||
if flag_e2e:
|
||||
lpcoeffs = diff_rc2lpc(name = "rc2lpc")(cfeat)
|
||||
else:
|
||||
lpcoeffs = Input(shape=(None, lpc_order), batch_size=batch_size)
|
||||
|
||||
real_preds = diff_pred(name = "real_lpc2preds")([pcm,lpcoeffs])
|
||||
weighting = lpc_gamma ** np.arange(1, 17).astype('float32')
|
||||
weighted_lpcoeffs = Lambda(lambda x: x[0]*x[1])([lpcoeffs, weighting])
|
||||
tensor_preds = diff_pred(name = "lpc2preds")([pcm,weighted_lpcoeffs])
|
||||
past_errors = error_calc([pcm,tensor_preds])
|
||||
|
||||
embed = diff_Embed(name='embed_sig',initializer = PCMInit())
|
||||
cpcm = Concatenate()([tf_l2u(pcm),tf_l2u(tensor_preds),past_errors])
|
||||
cpcm = GaussianNoise(.3)(cpcm)
|
||||
cpcm = Reshape((-1, embed_size*3))(embed(cpcm))
|
||||
cpcm_decoder = Reshape((-1, embed_size*3))(embed(dpcm))
|
||||
|
||||
|
||||
rep = Lambda(lambda x: K.repeat_elements(x, frame_size, 1))
|
||||
|
||||
quant = quant_regularizer if quantize else None
|
||||
|
||||
if training:
|
||||
rnn = CuDNNGRU(rnn_units1, return_sequences=True, return_state=True, name='gru_a', stateful=True,
|
||||
recurrent_constraint = constraint, recurrent_regularizer=quant)
|
||||
rnn2 = CuDNNGRU(rnn_units2, return_sequences=True, return_state=True, name='gru_b', stateful=True,
|
||||
kernel_constraint=constraint, recurrent_constraint = constraint, kernel_regularizer=quant, recurrent_regularizer=quant)
|
||||
else:
|
||||
rnn = GRU(rnn_units1, return_sequences=True, return_state=True, recurrent_activation="sigmoid", reset_after='true', name='gru_a', stateful=True,
|
||||
recurrent_constraint = constraint, recurrent_regularizer=quant)
|
||||
rnn2 = GRU(rnn_units2, return_sequences=True, return_state=True, recurrent_activation="sigmoid", reset_after='true', name='gru_b', stateful=True,
|
||||
kernel_constraint=constraint, recurrent_constraint = constraint, kernel_regularizer=quant, recurrent_regularizer=quant)
|
||||
|
||||
rnn_in = Concatenate()([cpcm, rep(cfeat)])
|
||||
md = MDense(pcm_levels, activation='sigmoid', name='dual_fc')
|
||||
gru_out1, _ = rnn(rnn_in)
|
||||
gru_out1 = GaussianNoise(.005)(gru_out1)
|
||||
gru_out2, _ = rnn2(Concatenate()([gru_out1, rep(cfeat)]))
|
||||
ulaw_prob = Lambda(tree_to_pdf_train)(md(gru_out2))
|
||||
|
||||
if adaptation:
|
||||
rnn.trainable=False
|
||||
rnn2.trainable=False
|
||||
md.trainable=False
|
||||
embed.Trainable=False
|
||||
|
||||
m_out = Concatenate(name='pdf')([tensor_preds,real_preds,ulaw_prob])
|
||||
if not flag_e2e:
|
||||
model = Model([pcm, feat, pitch, lpcoeffs], m_out)
|
||||
else:
|
||||
model = Model([pcm, feat, pitch], [m_out, cfeat])
|
||||
model.rnn_units1 = rnn_units1
|
||||
model.rnn_units2 = rnn_units2
|
||||
model.nb_used_features = nb_used_features
|
||||
model.frame_size = frame_size
|
||||
|
||||
if not flag_e2e:
|
||||
encoder = Model([feat, pitch], cfeat)
|
||||
dec_rnn_in = Concatenate()([cpcm_decoder, dec_feat])
|
||||
else:
|
||||
encoder = Model([feat, pitch], [cfeat,lpcoeffs])
|
||||
dec_rnn_in = Concatenate()([cpcm_decoder, dec_feat])
|
||||
dec_gru_out1, state1 = rnn(dec_rnn_in, initial_state=dec_state1)
|
||||
dec_gru_out2, state2 = rnn2(Concatenate()([dec_gru_out1, dec_feat]), initial_state=dec_state2)
|
||||
dec_ulaw_prob = Lambda(tree_to_pdf_infer)(md(dec_gru_out2))
|
||||
|
||||
if flag_e2e:
|
||||
decoder = Model([dpcm, dec_feat, dec_state1, dec_state2], [dec_ulaw_prob, state1, state2])
|
||||
else:
|
||||
decoder = Model([dpcm, dec_feat, dec_state1, dec_state2], [dec_ulaw_prob, state1, state2])
|
||||
|
||||
# add parameters to model
|
||||
set_parameter(model, 'lpc_gamma', lpc_gamma, dtype='float64')
|
||||
set_parameter(model, 'flag_e2e', flag_e2e, dtype='bool')
|
||||
set_parameter(model, 'lookahead', lookahead, dtype='int32')
|
||||
|
||||
return model, encoder, decoder
|
||||
101
managed_components/78__esp-opus/dnn/training_tf2/lpcnet_plc.py
Normal file
101
managed_components/78__esp-opus/dnn/training_tf2/lpcnet_plc.py
Normal file
@@ -0,0 +1,101 @@
|
||||
#!/usr/bin/python3
|
||||
'''Copyright (c) 2021-2022 Amazon
|
||||
Copyright (c) 2018-2019 Mozilla
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
'''
|
||||
|
||||
import math
|
||||
import tensorflow as tf
|
||||
from tensorflow.keras.models import Model
|
||||
from tensorflow.keras.layers import Input, GRU, Dense, Embedding, Reshape, Concatenate, Lambda, Conv1D, Multiply, Add, Bidirectional, MaxPooling1D, Activation, GaussianNoise
|
||||
from tensorflow.compat.v1.keras.layers import CuDNNGRU
|
||||
from tensorflow.keras import backend as K
|
||||
from tensorflow.keras.constraints import Constraint
|
||||
from tensorflow.keras.initializers import Initializer
|
||||
from tensorflow.keras.callbacks import Callback
|
||||
import numpy as np
|
||||
|
||||
def quant_regularizer(x):
|
||||
Q = 128
|
||||
Q_1 = 1./Q
|
||||
#return .01 * tf.reduce_mean(1 - tf.math.cos(2*3.1415926535897931*(Q*x-tf.round(Q*x))))
|
||||
return .01 * tf.reduce_mean(K.sqrt(K.sqrt(1.0001 - tf.math.cos(2*3.1415926535897931*(Q*x-tf.round(Q*x))))))
|
||||
|
||||
|
||||
class WeightClip(Constraint):
|
||||
'''Clips the weights incident to each hidden unit to be inside a range
|
||||
'''
|
||||
def __init__(self, c=2):
|
||||
self.c = c
|
||||
|
||||
def __call__(self, p):
|
||||
# Ensure that abs of adjacent weights don't sum to more than 127. Otherwise there's a risk of
|
||||
# saturation when implementing dot products with SSSE3 or AVX2.
|
||||
return self.c*p/tf.maximum(self.c, tf.repeat(tf.abs(p[:, 1::2])+tf.abs(p[:, 0::2]), 2, axis=1))
|
||||
#return K.clip(p, -self.c, self.c)
|
||||
|
||||
def get_config(self):
|
||||
return {'name': self.__class__.__name__,
|
||||
'c': self.c}
|
||||
|
||||
constraint = WeightClip(0.992)
|
||||
|
||||
def new_lpcnet_plc_model(rnn_units=256, nb_used_features=20, nb_burg_features=36, batch_size=128, training=False, adaptation=False, quantize=False, cond_size=128):
|
||||
feat = Input(shape=(None, nb_used_features+nb_burg_features), batch_size=batch_size)
|
||||
lost = Input(shape=(None, 1), batch_size=batch_size)
|
||||
|
||||
fdense1 = Dense(cond_size, activation='tanh', name='plc_dense1')
|
||||
|
||||
cfeat = Concatenate()([feat, lost])
|
||||
cfeat = fdense1(cfeat)
|
||||
#cfeat = Conv1D(cond_size, 3, padding='causal', activation='tanh', name='plc_conv1')(cfeat)
|
||||
|
||||
quant = quant_regularizer if quantize else None
|
||||
|
||||
if training:
|
||||
rnn = CuDNNGRU(rnn_units, return_sequences=True, return_state=True, name='plc_gru1', stateful=True,
|
||||
kernel_constraint=constraint, recurrent_constraint = constraint, kernel_regularizer=quant, recurrent_regularizer=quant)
|
||||
rnn2 = CuDNNGRU(rnn_units, return_sequences=True, return_state=True, name='plc_gru2', stateful=True,
|
||||
kernel_constraint=constraint, recurrent_constraint = constraint, kernel_regularizer=quant, recurrent_regularizer=quant)
|
||||
else:
|
||||
rnn = GRU(rnn_units, return_sequences=True, return_state=True, recurrent_activation="sigmoid", reset_after='true', name='plc_gru1', stateful=True,
|
||||
kernel_constraint=constraint, recurrent_constraint = constraint, kernel_regularizer=quant, recurrent_regularizer=quant)
|
||||
rnn2 = GRU(rnn_units, return_sequences=True, return_state=True, recurrent_activation="sigmoid", reset_after='true', name='plc_gru2', stateful=True,
|
||||
kernel_constraint=constraint, recurrent_constraint = constraint, kernel_regularizer=quant, recurrent_regularizer=quant)
|
||||
|
||||
gru_out1, _ = rnn(cfeat)
|
||||
gru_out1 = GaussianNoise(.005)(gru_out1)
|
||||
gru_out2, _ = rnn2(gru_out1)
|
||||
|
||||
out_dense = Dense(nb_used_features, activation='linear', name='plc_out')
|
||||
plc_out = out_dense(gru_out2)
|
||||
|
||||
model = Model([feat, lost], plc_out)
|
||||
model.rnn_units = rnn_units
|
||||
model.cond_size = cond_size
|
||||
model.nb_used_features = nb_used_features
|
||||
model.nb_burg_features = nb_burg_features
|
||||
|
||||
return model
|
||||
95
managed_components/78__esp-opus/dnn/training_tf2/mdense.py
Normal file
95
managed_components/78__esp-opus/dnn/training_tf2/mdense.py
Normal file
@@ -0,0 +1,95 @@
|
||||
from tensorflow.keras import backend as K
|
||||
from tensorflow.keras.layers import Layer, InputSpec
|
||||
from tensorflow.keras import activations
|
||||
from tensorflow.keras import initializers, regularizers, constraints
|
||||
import numpy as np
|
||||
import math
|
||||
|
||||
class MDense(Layer):
|
||||
|
||||
def __init__(self, outputs,
|
||||
channels=2,
|
||||
activation=None,
|
||||
use_bias=True,
|
||||
kernel_initializer='glorot_uniform',
|
||||
bias_initializer='zeros',
|
||||
kernel_regularizer=None,
|
||||
bias_regularizer=None,
|
||||
activity_regularizer=None,
|
||||
kernel_constraint=None,
|
||||
bias_constraint=None,
|
||||
**kwargs):
|
||||
if 'input_shape' not in kwargs and 'input_dim' in kwargs:
|
||||
kwargs['input_shape'] = (kwargs.pop('input_dim'),)
|
||||
super(MDense, self).__init__(**kwargs)
|
||||
self.units = outputs
|
||||
self.channels = channels
|
||||
self.activation = activations.get(activation)
|
||||
self.use_bias = use_bias
|
||||
self.kernel_initializer = initializers.get(kernel_initializer)
|
||||
self.bias_initializer = initializers.get(bias_initializer)
|
||||
self.kernel_regularizer = regularizers.get(kernel_regularizer)
|
||||
self.bias_regularizer = regularizers.get(bias_regularizer)
|
||||
self.activity_regularizer = regularizers.get(activity_regularizer)
|
||||
self.kernel_constraint = constraints.get(kernel_constraint)
|
||||
self.bias_constraint = constraints.get(bias_constraint)
|
||||
self.input_spec = InputSpec(min_ndim=2)
|
||||
self.supports_masking = True
|
||||
|
||||
def build(self, input_shape):
|
||||
assert len(input_shape) >= 2
|
||||
input_dim = input_shape[-1]
|
||||
|
||||
self.kernel = self.add_weight(shape=(self.units, input_dim, self.channels),
|
||||
initializer=self.kernel_initializer,
|
||||
name='kernel',
|
||||
regularizer=self.kernel_regularizer,
|
||||
constraint=self.kernel_constraint)
|
||||
if self.use_bias:
|
||||
self.bias = self.add_weight(shape=(self.units, self.channels),
|
||||
initializer=self.bias_initializer,
|
||||
name='bias',
|
||||
regularizer=self.bias_regularizer,
|
||||
constraint=self.bias_constraint)
|
||||
else:
|
||||
self.bias = None
|
||||
self.factor = self.add_weight(shape=(self.units, self.channels),
|
||||
initializer='ones',
|
||||
name='factor',
|
||||
regularizer=self.bias_regularizer,
|
||||
constraint=self.bias_constraint)
|
||||
self.input_spec = InputSpec(min_ndim=2, axes={-1: input_dim})
|
||||
self.built = True
|
||||
|
||||
def call(self, inputs):
|
||||
output = K.dot(inputs, self.kernel)
|
||||
if self.use_bias:
|
||||
output = output + self.bias
|
||||
output = K.tanh(output) * self.factor
|
||||
output = K.sum(output, axis=-1)
|
||||
if self.activation is not None:
|
||||
output = self.activation(output)
|
||||
return output
|
||||
|
||||
def compute_output_shape(self, input_shape):
|
||||
assert input_shape and len(input_shape) >= 2
|
||||
assert input_shape[-1]
|
||||
output_shape = list(input_shape)
|
||||
output_shape[-1] = self.units
|
||||
return tuple(output_shape)
|
||||
|
||||
def get_config(self):
|
||||
config = {
|
||||
'units': self.units,
|
||||
'activation': activations.serialize(self.activation),
|
||||
'use_bias': self.use_bias,
|
||||
'kernel_initializer': initializers.serialize(self.kernel_initializer),
|
||||
'bias_initializer': initializers.serialize(self.bias_initializer),
|
||||
'kernel_regularizer': regularizers.serialize(self.kernel_regularizer),
|
||||
'bias_regularizer': regularizers.serialize(self.bias_regularizer),
|
||||
'activity_regularizer': regularizers.serialize(self.activity_regularizer),
|
||||
'kernel_constraint': constraints.serialize(self.kernel_constraint),
|
||||
'bias_constraint': constraints.serialize(self.bias_constraint)
|
||||
}
|
||||
base_config = super(MDense, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
70
managed_components/78__esp-opus/dnn/training_tf2/pade.py
Normal file
70
managed_components/78__esp-opus/dnn/training_tf2/pade.py
Normal file
@@ -0,0 +1,70 @@
|
||||
# Optimizing a rational function to optimize a tanh() approximation
|
||||
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
from tensorflow.keras.models import Model
|
||||
from tensorflow.keras.layers import Input, GRU, Dense, Embedding, Reshape, Concatenate, Lambda, Conv1D, Multiply, Add, Bidirectional, MaxPooling1D, Activation
|
||||
import tensorflow.keras.backend as K
|
||||
from tensorflow.keras.optimizers import Adam, SGD
|
||||
|
||||
def my_loss1(y_true, y_pred):
|
||||
return 1*K.mean(K.square(y_true-y_pred)) + 1*K.max(K.square(y_true-y_pred), axis=1)
|
||||
|
||||
def my_loss2(y_true, y_pred):
|
||||
return .1*K.mean(K.square(y_true-y_pred)) + 1*K.max(K.square(y_true-y_pred), axis=1)
|
||||
|
||||
def my_loss3(y_true, y_pred):
|
||||
return .01*K.mean(K.square(y_true-y_pred)) + 1*K.max(K.square(y_true-y_pred), axis=1)
|
||||
|
||||
# Using these initializers to seed the approximation
|
||||
# with a reasonable starting point
|
||||
def num_init(shape, dtype=None):
|
||||
rr = tf.constant([[945], [105], [1]], dtype=dtype)
|
||||
#rr = tf.constant([[946.56757], [98.01368], [0.66841]], dtype=dtype)
|
||||
print(rr)
|
||||
return rr
|
||||
|
||||
def den_init(shape, dtype=None):
|
||||
rr = tf.constant([[945], [420], [15]], dtype=dtype)
|
||||
#rr = tf.constant([[946.604], [413.342], [12.465]], dtype=dtype)
|
||||
print(rr)
|
||||
return rr
|
||||
|
||||
|
||||
x = np.arange(-10, 10, .01)
|
||||
N = len(x)
|
||||
x = np.reshape(x, (1, -1, 1))
|
||||
x2 = x*x
|
||||
|
||||
x2in = np.concatenate([x2*0 + 1, x2, x2*x2], axis=2)
|
||||
yout = np.tanh(x)
|
||||
|
||||
|
||||
model_x = Input(shape=(None, 1,))
|
||||
model_x2 = Input(shape=(None, 3,))
|
||||
|
||||
num = Dense(1, name='num', use_bias=False, kernel_initializer=num_init)
|
||||
den = Dense(1, name='den', use_bias=False, kernel_initializer=den_init)
|
||||
|
||||
def ratio(x):
|
||||
return tf.minimum(1., tf.maximum(-1., x[0]*x[1]/x[2]))
|
||||
|
||||
out_layer = Lambda(ratio)
|
||||
output = out_layer([model_x, num(model_x2), den(model_x2)])
|
||||
|
||||
model = Model([model_x, model_x2], output)
|
||||
model.summary()
|
||||
|
||||
model.compile(Adam(0.05, beta_1=0.9, beta_2=0.9, decay=2e-5), loss='mean_squared_error')
|
||||
model.fit([x, x2in], yout, batch_size=1, epochs=500000, validation_split=0.0)
|
||||
|
||||
model.compile(Adam(0.001, beta_2=0.9, decay=1e-4), loss=my_loss1)
|
||||
model.fit([x, x2in], yout, batch_size=1, epochs=50000, validation_split=0.0)
|
||||
|
||||
model.compile(Adam(0.0001, beta_2=0.9, decay=1e-4), loss=my_loss2)
|
||||
model.fit([x, x2in], yout, batch_size=1, epochs=50000, validation_split=0.0)
|
||||
|
||||
model.compile(Adam(0.00001, beta_2=0.9, decay=1e-4), loss=my_loss3)
|
||||
model.fit([x, x2in], yout, batch_size=1, epochs=50000, validation_split=0.0)
|
||||
|
||||
model.save_weights('tanh.h5')
|
||||
@@ -0,0 +1,29 @@
|
||||
""" module for handling extra model parameters for tf.keras models """
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
|
||||
def set_parameter(model, parameter_name, parameter_value, dtype='float32'):
|
||||
""" stores parameter_value as non-trainable weight with name parameter_name:0 """
|
||||
|
||||
weights = [weight for weight in model.weights if weight.name == (parameter_name + ":0")]
|
||||
|
||||
if len(weights) == 0:
|
||||
model.add_weight(parameter_name, trainable=False, initializer=tf.keras.initializers.Constant(parameter_value), dtype=dtype)
|
||||
elif len(weights) == 1:
|
||||
weights[0].assign(parameter_value)
|
||||
else:
|
||||
raise ValueError(f"more than one weight starting with {parameter_name}:0 in model")
|
||||
|
||||
|
||||
def get_parameter(model, parameter_name, default=None):
|
||||
""" returns parameter value if parameter is present in model and otherwise default """
|
||||
|
||||
weights = [weight for weight in model.weights if weight.name == (parameter_name + ":0")]
|
||||
|
||||
if len(weights) == 0:
|
||||
return default
|
||||
elif len(weights) > 1:
|
||||
raise ValueError(f"more than one weight starting with {parameter_name}:0 in model")
|
||||
else:
|
||||
return weights[0].numpy().item()
|
||||
@@ -0,0 +1,73 @@
|
||||
#!/usr/bin/python3
|
||||
'''Copyright (c) 2021-2022 Amazon
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
'''
|
||||
|
||||
import numpy as np
|
||||
from tensorflow.keras.utils import Sequence
|
||||
|
||||
class PLCLoader(Sequence):
|
||||
def __init__(self, features, lost, nb_burg_features, batch_size):
|
||||
self.batch_size = batch_size
|
||||
self.nb_batches = features.shape[0]//self.batch_size
|
||||
self.features = features[:self.nb_batches*self.batch_size, :, :]
|
||||
self.lost = lost.astype('float')
|
||||
self.lost = self.lost[:(len(self.lost)//features.shape[1]-1)*features.shape[1]]
|
||||
self.nb_burg_features = nb_burg_features
|
||||
self.on_epoch_end()
|
||||
|
||||
def on_epoch_end(self):
|
||||
self.indices = np.arange(self.nb_batches*self.batch_size)
|
||||
np.random.shuffle(self.indices)
|
||||
offset = np.random.randint(0, high=self.features.shape[1])
|
||||
self.lost_offset = np.reshape(self.lost[offset:-self.features.shape[1]+offset], (-1, self.features.shape[1]))
|
||||
self.lost_indices = np.random.randint(0, high=self.lost_offset.shape[0], size=self.nb_batches*self.batch_size)
|
||||
|
||||
def __getitem__(self, index):
|
||||
features = self.features[self.indices[index*self.batch_size:(index+1)*self.batch_size], :, :]
|
||||
burg_lost = (np.random.rand(features.shape[0], features.shape[1]) > .1).astype('float')
|
||||
burg_lost = np.reshape(burg_lost, (features.shape[0], features.shape[1], 1))
|
||||
burg_mask = np.tile(burg_lost, (1,1,self.nb_burg_features))
|
||||
|
||||
lost = self.lost_offset[self.lost_indices[index*self.batch_size:(index+1)*self.batch_size], :]
|
||||
lost = np.reshape(lost, (features.shape[0], features.shape[1], 1))
|
||||
lost_mask = np.tile(lost, (1,1,features.shape[2]))
|
||||
in_features = features*lost_mask
|
||||
in_features[:,:,:self.nb_burg_features] = in_features[:,:,:self.nb_burg_features]*burg_mask
|
||||
|
||||
#For the first frame after a loss, we don't have valid features, but the Burg estimate is valid.
|
||||
#in_features[:,1:,self.nb_burg_features:] = in_features[:,1:,self.nb_burg_features:]*lost_mask[:,:-1,self.nb_burg_features:]
|
||||
out_lost = np.copy(lost)
|
||||
#out_lost[:,1:,:] = out_lost[:,1:,:]*out_lost[:,:-1,:]
|
||||
|
||||
out_features = np.concatenate([features[:,:,self.nb_burg_features:], 1.-out_lost], axis=-1)
|
||||
burg_sign = 2*burg_lost - 1
|
||||
# last dim is 1 for received packet, 0 for lost packet, and -1 when just the Burg info is missing
|
||||
inputs = [in_features*lost_mask, lost*burg_sign]
|
||||
outputs = [out_features]
|
||||
return (inputs, outputs)
|
||||
|
||||
def __len__(self):
|
||||
return self.nb_batches
|
||||
372
managed_components/78__esp-opus/dnn/training_tf2/rdovae.py
Normal file
372
managed_components/78__esp-opus/dnn/training_tf2/rdovae.py
Normal file
@@ -0,0 +1,372 @@
|
||||
#!/usr/bin/python3
|
||||
'''Copyright (c) 2022 Amazon
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
'''
|
||||
|
||||
import math
|
||||
import tensorflow as tf
|
||||
from tensorflow.keras.models import Model
|
||||
from tensorflow.keras.layers import Input, GRU, Dense, Embedding, Reshape, Concatenate, Lambda, Conv1D, Multiply, Add, Bidirectional, MaxPooling1D, Activation, GaussianNoise, AveragePooling1D, RepeatVector
|
||||
from tensorflow.compat.v1.keras.layers import CuDNNGRU
|
||||
from tensorflow.keras import backend as K
|
||||
from tensorflow.keras.constraints import Constraint
|
||||
from tensorflow.keras.initializers import Initializer
|
||||
from tensorflow.keras.callbacks import Callback
|
||||
from tensorflow.keras.regularizers import l1
|
||||
import numpy as np
|
||||
import h5py
|
||||
from uniform_noise import UniformNoise
|
||||
|
||||
class WeightClip(Constraint):
|
||||
'''Clips the weights incident to each hidden unit to be inside a range
|
||||
'''
|
||||
def __init__(self, c=2):
|
||||
self.c = c
|
||||
|
||||
def __call__(self, p):
|
||||
# Ensure that abs of adjacent weights don't sum to more than 127. Otherwise there's a risk of
|
||||
# saturation when implementing dot products with SSSE3 or AVX2.
|
||||
return self.c*p/tf.maximum(self.c, tf.repeat(tf.abs(p[:, 1::2])+tf.abs(p[:, 0::2]), 2, axis=1))
|
||||
#return K.clip(p, -self.c, self.c)
|
||||
|
||||
def get_config(self):
|
||||
return {'name': self.__class__.__name__,
|
||||
'c': self.c}
|
||||
|
||||
constraint = WeightClip(0.496)
|
||||
|
||||
def soft_quantize(x):
|
||||
#x = 4*x
|
||||
#x = x - (.25/np.math.pi)*tf.math.sin(2*np.math.pi*x)
|
||||
#x = x - (.25/np.math.pi)*tf.math.sin(2*np.math.pi*x)
|
||||
#x = x - (.25/np.math.pi)*tf.math.sin(2*np.math.pi*x)
|
||||
return x
|
||||
|
||||
def noise_quantize(x):
|
||||
return soft_quantize(x + (K.random_uniform((128, 16, 80))-.5) )
|
||||
|
||||
def hard_quantize(x):
|
||||
x = soft_quantize(x)
|
||||
quantized = tf.round(x)
|
||||
return x + tf.stop_gradient(quantized - x)
|
||||
|
||||
def apply_dead_zone(x):
|
||||
d = x[1]*.05
|
||||
x = x[0]
|
||||
y = x - d*tf.math.tanh(x/(.1+d))
|
||||
return y
|
||||
|
||||
def rate_loss(y_true,y_pred):
|
||||
log2_e = 1.4427
|
||||
n = y_pred.shape[-1]
|
||||
C = n - log2_e*np.math.log(np.math.gamma(n))
|
||||
k = K.sum(K.abs(y_pred), axis=-1)
|
||||
p = 1.5
|
||||
#rate = C + (n-1)*log2_e*tf.math.log((k**p + (n/5)**p)**(1/p))
|
||||
rate = C + (n-1)*log2_e*tf.math.log(k + .112*n**2/(n/1.8+k) )
|
||||
return K.mean(rate)
|
||||
|
||||
eps=1e-6
|
||||
def safelog2(x):
|
||||
log2_e = 1.4427
|
||||
return log2_e*tf.math.log(eps+x)
|
||||
|
||||
def feat_dist_loss(y_true,y_pred):
|
||||
lambda_1 = 1./K.sqrt(y_pred[:,:,:,-1])
|
||||
y_pred = y_pred[:,:,:,:-1]
|
||||
ceps = y_pred[:,:,:,:18] - y_true[:,:,:18]
|
||||
pitch = 2*(y_pred[:,:,:,18:19] - y_true[:,:,18:19])/(y_true[:,:,18:19] + 2)
|
||||
corr = y_pred[:,:,:,19:] - y_true[:,:,19:]
|
||||
pitch_weight = K.square(K.maximum(0., y_true[:,:,19:]+.5))
|
||||
return K.mean(lambda_1*K.mean(K.square(ceps) + 10*(1/18.)*K.abs(pitch)*pitch_weight + (1/18.)*K.square(corr), axis=-1))
|
||||
|
||||
def sq1_rate_loss(y_true,y_pred):
|
||||
lambda_val = K.sqrt(y_pred[:,:,-1])
|
||||
y_pred = y_pred[:,:,:-1]
|
||||
log2_e = 1.4427
|
||||
n = y_pred.shape[-1]//3
|
||||
r = (y_pred[:,:,2*n:])
|
||||
p0 = (y_pred[:,:,n:2*n])
|
||||
p0 = 1-r**(.5+.5*p0)
|
||||
y_pred = y_pred[:,:,:n]
|
||||
y_pred = soft_quantize(y_pred)
|
||||
|
||||
y0 = K.maximum(0., 1. - K.abs(y_pred))**2
|
||||
rate = -y0*safelog2(p0*r**K.abs(y_pred)) - (1-y0)*safelog2(.5*(1-p0)*(1-r)*r**(K.abs(y_pred)-1))
|
||||
rate = -safelog2(-.5*tf.math.log(r)*r**K.abs(y_pred))
|
||||
rate = -safelog2((1-r)/(1+r)*r**K.abs(y_pred))
|
||||
#rate = -safelog2(- tf.math.sinh(.5*tf.math.log(r))* r**K.abs(y_pred) - tf.math.cosh(K.maximum(0., .5 - K.abs(y_pred))*tf.math.log(r)) + 1)
|
||||
rate = lambda_val*K.sum(rate, axis=-1)
|
||||
return K.mean(rate)
|
||||
|
||||
def sq2_rate_loss(y_true,y_pred):
|
||||
lambda_val = K.sqrt(y_pred[:,:,-1])
|
||||
y_pred = y_pred[:,:,:-1]
|
||||
log2_e = 1.4427
|
||||
n = y_pred.shape[-1]//3
|
||||
r = y_pred[:,:,2*n:]
|
||||
p0 = y_pred[:,:,n:2*n]
|
||||
p0 = 1-r**(.5+.5*p0)
|
||||
#theta = K.minimum(1., .5 + 0*p0 - 0.04*tf.math.log(r))
|
||||
#p0 = 1-r**theta
|
||||
y_pred = tf.round(y_pred[:,:,:n])
|
||||
y0 = K.maximum(0., 1. - K.abs(y_pred))**2
|
||||
rate = -y0*safelog2(p0*r**K.abs(y_pred)) - (1-y0)*safelog2(.5*(1-p0)*(1-r)*r**(K.abs(y_pred)-1))
|
||||
rate = lambda_val*K.sum(rate, axis=-1)
|
||||
return K.mean(rate)
|
||||
|
||||
def sq_rate_metric(y_true,y_pred, reduce=True):
|
||||
y_pred = y_pred[:,:,:-1]
|
||||
log2_e = 1.4427
|
||||
n = y_pred.shape[-1]//3
|
||||
r = y_pred[:,:,2*n:]
|
||||
p0 = y_pred[:,:,n:2*n]
|
||||
p0 = 1-r**(.5+.5*p0)
|
||||
#theta = K.minimum(1., .5 + 0*p0 - 0.04*tf.math.log(r))
|
||||
#p0 = 1-r**theta
|
||||
y_pred = tf.round(y_pred[:,:,:n])
|
||||
y0 = K.maximum(0., 1. - K.abs(y_pred))**2
|
||||
rate = -y0*safelog2(p0*r**K.abs(y_pred)) - (1-y0)*safelog2(.5*(1-p0)*(1-r)*r**(K.abs(y_pred)-1))
|
||||
rate = K.sum(rate, axis=-1)
|
||||
if reduce:
|
||||
rate = K.mean(rate)
|
||||
return rate
|
||||
|
||||
def pvq_quant_search(x, k):
|
||||
x = x/tf.reduce_sum(tf.abs(x), axis=-1, keepdims=True)
|
||||
kx = k*x
|
||||
y = tf.round(kx)
|
||||
newk = k
|
||||
|
||||
for j in range(10):
|
||||
#print("y = ", y)
|
||||
#print("iteration ", j)
|
||||
abs_y = tf.abs(y)
|
||||
abs_kx = tf.abs(kx)
|
||||
kk=tf.reduce_sum(abs_y, axis=-1)
|
||||
#print("sums = ", kk)
|
||||
plus = 1.000001*tf.reduce_min((abs_y+.5)/(abs_kx+1e-15), axis=-1)
|
||||
minus = .999999*tf.reduce_max((abs_y-.5)/(abs_kx+1e-15), axis=-1)
|
||||
#print("plus = ", plus)
|
||||
#print("minus = ", minus)
|
||||
factor = tf.where(kk>k, minus, plus)
|
||||
factor = tf.where(kk==k, tf.ones_like(factor), factor)
|
||||
#print("scale = ", factor)
|
||||
factor = tf.expand_dims(factor, axis=-1)
|
||||
#newk = newk * (k/kk)**.2
|
||||
newk = newk*factor
|
||||
kx = newk*x
|
||||
#print("newk = ", newk)
|
||||
#print("unquantized = ", newk*x)
|
||||
y = tf.round(kx)
|
||||
|
||||
#print(y)
|
||||
#print(K.mean(K.sum(K.abs(y), axis=-1)))
|
||||
return y
|
||||
|
||||
def pvq_quantize(x, k):
|
||||
x = x/(1e-15+tf.norm(x, axis=-1,keepdims=True))
|
||||
quantized = pvq_quant_search(x, k)
|
||||
quantized = quantized/(1e-15+tf.norm(quantized, axis=-1,keepdims=True))
|
||||
return x + tf.stop_gradient(quantized - x)
|
||||
|
||||
|
||||
def var_repeat(x):
|
||||
return tf.repeat(tf.expand_dims(x[0], 1), K.shape(x[1])[1], axis=1)
|
||||
|
||||
nb_state_dim = 24
|
||||
|
||||
def new_rdovae_encoder(nb_used_features=20, nb_bits=17, bunch=4, nb_quant=40, batch_size=128, cond_size=128, cond_size2=256, training=False):
|
||||
feat = Input(shape=(None, nb_used_features), batch_size=batch_size)
|
||||
|
||||
gru = CuDNNGRU if training else GRU
|
||||
enc_dense1 = Dense(cond_size2, activation='tanh', kernel_constraint=constraint, name='enc_dense1')
|
||||
enc_dense2 = gru(cond_size, return_sequences=True, kernel_constraint=constraint, recurrent_constraint=constraint, name='enc_dense2')
|
||||
enc_dense3 = Dense(cond_size2, activation='tanh', kernel_constraint=constraint, name='enc_dense3')
|
||||
enc_dense4 = gru(cond_size, return_sequences=True, kernel_constraint=constraint, recurrent_constraint=constraint, name='enc_dense4')
|
||||
enc_dense5 = Dense(cond_size2, activation='tanh', kernel_constraint=constraint, name='enc_dense5')
|
||||
enc_dense6 = gru(cond_size, return_sequences=True, kernel_constraint=constraint, recurrent_constraint=constraint, name='enc_dense6')
|
||||
enc_dense7 = Dense(cond_size, activation='tanh', kernel_constraint=constraint, name='enc_dense7')
|
||||
enc_dense8 = Dense(cond_size, activation='tanh', kernel_constraint=constraint, name='enc_dense8')
|
||||
|
||||
#bits_dense = Dense(nb_bits, activation='linear', name='bits_dense')
|
||||
bits_dense = Conv1D(nb_bits, 4, padding='causal', activation='linear', name='bits_dense')
|
||||
|
||||
zero_out = Lambda(lambda x: 0*x)
|
||||
inputs = Reshape((-1, 2*nb_used_features))(feat)
|
||||
d1 = enc_dense1(inputs)
|
||||
d2 = enc_dense2(d1)
|
||||
d3 = enc_dense3(d2)
|
||||
d4 = enc_dense4(d3)
|
||||
d5 = enc_dense5(d4)
|
||||
d6 = enc_dense6(d5)
|
||||
d7 = enc_dense7(d6)
|
||||
d8 = enc_dense8(d7)
|
||||
pre_out = Concatenate()([d1, d2, d3, d4, d5, d6, d7, d8])
|
||||
enc_out = bits_dense(pre_out)
|
||||
global_dense1 = Dense(128, activation='tanh', name='gdense1')
|
||||
global_dense2 = Dense(nb_state_dim, activation='tanh', name='gdense2')
|
||||
global_bits = global_dense2(global_dense1(pre_out))
|
||||
|
||||
encoder = Model([feat], [enc_out, global_bits], name='encoder')
|
||||
return encoder
|
||||
|
||||
def new_rdovae_decoder(nb_used_features=20, nb_bits=17, bunch=4, nb_quant=40, batch_size=128, cond_size=128, cond_size2=256, training=False):
|
||||
bits_input = Input(shape=(None, nb_bits), batch_size=batch_size, name="dec_bits")
|
||||
gru_state_input = Input(shape=(nb_state_dim,), batch_size=batch_size, name="dec_state")
|
||||
|
||||
|
||||
gru = CuDNNGRU if training else GRU
|
||||
dec_dense1 = Dense(cond_size2, activation='tanh', kernel_constraint=constraint, name='dec_dense1')
|
||||
dec_dense2 = gru(cond_size, return_sequences=True, kernel_constraint=constraint, recurrent_constraint=constraint, name='dec_dense2')
|
||||
dec_dense3 = Dense(cond_size2, activation='tanh', kernel_constraint=constraint, name='dec_dense3')
|
||||
dec_dense4 = gru(cond_size, return_sequences=True, kernel_constraint=constraint, recurrent_constraint=constraint, name='dec_dense4')
|
||||
dec_dense5 = Dense(cond_size2, activation='tanh', kernel_constraint=constraint, name='dec_dense5')
|
||||
dec_dense6 = gru(cond_size, return_sequences=True, kernel_constraint=constraint, recurrent_constraint=constraint, name='dec_dense6')
|
||||
dec_dense7 = Dense(cond_size, activation='tanh', kernel_constraint=constraint, name='dec_dense7')
|
||||
dec_dense8 = Dense(cond_size, activation='tanh', kernel_constraint=constraint, name='dec_dense8')
|
||||
|
||||
dec_final = Dense(bunch*nb_used_features, activation='linear', name='dec_final')
|
||||
|
||||
time_reverse = Lambda(lambda x: K.reverse(x, 1))
|
||||
#time_reverse = Lambda(lambda x: x)
|
||||
#gru_state_rep = RepeatVector(64//bunch)(gru_state_input)
|
||||
|
||||
#gru_state_rep = Lambda(var_repeat, output_shape=(None, nb_state_dim)) ([gru_state_input, bits_input])
|
||||
gru_state1 = Dense(cond_size, name="state1", activation='tanh')(gru_state_input)
|
||||
gru_state2 = Dense(cond_size, name="state2", activation='tanh')(gru_state_input)
|
||||
gru_state3 = Dense(cond_size, name="state3", activation='tanh')(gru_state_input)
|
||||
|
||||
dec1 = dec_dense1(time_reverse(bits_input))
|
||||
dec2 = dec_dense2(dec1, initial_state=gru_state1)
|
||||
dec3 = dec_dense3(dec2)
|
||||
dec4 = dec_dense4(dec3, initial_state=gru_state2)
|
||||
dec5 = dec_dense5(dec4)
|
||||
dec6 = dec_dense6(dec5, initial_state=gru_state3)
|
||||
dec7 = dec_dense7(dec6)
|
||||
dec8 = dec_dense8(dec7)
|
||||
output = Reshape((-1, nb_used_features))(dec_final(Concatenate()([dec1, dec2, dec3, dec4, dec5, dec6, dec7, dec8])))
|
||||
decoder = Model([bits_input, gru_state_input], time_reverse(output), name='decoder')
|
||||
decoder.nb_bits = nb_bits
|
||||
decoder.bunch = bunch
|
||||
return decoder
|
||||
|
||||
def new_split_decoder(decoder):
|
||||
nb_bits = decoder.nb_bits
|
||||
bunch = decoder.bunch
|
||||
bits_input = Input(shape=(None, nb_bits), name="split_bits")
|
||||
gru_state_input = Input(shape=(None,nb_state_dim), name="split_state")
|
||||
|
||||
range_select = Lambda(lambda x: x[0][:,x[1]:x[2],:])
|
||||
elem_select = Lambda(lambda x: x[0][:,x[1],:])
|
||||
points = [0, 100, 200, 300, 400]
|
||||
outputs = []
|
||||
for i in range(len(points)-1):
|
||||
begin = points[i]//bunch
|
||||
end = points[i+1]//bunch
|
||||
state = elem_select([gru_state_input, end-1])
|
||||
bits = range_select([bits_input, begin, end])
|
||||
outputs.append(decoder([bits, state]))
|
||||
output = Concatenate(axis=1)(outputs)
|
||||
split = Model([bits_input, gru_state_input], output, name="split")
|
||||
return split
|
||||
|
||||
def tensor_concat(x):
|
||||
#n = x[1]//2
|
||||
#x = x[0]
|
||||
n=2
|
||||
y = []
|
||||
for i in range(n-1):
|
||||
offset = 2 * (n-1-i)
|
||||
tmp = K.concatenate([x[i][:, offset:, :], x[-1][:, -offset:, :]], axis=-2)
|
||||
y.append(tf.expand_dims(tmp, axis=0))
|
||||
y.append(tf.expand_dims(x[-1], axis=0))
|
||||
return Concatenate(axis=0)(y)
|
||||
|
||||
|
||||
def new_rdovae_model(nb_used_features=20, nb_bits=17, bunch=4, nb_quant=40, batch_size=128, cond_size=128, cond_size2=256, training=False):
|
||||
|
||||
feat = Input(shape=(None, nb_used_features), batch_size=batch_size)
|
||||
quant_id = Input(shape=(None,), batch_size=batch_size)
|
||||
lambda_val = Input(shape=(None, 1), batch_size=batch_size)
|
||||
lambda_bunched = AveragePooling1D(pool_size=bunch//2, strides=bunch//2, padding="valid")(lambda_val)
|
||||
lambda_up = Lambda(lambda x: K.repeat_elements(x, 2, axis=-2))(lambda_val)
|
||||
|
||||
qembedding = Embedding(nb_quant, 6*nb_bits, name='quant_embed', embeddings_initializer='zeros')
|
||||
quant_embed_dec = qembedding(quant_id)
|
||||
quant_scale = Activation('softplus')(Lambda(lambda x: x[:,:,:nb_bits], name='quant_scale_embed')(quant_embed_dec))
|
||||
|
||||
encoder = new_rdovae_encoder(nb_used_features, nb_bits, bunch, nb_quant, batch_size, cond_size, cond_size2, training=training)
|
||||
ze, gru_state_dec = encoder([feat])
|
||||
ze = Multiply()([ze, quant_scale])
|
||||
|
||||
decoder = new_rdovae_decoder(nb_used_features, nb_bits, bunch, nb_quant, batch_size, cond_size, cond_size2, training=training)
|
||||
split_decoder = new_split_decoder(decoder)
|
||||
|
||||
dead_zone = Activation('softplus')(Lambda(lambda x: x[:,:,nb_bits:2*nb_bits], name='dead_zone_embed')(quant_embed_dec))
|
||||
soft_distr_embed = Activation('sigmoid')(Lambda(lambda x: x[:,:,2*nb_bits:4*nb_bits], name='soft_distr_embed')(quant_embed_dec))
|
||||
hard_distr_embed = Activation('sigmoid')(Lambda(lambda x: x[:,:,4*nb_bits:], name='hard_distr_embed')(quant_embed_dec))
|
||||
|
||||
noisequant = UniformNoise()
|
||||
hardquant = Lambda(hard_quantize)
|
||||
dzone = Lambda(apply_dead_zone)
|
||||
dze = dzone([ze,dead_zone])
|
||||
ndze = noisequant(dze)
|
||||
dze_quant = hardquant(dze)
|
||||
|
||||
div = Lambda(lambda x: x[0]/x[1])
|
||||
dze_quant = div([dze_quant,quant_scale])
|
||||
ndze_unquant = div([ndze,quant_scale])
|
||||
|
||||
mod_select = Lambda(lambda x: x[0][:,x[1]::bunch//2,:])
|
||||
gru_state_dec = Lambda(lambda x: pvq_quantize(x, 82))(gru_state_dec)
|
||||
combined_output = []
|
||||
unquantized_output = []
|
||||
cat = Concatenate(name="out_cat")
|
||||
for i in range(bunch//2):
|
||||
dze_select = mod_select([dze_quant, i])
|
||||
ndze_select = mod_select([ndze_unquant, i])
|
||||
state_select = mod_select([gru_state_dec, i])
|
||||
|
||||
tmp = split_decoder([dze_select, state_select])
|
||||
tmp = cat([tmp, lambda_up])
|
||||
combined_output.append(tmp)
|
||||
|
||||
tmp = split_decoder([ndze_select, state_select])
|
||||
tmp = cat([tmp, lambda_up])
|
||||
unquantized_output.append(tmp)
|
||||
|
||||
concat = Lambda(tensor_concat, name="output")
|
||||
combined_output = concat(combined_output)
|
||||
unquantized_output = concat(unquantized_output)
|
||||
|
||||
e2 = Concatenate(name="hard_bits")([dze, hard_distr_embed, lambda_val])
|
||||
e = Concatenate(name="soft_bits")([dze, soft_distr_embed, lambda_val])
|
||||
|
||||
|
||||
model = Model([feat, quant_id, lambda_val], [combined_output, unquantized_output, e, e2], name="end2end")
|
||||
model.nb_used_features = nb_used_features
|
||||
|
||||
return model, encoder, decoder, qembedding
|
||||
@@ -0,0 +1,138 @@
|
||||
"""
|
||||
/* Copyright (c) 2022 Amazon
|
||||
Written by Jan Buethe */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
"""
|
||||
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import sys
|
||||
|
||||
os.environ['CUDA_VISIBLE_DEVICES'] = ""
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
|
||||
parser.add_argument('weights', metavar="<weight file>", type=str, help='model weight file in hdf5 format')
|
||||
parser.add_argument('output', metavar="<output folder>", type=str, help='output exchange folder')
|
||||
parser.add_argument('--cond-size', type=int, help="conditioning size (default: 256)", default=256)
|
||||
parser.add_argument('--latent-dim', type=int, help="dimension of latent space (default: 80)", default=80)
|
||||
parser.add_argument('--quant-levels', type=int, help="number of quantization steps (default: 16)", default=16)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# now import the heavy stuff
|
||||
from rdovae import new_rdovae_model
|
||||
from wexchange.tf import dump_tf_weights, load_tf_weights
|
||||
|
||||
|
||||
exchange_name = {
|
||||
'enc_dense1' : 'encoder_stack_layer1_dense',
|
||||
'enc_dense3' : 'encoder_stack_layer3_dense',
|
||||
'enc_dense5' : 'encoder_stack_layer5_dense',
|
||||
'enc_dense7' : 'encoder_stack_layer7_dense',
|
||||
'enc_dense8' : 'encoder_stack_layer8_dense',
|
||||
'gdense1' : 'encoder_state_layer1_dense',
|
||||
'gdense2' : 'encoder_state_layer2_dense',
|
||||
'enc_dense2' : 'encoder_stack_layer2_gru',
|
||||
'enc_dense4' : 'encoder_stack_layer4_gru',
|
||||
'enc_dense6' : 'encoder_stack_layer6_gru',
|
||||
'bits_dense' : 'encoder_stack_layer9_conv',
|
||||
'qembedding' : 'statistical_model_embedding',
|
||||
'state1' : 'decoder_state1_dense',
|
||||
'state2' : 'decoder_state2_dense',
|
||||
'state3' : 'decoder_state3_dense',
|
||||
'dec_dense1' : 'decoder_stack_layer1_dense',
|
||||
'dec_dense3' : 'decoder_stack_layer3_dense',
|
||||
'dec_dense5' : 'decoder_stack_layer5_dense',
|
||||
'dec_dense7' : 'decoder_stack_layer7_dense',
|
||||
'dec_dense8' : 'decoder_stack_layer8_dense',
|
||||
'dec_final' : 'decoder_stack_layer9_dense',
|
||||
'dec_dense2' : 'decoder_stack_layer2_gru',
|
||||
'dec_dense4' : 'decoder_stack_layer4_gru',
|
||||
'dec_dense6' : 'decoder_stack_layer6_gru'
|
||||
}
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
model, encoder, decoder, qembedding = new_rdovae_model(20, args.latent_dim, cond_size=args.cond_size, nb_quant=args.quant_levels)
|
||||
model.load_weights(args.weights)
|
||||
|
||||
os.makedirs(args.output, exist_ok=True)
|
||||
|
||||
# encoder
|
||||
encoder_dense_names = [
|
||||
'enc_dense1',
|
||||
'enc_dense3',
|
||||
'enc_dense5',
|
||||
'enc_dense7',
|
||||
'enc_dense8',
|
||||
'gdense1',
|
||||
'gdense2'
|
||||
]
|
||||
|
||||
encoder_gru_names = [
|
||||
'enc_dense2',
|
||||
'enc_dense4',
|
||||
'enc_dense6'
|
||||
]
|
||||
|
||||
encoder_conv1d_names = [
|
||||
'bits_dense'
|
||||
]
|
||||
|
||||
|
||||
for name in encoder_dense_names + encoder_gru_names + encoder_conv1d_names:
|
||||
print(f"writing layer {exchange_name[name]}...")
|
||||
dump_tf_weights(os.path.join(args.output, exchange_name[name]), encoder.get_layer(name))
|
||||
|
||||
# qembedding
|
||||
print(f"writing layer {exchange_name['qembedding']}...")
|
||||
dump_tf_weights(os.path.join(args.output, exchange_name['qembedding']), qembedding)
|
||||
|
||||
# decoder
|
||||
decoder_dense_names = [
|
||||
'state1',
|
||||
'state2',
|
||||
'state3',
|
||||
'dec_dense1',
|
||||
'dec_dense3',
|
||||
'dec_dense5',
|
||||
'dec_dense7',
|
||||
'dec_dense8',
|
||||
'dec_final'
|
||||
]
|
||||
|
||||
decoder_gru_names = [
|
||||
'dec_dense2',
|
||||
'dec_dense4',
|
||||
'dec_dense6'
|
||||
]
|
||||
|
||||
for name in decoder_dense_names + decoder_gru_names:
|
||||
print(f"writing layer {exchange_name[name]}...")
|
||||
dump_tf_weights(os.path.join(args.output, exchange_name[name]), decoder.get_layer(name))
|
||||
@@ -0,0 +1,123 @@
|
||||
"""
|
||||
/* Copyright (c) 2022 Amazon
|
||||
Written by Jan Buethe */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
"""
|
||||
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import sys
|
||||
|
||||
os.environ['CUDA_VISIBLE_DEVICES'] = ""
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
|
||||
parser.add_argument('input', metavar="<input folder>", type=str, help='input exchange folder')
|
||||
parser.add_argument('weights', metavar="<weight file>", type=str, help='model weight file in hdf5 format')
|
||||
parser.add_argument('--cond-size', type=int, help="conditioning size (default: 256)", default=256)
|
||||
parser.add_argument('--latent-dim', type=int, help="dimension of latent space (default: 80)", default=80)
|
||||
parser.add_argument('--quant-levels', type=int, help="number of quantization steps (default: 16)", default=16)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# now import the heavy stuff
|
||||
from rdovae import new_rdovae_model
|
||||
from wexchange.tf import load_tf_weights
|
||||
|
||||
|
||||
exchange_name = {
|
||||
'enc_dense1' : 'encoder_stack_layer1_dense',
|
||||
'enc_dense3' : 'encoder_stack_layer3_dense',
|
||||
'enc_dense5' : 'encoder_stack_layer5_dense',
|
||||
'enc_dense7' : 'encoder_stack_layer7_dense',
|
||||
'enc_dense8' : 'encoder_stack_layer8_dense',
|
||||
'gdense1' : 'encoder_state_layer1_dense',
|
||||
'gdense2' : 'encoder_state_layer2_dense',
|
||||
'enc_dense2' : 'encoder_stack_layer2_gru',
|
||||
'enc_dense4' : 'encoder_stack_layer4_gru',
|
||||
'enc_dense6' : 'encoder_stack_layer6_gru',
|
||||
'bits_dense' : 'encoder_stack_layer9_conv',
|
||||
'qembedding' : 'statistical_model_embedding',
|
||||
'state1' : 'decoder_state1_dense',
|
||||
'state2' : 'decoder_state2_dense',
|
||||
'state3' : 'decoder_state3_dense',
|
||||
'dec_dense1' : 'decoder_stack_layer1_dense',
|
||||
'dec_dense3' : 'decoder_stack_layer3_dense',
|
||||
'dec_dense5' : 'decoder_stack_layer5_dense',
|
||||
'dec_dense7' : 'decoder_stack_layer7_dense',
|
||||
'dec_dense8' : 'decoder_stack_layer8_dense',
|
||||
'dec_final' : 'decoder_stack_layer9_dense',
|
||||
'dec_dense2' : 'decoder_stack_layer2_gru',
|
||||
'dec_dense4' : 'decoder_stack_layer4_gru',
|
||||
'dec_dense6' : 'decoder_stack_layer6_gru'
|
||||
}
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
model, encoder, decoder, qembedding = new_rdovae_model(20, args.latent_dim, cond_size=args.cond_size, nb_quant=args.quant_levels)
|
||||
|
||||
encoder_layers = [
|
||||
'enc_dense1',
|
||||
'enc_dense3',
|
||||
'enc_dense5',
|
||||
'enc_dense7',
|
||||
'enc_dense8',
|
||||
'gdense1',
|
||||
'gdense2',
|
||||
'enc_dense2',
|
||||
'enc_dense4',
|
||||
'enc_dense6',
|
||||
'bits_dense'
|
||||
]
|
||||
|
||||
decoder_layers = [
|
||||
'state1',
|
||||
'state2',
|
||||
'state3',
|
||||
'dec_dense1',
|
||||
'dec_dense3',
|
||||
'dec_dense5',
|
||||
'dec_dense7',
|
||||
'dec_dense8',
|
||||
'dec_final',
|
||||
'dec_dense2',
|
||||
'dec_dense4',
|
||||
'dec_dense6'
|
||||
]
|
||||
|
||||
for name in encoder_layers:
|
||||
print(f"loading weight for layer {name}...")
|
||||
load_tf_weights(os.path.join(args.input, exchange_name[name]), encoder.get_layer(name))
|
||||
|
||||
print(f"loading weight for layer qembedding...")
|
||||
load_tf_weights(os.path.join(args.input, exchange_name['qembedding']), qembedding)
|
||||
|
||||
for name in decoder_layers:
|
||||
print(f"loading weight for layer {name}...")
|
||||
load_tf_weights(os.path.join(args.input, exchange_name[name]), decoder.get_layer(name))
|
||||
|
||||
model.save(args.weights)
|
||||
120
managed_components/78__esp-opus/dnn/training_tf2/test_lpcnet.py
Normal file
120
managed_components/78__esp-opus/dnn/training_tf2/test_lpcnet.py
Normal file
@@ -0,0 +1,120 @@
|
||||
#!/usr/bin/python3
|
||||
'''Copyright (c) 2018 Mozilla
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
'''
|
||||
import argparse
|
||||
import sys
|
||||
|
||||
import h5py
|
||||
import numpy as np
|
||||
|
||||
import lpcnet
|
||||
from ulaw import ulaw2lin, lin2ulaw
|
||||
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('model-file', type=str, help='model weight h5 file')
|
||||
parser.add_argument('--lpc-gamma', type=float, help='LPC weighting factor. WARNING: giving an inconsistent value here will severely degrade performance', default=1)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
filename = args.model_file
|
||||
with h5py.File(filename, "r") as f:
|
||||
units = min(f['model_weights']['gru_a']['gru_a']['recurrent_kernel:0'].shape)
|
||||
units2 = min(f['model_weights']['gru_b']['gru_b']['recurrent_kernel:0'].shape)
|
||||
cond_size = min(f['model_weights']['feature_dense1']['feature_dense1']['kernel:0'].shape)
|
||||
e2e = 'rc2lpc' in f['model_weights']
|
||||
|
||||
|
||||
model, enc, dec = lpcnet.new_lpcnet_model(training = False, rnn_units1=units, rnn_units2=units2, flag_e2e = e2e, cond_size=cond_size, batch_size=1)
|
||||
|
||||
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy'])
|
||||
#model.summary()
|
||||
|
||||
|
||||
feature_file = sys.argv[2]
|
||||
out_file = sys.argv[3]
|
||||
frame_size = model.frame_size
|
||||
nb_features = 36
|
||||
nb_used_features = model.nb_used_features
|
||||
|
||||
features = np.fromfile(feature_file, dtype='float32')
|
||||
features = np.resize(features, (-1, nb_features))
|
||||
nb_frames = 1
|
||||
feature_chunk_size = features.shape[0]
|
||||
pcm_chunk_size = frame_size*feature_chunk_size
|
||||
|
||||
features = np.reshape(features, (nb_frames, feature_chunk_size, nb_features))
|
||||
periods = (.1 + 50*features[:,:,18:19]+100).astype('int16')
|
||||
|
||||
|
||||
|
||||
model.load_weights(filename);
|
||||
|
||||
order = 16
|
||||
|
||||
pcm = np.zeros((nb_frames*pcm_chunk_size, ))
|
||||
fexc = np.zeros((1, 1, 3), dtype='int16')+128
|
||||
state1 = np.zeros((1, model.rnn_units1), dtype='float32')
|
||||
state2 = np.zeros((1, model.rnn_units2), dtype='float32')
|
||||
|
||||
mem = 0
|
||||
coef = 0.85
|
||||
|
||||
lpc_weights = np.array([args.lpc_gamma ** (i + 1) for i in range(16)])
|
||||
|
||||
fout = open(out_file, 'wb')
|
||||
|
||||
skip = order + 1
|
||||
for c in range(0, nb_frames):
|
||||
if not e2e:
|
||||
cfeat = enc.predict([features[c:c+1, :, :nb_used_features], periods[c:c+1, :, :]])
|
||||
else:
|
||||
cfeat,lpcs = enc.predict([features[c:c+1, :, :nb_used_features], periods[c:c+1, :, :]])
|
||||
for fr in range(0, feature_chunk_size):
|
||||
f = c*feature_chunk_size + fr
|
||||
if not e2e:
|
||||
a = features[c, fr, nb_features-order:] * lpc_weights
|
||||
else:
|
||||
a = lpcs[c,fr]
|
||||
for i in range(skip, frame_size):
|
||||
pred = -sum(a*pcm[f*frame_size + i - 1:f*frame_size + i - order-1:-1])
|
||||
fexc[0, 0, 1] = lin2ulaw(pred)
|
||||
|
||||
p, state1, state2 = dec.predict([fexc, cfeat[:, fr:fr+1, :], state1, state2])
|
||||
#Lower the temperature for voiced frames to reduce noisiness
|
||||
p *= np.power(p, np.maximum(0, 1.5*features[c, fr, 19] - .5))
|
||||
p = p/(1e-18 + np.sum(p))
|
||||
#Cut off the tail of the remaining distribution
|
||||
p = np.maximum(p-0.002, 0).astype('float64')
|
||||
p = p/(1e-8 + np.sum(p))
|
||||
|
||||
fexc[0, 0, 2] = np.argmax(np.random.multinomial(1, p[0,0,:], 1))
|
||||
pcm[f*frame_size + i] = pred + ulaw2lin(fexc[0, 0, 2])
|
||||
fexc[0, 0, 0] = lin2ulaw(pcm[f*frame_size + i])
|
||||
mem = coef*mem + pcm[f*frame_size + i]
|
||||
#print(mem)
|
||||
np.array([np.round(mem)], dtype='int16').tofile(fout)
|
||||
skip = 0
|
||||
92
managed_components/78__esp-opus/dnn/training_tf2/test_plc.py
Normal file
92
managed_components/78__esp-opus/dnn/training_tf2/test_plc.py
Normal file
@@ -0,0 +1,92 @@
|
||||
#!/usr/bin/python3
|
||||
'''Copyright (c) 2021-2022 Amazon
|
||||
Copyright (c) 2018-2019 Mozilla
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
'''
|
||||
|
||||
# Train an LPCNet model
|
||||
|
||||
import argparse
|
||||
from plc_loader import PLCLoader
|
||||
|
||||
parser = argparse.ArgumentParser(description='Test a PLC model')
|
||||
|
||||
parser.add_argument('weights', metavar='<weights file>', help='weights file (.h5)')
|
||||
parser.add_argument('features', metavar='<features file>', help='binary features file (float32)')
|
||||
parser.add_argument('output', metavar='<output>', help='reconstructed file (float32)')
|
||||
parser.add_argument('--model', metavar='<model>', default='lpcnet_plc', help='PLC model python definition (without .py)')
|
||||
group1 = parser.add_mutually_exclusive_group()
|
||||
|
||||
parser.add_argument('--gru-size', metavar='<units>', default=256, type=int, help='number of units in GRU (default 256)')
|
||||
parser.add_argument('--cond-size', metavar='<units>', default=128, type=int, help='number of units in conditioning network (default 128)')
|
||||
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
import importlib
|
||||
lpcnet = importlib.import_module(args.model)
|
||||
|
||||
import sys
|
||||
import numpy as np
|
||||
from tensorflow.keras.optimizers import Adam
|
||||
from tensorflow.keras.callbacks import ModelCheckpoint, CSVLogger
|
||||
import tensorflow.keras.backend as K
|
||||
import h5py
|
||||
|
||||
import tensorflow as tf
|
||||
#gpus = tf.config.experimental.list_physical_devices('GPU')
|
||||
#if gpus:
|
||||
# try:
|
||||
# tf.config.experimental.set_virtual_device_configuration(gpus[0], [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=5120)])
|
||||
# except RuntimeError as e:
|
||||
# print(e)
|
||||
|
||||
model = lpcnet.new_lpcnet_plc_model(rnn_units=args.gru_size, batch_size=1, training=False, quantize=False, cond_size=args.cond_size)
|
||||
model.compile()
|
||||
|
||||
lpc_order = 16
|
||||
|
||||
feature_file = args.features
|
||||
nb_features = model.nb_used_features + lpc_order
|
||||
nb_used_features = model.nb_used_features
|
||||
|
||||
# u for unquantised, load 16 bit PCM samples and convert to mu-law
|
||||
|
||||
features = np.loadtxt(feature_file)
|
||||
print(features.shape)
|
||||
sequence_size = features.shape[0]
|
||||
lost = np.reshape(features[:,-1:], (1, sequence_size, 1))
|
||||
features = features[:,:nb_used_features]
|
||||
features = np.reshape(features, (1, sequence_size, nb_used_features))
|
||||
|
||||
|
||||
model.load_weights(args.weights)
|
||||
|
||||
features = features*lost
|
||||
out = model.predict([features, lost])
|
||||
|
||||
out = features + (1-lost)*out
|
||||
|
||||
np.savetxt(args.output, out[0,:,:])
|
||||
70
managed_components/78__esp-opus/dnn/training_tf2/tf_funcs.py
Normal file
70
managed_components/78__esp-opus/dnn/training_tf2/tf_funcs.py
Normal file
@@ -0,0 +1,70 @@
|
||||
"""
|
||||
Tensorflow/Keras helper functions to do the following:
|
||||
1. \mu law <-> Linear domain conversion
|
||||
2. Differentiable prediction from the input signal and LP coefficients
|
||||
3. Differentiable transformations Reflection Coefficients (RCs) <-> LP Coefficients
|
||||
"""
|
||||
from tensorflow.keras.layers import Lambda, Multiply, Layer, Concatenate
|
||||
from tensorflow.keras import backend as K
|
||||
import tensorflow as tf
|
||||
|
||||
# \mu law <-> Linear conversion functions
|
||||
scale = 255.0/32768.0
|
||||
scale_1 = 32768.0/255.0
|
||||
def tf_l2u(x):
|
||||
s = K.sign(x)
|
||||
x = K.abs(x)
|
||||
u = (s*(128*K.log(1+scale*x)/K.log(256.0)))
|
||||
u = K.clip(128 + u, 0, 255)
|
||||
return u
|
||||
|
||||
def tf_u2l(u):
|
||||
u = tf.cast(u,"float32")
|
||||
u = u - 128.0
|
||||
s = K.sign(u)
|
||||
u = K.abs(u)
|
||||
return s*scale_1*(K.exp(u/128.*K.log(256.0))-1)
|
||||
|
||||
# Differentiable Prediction Layer
|
||||
# Computes the LP prediction from the input lag signal and the LP coefficients
|
||||
# The inputs xt and lpc conform with the shapes in lpcnet.py (the '2400' is coded keeping this in mind)
|
||||
class diff_pred(Layer):
|
||||
def call(self, inputs, lpcoeffs_N = 16, frame_size = 160):
|
||||
xt = inputs[0]
|
||||
lpc = inputs[1]
|
||||
|
||||
rept = Lambda(lambda x: K.repeat_elements(x , frame_size, 1))
|
||||
zpX = Lambda(lambda x: K.concatenate([0*x[:,0:lpcoeffs_N,:], x],axis = 1))
|
||||
cX = Lambda(lambda x: K.concatenate([x[:,(lpcoeffs_N - i):(lpcoeffs_N - i + 2400),:] for i in range(lpcoeffs_N)],axis = 2))
|
||||
|
||||
pred = -Multiply()([rept(lpc),cX(zpX(xt))])
|
||||
|
||||
return K.sum(pred,axis = 2,keepdims = True)
|
||||
|
||||
# Differentiable Transformations (RC <-> LPC) computed using the Levinson Durbin Recursion
|
||||
class diff_rc2lpc(Layer):
|
||||
def call(self, inputs, lpcoeffs_N = 16):
|
||||
def pred_lpc_recursive(input):
|
||||
temp = (input[0] + K.repeat_elements(input[1],input[0].shape[2],2)*K.reverse(input[0],axes = 2))
|
||||
temp = Concatenate(axis = 2)([temp,input[1]])
|
||||
return temp
|
||||
Llpc = Lambda(pred_lpc_recursive)
|
||||
inputs = inputs[:,:,:lpcoeffs_N]
|
||||
lpc_init = inputs
|
||||
for i in range(1,lpcoeffs_N):
|
||||
lpc_init = Llpc([lpc_init[:,:,:i],K.expand_dims(inputs[:,:,i],axis = -1)])
|
||||
return lpc_init
|
||||
|
||||
class diff_lpc2rc(Layer):
|
||||
def call(self, inputs, lpcoeffs_N = 16):
|
||||
def pred_rc_recursive(input):
|
||||
ki = K.repeat_elements(K.expand_dims(input[1][:,:,0],axis = -1),input[0].shape[2],2)
|
||||
temp = (input[0] - ki*K.reverse(input[0],axes = 2))/(1 - ki*ki)
|
||||
temp = Concatenate(axis = 2)([temp,input[1]])
|
||||
return temp
|
||||
Lrc = Lambda(pred_rc_recursive)
|
||||
rc_init = inputs
|
||||
for i in range(1,lpcoeffs_N):
|
||||
j = (lpcoeffs_N - i + 1)
|
||||
rc_init = Lrc([rc_init[:,:,:(j - 1)],rc_init[:,:,(j - 1):]])
|
||||
return rc_init
|
||||
214
managed_components/78__esp-opus/dnn/training_tf2/train_lpcnet.py
Normal file
214
managed_components/78__esp-opus/dnn/training_tf2/train_lpcnet.py
Normal file
@@ -0,0 +1,214 @@
|
||||
#!/usr/bin/python3
|
||||
'''Copyright (c) 2018 Mozilla
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
'''
|
||||
|
||||
# Train an LPCNet model
|
||||
|
||||
import argparse
|
||||
import os
|
||||
|
||||
from dataloader import LPCNetLoader
|
||||
|
||||
parser = argparse.ArgumentParser(description='Train an LPCNet model')
|
||||
|
||||
parser.add_argument('features', metavar='<features file>', help='binary features file (float32)')
|
||||
parser.add_argument('data', metavar='<audio data file>', help='binary audio data file (uint8)')
|
||||
parser.add_argument('output', metavar='<output>', help='trained model file (.h5)')
|
||||
parser.add_argument('--model', metavar='<model>', default='lpcnet', help='LPCNet model python definition (without .py)')
|
||||
group1 = parser.add_mutually_exclusive_group()
|
||||
group1.add_argument('--quantize', metavar='<input weights>', help='quantize model')
|
||||
group1.add_argument('--retrain', metavar='<input weights>', help='continue training model')
|
||||
parser.add_argument('--density', metavar='<global density>', type=float, help='average density of the recurrent weights (default 0.1)')
|
||||
parser.add_argument('--density-split', nargs=3, metavar=('<update>', '<reset>', '<state>'), type=float, help='density of each recurrent gate (default 0.05, 0.05, 0.2)')
|
||||
parser.add_argument('--grub-density', metavar='<global GRU B density>', type=float, help='average density of the recurrent weights (default 1.0)')
|
||||
parser.add_argument('--grub-density-split', nargs=3, metavar=('<update>', '<reset>', '<state>'), type=float, help='density of each GRU B input gate (default 1.0, 1.0, 1.0)')
|
||||
parser.add_argument('--grua-size', metavar='<units>', default=384, type=int, help='number of units in GRU A (default 384)')
|
||||
parser.add_argument('--grub-size', metavar='<units>', default=16, type=int, help='number of units in GRU B (default 16)')
|
||||
parser.add_argument('--cond-size', metavar='<units>', default=128, type=int, help='number of units in conditioning network, aka frame rate network (default 128)')
|
||||
parser.add_argument('--epochs', metavar='<epochs>', default=120, type=int, help='number of epochs to train for (default 120)')
|
||||
parser.add_argument('--batch-size', metavar='<batch size>', default=128, type=int, help='batch size to use (default 128)')
|
||||
parser.add_argument('--end2end', dest='flag_e2e', action='store_true', help='Enable end-to-end training (with differentiable LPC computation')
|
||||
parser.add_argument('--lr', metavar='<learning rate>', type=float, help='learning rate')
|
||||
parser.add_argument('--decay', metavar='<decay>', type=float, help='learning rate decay')
|
||||
parser.add_argument('--gamma', metavar='<gamma>', type=float, help='adjust u-law compensation (default 2.0, should not be less than 1.0)')
|
||||
parser.add_argument('--lookahead', metavar='<nb frames>', default=2, type=int, help='Number of look-ahead frames (default 2)')
|
||||
parser.add_argument('--logdir', metavar='<log dir>', help='directory for tensorboard log files')
|
||||
parser.add_argument('--lpc-gamma', type=float, default=1, help='gamma for LPC weighting')
|
||||
parser.add_argument('--cuda-devices', metavar='<cuda devices>', type=str, default=None, help='string with comma separated cuda device ids')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# set visible cuda devices
|
||||
if args.cuda_devices != None:
|
||||
os.environ['CUDA_VISIBLE_DEVICES'] = args.cuda_devices
|
||||
|
||||
density = (0.05, 0.05, 0.2)
|
||||
if args.density_split is not None:
|
||||
density = args.density_split
|
||||
elif args.density is not None:
|
||||
density = [0.5*args.density, 0.5*args.density, 2.0*args.density];
|
||||
|
||||
grub_density = (1., 1., 1.)
|
||||
if args.grub_density_split is not None:
|
||||
grub_density = args.grub_density_split
|
||||
elif args.grub_density is not None:
|
||||
grub_density = [0.5*args.grub_density, 0.5*args.grub_density, 2.0*args.grub_density];
|
||||
|
||||
gamma = 2.0 if args.gamma is None else args.gamma
|
||||
|
||||
import importlib
|
||||
lpcnet = importlib.import_module(args.model)
|
||||
|
||||
import sys
|
||||
import numpy as np
|
||||
from tensorflow.keras.optimizers import Adam
|
||||
from tensorflow.keras.callbacks import ModelCheckpoint, CSVLogger
|
||||
from ulaw import ulaw2lin, lin2ulaw
|
||||
import tensorflow.keras.backend as K
|
||||
import h5py
|
||||
|
||||
import tensorflow as tf
|
||||
from tf_funcs import *
|
||||
from lossfuncs import *
|
||||
#gpus = tf.config.experimental.list_physical_devices('GPU')
|
||||
#if gpus:
|
||||
# try:
|
||||
# tf.config.experimental.set_virtual_device_configuration(gpus[0], [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=5120)])
|
||||
# except RuntimeError as e:
|
||||
# print(e)
|
||||
|
||||
nb_epochs = args.epochs
|
||||
|
||||
# Try reducing batch_size if you run out of memory on your GPU
|
||||
batch_size = args.batch_size
|
||||
|
||||
quantize = args.quantize is not None
|
||||
retrain = args.retrain is not None
|
||||
|
||||
lpc_order = 16
|
||||
|
||||
if quantize:
|
||||
lr = 0.00003
|
||||
decay = 0
|
||||
input_model = args.quantize
|
||||
else:
|
||||
lr = 0.001
|
||||
decay = 5e-5
|
||||
|
||||
if args.lr is not None:
|
||||
lr = args.lr
|
||||
|
||||
if args.decay is not None:
|
||||
decay = args.decay
|
||||
|
||||
if retrain:
|
||||
input_model = args.retrain
|
||||
|
||||
flag_e2e = args.flag_e2e
|
||||
|
||||
opt = Adam(lr, decay=decay, beta_1=0.5, beta_2=0.8)
|
||||
strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy()
|
||||
|
||||
with strategy.scope():
|
||||
model, _, _ = lpcnet.new_lpcnet_model(rnn_units1=args.grua_size,
|
||||
rnn_units2=args.grub_size,
|
||||
batch_size=batch_size, training=True,
|
||||
quantize=quantize,
|
||||
flag_e2e=flag_e2e,
|
||||
cond_size=args.cond_size,
|
||||
lpc_gamma=args.lpc_gamma,
|
||||
lookahead=args.lookahead
|
||||
)
|
||||
if not flag_e2e:
|
||||
model.compile(optimizer=opt, loss=metric_cel, metrics=metric_cel)
|
||||
else:
|
||||
model.compile(optimizer=opt, loss = [interp_mulaw(gamma=gamma), loss_matchlar()], loss_weights = [1.0, 2.0], metrics={'pdf':[metric_cel,metric_icel,metric_exc_sd,metric_oginterploss]})
|
||||
model.summary()
|
||||
|
||||
feature_file = args.features
|
||||
pcm_file = args.data # 16 bit unsigned short PCM samples
|
||||
frame_size = model.frame_size
|
||||
nb_features = model.nb_used_features + lpc_order
|
||||
nb_used_features = model.nb_used_features
|
||||
feature_chunk_size = 15
|
||||
pcm_chunk_size = frame_size*feature_chunk_size
|
||||
|
||||
# u for unquantised, load 16 bit PCM samples and convert to mu-law
|
||||
|
||||
data = np.memmap(pcm_file, dtype='int16', mode='r')
|
||||
nb_frames = (len(data)//(2*pcm_chunk_size)-1)//batch_size*batch_size
|
||||
|
||||
features = np.memmap(feature_file, dtype='float32', mode='r')
|
||||
|
||||
# limit to discrete number of frames
|
||||
data = data[(4-args.lookahead)*2*frame_size:]
|
||||
data = data[:nb_frames*2*pcm_chunk_size]
|
||||
|
||||
|
||||
data = np.reshape(data, (nb_frames, pcm_chunk_size, 2))
|
||||
|
||||
#print("ulaw std = ", np.std(out_exc))
|
||||
|
||||
sizeof = features.strides[-1]
|
||||
features = np.lib.stride_tricks.as_strided(features, shape=(nb_frames, feature_chunk_size+4, nb_features),
|
||||
strides=(feature_chunk_size*nb_features*sizeof, nb_features*sizeof, sizeof))
|
||||
#features = features[:, :, :nb_used_features]
|
||||
|
||||
|
||||
periods = (.1 + 50*features[:,:,nb_used_features-2:nb_used_features-1]+100).astype('int16')
|
||||
#periods = np.minimum(periods, 255)
|
||||
|
||||
# dump models to disk as we go
|
||||
checkpoint = ModelCheckpoint('{}_{}_{}.h5'.format(args.output, args.grua_size, '{epoch:02d}'))
|
||||
|
||||
if args.retrain is not None:
|
||||
model.load_weights(args.retrain)
|
||||
|
||||
if quantize or retrain:
|
||||
#Adapting from an existing model
|
||||
model.load_weights(input_model)
|
||||
if quantize:
|
||||
sparsify = lpcnet.Sparsify(10000, 30000, 100, density, quantize=True)
|
||||
grub_sparsify = lpcnet.SparsifyGRUB(10000, 30000, 100, args.grua_size, grub_density, quantize=True)
|
||||
else:
|
||||
sparsify = lpcnet.Sparsify(0, 0, 1, density)
|
||||
grub_sparsify = lpcnet.SparsifyGRUB(0, 0, 1, args.grua_size, grub_density)
|
||||
else:
|
||||
#Training from scratch
|
||||
sparsify = lpcnet.Sparsify(2000, 20000, 400, density)
|
||||
grub_sparsify = lpcnet.SparsifyGRUB(2000, 40000, 400, args.grua_size, grub_density)
|
||||
|
||||
model.save_weights('{}_{}_initial.h5'.format(args.output, args.grua_size))
|
||||
|
||||
loader = LPCNetLoader(data, features, periods, batch_size, e2e=flag_e2e, lookahead=args.lookahead)
|
||||
|
||||
callbacks = [checkpoint, sparsify, grub_sparsify]
|
||||
if args.logdir is not None:
|
||||
logdir = '{}/{}_{}_logs'.format(args.logdir, args.output, args.grua_size)
|
||||
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=logdir)
|
||||
callbacks.append(tensorboard_callback)
|
||||
|
||||
model.fit(loader, epochs=nb_epochs, validation_split=0.0, callbacks=callbacks)
|
||||
197
managed_components/78__esp-opus/dnn/training_tf2/train_plc.py
Normal file
197
managed_components/78__esp-opus/dnn/training_tf2/train_plc.py
Normal file
@@ -0,0 +1,197 @@
|
||||
#!/usr/bin/python3
|
||||
'''Copyright (c) 2021-2022 Amazon
|
||||
Copyright (c) 2018-2019 Mozilla
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
'''
|
||||
|
||||
# Train an LPCNet model
|
||||
|
||||
import argparse
|
||||
from plc_loader import PLCLoader
|
||||
|
||||
parser = argparse.ArgumentParser(description='Train a PLC model')
|
||||
|
||||
parser.add_argument('features', metavar='<features file>', help='binary features file (float32)')
|
||||
parser.add_argument('lost_file', metavar='<packet loss file>', help='packet loss traces (int8)')
|
||||
parser.add_argument('output', metavar='<output>', help='trained model file (.h5)')
|
||||
parser.add_argument('--model', metavar='<model>', default='lpcnet_plc', help='PLC model python definition (without .py)')
|
||||
group1 = parser.add_mutually_exclusive_group()
|
||||
group1.add_argument('--quantize', metavar='<input weights>', help='quantize model')
|
||||
group1.add_argument('--retrain', metavar='<input weights>', help='continue training model')
|
||||
parser.add_argument('--gru-size', metavar='<units>', default=256, type=int, help='number of units in GRU (default 256)')
|
||||
parser.add_argument('--cond-size', metavar='<units>', default=128, type=int, help='number of units in conditioning network (default 128)')
|
||||
parser.add_argument('--epochs', metavar='<epochs>', default=120, type=int, help='number of epochs to train for (default 120)')
|
||||
parser.add_argument('--batch-size', metavar='<batch size>', default=128, type=int, help='batch size to use (default 128)')
|
||||
parser.add_argument('--seq-length', metavar='<sequence length>', default=1000, type=int, help='sequence length to use (default 1000)')
|
||||
parser.add_argument('--lr', metavar='<learning rate>', type=float, help='learning rate')
|
||||
parser.add_argument('--decay', metavar='<decay>', type=float, help='learning rate decay')
|
||||
parser.add_argument('--band-loss', metavar='<weight>', default=1.0, type=float, help='weight of band loss (default 1.0)')
|
||||
parser.add_argument('--loss-bias', metavar='<bias>', default=0.0, type=float, help='loss bias towards low energy (default 0.0)')
|
||||
parser.add_argument('--logdir', metavar='<log dir>', help='directory for tensorboard log files')
|
||||
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
import importlib
|
||||
lpcnet = importlib.import_module(args.model)
|
||||
|
||||
import sys
|
||||
import numpy as np
|
||||
from tensorflow.keras.optimizers import Adam
|
||||
from tensorflow.keras.callbacks import ModelCheckpoint, CSVLogger
|
||||
import tensorflow.keras.backend as K
|
||||
import h5py
|
||||
|
||||
import tensorflow as tf
|
||||
#gpus = tf.config.experimental.list_physical_devices('GPU')
|
||||
#if gpus:
|
||||
# try:
|
||||
# tf.config.experimental.set_virtual_device_configuration(gpus[0], [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=5120)])
|
||||
# except RuntimeError as e:
|
||||
# print(e)
|
||||
|
||||
nb_epochs = args.epochs
|
||||
|
||||
# Try reducing batch_size if you run out of memory on your GPU
|
||||
batch_size = args.batch_size
|
||||
|
||||
quantize = args.quantize is not None
|
||||
retrain = args.retrain is not None
|
||||
|
||||
if quantize:
|
||||
lr = 0.00003
|
||||
decay = 0
|
||||
input_model = args.quantize
|
||||
else:
|
||||
lr = 0.001
|
||||
decay = 2.5e-5
|
||||
|
||||
if args.lr is not None:
|
||||
lr = args.lr
|
||||
|
||||
if args.decay is not None:
|
||||
decay = args.decay
|
||||
|
||||
if retrain:
|
||||
input_model = args.retrain
|
||||
|
||||
def plc_loss(alpha=1.0, bias=0.):
|
||||
def loss(y_true,y_pred):
|
||||
mask = y_true[:,:,-1:]
|
||||
y_true = y_true[:,:,:-1]
|
||||
e = (y_pred - y_true)*mask
|
||||
e_bands = tf.signal.idct(e[:,:,:-2], norm='ortho')
|
||||
bias_mask = K.minimum(1., K.maximum(0., 4*y_true[:,:,-1:]))
|
||||
l1_loss = K.mean(K.abs(e)) + 0.1*K.mean(K.maximum(0., -e[:,:,-1:])) + alpha*K.mean(K.abs(e_bands) + bias*bias_mask*K.maximum(0., e_bands)) + K.mean(K.minimum(K.abs(e[:,:,18:19]),1.)) + 8*K.mean(K.minimum(K.abs(e[:,:,18:19]),.4))
|
||||
return l1_loss
|
||||
return loss
|
||||
|
||||
def plc_l1_loss():
|
||||
def L1_loss(y_true,y_pred):
|
||||
mask = y_true[:,:,-1:]
|
||||
y_true = y_true[:,:,:-1]
|
||||
e = (y_pred - y_true)*mask
|
||||
l1_loss = K.mean(K.abs(e))
|
||||
return l1_loss
|
||||
return L1_loss
|
||||
|
||||
def plc_ceps_loss():
|
||||
def ceps_loss(y_true,y_pred):
|
||||
mask = y_true[:,:,-1:]
|
||||
y_true = y_true[:,:,:-1]
|
||||
e = (y_pred - y_true)*mask
|
||||
l1_loss = K.mean(K.abs(e[:,:,:-2]))
|
||||
return l1_loss
|
||||
return ceps_loss
|
||||
|
||||
def plc_band_loss():
|
||||
def L1_band_loss(y_true,y_pred):
|
||||
mask = y_true[:,:,-1:]
|
||||
y_true = y_true[:,:,:-1]
|
||||
e = (y_pred - y_true)*mask
|
||||
e_bands = tf.signal.idct(e[:,:,:-2], norm='ortho')
|
||||
l1_loss = K.mean(K.abs(e_bands))
|
||||
return l1_loss
|
||||
return L1_band_loss
|
||||
|
||||
def plc_pitch_loss():
|
||||
def pitch_loss(y_true,y_pred):
|
||||
mask = y_true[:,:,-1:]
|
||||
y_true = y_true[:,:,:-1]
|
||||
e = (y_pred - y_true)*mask
|
||||
l1_loss = K.mean(K.minimum(K.abs(e[:,:,18:19]),.4))
|
||||
return l1_loss
|
||||
return pitch_loss
|
||||
|
||||
opt = Adam(lr, decay=decay, beta_2=0.99)
|
||||
strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy()
|
||||
|
||||
with strategy.scope():
|
||||
model = lpcnet.new_lpcnet_plc_model(rnn_units=args.gru_size, batch_size=batch_size, training=True, quantize=quantize, cond_size=args.cond_size)
|
||||
model.compile(optimizer=opt, loss=plc_loss(alpha=args.band_loss, bias=args.loss_bias), metrics=[plc_l1_loss(), plc_ceps_loss(), plc_band_loss(), plc_pitch_loss()])
|
||||
model.summary()
|
||||
|
||||
lpc_order = 16
|
||||
|
||||
feature_file = args.features
|
||||
nb_features = model.nb_used_features + lpc_order + model.nb_burg_features
|
||||
nb_used_features = model.nb_used_features
|
||||
nb_burg_features = model.nb_burg_features
|
||||
sequence_size = args.seq_length
|
||||
|
||||
# u for unquantised, load 16 bit PCM samples and convert to mu-law
|
||||
|
||||
|
||||
features = np.memmap(feature_file, dtype='float32', mode='r')
|
||||
nb_sequences = len(features)//(nb_features*sequence_size)//batch_size*batch_size
|
||||
features = features[:nb_sequences*sequence_size*nb_features]
|
||||
|
||||
features = np.reshape(features, (nb_sequences, sequence_size, nb_features))
|
||||
|
||||
features = features[:, :, :nb_used_features+model.nb_burg_features]
|
||||
|
||||
lost = np.memmap(args.lost_file, dtype='int8', mode='r')
|
||||
|
||||
# dump models to disk as we go
|
||||
checkpoint = ModelCheckpoint('{}_{}_{}.h5'.format(args.output, args.gru_size, '{epoch:02d}'))
|
||||
|
||||
if args.retrain is not None:
|
||||
model.load_weights(args.retrain)
|
||||
|
||||
if quantize or retrain:
|
||||
#Adapting from an existing model
|
||||
model.load_weights(input_model)
|
||||
|
||||
model.save_weights('{}_{}_initial.h5'.format(args.output, args.gru_size))
|
||||
|
||||
loader = PLCLoader(features, lost, nb_burg_features, batch_size)
|
||||
|
||||
callbacks = [checkpoint]
|
||||
if args.logdir is not None:
|
||||
logdir = '{}/{}_{}_logs'.format(args.logdir, args.output, args.gru_size)
|
||||
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=logdir)
|
||||
callbacks.append(tensorboard_callback)
|
||||
|
||||
model.fit(loader, epochs=nb_epochs, validation_split=0.0, callbacks=callbacks)
|
||||
151
managed_components/78__esp-opus/dnn/training_tf2/train_rdovae.py
Normal file
151
managed_components/78__esp-opus/dnn/training_tf2/train_rdovae.py
Normal file
@@ -0,0 +1,151 @@
|
||||
#!/usr/bin/python3
|
||||
'''Copyright (c) 2021-2022 Amazon
|
||||
Copyright (c) 2018-2019 Mozilla
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
'''
|
||||
|
||||
# Train an LPCNet model
|
||||
import tensorflow as tf
|
||||
strategy = tf.distribute.MultiWorkerMirroredStrategy()
|
||||
|
||||
|
||||
import argparse
|
||||
#from plc_loader import PLCLoader
|
||||
|
||||
parser = argparse.ArgumentParser(description='Train a quantization model')
|
||||
|
||||
parser.add_argument('features', metavar='<features file>', help='binary features file (float32)')
|
||||
parser.add_argument('output', metavar='<output>', help='trained model file (.h5)')
|
||||
parser.add_argument('--model', metavar='<model>', default='rdovae', help='PLC model python definition (without .py)')
|
||||
group1 = parser.add_mutually_exclusive_group()
|
||||
group1.add_argument('--quantize', metavar='<input weights>', help='quantize model')
|
||||
group1.add_argument('--retrain', metavar='<input weights>', help='continue training model')
|
||||
parser.add_argument('--cond-size', metavar='<units>', default=1024, type=int, help='number of units in conditioning network (default 1024)')
|
||||
parser.add_argument('--epochs', metavar='<epochs>', default=120, type=int, help='number of epochs to train for (default 120)')
|
||||
parser.add_argument('--batch-size', metavar='<batch size>', default=128, type=int, help='batch size to use (default 128)')
|
||||
parser.add_argument('--seq-length', metavar='<sequence length>', default=1000, type=int, help='sequence length to use (default 1000)')
|
||||
parser.add_argument('--lr', metavar='<learning rate>', type=float, help='learning rate')
|
||||
parser.add_argument('--decay', metavar='<decay>', type=float, help='learning rate decay')
|
||||
parser.add_argument('--logdir', metavar='<log dir>', help='directory for tensorboard log files')
|
||||
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
import importlib
|
||||
rdovae = importlib.import_module(args.model)
|
||||
|
||||
import sys
|
||||
import numpy as np
|
||||
from tensorflow.keras.optimizers import Adam
|
||||
from tensorflow.keras.callbacks import ModelCheckpoint, CSVLogger
|
||||
import tensorflow.keras.backend as K
|
||||
import h5py
|
||||
|
||||
#gpus = tf.config.experimental.list_physical_devices('GPU')
|
||||
#if gpus:
|
||||
# try:
|
||||
# tf.config.experimental.set_virtual_device_configuration(gpus[0], [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=5120)])
|
||||
# except RuntimeError as e:
|
||||
# print(e)
|
||||
|
||||
nb_epochs = args.epochs
|
||||
|
||||
# Try reducing batch_size if you run out of memory on your GPU
|
||||
batch_size = args.batch_size
|
||||
|
||||
quantize = args.quantize is not None
|
||||
retrain = args.retrain is not None
|
||||
|
||||
if quantize:
|
||||
lr = 0.00003
|
||||
decay = 0
|
||||
input_model = args.quantize
|
||||
else:
|
||||
lr = 0.001
|
||||
decay = 2.5e-5
|
||||
|
||||
if args.lr is not None:
|
||||
lr = args.lr
|
||||
|
||||
if args.decay is not None:
|
||||
decay = args.decay
|
||||
|
||||
if retrain:
|
||||
input_model = args.retrain
|
||||
|
||||
|
||||
opt = Adam(lr, decay=decay, beta_2=0.99)
|
||||
|
||||
with strategy.scope():
|
||||
model, encoder, decoder, _ = rdovae.new_rdovae_model(nb_used_features=20, nb_bits=80, batch_size=batch_size, cond_size=args.cond_size, nb_quant=16)
|
||||
model.compile(optimizer=opt, loss=[rdovae.feat_dist_loss, rdovae.feat_dist_loss, rdovae.sq1_rate_loss, rdovae.sq2_rate_loss], loss_weights=[.5, .5, 1., .1], metrics={'hard_bits':rdovae.sq_rate_metric})
|
||||
model.summary()
|
||||
|
||||
lpc_order = 16
|
||||
|
||||
feature_file = args.features
|
||||
nb_features = model.nb_used_features + lpc_order
|
||||
nb_used_features = model.nb_used_features
|
||||
sequence_size = args.seq_length
|
||||
|
||||
# u for unquantised, load 16 bit PCM samples and convert to mu-law
|
||||
|
||||
|
||||
features = np.memmap(feature_file, dtype='float32', mode='r')
|
||||
nb_sequences = len(features)//(nb_features*sequence_size)//batch_size*batch_size
|
||||
features = features[:nb_sequences*sequence_size*nb_features]
|
||||
|
||||
features = np.reshape(features, (nb_sequences, sequence_size, nb_features))
|
||||
print(features.shape)
|
||||
features = features[:, :, :nb_used_features]
|
||||
|
||||
#lambda_val = np.repeat(np.random.uniform(.0007, .002, (features.shape[0], 1, 1)), features.shape[1]//2, axis=1)
|
||||
#quant_id = np.round(10*np.log(lambda_val/.0007)).astype('int16')
|
||||
#quant_id = quant_id[:,:,0]
|
||||
quant_id = np.repeat(np.random.randint(16, size=(features.shape[0], 1, 1), dtype='int16'), features.shape[1]//2, axis=1)
|
||||
lambda_val = .0002*np.exp(quant_id/3.8)
|
||||
quant_id = quant_id[:,:,0]
|
||||
|
||||
# dump models to disk as we go
|
||||
checkpoint = ModelCheckpoint('{}_{}_{}.h5'.format(args.output, args.cond_size, '{epoch:02d}'))
|
||||
|
||||
if args.retrain is not None:
|
||||
model.load_weights(args.retrain)
|
||||
|
||||
if quantize or retrain:
|
||||
#Adapting from an existing model
|
||||
model.load_weights(input_model)
|
||||
|
||||
model.save_weights('{}_{}_initial.h5'.format(args.output, args.cond_size))
|
||||
|
||||
callbacks = [checkpoint]
|
||||
#callbacks = []
|
||||
|
||||
if args.logdir is not None:
|
||||
logdir = '{}/{}_{}_logs'.format(args.logdir, args.output, args.cond_size)
|
||||
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=logdir)
|
||||
callbacks.append(tensorboard_callback)
|
||||
|
||||
model.fit([features, quant_id, lambda_val], [features, features, features, features], batch_size=batch_size, epochs=nb_epochs, validation_split=0.0, callbacks=callbacks)
|
||||
19
managed_components/78__esp-opus/dnn/training_tf2/ulaw.py
Normal file
19
managed_components/78__esp-opus/dnn/training_tf2/ulaw.py
Normal file
@@ -0,0 +1,19 @@
|
||||
|
||||
import numpy as np
|
||||
import math
|
||||
|
||||
scale = 255.0/32768.0
|
||||
scale_1 = 32768.0/255.0
|
||||
def ulaw2lin(u):
|
||||
u = u - 128
|
||||
s = np.sign(u)
|
||||
u = np.abs(u)
|
||||
return s*scale_1*(np.exp(u/128.*math.log(256))-1)
|
||||
|
||||
|
||||
def lin2ulaw(x):
|
||||
s = np.sign(x)
|
||||
x = np.abs(x)
|
||||
u = (s*(128*np.log(1+scale*x)/math.log(256)))
|
||||
u = np.clip(128 + np.round(u), 0, 255)
|
||||
return u.astype('int16')
|
||||
@@ -0,0 +1,78 @@
|
||||
# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Contains the UniformNoise layer."""
|
||||
|
||||
|
||||
import tensorflow.compat.v2 as tf
|
||||
|
||||
from tensorflow.keras import backend
|
||||
|
||||
from tensorflow.keras.layers import Layer
|
||||
|
||||
class UniformNoise(Layer):
|
||||
"""Apply additive zero-centered uniform noise.
|
||||
|
||||
This is useful to mitigate overfitting
|
||||
(you could see it as a form of random data augmentation).
|
||||
Gaussian Noise (GS) is a natural choice as corruption process
|
||||
for real valued inputs.
|
||||
|
||||
As it is a regularization layer, it is only active at training time.
|
||||
|
||||
Args:
|
||||
stddev: Float, standard deviation of the noise distribution.
|
||||
seed: Integer, optional random seed to enable deterministic behavior.
|
||||
|
||||
Call arguments:
|
||||
inputs: Input tensor (of any rank).
|
||||
training: Python boolean indicating whether the layer should behave in
|
||||
training mode (adding noise) or in inference mode (doing nothing).
|
||||
|
||||
Input shape:
|
||||
Arbitrary. Use the keyword argument `input_shape`
|
||||
(tuple of integers, does not include the samples axis)
|
||||
when using this layer as the first layer in a model.
|
||||
|
||||
Output shape:
|
||||
Same shape as input.
|
||||
"""
|
||||
|
||||
|
||||
|
||||
|
||||
def __init__(self, stddev=0.5, seed=None, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
self.supports_masking = True
|
||||
self.stddev = stddev
|
||||
|
||||
|
||||
def call(self, inputs, training=None):
|
||||
def noised():
|
||||
return inputs + backend.random_uniform(
|
||||
shape=tf.shape(inputs),
|
||||
minval=-self.stddev,
|
||||
maxval=self.stddev,
|
||||
dtype=inputs.dtype,
|
||||
)
|
||||
|
||||
return backend.in_train_phase(noised, inputs, training=training)
|
||||
|
||||
def get_config(self):
|
||||
config = {"stddev": self.stddev}
|
||||
base_config = super().get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
def compute_output_shape(self, input_shape):
|
||||
return input_shape
|
||||
Reference in New Issue
Block a user