add some code

This commit is contained in:
2025-09-05 13:25:11 +08:00
parent 9ff0a99e7a
commit 3cf1229a85
8911 changed files with 2535396 additions and 0 deletions

View File

@@ -0,0 +1,29 @@
#pragma once
#include <float.h>
#include <math.h>
/* #undef ENABLE_DOUBLE */
#ifdef ENABLE_DOUBLE
# define csf_float double
# define csf_ceil ceil
# define csf_floor floor
# define csf_sin sin
# define csf_log log
# define csf_log10 log10
# define csf_pow pow
# define csf_sqrt sqrt
# define csf_abs fabs
# define csf_float_min DBL_MIN
#else
# define csf_float float
# define csf_ceil ceilf
# define csf_floor floorf
# define csf_sin sinf
# define csf_log logf
# define csf_log10 log10f
# define csf_pow powf
# define csf_sqrt sqrtf
# define csf_abs fabsf
# define csf_float_min FLT_MIN
#endif

View File

@@ -0,0 +1,9 @@
//Generated by mkmodel_py
#pragma once
#include <string.h>
#include "dl_lib_coefgetter_if.h"
#include "dl_lib_matrix.h"
#include "dl_lib_matrixq.h"
#include "dl_lib_matrixq8.h"
extern const model_coeff_getter_t get_coeff_customized_word_wn5;

View File

@@ -0,0 +1,418 @@
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef DL_LIB_H
#define DL_LIB_H
#include "dl_lib_matrix.h"
#include "dl_lib_matrixq.h"
#include "dl_lib_matrixq8.h"
#ifdef ESP_PLATFORM
#include "freertos/FreeRTOS.h"
#include "freertos/task.h"
#include "freertos/queue.h"
#include "esp_system.h"
#include "esp_heap_caps.h"
#include "sdkconfig.h"
#define DL_SPIRAM_SUPPORT 1
#endif
#ifdef CONFIG_IDF_TARGET_ESP32S3
#include "esp32s3/rom/cache.h"
#endif
#ifdef __cplusplus
extern "C" {
#endif
typedef int padding_state;
// /**
// * @brief Allocate a chunk of memory which has the given capabilities.
// * Equivalent semantics to libc malloc(), for capability-aware memory.
// * In IDF, malloc(p) is equivalent to heap_caps_malloc(p, MALLOC_CAP_8BIT).
// *
// * @param size In bytes, of the amount of memory to allocate
// * @param caps Bitwise OR of MALLOC_CAP_* flags indicating the type of memory to be returned
// * MALLOC_CAP_SPIRAM: Memory must be in SPI RAM
// * MALLOC_CAP_INTERNAL: Memory must be internal; specifically it should not disappear when flash/spiram cache is switched off
// * MALLOC_CAP_DMA: Memory must be able to accessed by DMA
// * MALLOC_CAP_DEFAULT: Memory can be returned in a non-capability-specific memory allocation
// * @return Pointer to currently allocated heap memory
// **/
// void *heap_caps_malloc(size_t size, uint32_t caps);
/**
* @brief Allocate aligned memory from internal memory or external memory.
* if cnt*size > CONFIG_SPIRAM_MALLOC_ALWAYSINTERNAL, allocate memory from internal RAM
* else, allocate memory from PSRAM
*
* @param cnt Number of continuing chunks of memory to allocate
* @param size Size, in bytes, of a chunk of memory to allocate
* @param align Aligned size, in bits
* @return Pointer to currently allocated heap memory
*/
void *dl_lib_calloc(int cnt, int size, int align);
/**
* @brief Always allocate aligned memory from external memory.
*
* @param cnt Number of continuing chunks of memory to allocate
* @param size Size, in bytes, of a chunk of memory to allocate
* @param align Aligned size, in bits
* @return Pointer to currently aligned heap memory
*/
void *dl_lib_calloc_psram(int cnt, int size, int align);
/**
* @brief Free aligned memory allocated by `dl_lib_calloc` or `dl_lib_calloc_psram`
*
* @param ptr Pointer to free
*/
void dl_lib_free(void *ptr);
/**
* @brief Does a fast version of the exp() operation on a floating point number.
*
* As described in https://codingforspeed.com/using-faster-exponential-approximation/
* Should be good til an input of 5 or so with a steps factor of 8.
*
* @param in Floating point input
* @param steps Approximation steps. More is more precise. 8 or 10 should be good enough for most purposes.
* @return Exp()'ed output
*/
fptp_t fast_exp(double x, int steps);
/**
* @brief Does a fast version of the exp() operation on a floating point number.
*
* @param in Floating point input
* @return Exp()'ed output
*/
double fast_exp_pro(double x);
/**
* @brief Does a softmax operation on a matrix.
*
* @param in Input matrix
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
*/
void dl_softmax(const dl_matrix2d_t *in, dl_matrix2d_t *out);
/**
* @brief Does a softmax operation on a quantized matrix.
*
* @param in Input matrix
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
*/
void dl_softmax_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out);
/**
* @brief Does a sigmoid operation on a floating point number
*
* @param in Floating point input
* @return Sigmoid output
*/
fptp_t dl_sigmoid_op(fptp_t in);
/**
* @brief Does a sigmoid operation on a matrix.
*
* @param in Input matrix
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
*/
void dl_sigmoid(const dl_matrix2d_t *in, dl_matrix2d_t *out);
/**
* @brief Does a tanh operation on a floating point number
*
* @param in Floating point input number
* @return Tanh value
*/
fptp_t dl_tanh_op(fptp_t v);
/**
* @brief Does a tanh operation on a matrix.
*
* @param in Input matrix
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
*/
void dl_tanh(const dl_matrix2d_t *in, dl_matrix2d_t *out);
/**
* @brief Does a relu (Rectifier Linear Unit) operation on a floating point number
*
* @param in Floating point input
* @param clip If value is higher than this, it will be clipped to this value
* @return Relu output
*/
fptp_t dl_relu_op(fptp_t in, fptp_t clip);
/**
* @brief Does a ReLu operation on a matrix.
*
* @param in Input matrix
* @param clip If values are higher than this, they will be clipped to this value
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
*/
void dl_relu(const dl_matrix2d_t *in, fptp_t clip, dl_matrix2d_t *out);
/**
* @brief Fully connected layer operation
*
* @param in Input vector
* @param weight Weights of the neurons
* @param bias Biases for the neurons. Can be NULL if a bias of 0 is required.
* @param out Output array. Outputs are placed here. Needs to be an initialized, weight->w by in->h in size, matrix.
*/
void dl_fully_connect_layer(const dl_matrix2d_t *in, const dl_matrix2d_t *weight, const dl_matrix2d_t *bias, dl_matrix2d_t *out);
/**
* @brief Pre-calculate the sqrtvari variable for the batch_normalize function.
* The sqrtvari matrix depends on the variance and epsilon values, which normally are constant. Hence,
* this matrix only needs to be calculated once. This function does that.
*
* @param
* @return
*/
void dl_batch_normalize_get_sqrtvar(const dl_matrix2d_t *variance, fptp_t epsilon, dl_matrix2d_t *out);
/**
* @brief Batch-normalize a matrix
*
* @param m The matrix to normalize
* @param offset Offset matrix
* @param scale Scale matrix
* @param mean Mean matrix
* @param sqrtvari Matrix precalculated using dl_batch_normalize_get_sqrtvar
* @return
*/
void dl_batch_normalize(dl_matrix2d_t *m, const dl_matrix2d_t *offset, const dl_matrix2d_t *scale,
const dl_matrix2d_t *mean, const dl_matrix2d_t *sqrtvari);
/**
* @brief Do a basic LSTM layer pass.
*
* @warning Returns state_h pointer, so do not free result.
* @param in Input vector
* @param state_c Internal state of the LSTM network
* @param state_h Internal state (previous output values) of the LSTM network
* @param weights Weights for the neurons
* @param bias Bias for the neurons. Can be NULL if no bias is required
* @return Output values of the neurons
*/
dl_matrix2d_t *dl_basic_lstm_layer(const dl_matrix2d_t *in, dl_matrix2d_t *state_c, dl_matrix2d_t *state_h,
const dl_matrix2d_t *weight, const dl_matrix2d_t *bias);
/**
* @brief Do a basic LSTM layer pass, partial quantized version.
* This LSTM function accepts 16-bit fixed-point weights and 32-bit float-point bias.
*
* @warning Returns state_h pointer, so do not free result.
* @param in Input vector
* @param state_c Internal state of the LSTM network
* @param state_h Internal state (previous output values) of the LSTM network
* @param weights Weights for the neurons, need to be quantised
* @param bias Bias for the neurons. Can be NULL if no bias is required
* @return Output values of the neurons
*/
dl_matrix2dq_t *dl_basic_lstm_layer_quantised_weights(const dl_matrix2d_t *in, dl_matrix2d_t *state_c, dl_matrix2d_t *state_h,
const dl_matrix2dq_t *weight, const dl_matrix2d_t *bias);
/**
* @brief Do a fully-connected layer pass, fully-quantized version.
*
* @param in Input vector
* @param weight Weights of the neurons
* @param bias Bias values of the neurons. Can be NULL if no bias is needed.
* @param shift Number of bits to shift the result back by. See dl_lib_matrixq.h for more info
* @return Output values of the neurons
*/
void dl_fully_connect_layer_q(const dl_matrix2dq_t *in, const dl_matrix2dq_t *weight, const dl_matrix2dq_t *bias, dl_matrix2dq_t *out, int shift);
/**
* @brief Do a basic LSTM layer pass, fully-quantized version
*
* @warning Returns state_h pointer, so do not free result.
* @param in Input vector
* @param state_c Internal state of the LSTM network
* @param state_h Internal state (previous output values) of the LSTM network
* @param weights Weights for the neurons
* @param bias Bias for the neurons. Can be NULL if no bias is required
* @param shift Number of bits to shift the result back by. See dl_lib_matrixq.h for more info
* @return Output values of the neurons
*/
dl_matrix2dq_t *dl_basic_lstm_layer_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *state_c, dl_matrix2dq_t *state_h,
const dl_matrix2dq_t *weight, const dl_matrix2dq_t *bias, int shift);
/**
* @brief Batch-normalize a matrix, fully-quantized version
*
* @param m The matrix to normalize
* @param offset Offset matrix
* @param scale Scale matrix
* @param mean Mean matrix
* @param sqrtvari Matrix precalculated using dl_batch_normalize_get_sqrtvar
* @param shift Number of bits to shift the result back by. See dl_lib_matrixq.h for more info
* @return
*/
void dl_batch_normalize_q(dl_matrix2dq_t *m, const dl_matrix2dq_t *offset, const dl_matrix2dq_t *scale,
const dl_matrix2dq_t *mean, const dl_matrix2dq_t *sqrtvari, int shift);
/**
* @brief Does a relu (Rectifier Linear Unit) operation on a fixed-point number
* This accepts and returns fixed-point 32-bit number with the last 15 bits being the bits after the decimal
* point. (Equivalent to a mantissa in a quantized matrix with exponent -15.)
*
* @param in Fixed-point input
* @param clip If value is higher than this, it will be clipped to this value
* @return Relu output
*/
qtp_t dl_relu_q_op(qtp_t in, qtp_t clip);
/**
* @brief Does a ReLu operation on a matrix, quantized version
*
* @param in Input matrix
* @param clip If values are higher than this, they will be clipped to this value
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
*/
void dl_relu_q(const dl_matrix2dq_t *in, fptp_t clip, dl_matrix2dq_t *out);
/**
* @brief Does a sigmoid operation on a fixed-point number.
* This accepts and returns a fixed-point 32-bit number with the last 15 bits being the bits after the decimal
* point. (Equivalent to a mantissa in a quantized matrix with exponent -15.)
*
* @param in Fixed-point input
* @return Sigmoid output
*/
int dl_sigmoid_op_q(const int in);
int16_t dl_sigmoid_op_q8(const int16_t in);
/**
* @brief Does a sigmoid operation on a matrix, quantized version
*
* @param in Input matrix
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
*/
void dl_sigmoid_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out);
/**
* @brief Does a tanh operation on a matrix, quantized version
*
* @param in Input matrix
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
*/
void dl_tanh_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out);
/**
* @brief Does a tanh operation on a fixed-point number.
* This accepts and returns a fixed-point 32-bit number with the last 15 bits being the bits after the decimal
* point. (Equivalent to a mantissa in a quantized matrix with exponent -15.)
*
* @param in Fixed-point input
* @return tanh output
*/
int dl_tanh_op_q(int v);
int16_t dl_tanh_op_q8(int16_t v);
void load_mat_psram_mn4(void);
void load_mat_psram_mn3(void);
void free_mat_psram_mn4(void);
void free_mat_psram_mn3(void);
qtp_t dl_hard_sigmoid_op(qtp_t in, int exponent);
qtp_t dl_hard_tanh_op(qtp_t in, int exponent);
int16_t dl_table_tanh_op(int16_t in, int exponent);
int16_t dl_table_sigmoid_op(int16_t in, int exponent);
void dl_hard_sigmoid_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out);
void dl_hard_tanh_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out);
void dl_table_sigmoid_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out);
void dl_table_tanh_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out);
/**
* @brief Filter out the number greater than clip in the matrix, quantized version
*
* @param in Input matrix
* @param clip If values are higher than this, they will be clipped to this value
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
*/
void dl_minimum(const dl_matrix2d_t *in, fptp_t clip, dl_matrix2d_t *out);
/**
* @brief Filter out the number greater than clip in the matrix, float version
*
* @param in Input matrix
* @param clip If values are higher than this, they will be clipped to this value
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
*/
void dl_minimum_q(const dl_matrix2dq_t *in, fptp_t clip, dl_matrix2dq_t *out);
/**
* @brief Do a basic CNN layer pass.
*
* @Warning This just supports the single channel input image, and the output is single row matrix.
That is to say, the height of output is 1, and the weight of output is out_channels*out_image_width*out_image_height
*
* @param in Input single channel image
* @param weight Weights of the neurons, weight->w = out_channels, weight->h = filter_width*filter_height
* @param bias Bias for the CNN layer.
* @param filter_height The height of convolution kernel
* @param filter_width The width of convolution kernel
* @param out_channels The number of output channels of convolution kernel
* @param stride_x The step length of the convolution window in x(width) direction
* @param stride_y The step length of the convolution window in y(height) direction
* @param pad One of `"VALID"` or `"SAME"`, 0 is "VALID" and the other is "SAME"
* @param out The result of CNN layer, out->h=1.
* @return The result of CNN layer.
*/
dl_matrix2d_t *dl_basic_conv_layer(const dl_matrix2d_t *in, const dl_matrix2d_t *weight, const dl_matrix2d_t *bias, int filter_width, int filter_height,
const int out_channels, const int stride_x, const int stride_y, padding_state pad, const dl_matrix2d_t* out);
/**
* @brief Do a basic CNN layer pass, quantised wersion.
*
* @Warning This just supports the single channel input image, and the output is single row matrix.
That is to say, the height of output is 1, and the weight of output is out_channels*out_image_width*out_image_height
*
* @param in Input single channel image
* @param weight Weights of the neurons, weight->w = out_channels, weight->h = filter_width*filter_height,
* @param bias Bias of the neurons.
* @param filter_height The height of convolution kernel
* @param filter_width The width of convolution kernel
* @param out_channels The number of output channels of convolution kernel
* @param stride_x The step length of the convolution window in x(width) direction
* @param stride_y The step length of the convolution window in y(height) direction
* @param pad One of `"VALID"` or `"SAME"`, 0 is "VALID" and the other is "SAME"
* @param out The result of CNN layer, out->h=1
* @return The result of CNN layer
*/
dl_matrix2d_t *dl_basic_conv_layer_quantised_weight(const dl_matrix2d_t *in, const dl_matrix2dq_t *weight, const dl_matrix2d_t *bias, int filter_width, int filter_height,
const int out_channels, const int stride_x, const int stride_y, padding_state pad, const dl_matrix2d_t* out);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,80 @@
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef DL_LIB_COEFGETTER_IF_H
#define DL_LIB_COEFGETTER_IF_H
#include "dl_lib_matrix.h"
#include "dl_lib_matrixq.h"
#include "dl_lib_matrixq8.h"
#include "cJSON.h"
#ifdef __cplusplus
extern "C" {
#endif
//Set this if the coefficient requested is a batch-normalization popvar matrix which needs to be preprocessed by
//dl_batch_normalize_get_sqrtvar first.
#define COEF_GETTER_HINT_BNVAR (1<<0)
/*
This struct describes the basic information of model data:
word_num: the number of wake words or speech commands
word_list: the name list of wake words or speech commands
thres_list: the threshold list of wake words or speech commands
info_str: the string used to reflect the version and information of model data
which consist of the architecture of network, the version of model data, wake words and their threshold
*/
typedef struct {
int word_num;
char **word_list;
int *win_list;
float *thresh_list;
char *info_str;
} model_info_t;
/*
Alphabet struct describes the basic grapheme or phoneme.
item_num: the number of baisc item(grapheme or phonemr)
items: the list of basic item
*/
typedef struct {
int item_num;
char **items;
}alphabet_t;
/*
This struct describes a generic coefficient getter: a way to get the constant coefficients needed for a neural network.
For the two getters, the name describes the name of the coefficient matrix, usually the same as the Numpy filename the
coefficient was originally stored in. The arg argument can be used to optionally pass an additional user-defined argument
to the getter (e.g. the directory to look for files in the case of the Numpy file loader getter). The hint argument
is a bitwise OR of the COEF_GETTER_HINT_* flags or 0 when none is needed. Use the free_f/free_q functions to release the
memory for the returned matrices, when applicable.
*/
typedef struct {
const dl_matrix2d_t* (*getter_f)(const char *name, void *arg, int hint);
const dl_matrix2dq_t* (*getter_q)(const char *name, void *arg, int hint);
const dl_matrix2dq8_t* (*getter_q8)(const char *name, void *arg, int hint);
void (*free_f)(const dl_matrix2d_t *m);
void (*free_q)(const dl_matrix2dq_t *m);
void (*free_q8)(const dl_matrix2dq8_t *m);
const model_info_t* (*getter_info)(void *arg);
const alphabet_t* (*getter_alphabet)(void *arg);
const cJSON* (*getter_config)(void *arg);
} model_coeff_getter_t;
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,180 @@
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef DL_LIB_CONV_QUEUE_H
#define DL_LIB_CONV_QUEUE_H
#include "dl_lib_matrix.h"
#ifdef __cplusplus
extern "C" {
#endif
typedef float fptp_t;
//Flags for matrices
// #define DL_MF_FOREIGNDATA (0) /*< Matrix *item data actually points to another matrix and should not be freed */
//Float convolution FIFO queue.
typedef struct {
int n; /*< the length of queue */
int c; /*< the channel number of queue element*/
int front; /*< the front(top) position of queue */
int flag; /*< not used*/
fptp_t *item; /*< Pointer to item array */
} dl_conv_queue_t;
/**
* @brief Allocate a convolution queue
*
* @param n The length of queue
* @param c The channel number of elements in the queue
* @return The convolution queue, or NULL if out of memory
*/
dl_conv_queue_t *dl_conv_queue_alloc(int n, int c);
/**
* @brief Allocate a convolution queue from psram
*
* @param n The length of queue
* @param c The channel number of elements in the queue
* @return The convolution queue, or NULL if out of memory
*/
dl_conv_queue_t *dl_conv_queue_alloc_from_psram(int n, int c);
/**
* @brief Free a convolution queue
*
* @param cq The convolution queue to free
*/
void dl_conv_queue_free(dl_conv_queue_t *cq);
void dl_conv_to_matrix2d(dl_conv_queue_t *cq, dl_matrix2d_t* out);
/**
* @brief Move the front pointer of queue forward,
the First(oldest) element become the last(newest) element,
*
* @param cq Input convolution queue
* @return Pointer of oldest element
*/
fptp_t *dl_conv_queue_pop(dl_conv_queue_t *cq);
/**
* @brief Remove the oldest element, then insert the input element at the end of queue
*
* @param cq Input convolution queue
* @param item The new element
*/
void dl_conv_queue_push(dl_conv_queue_t *cq, fptp_t* item);
/**
* @brief Get the pointer of element in the queue by offset
*
* @param cq Input convolution queue
* @param offset Offset from the front of the queue
* @return Pointer of the element
*/
fptp_t *dl_get_queue_item(dl_conv_queue_t *cq, int offset);
/**
* @brief Does a sigmoid operation on the one of element in the convolution queue.
* Gets the pointer of element in the convolution queue by offset, and does a sigmoid operation
* by this pointer, then return the pointer
*
* @param cq Input convolution queue
* @param offset Offset from the front of the queue
* @return Pointer of the element
*/
fptp_t *dl_sigmoid_step(dl_conv_queue_t *cq, int offset);
/**
* @brief Does a tanh operation on the one of element in the convolution queue.
* Gets the pointer of element in the convolution queue by offset, and does a tanh operation
* by this pointer, then return the pointer
*
* @param cq Input convolution queue
* @param offset Offset from the front of the queue
* @return Pointer of the element
*/
fptp_t *dl_tanh_step(dl_conv_queue_t *cq, int offset);
/**
* @brief Does a softmax operation on the one of element in the convolution queue.
* Gets the pointer of element in the convolution queue by offset, and does a softmax operation
* by this pointer, then return the pointer
*
* @param cq Input convolution queue
* @param offset Offset from the front of the queue
* @return Pointer of the element
*/
fptp_t *dl_softmax_step(dl_conv_queue_t *cq, int offset);
fptp_t *dl_relu_step(dl_conv_queue_t *cq, int offset);
fptp_t *dl_relu_look(dl_matrix2d_t *cq, int offset);
dl_matrix2d_t *dl_matrix_concat1(const dl_conv_queue_t *a, const dl_matrix2d_t *b);
dl_matrix2d_t *dl_basic_lstm_layer1(const dl_conv_queue_t *in, dl_matrix2d_t *state_c, dl_matrix2d_t *state_h,
const dl_matrix2d_t *weight, const dl_matrix2d_t *bias);
/**
* @brief Fast implement for 1D atrous convolution (a.k.a. convolution with holes or dilated convolution)
* based on convolution queue.
*
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
* is first element of output queue and should not be freed separately.
*
* @param in Input convolution queue
* @param out Output convolution queue
* @param rate A positive int, the stride with which we sample input value
* @param size A positive int, the size of 1D-filter
* @param kernel The kernel matrix of filter
* @param bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
* @return The result of atrous convolution
*/
fptp_t *dl_atrous_conv1d_step(dl_conv_queue_t *in, dl_conv_queue_t *out, int rate, int size,
dl_matrix2d_t* kernel, dl_matrix2d_t* bias);
fptp_t *dl_look_conv_step(dl_conv_queue_t *in, dl_matrix2d_t *out, int rate, int size,
dl_matrix2d_t* kernel, dl_matrix2d_t* bias);
/**
* @brief Fast implement of dilation layer as follows
*
* |-> [gate(sigmoid)] -|
* input - | |-> (*) - output
* |-> [filter(tanh)] -|
*
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
* is first element of output queue and should not be freed separately.
*
* @param in Input convolution queue
* @param out Output convolution queue
* @param rate A positive int, the stride with which we sample input value
* @param size A positive int, the size of 1D-filter
* @param filter_kernel The kernel matrix of filter
* @param filter_bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
* @param gate_kernel The kernel matrix of gate
* @param gate_bias The bias matrix of gate. Can be NULL if a bias of 0 is required.
* @return The result of dilation layer
*/
fptp_t *dl_dilation_layer(dl_conv_queue_t *in, dl_conv_queue_t *out, int rate, int size,
dl_matrix2d_t* filter_kernel, dl_matrix2d_t* filter_bias,
dl_matrix2d_t* gate_kernel, dl_matrix2d_t* gate_bias);
void test_atrous_conv(int size, int rate, int in_channel, int out_channel);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,303 @@
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef DL_LIB_CONVQ8_QUEUE_H
#define DL_LIB_CONVQ8_QUEUE_H
#include "dl_lib_matrixq.h"
#include "dl_lib_matrixq8.h"
#include "dl_lib_conv_queue.h"
#include "dl_lib_convq_queue.h"
#ifdef __cplusplus
extern "C" {
#endif
//[nch, n, c]
typedef struct {
int n; /*< the length of queue */
int c; /*< the number of queue element*/
int front; /*< the front(top) position of queue */
int nch; /*< the channel of queue */
int exponent; /*< The values in items should be multiplied by pow(2,exponent)
to get the real values */
q8tp_t *itemq; /*< Pointer to item array */
} dl_convq8_queue_t;
/**
* @brief Allocate a fixed-point convolution queue
*
* @param n The length of queue
* @param c The number of elements in the queue
* @return The convolution queue, or NULL if out of memory
*/
dl_convq8_queue_t *dl_convq8_queue_alloc(int n, int c);
/**
* @brief Allocate a fixed-point convolution queue
*
* @param n The length of queue
* @param c The number of elements in the queue
* @param c The channel of queue
* @return The convolution queue, or NULL if out of memory
*/
dl_convq8_queue_t *dl_convq8_queue_alloc_mc(int n, int c, int nch);
/**
* @brief Allocate a bit fixed-point convolution queue from PSRAM
*
* @param n The length of queue
* @param c The number of elements in the queue
* @param nch The channel of queue
* @return The convolution queue, or NULL if out of memory
*/
dl_convq8_queue_t *dl_convq8_queue_alloc_mc_from_psram(int n, int c, int nch);
/**
* @brief Free a fixed-point convolution queue
*
* @param cq The fixed-point convolution queue to free
*/
void dl_convq8_queue_free(dl_convq8_queue_t *cq);
/**
* @brief Set itemq of convolution queue to 0
*
* @param cq The fixed-point convolution queue to free
*/
void dl_convq8_queue_bzero(dl_convq8_queue_t *cqm);
/**
* @brief Move the front pointer of queue forward,
the First(oldest) element become the last(newest) element,
*
* @param cq Input fixed-point convolution queue
* @return Pointer of oldest element
*/
q8tp_t *dl_convq8_queue_pop(dl_convq8_queue_t *cq);
q8tp_t *dl_convq8_queue_popn(dl_convq8_queue_t *cq, int n);
/**
* @brief Insert the float-point element at the end of queue.
* The precision of fixed-point numbers is described by the Qm.f notation,
*
* @param cq Input fixed-point convolution queue
* @param item The float-point element
* @param m_bit The number of integer bits including the sign bits
* @param f_bit The number of fractional bits
*/
void dl_convq8_queue_push_by_qmf(dl_convq8_queue_t *cq, fptp_t* item, int m_bit, int f_bit);
/**
* @brief Get the pointer of element in the queue by offset
*
* @param cq Input fixed-point convolution queue
* @param offset Offset from the front of the queue
* @return Pointer of the element
*/
q8tp_t *dl_get_queue_itemq8(dl_convq8_queue_t *cq, int offset);
/**
* @brief Get the pointer of element in the queue by offset
*
* @param cq Input fixed-point convolution queue
* @param offset Offset from the front of the queue
* @param ch Channel index of queue
* @return Pointer of the element
*/
q8tp_t *dl_get_queue_itemq8_mc(dl_convq8_queue_t *cq, int offset, int ch);
/**
* @brief Fast and quantised implement for 1D atrous convolution (a.k.a. convolution with holes or dilated convolution)
* based on convolution queue.
*
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
* is last element of output queue and should not be freed separately.
*
* @param in Input fixed-point convolution queue
* @param out Output fixed-point convolution queue
* @param rate A positive int, the stride with which we sample input value
* @param size A positive int, the size of 1D-filter
* @param kernel Kernel matrix of filter
* @param bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
* @param out_exponent Shift ratio used in dot operation between two 16-bit fixed point vector
* @param offset Offset used to calculate the beginning of input conv queue
* @param prenum The num to control the parameter size of preload operation
* @return The result of atrous convolution
*/
void dl_atrous_conv1dq8_steps(dl_convq8_queue_t *in, dl_convq8_queue_t *out, int rate, int size,
dl_matrix2dq8_t* kernel, dl_matrix2dq8_t* bias,
int out_exponent, int offset, int prenum);
/**
* @brief Fast implement of dilation layer as follows
*
* |-> [gate(sigmoid)] -|
* input - | |-> (*) - output
* |-> [filter(tanh)] -|
*
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
* is last element of output queue and should not be freed separately.
*
* @param in Input fixed-point convolution queue
* @param out Output fixed-point convolution queue
* @param rate A positive int, the stride with which we sample input value
* @param size A positive int, the size of 1D-filter
* @param filter_kernel The kernel matrix of filter
* @param filter_bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
* @param gate_kernel The kernel matrix of gate
* @param gate_bias The bias matrix of gate. Can be NULL if a bias of 0 is required.
* @param offset Offset used to calculate the beginning of input conv queue
* @param prenum The num to control the parameter size of preload operation
* @return The result of dilation layer
*/
void dl_dilation_layerq8_steps(dl_convq8_queue_t *in, dl_convq8_queue_t *out, int rate, int size,
dl_matrix2dq8_t* filter_kernel, dl_matrix2dq8_t* filter_bias,
dl_matrix2dq8_t* gate_kernel, dl_matrix2dq8_t* gate_bias,
int offset, int prenum);
dl_conv_queue_t *dl_convq8_queue_add(dl_convq8_queue_t *cq1, dl_convq8_queue_t *cq2);
int8_t dl_sigmoid_lutq8(int in);
/**
* @brief Allocate a 8-bit fixed-point Multi-Channel convolution queue
*
* @param n The length of queue
* @param c The number of elements in the queue
* @param nch  The channel number
* @return The convolution queue, or NULL if out of memory
*/
dl_convq8_queue_t **dl_convq8_queue_mc_alloc(int n, int c, int nch);
/**
* @brief Free a 8-bit fixed-point Multi-Channel convolution queue
*
* @param cqm The fixed-point convolution queue to free
* @param nch The channel number
*/
void dl_convq8_queue_mc_free(dl_convq8_queue_t **cqm, int nch);
/**
* @brief Tanh activation function for 8-bit fixed-point Multi-Channel convolution queue input
*
* @param cqm Input 8-bit fixed-point Multi-Channel convolution queue
* @param offset Offset used to calculate the beginning of input conv queue
* @param nch The channel number
*/
void dl_tanh_convq8_mc(dl_convq8_queue_t **cqm, int offset, int nch);
/**
* @brief Fast and quantised 16-bit implement for Multi-channel 1D atrous convolution (a.k.a. convolution with holes or dilated convolution)
* Usually, this layer is used as first layer for 8-bit network.
*
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
* Input is a 16-bit queue point, Output is an 8-bit queue point.
*
* @param in Input 16bit fixed-point convolution queue array
* @param out Output 8bit fixed-point convolution queue array
* @param rate A positive int, the stride with which we sample input value
* @param size A positive int, the size of 1D-filter
* @param kernel The kernel matrix of filter
* @param bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
* @param out_exponent Exponent of output
* @param offset Offset used to calculate the beginning of input conv queue
* @param prenum The num to control the parameter size of preload operation
*/
void dl_atrous_conv1dq8_16in_mc_steps(dl_convq_queue_t **in, dl_convq8_queue_t **out, int nch, int rate, int size,
dl_matrix2dq_t* kernel, dl_matrix2dq_t* bias, int out_exponent, int offset, int prenum);
/**
* @brief Fast and quantised 8-bit implement for Multi-channel 1D atrous convolution (a.k.a. convolution with holes or dilated convolution)
* based on convolution queue.
*
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
* is last element of output queue and should not be freed separately.
*
* @param in Input 8bit fixed-point convolution queue array
* @param out Output 8bit fixed-point convolution queue array
* @param rate A positive int, the stride with which we sample input value
* @param size A positive int, the size of 1D-filter
* @param kernel The kernel matrix of filter
* @param bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
* @param out_exponent Exponent of output
* @param offset Offset used to calculate the beginning of input conv queue
* @param prenum The num to control the parameter size of preload operation
*/
void dl_atrous_conv1dq8_mc_steps(dl_convq8_queue_t **in, dl_convq8_queue_t **out,
int nch, int rate, int size,
dl_matrix2dq8_t* kernel, dl_matrix2dq8_t* bias,
int out_exponent, int offset, int prenum);
/**
* @brief Fast implement of 8-bit dilation layer as follows
*
* |-> [gate(sigmoid)] -|
* input - | |-> (*) - output
* |-> [filter(tanh)] -|
*
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
* is last element of output queue and should not be freed separately.
*
* @param in Input 8-bit fixed-point convolution queue
* @param out Output 8-bit fixed-point convolution queue
* @param rate A positive int, the stride with which we sample input value
* @param size A positive int, the size of 1D-filter
* @param filter_kernel The kernel matrix of filter
* @param filter_bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
* @param gate_kernel The kernel matrix of gate
* @param gate_bias The bias matrix of gate. Can be NULL if a bias of 0 is required.
* @param offset Offset used to calculate the beginning of input conv queue
* @param prenum The num to control the parameter size of preload operation
*/
void dl_dilation_layerq8_mc_steps(dl_convq8_queue_t **in, dl_convq8_queue_t **out, int nch, int rate, int size,
dl_matrix2dq8_t* filter_kernel, dl_matrix2dq8_t* filter_bias,
dl_matrix2dq8_t* gate_kernel, dl_matrix2dq8_t* gate_bias,
int offset, int prenum);
void dl_convq8_queue_mc_bzero(dl_convq8_queue_t **cqm, int nch);
dl_convq8_queue_t *dl_convq8_queue_alloc_from_psram(int n, int c);
qtp_t *dl_dilation_layerq16_8(dl_convq_queue_t *in, dl_convq8_queue_t *out, int rate, int size,
dl_matrix2dq_t* filter_kernel, dl_matrix2dq_t* filter_bias,
dl_matrix2dq_t* gate_kernel, dl_matrix2dq_t* gate_bias, int prenum);
qtp_t *dl_dilation_layerq8(dl_convq8_queue_t *in, dl_convq8_queue_t *out, int rate, int size,
dl_matrix2dq8_t* filter_kernel, dl_matrix2dq_t* filter_bias,
dl_matrix2dq8_t* gate_kernel, dl_matrix2dq_t* gate_bias, int prenum);
dl_matrix2dq8_t *dl_convq8_lstm_layer(const dl_convq8_queue_t *in, dl_convq8_queue_t *out, dl_matrix2dq8_t *state_c,
dl_matrix2dq8_t *state_h, const dl_matrix2dq8_t *in_weight, const dl_matrix2dq8_t *h_weight,
const dl_matrix2dq_t *bias, int prenum);
qtp_t *dl_atrous_conv1dq8_16_s3(dl_convq8_queue_t *in, dl_convq_queue_t *out, int rate, int size,
dl_matrix2dq8_t* kernel, dl_matrix2dq_t* bias, int prenum);
void print_convq8(dl_convq8_queue_t *cq, int offset);
void print_convq(dl_convq_queue_t *cq, int offset);
void dl_relu_convq8(dl_convq8_queue_t *cq);
void lstmq8_free(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,382 @@
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef DL_LIB_CONVQ_QUEUE_H
#define DL_LIB_CONVQ_QUEUE_H
#include "dl_lib_matrixq.h"
#include "dl_lib_conv_queue.h"
#include "dl_lib.h"
#ifdef __cplusplus
extern "C" {
#endif
//fixed-point convolution FIFO queue.
//[nch, n, c]
typedef struct {
int n; /*< the length of queue */
int c; /*< the number of queue element*/
int front; /*< the front(top) position of queue */
int nch; /*< the multiple of queue*/
int exponent; /*< The values in items should be multiplied by pow(2,exponent)
to get the real values */
qtp_t *itemq; /*< Pointer to item array */
} dl_convq_queue_t;
/**
* @brief Allocate a fixed-point convolution queue
*
* @param n The length of queue
* @param c The number of elements in the queue
* @return The convolution queue, or NULL if out of memory
*/
dl_convq_queue_t *dl_convq_queue_alloc(int n, int c);
/**
* @brief Allocate a fixed-point convolution queue from PSRAM
*
* @param n The length of queue
* @param c The number of elements in the queue
* @return The convolution queue, or NULL if out of memory
*/
dl_convq_queue_t *dl_convq_queue_alloc_from_psram(int n, int c);
/**
* @brief Allocate a fixed-point multi-channel convolution queue
*
* @param n The length of queue
* @param c The number of elements in the queue
* @param nch The channel of conv queue
* @return The convolution queue, or NULL if out of memory
*/
dl_convq_queue_t *dl_convq_queue_alloc_mc(int n, int c, int nch);
/**
* @brief Allocate a fixed-point multi-channel convolution queue from PSRAM
*
* @param n The length of queue
* @param c The number of elements in the queue
* @param nch The channel of conv queue
* @return The convolution queue, or NULL if out of memory
*/
dl_convq_queue_t *dl_convq_queue_alloc_mc_from_psram(int n, int c, int nch);
void dl_convq_to_matrix2dq(dl_convq_queue_t *cq, dl_matrix2dq_t* out, int row);
/**
* @brief Free a fixed-point convolution queue
*
* @param cq The fixed-point convolution queue to free
*/
void dl_convq_queue_free(dl_convq_queue_t *cq);
/**
* @brief Set itemq of convolution queue to 0
*
* @param cq The fixed-point convolution queue point
*/
void dl_convq_queue_bzero(dl_convq_queue_t *cq);
/**
* @brief Move the front pointer of queue forward,
the First(oldest) element become the last(newest) element,
*
* @param cq Input fixed-point convolution queue
* @return Pointer of oldest element
*/
qtp_t *dl_convq_queue_pop(dl_convq_queue_t *cq);
qtp_t *dl_convq_queue_popn(dl_convq_queue_t *cq, int n);
/**
* @brief Remove the oldest element, then insert the input element at the end of queue
*
* @param cq Input fixed-point convolution queue
* @param item The new element
*/
void dl_convq_queue_push(dl_convq_queue_t *cq, dl_matrix2dq_t *a, int shift);
/**
* @brief Insert the float-point element at the end of queue.
* The precision of fixed-point numbers is described by the Qm.f notation,
*
* @param cq Input fixed-point convolution queue
* @param item The float-point element
* @param m_bit The number of integer bits including the sign bits
* @param f_bit The number of fractional bits
*/
void dl_convq_queue_push_by_qmf(dl_convq_queue_t *cq, fptp_t* item, int m_bit, int f_bit);
void dl_convq16_queue_push_by_qmf(dl_convq_queue_t *cq, fptp_t* item, int m_bit, int f_bit);
dl_conv_queue_t *dl_queue_from_convq(dl_convq_queue_t *cq1);
/**
* @brief Get the pointer of element in the queue by offset
*
* @param cq Input fixed-point convolution queue
* @param last_num Offset from the front of the queue
* @return Pointer of the element
*/
qtp_t *dl_get_queue_itemq(dl_convq_queue_t *cq, int last_num);
/**
* @brief Get the pointer of element in the queue by offset
*
* @param cq Input fixed-point convolution queue
* @param offset Offset from the front of the queue
* @param ch Channel index of convolution queue
* @return Pointer of the element
*/
qtp_t *dl_get_queue_itemq_mc(dl_convq_queue_t *cq, int offset, int ch);
/**
* @brief Does a tanh operation on the one of element in the convolution queue.
* Gets the pointer of element in the convolution queue by offset, and does a
* tanh operation by this pointer, then return the pointer
*
* @param cq Input fixed-point convolution queue
* @param offset Offset from the front of the queue
* @return Pointer of the element
*/
void dl_tanh_convq(dl_convq_queue_t *cq, int offset);
/**
* @brief Does a tanh operation on the one of element in multi channel convolution queue.
* Gets the pointer of element in the convolution queue by offset, and does a
* tanh operation by this pointer, then return the pointer
*
* @param cq Input fixed-point multi channnel convolution queue
* @param offset Offset from the front of the queue
* @param nch The channel number of cqm
* @return Pointer of the element
*/
void dl_tanh_convq_mc(dl_convq_queue_t **cqm, int offset, int nch);
/**
* @brief Does a relu operation on the one of element in the convolution queue.
* Gets the pointer of element in the convolution queue by offset, and does a
* relu operation by this pointer, then return the pointer
*
* @param cq Input fixed-point convolution queue
* @param offset Offset from the front of the queue
* @return Pointer of the element
*/
void dl_relu_convq(dl_convq_queue_t *cq, fptp_t clip, int last_num);
/**
* @brief Does a softmax operation on the one of element in the convolution queue.
* Gets the pointer of element in the convolution queue by offset, input data
stay as it is. Results are saved into the *out* array.
*
* @param cq Input fixed-point convolution queue
* @param offset Offset from the front of the queue
* @param out Old array to re-use. Passing NULL will allocate a new matrix.
* @return softmax results
*/
fptp_t * dl_softmax_step_q(dl_convq_queue_t *cq, int offset, fptp_t *out);
/**
* @brief Fast and quantised implement for 1D atrous convolution (a.k.a. convolution with holes or dilated convolution)
* based on convolution queue.
*
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
* is last element of output queue and should not be freed separately.
*
* @param in Input fixed-point convolution queue
* @param out Output fixed-point convolution queue
* @param rate A positive int, the stride with which we sample input value
* @param size A positive int, the size of 1D-filter
* @param kernel The kernel matrix of filter
* @param bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
* @param shift Shift ratio used in dot operation between two 16-bit fixed point vector
* @return The result of atrous convolution
*/
qtp_t * dl_atrous_conv1dq(dl_convq_queue_t *in, dl_convq_queue_t *out, int rate, int size,
dl_matrix2dq_t* kernel, dl_matrix2dq_t* bias, int shift, int prenum);
/**
* @brief Fast implement of dilation layer as follows
*
* |-> [gate(sigmoid)] -|
* input - | |-> (*) - output
* |-> [filter(tanh)] -|
*
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
* is last element of output queue and should not be freed separately.
*
* @param in Input fixed-point convolution queue
* @param out Output fixed-point convolution queue
* @param rate A positive int, the stride with which we sample input value
* @param size A positive int, the size of 1D-filter
* @param filter_kernel The kernel matrix of filter
* @param filter_bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
* @param gate_kernel The kernel matrix of gate
* @param gate_bias The bias matrix of gate. Can be NULL if a bias of 0 is required.
* @param filter_shift Shift ratio used in filter operation between two 16-bit fixed point vector
* @param gate_shift Shift ratio used in gate operation between two 16-bit fixed point vector
* @return The result of dilation layer
*/
qtp_t *dl_dilation_layerq_steps(dl_convq_queue_t *in, dl_convq_queue_t *out, int rate, int size,
dl_matrix2dq_t* filter_kernel, dl_matrix2dq_t* filter_bias,
dl_matrix2dq_t* gate_kernel, dl_matrix2dq_t* gate_bias,
int filter_shift, int gate_shift, int offset, int prenum);
qtp_t *dl_dilation_layerq(dl_convq_queue_t *in, dl_convq_queue_t *out, int rate, int size,
dl_matrix2dq_t* filter_kernel, dl_matrix2dq_t* filter_bias,
dl_matrix2dq_t* gate_kernel, dl_matrix2dq_t* gate_bias,
int filter_shift, int gate_shift, int prenum);
qtp_t *dl_dilation_layerq16(dl_convq_queue_t *in, dl_convq_queue_t *out, int rate, int size,
dl_matrix2dq_t* filter_kernel, dl_matrix2dq_t* filter_bias,
dl_matrix2dq_t* gate_kernel, dl_matrix2dq_t* gate_bias, int prenum);
qtp_t *dl_atrous_conv1dq_steps(dl_convq_queue_t *in, dl_convq_queue_t *out, int rate, int size,
dl_matrix2dq_t* kernel, dl_matrix2dq_t* bias, int shift, int offset, int prenum);
/**
* @brief Add a pair of fixed-point convolution queue item-by-item, and return float-point convolution queue
*
* @param cq1 First fixed-point convolution queue
* @param cq2 Seconf fixed-point convolution queue
* @return The result of float-point convolution queue
*/
dl_conv_queue_t *dl_convq_queue_add(dl_convq_queue_t *cq1, dl_convq_queue_t *cq2);
/**
* @brief Fast implement of LSTM layer by dl_atrous_conv1dq function
*
* @Warning LSTM kernel is split into two part, the first part input is the last layer output,
* and kernel is parameter *in_weight*. The second part input is the last frame LSTM output,
* the kernel is parameters *h_weight*.
*
* @param in Input fixed-point convolution queue
* @param out Output fixed-point convolution queue
* @param state_c Internal state of the LSTM network
* @param state_h Internal state (previous output values) of the LSTM network
* @param in_weight the LSTM kernel needed by first part
* @param h_weight the LSTM kernel needed by second part
* @param bias The bias matrix of LSTM. Can be NULL if a bias of 0 is required.
* @in_shift Shift ratio used in first part
* @h_shift Shift ratio used in second part
* @return The result of LSTM layer
*/
dl_matrix2dq_t *dl_convq_lstm_layer(const dl_convq_queue_t *in, dl_convq_queue_t *out, dl_matrix2dq_t *state_c,
dl_matrix2dq_t *state_h, const dl_matrix2dq_t *in_weight, const dl_matrix2dq_t *h_weight,
const dl_matrix2dq_t *bias, int in_shift, int h_shift, int prenum);
dl_matrix2dq_t *dl_basic_lstm_layer1_q(const dl_convq_queue_t *in, dl_matrix2dq_t *state_c, dl_matrix2dq_t *state_h,
const dl_matrix2dq_t *weight, const dl_matrix2dq_t *bias, int step, int shift);
dl_matrix2dq_t *dl_convq16_lstm_layer(dl_convq_queue_t *in, dl_convq_queue_t *out, dl_matrix2dq_t *state_c,
dl_matrix2dq_t *state_h, dl_matrix2dq_t *in_weight, dl_matrix2dq_t *h_weight,
dl_matrix2dq_t *bias, int prenum);
/**
* @brief Allocate a fixed-point multi channel convolution queue
*
* @param n The length of queue
* @param c The channel number of elements in the queue
* @param nch the channel numbet of convolution queue
* @return The convolution queue, or NULL if out of memory
*/
dl_convq_queue_t **dl_convq_queue_mc_alloc(int n, int c, int nch);
/**
* @brief Free a fixed-point multi channel convolution queue
*
* @param cqm The fixed-point convolution queue to free
* @param nch The channel number of cqm
*/
void dl_convq_queue_mc_free(dl_convq_queue_t **cqm, int nch);
/**
* @brief Fast and quantised implement for 1D atrous convolution (a.k.a. convolution with holes or dilated convolution)
* based on convolution queue.
*
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
* is last element of output queue and should not be freed separately.
*
* @param in Input fixed-point convolution queue
* @param out Output fixed-point convolution queue
* @param nch The channel number of input
* @param rate A positive int, the stride with which we sample input value
* @param size A positive int, the size of 1D-filter
* @param kernel The kernel matrix of filter
* @param bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
* @param shift Shift ratio used in dot operation between two 16-bit fixed point vector
* @param offset the offset to calculate input convq
* @param prenum the preload size, 0: do not use preload function
* @return The result of atrous convolution
*/
qtp_t *dl_atrous_conv1dq_mc_steps( dl_convq_queue_t **in,
dl_convq_queue_t **out,
int nch,
int rate,
int size,
dl_matrix2dq_t* kernel,
dl_matrix2dq_t* bias,
int shift,
int offset,
int prenum);
/**
* @brief Fast implement of dilation layer as follows for multi channel input
*
* |-> [gate(sigmoid)] -|
* input - | |-> (*) - output
* |-> [filter(tanh)] -|
*
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
* is last element of output queue and should not be freed separately.
*
* @param in Input fixed-point convolution queue
* @param out Output fixed-point convolution queue
* @param nch The channel number of input
* @param rate A positive int, the stride with which we sample input value
* @param size A positive int, the size of 1D-filter
* @param filter_kernel The kernel matrix of filter
* @param filter_bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
* @param gate_kernel The kernel matrix of gate
* @param gate_bias The bias matrix of gate. Can be NULL if a bias of 0 is required.
* @param filter_shift Shift ratio used in filter operation between two 16-bit fixed point vector
* @param gate_shift Shift ratio used in gate operation between two 16-bit fixed point vector
* @param offset The offset to calculate input convq
* @param prenum The preload size, 0: do not use preload function
* @return The result of dilation layer
*/
qtp_t *dl_dilation_layerq_mc_steps( dl_convq_queue_t **in,
dl_convq_queue_t **out,
int nch,
int rate,
int size,
dl_matrix2dq_t* filter_kernel,
dl_matrix2dq_t* filter_bias,
dl_matrix2dq_t* gate_kernel,
dl_matrix2dq_t* gate_bias,
int filter_shift,
int gate_shift,
int offset,
int prenum);
void test_atrous_convq(int size, int rate, int in_channel, int out_channel);
void test_lstm_convq(int size, int in_dim, int lstm_cell);
void dl_nn_tanh_i162(dl_convq_queue_t **cqm, int offset, int nch);
void dl_copy_queue_item_by_qmf(dl_convq_queue_t *cq, fptp_t* item, int m_bit, int f_bit, int offset, int ch);
void dl_convq_queue_mc_bzero(dl_convq_queue_t **cqm, int nch);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,257 @@
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef DL_LIB_MATRIX_H
#define DL_LIB_MATRIX_H
#ifdef ESP_PLATFORM
#include "freertos/FreeRTOS.h"
#include "freertos/task.h"
#include "freertos/queue.h"
#include "esp_system.h"
#endif
#ifdef __cplusplus
extern "C" {
#endif
typedef float fptp_t;
#if CONFIG_BT_SHARE_MEM_REUSE
extern multi_heap_handle_t gst_heap;
#endif
//Flags for matrices
#define DL_MF_FOREIGNDATA 1 /*< Matrix pointer and item data actually points to another matrix and should not be freed */
#define DL_MF_FOREIGNITEM 2 /*< Only item data actually points to another matrix and should not be freed */
//'Normal' float matrix
typedef struct {
int w; /*< Width */
int h; /*< Height */
int stride; /*< Row stride, essentially how many items to skip to get to the same position in the next row */
int flags; /*< Flags. OR of DL_MF_* values */
fptp_t *item; /*< Pointer to item array */
} dl_matrix2d_t;
//Macro to quickly access the raw items in a matrix
#define DL_ITM(m, x, y) m->item[(x)+(y)*m->stride]
/**
* @brief Allocate a matrix
*
* @param w Width of the matrix
* @param h Height of the matrix
* @return The matrix, or NULL if out of memory
*/
dl_matrix2d_t *dl_matrix_alloc(int w, int h);
/**
* @brief Free a matrix
* Frees the matrix structure and (if it doesn't have the DL_MF_FOREIGNDATA flag set) the m->items space as well.
*
* @param m Matrix to free
*/
void dl_matrix_free(dl_matrix2d_t *m);
/**
* @brief Zero out the matrix
* Sets all entries in the matrix to 0.
*
* @param m Matrix to zero
*/
void dl_matrix_zero(dl_matrix2d_t *m);
/**
* @brief Copy the matrix into psram
* Copy the matrix from flash or iram/psram into psram
*
* @param m Matrix to zero
*/
dl_matrix2d_t *dl_matrix_copy_to_psram(const dl_matrix2d_t *m);
/**
* @brief Generate a new matrix using a range of items from an existing matrix.
* When using this, the data of the new matrix is not allocated/copied but it re-uses a pointer
* to the existing data. Changing the data in the resulting matrix, as a result, will also change
* the data in the existing matrix that has been sliced.
*
* @param x X-offset of the origin of the returned matrix within the sliced matrix
* @param y Y-offset of the origin of the returned matrix within the sliced matrix
* @param w Width of the resulting matrix
* @param h Height of the resulting matrix
* @param in Old matrix (with foreign data) to re-use. Passing NULL will allocate a new matrix.
* @return The resulting slice matrix, or NULL if out of memory
*/
dl_matrix2d_t *dl_matrix_slice(const dl_matrix2d_t *src, int x, int y, int w, int h, dl_matrix2d_t *in);
/**
* @brief select a range of items from an existing matrix and flatten them into one dimension.
*
* @Warning The results are flattened in row-major order.
*
* @param x X-offset of the origin of the returned matrix within the sliced matrix
* @param y Y-offset of the origin of the returned matrix within the sliced matrix
* @param w Width of the resulting matrix
* @param h Height of the resulting matrix
* @param in Old matrix to re-use. Passing NULL will allocate a new matrix.
* @return The resulting flatten matrix, or NULL if out of memory
*/
dl_matrix2d_t *dl_matrix_flatten(const dl_matrix2d_t *src, int x, int y, int w, int h, dl_matrix2d_t *in);
/**
* @brief Generate a matrix from existing floating-point data
*
* @param w Width of resulting matrix
* @param h Height of resulting matrix
* @param data Data to populate matrix with
* @return A newaly allocated matrix populated with the given input data, or NULL if out of memory.
*/
dl_matrix2d_t *dl_matrix_from_data(int w, int h, int stride, const void *data);
/**
* @brief Multiply a pair of matrices item-by-item: res=a*b
*
* @param a First multiplicand
* @param b Second multiplicand
* @param res Multiplicated data. Can be equal to a or b to overwrite that.
*/
void dl_matrix_mul(const dl_matrix2d_t *a, const dl_matrix2d_t *b, dl_matrix2d_t *res);
/**
* @brief Do a dotproduct of two matrices : res=a.b
*
* @param a First multiplicand
* @param b Second multiplicand
* @param res Dotproduct data. *Must* be a *different* matrix from a or b!
*/
void dl_matrix_dot(const dl_matrix2d_t *a, const dl_matrix2d_t *b, dl_matrix2d_t *res);
/**
* @brief Add a pair of matrices item-by-item: res=a-b
*
* @param a First matrix
* @param b Second matrix
* @param res Added data. Can be equal to a or b to overwrite that.
*/
void dl_matrix_add(const dl_matrix2d_t *a, const dl_matrix2d_t *b, dl_matrix2d_t *out);
/**
* @brief Divide a pair of matrices item-by-item: res=a/b
*
* @param a First matrix
* @param b Second matrix
* @param res Divided data. Can be equal to a or b to overwrite that.
*/
void dl_matrix_div(const dl_matrix2d_t *a, const dl_matrix2d_t *b, dl_matrix2d_t *out);
/**
* @brief Subtract a matrix from another, item-by-item: res=a-b
*
* @param a First matrix
* @param b Second matrix
* @param res Subtracted data. Can be equal to a or b to overwrite that.
*/
void dl_matrix_sub(const dl_matrix2d_t *a, const dl_matrix2d_t *b, dl_matrix2d_t *out);
/**
* @brief Add a constant to every item of the matrix
*
* @param subj Matrix to add the constant to
* @param add The constant
*/
void dl_matrix_add_const(dl_matrix2d_t *subj, const fptp_t add);
/**
* @brief Concatenate the rows of two matrices into a new matrix
*
* @param a First matrix
* @param b Second matrix
* @return A newly allocated array with as avlues a|b
*/
dl_matrix2d_t *dl_matrix_concat(const dl_matrix2d_t *a, const dl_matrix2d_t *b);
dl_matrix2d_t *dl_matrix_concat_h( dl_matrix2d_t *a, const dl_matrix2d_t *b);
/**
* @brief Print the contents of a matrix to stdout. Used for debugging.
*
* @param a The matrix to print.
*/
void dl_printmatrix(const dl_matrix2d_t *a);
/**
* @brief Return the average square error given a correct and a test matrix.
*
* ...Well, more or less. If anything, it gives an indication of the error between
* the two. Check the code for the exact implementation.
*
* @param a First of the two matrices to compare
* @param b Second of the two matrices to compare
* @return value indicating the relative difference between matrices
*/
float dl_matrix_get_avg_sq_err(const dl_matrix2d_t *a, const dl_matrix2d_t *b);
/**
* @brief Check if two matrices have the same shape, that is, the same amount of rows and columns
*
* @param a First of the two matrices to compare
* @param b Second of the two matrices to compare
* @return true if the two matrices are shaped the same, false otherwise.
*/
int dl_matrix_same_shape(const dl_matrix2d_t *a, const dl_matrix2d_t *b);
/**
* @brief Get a specific item from the matrix
*
* Please use these for external matrix access instead of DL_ITM
*
* @param m Matrix to access
* @param x Column address
* @param y Row address
* @return Value in that position
*/
inline static fptp_t dl_matrix_get(const dl_matrix2d_t *m, const int x, const int y) {
return DL_ITM(m, x, y);
}
/**
* @brief Set a specific item in the matrix to the given value
*
* Please use these for external matrix access instead of DL_ITM
*
* @param m Matrix to access
* @param x Column address
* @param y Row address
* @param val Value to write to that position
*/
inline static void dl_matrix_set(dl_matrix2d_t *m, const int x, const int y, fptp_t val) {
DL_ITM(m, x, y)=val;
}
void matrix_get_range(const dl_matrix2d_t *m, fptp_t *rmin, fptp_t *rmax);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,387 @@
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef DL_LIB_MATRIXQ_H
#define DL_LIB_MATRIXQ_H
#include <stdint.h>
#include "dl_lib_matrix.h"
#ifdef __cplusplus
extern "C" {
#endif
typedef int16_t qtp_t;
//Quantized matrix. Uses fixed numbers and has the storage for the rows/columns inverted
//for easy use as a multiplicand without stressing out the flash cache too much.
typedef struct {
int w;
int h;
int stride; //Normally equals h, not w!
int flags;
int exponent; //The values in items should be multiplied by pow(2,exponent) to get the real values.
qtp_t *itemq;
} dl_matrix2dq_t;
#define DL_QTP_SHIFT 15
#define DL_QTP_RANGE ((1<<DL_QTP_SHIFT)-1)
#define DL_ITMQ(m, x, y) m->itemq[(y)+(x)*m->stride]
#define DL_QTP_EXP_NA 255 //non-applicable exponent because matrix is null
#define DL_SHIFT_AUTO 32
/**
* @info About quantized matrices and shift values
*
* Grab a coffee (or tea, or hot water) and sit down when you read this for the first
* time. Quantized matrices can speed up your operations, but come with some quirks, and
* it's good to understand how they work before using them.
*
* The data in the quantized matrix type is stored similarily to floating-point types:
* when storing a real value, the value is stored as a mantissa (base number) and an
* exponent. The 'real' value that can be re-derived from those two numbers is something
* similar to mantissa*2^exponent. Up to this point, there's not that much difference from
* the standard floating point implementations like e.g. IEEE-754.
*
* The difference with respect to quantized matrices is that for a quantized matrix, it is
* assumed all values stored have more-or-less the same order of magnitude. This allows the
* matrix to only store all the mantissas, while the exponents are shared; there is only one
* exponent for the entire matrix. This makes it quicker to handle matrix operations - the
* logic to fix the exponents only needs to happen once, while the rest can be done in simple
* integer arithmetic. It also nets us some memory savings - while normally a floating point
* number is 32-bit, storing only 16-bit mantissas as the matrix items almost halves the
* memory requirements.
*
* While most of the details of handling the intricacies of the quantized matrixes are done
* transparently by the code in dl_lib_matrixq.c, some implementation details leak out,
* specifically in places where addition/subtraction/division happens.
*
* The problem is that the routines do not know what the size of the resulting operation is. For
* instance, when adding two matrices of numbers, the resulting numbers *could* be large enough
* to overflow the mantissa of the result if the exponent is the same. However, if by default we
* assume the mantissas needs to be scaled back, we may lose precision.
*
* In order to counter this, all operations that have this issue have a ``shift`` argument. If
* the argument is zero, the routine will be conservative, that is, increase the exponent of
* the result to such an extent it's mathematically impossible a value in the result will exceed
* the maximum value that can be stored. However, when this argument is larger than zero, the
* algorithm will hold back on this scaling by the indicated amount of bits, preserving precision
* but increasing the chance of some of the calculated values not fitting in the mantissa anymore.
* If this happens, the value will be clipped to the largest (or, for negative values, smallest)
* value possible. (Neural networks usually are okay with this happening for a limited amount
* of matrix indices).
*
* For deciding on these shift values, it is recommended to start with a shift value of one, then
* use dl_matrixq_check_sanity on the result. If this indicates clipping, lower the shift value.
* If it indicates bits are under-used, increase it. Note that for adding and subtraction, only
* shift values of 0 or 1 make sense; these routines will error out if you try to do something
* else.
*
* For neural networks and other noise-tolerant applications, note that even when
* dl_matrixq_check_sanity does not indicate any problems, twiddling with the shift value may lead
* to slightly improved precision. Feel free to experiment.
**/
/**
* @brief Allocate a matrix
*
* @param w Width of the matrix
* @param h Height of the matrix
* @return The matrix, or NULL if out of memory
*/
dl_matrix2dq_t *dl_matrixq_alloc(int w, int h);
dl_matrix2dq_t *dl_matrixq_alloc_psram(int w, int h);
/**
* @brief Convert a floating-point matrix to a quantized matrix
*
* @param m Floating-point matrix to convert
* @param out Quantized matrix to re-use. If NULL, allocate a new one.
* @Return The quantized version of the floating-point matrix
*/
dl_matrix2dq_t *dl_matrixq_from_matrix2d(const dl_matrix2d_t *m, dl_matrix2dq_t *out);
/**
* TODO: DESCRIBE THIS FUNCTION
*/
dl_matrix2dq_t *dl_matrixq_from_matrix2d_by_qmf(const dl_matrix2d_t *m, dl_matrix2dq_t *out, int m_bit, int f_bit);
/**
* @brief Convert a quantized matrix to a floating-point one.
*
* @param m Floating-point matrix to convert
* @param out Quantized matrix to re-use. If NULL, allocate a new one.
* @Return The quantized version of the floating-point matrix
**/
dl_matrix2d_t *dl_matrix2d_from_matrixq(const dl_matrix2dq_t *m, dl_matrix2d_t *out);
/**
* @brief Free a quantized matrix
* Frees the matrix structure and (if it doesn't have the DL_MF_FOREIGNDATA flag set) the m->items space as well.
*
* @param m Matrix to free
*/
void dl_matrixq_free(dl_matrix2dq_t *m);
/**
* @brief Zero out the matrix
* Sets all entries in the matrix to 0.
*
* @param m Matrix to zero
*/
void dl_matrixq_zero(dl_matrix2dq_t *m);
/**
* @brief Copy the matrix into psram
* Copy the matrix from flash or iram/psram into psram
*
* @param m Matrix to copy
*/
dl_matrix2dq_t *dl_matrixq_copy_to_psram(const dl_matrix2dq_t *m);
/**
* @brief Do a dotproduct of two quantized matrices : res=a.b, Result is a fixed-point matrix.
*
* @param a First multiplicand
* @param b Second multiplicand
* @param res Dotproduct data. *Must* be a *different* matrix from a or b!
* @param shift Shift ratio
*/
void dl_matrixq_dot(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *res, int shift);
/**
* @brief Do a dotproduct of two quantized matrices: res=a.b, Result is a floating-point matrix.
*
* @param a First multiplicand
* @param b Second multiplicand
* @param res Dotproduct data. *Must* be a *different* matrix from a or b!
*/
void dl_matrixq_dot_matrix_out(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2d_t *res);
/**
* @brief Do a dotproduct of two quantized matrices : res=a.b. This always uses the simple & stupid C algo for the dot product.
*
* Result is a fixed-point matrix.
*
* Use this only if you expect something is wrong with the accelerated routines that dl_matrixq_dot calls; this function can be
* much slower than dl_matrixq_dot .
*
* @param a First multiplicand
* @param b Second multiplicand
* @param res Dotproduct data. *Must* be a *different* matrix from a or b!
* @param shift Shift ratio
*/
void dl_matrixq_dot_c_impl(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *res, int shift);
/**
* @brief Do a dotproduct of two quantized matrices : res=a.b. This always uses the simple & stupid C algo for the dot product.
*
* Result is a floating-point matrix.
*
* Use this only if you expect something is wrong with the accelerated routines that dl_matrixq_dot_matrix_out calls; this function can be
* much slower than dl_matrixq_dot_matrix_out.
*
* @param a First multiplicand
* @param b Second multiplicand
* @param res Dotproduct data. *Must* be a *different* matrix from a or b!
*/
void dl_matrixq_dot_matrix_out_c_impl(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2d_t *res);
/**
* @brief Do a dotproduct of a floating point and a quantized matrix. Result is a floating-point matrix.
*
* @param a First multiplicand; float matrix
* @param b Second multiplicand; quantized matrix
* @param res Dotproduct data; float matrix. *Must* be a *different* matrix from a or b!
*/
void dl_matrix_matrixq_dot(const dl_matrix2d_t *a, const dl_matrix2dq_t *b, dl_matrix2d_t *res);
/**
* @brief Print the contents of a quantized matrix to stdout. Used for debugging.
*
* @param a The matrix to print.
*/
void dl_printmatrixq(const dl_matrix2dq_t *a);
/**
* @brief Add a pair of quantizedmatrices item-by-item: res=a-b
*
* @param a First matrix
* @param b Second matrix
* @param res Added data. Can be equal to a or b to overwrite that.
* @param shift Shift value. Only 0 or 1 makes sense here. <ToDo: check>
*/
void dl_matrixq_add(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *res, int shift);
/**
* @brief Generate a new matrix using a range of items from an existing matrix.
* When using this, the data of the new matrix is not allocated/copied but it re-uses a pointer
* to the existing data. Changing the data in the resulting matrix, as a result, will also change
* the data in the existing matrix that has been sliced.
*
* @Warning In contrast to the floating point equivalent of this function, the fixed-point version
* of this has the issue that as soon as the output exponent of one of the slices changes, the data
* in the sliced matrix gets corrupted (because the exponent of that matrix is still the same.) If you
* use this function, either treat the slices as read-only, or assume the sliced matrix contains
* garbage after modifying the data in one of the slices.
*
* @param x X-offset of the origin of the returned matrix within the sliced matrix
* @param y Y-offset of the origin of the returned matrix within the sliced matrix
* @param w Width of the resulting matrix
* @param h Height of the resulting matrix
* @param in Old matrix (with foreign data) to re-use. Passing NULL will allocate a new matrix.
* @return The resulting slice matrix, or NULL if out of memory
*/
dl_matrix2dq_t *dl_matrixq_slice(const dl_matrix2dq_t *src, int x, int y, int w, int h, dl_matrix2dq_t *in);
/**
* @brief select a range of items from an existing matrix and flatten them into one dimension.
*
* @Warning The results are flattened in row-major order.
*
* @param x X-offset of the origin of the returned matrix within the sliced matrix
* @param y Y-offset of the origin of the returned matrix within the sliced matrix
* @param w Width of the resulting matrix
* @param h Height of the resulting matrix
* @param in Old matrix to re-use. Passing NULL will allocate a new matrix.
* @return The resulting flatten matrix, or NULL if out of memory
*/
dl_matrix2dq_t *dl_matrixq_flatten(const dl_matrix2dq_t *src, int x, int y, int w, int h, dl_matrix2dq_t *in);
/**
* @brief Subtract a quantized matrix from another, item-by-item: res=a-b
*
* @param a First matrix
* @param b Second matrix
* @param res Subtracted data. Can be equal to a or b to overwrite that.
* @param shift Shift value. Only 0 or 1 makes sense here. <ToDo: check>
*/
void dl_matrixq_sub(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *res, int shift);
/**
* @brief Multiply a pair of quantized matrices item-by-item: res=a*b
*
* @param a First multiplicand
* @param b Second multiplicand
* @param res Multiplicated data. Can be equal to a or b to overwrite that matrix.
*/
void dl_matrixq_mul( dl_matrix2dq_t *a, dl_matrix2dq_t *b, dl_matrix2dq_t *res);
/**
* @brief Divide a pair of quantized matrices item-by-item: res=a/b
*
* @param a First matrix
* @param b Second matrix
* @param res Divided data. Can be equal to a or b to overwrite that.
*/
void dl_matrixq_div(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *out, int shift);
/**
* @brief Check if two quantized matrices have the same shape, that is, the same amount of
* rows and columns
*
* @param a First of the two matrices to compare
* @param b Second of the two matrices to compare
* @return true if the two matrices are shaped the same, false otherwise.
*/
int dl_matrixq_same_shape(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b);
/**
* @brief Concatenate the rows of two quantized matrices into a new matrix
*
* @param a First matrix
* @param b Second matrix
* @return A newly allocated quantized matrix with as values a|b
*/
dl_matrix2dq_t *dl_matrixq_concat(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b);
/**
* @brief Add a constant to every item of the quantized matrix
*
* @param subj Matrix to add the constant to
* @param add The constant
*/
void dl_matrixq_add_const(dl_matrix2dq_t *subj, const fptp_t add, int shift);
/**
* @brief Check the sanity of a quantized matrix
*
* Due to the nature of quantized matrices, depending on the calculations a quantized
* matrix is the result of and the shift values chosen in those calculations, a quantized
* matrix may have an exponent and mantissas that lead to a loss of precision, either because
* most significant mantissa bits are unused, or because a fair amount of mantissas are
* clipped. This function checks if this is the case and will report a message to stdout
* if significant loss of precision is detected.
*
* @param m The quantized matrix to check
* @param name A string to be displayed in the message if the sanity check fails
* @return True if matrix is sane, false otherwise
**/
int dl_matrixq_check_sanity(dl_matrix2dq_t *m, const char *name);
/**
* @brief re-adjust the exponent of the matrix to fit the mantissa better
*
* This function will shift up all the data in the mantissas so there are no
* most-significant bits that are unused in all mantissas. It will also adjust
* the exponent to keep the actua values in the matrix the same.
*
* Some operations done on a matrix, especially operations that re-use the
* result of earlier operations done in the same way, can lead to the loss of
* data because the exponent of the quantized matrix is never re-adjusted. You
* can do that implicitely by calling this function.
*
* @param m The matrix to re-adjust
**/
void dl_matrixq_readjust_exp(dl_matrix2dq_t *m);
/**
* @brief Get the floating-point value of a specific item from the quantized matrix
*
* @param m Matrix to access
* @param x Column address
* @param y Row address
* @return Value in that position
*/
fptp_t dl_matrixq_get(const dl_matrix2dq_t *m, const int x, const int y);
/**
* @brief Set a specific item in the quantized matrix to the given
* floating-point value
*
* @warning If the given value is more than the exponent in the quantized matrix
* allows for, all mantissas in the matrix will be shifted down to make the value
* 'fit'. If, however, the exponent is such that the value would result in a
* quantized mantissa of 0, nothing is done.
*
* @param m Matrix to access
* @param x Column address
* @param y Row address
* @param val Value to write to that position
*/
void dl_matrixq_set(dl_matrix2dq_t *m, const int x, const int y, fptp_t val);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,80 @@
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef DL_LIB_MATRIXQ8_H
#define DL_LIB_MATRIXQ8_H
#include <stdint.h>
#include "dl_lib_matrix.h"
#include "dl_lib.h"
#include "dl_lib_matrixq.h"
#ifdef __cplusplus
extern "C" {
#endif
typedef int8_t q8tp_t;
typedef struct {
int w;
int h;
int stride; //Normally equals h, not w!
int flags;
int exponent; //The values in items should be multiplied by pow(2,exponent) to get the real values.
q8tp_t *itemq;
} dl_matrix2dq8_t;
#define DL_Q8TP_SHIFT 7
#define DL_Q8TP_RANGE ((1<<DL_Q8TP_SHIFT)-1)
#define DL_ITMQ8(m, x, y) m->itemq[(y)+(x)*m->stride]
/**
* @brief Allocate a matrix
*
* @param w Width of the matrix
* @param h Height of the matrix
* @return The matrix, or NULL if out of memory
*/
dl_matrix2dq8_t *dl_matrixq8_alloc(int w, int h);
/**
* @brief Free a quantized matrix
* Frees the matrix structure and (if it doesn't have the DL_MF_FOREIGNDATA flag set) the m->items space as well.
*
* @param m Matrix to free
*/
void dl_matrixq8_free(dl_matrix2dq8_t *m);
/**
* @brief Copy a quantized matrix
* Copy a quantized matrix from flash or iram/psram
*
* @param m Matrix to copy
*/
dl_matrix2dq8_t *dl_matrixq8_copy_to_psram(const dl_matrix2dq8_t *m);
/**
* @brief Convert a floating-point matrix to a quantized matrix
*
* @param m Floating-point matrix to convert
* @param out Quantized matrix to re-use. If NULL, allocate a new one.
* @Return The quantized version of the floating-point matrix
*/
dl_matrix2dq8_t *dl_matrixq8_from_matrix2d(const dl_matrix2d_t *m, dl_matrix2dq8_t *out);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,105 @@
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License
#ifndef _ESP_AEC_H_
#define _ESP_AEC_H_
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
#define USE_AEC_FFT // Not kiss_fft
#define AEC_SAMPLE_RATE 16000 // Only Support 16000Hz
#define AEC_FRAME_LENGTH_MS 32
typedef struct aec_handle_t aec_handle_t;
typedef enum {
AEC_MODE_SR_LOW_COST = 0, // Low Cost AEC fro speech recognition
AEC_MODE_SR_HIGH_PERF = 1, // High Perforamce AEC for speech recognition
AEC_MODE_VOIP_LOW_COST = 3, // Low Cost AEC for voice communication
AEC_MODE_VOIP_HIGH_PERF = 4, // High Perforamce AEC for voice communication
} aec_mode_t;
/**
* @brief Creates an instance to the AEC structure.
* Please get frame size by aec_get_chunksize() function
*
* @param sample_rate The Sampling frequency (Hz) must be 16000.
* @param filter_length Number of filter, recommend to set 4. The larger the filter_length, the more resource consumption.
* @param channel_num The input microphone channel number
* @param mode The mode of AEC, recommend to set AEC_MODE_SR_LOW_COST
* @return
* - NULL: Create failed
* - Others: The instance of AEC
*/
aec_handle_t *aec_create(int sample_rate, int filter_length, int channel_num, aec_mode_t mode);
/**
* @brief Creates an instance to the AEC structure, same with aec_create().
*
* @param filter_length Number of filter, recommend to set 4. The larger the filter_length, the more resource consumption.
* @param channel_num The input microphone channel number
* @param mode The mode of AEC, recommend to set AEC_MODE_SR_LOW_COST
* @return
* - NULL: Create failed
* - Others: The instance of AEC
*/
aec_handle_t *aec_pro_create(int filter_length, int channel_num, aec_mode_t mode);
/**
* @brief Performs echo cancellation a frame, based on the audio sent to the speaker and frame from mic.
*
* @warning The indata, refdata and outdata must be 16-bit signed. please allocate memory by heap_caps_aligned_alloc().
*
* @param inst The instance of AEC. Format for multi-channel data is "ch0 ch0 ch0 ..., ch1 ch1 ch1 ..."
* @param indata An array of 16-bit signed audio samples from mic.
* @param refdata An array of 16-bit signed audio samples sent to the speaker.
* @param outdata Returns near-end signal with echo removed. Format for multi-channel data is "ch0 ch0 ch0..., ch1 ch1 ch1 ..."
* @return None
*
*/
void aec_process(const aec_handle_t *handel, int16_t *indata, int16_t *refdata, int16_t *outdata);
/**
* @brief Get frame size of AEC (the samples of one frame)
* @param handle The instance of AEC.
* @return Frame size
*/
int aec_get_chunksize(const aec_handle_t *handle);
/**
* @brief Get AEC mode string
*
* @param aec_mode The mode of AEC.
*
* @return AEC mode string
*/
char * aec_get_mode_string(aec_mode_t aec_mode);
/**
* @brief Free the AEC instance
*
* @param inst The instance of AEC.
*
* @return None
*
*/
void aec_destroy(aec_handle_t *handel);
#ifdef __cplusplus
}
#endif
#endif //_ESP_AEC_H_

View File

@@ -0,0 +1,81 @@
#ifndef _ESP_AFE_AEC_H_
#define _ESP_AFE_AEC_H_
#include "esp_aec.h"
#include "esp_afe_config.h"
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
typedef struct {
aec_handle_t *handle;
aec_mode_t mode;
afe_pcm_config_t pcm_config;
int frame_size;
int16_t *data;
} afe_aec_handle_t;
/**
* @brief Creates an instance to the AEC structure.
*
* @warning Currently only support 1 microphone channel and 1 playback channe.
* If input has multiple microphone channels and playback channels, just the first microphone channel and playback
* channel will be selected.
*
* The input format, same as afe config:
* M to represent the microphone channel
* R to represent the playback reference channel
* N to represent an unknown or unused channel
*
* For example, input_format="MMNR" indicates that the input data consists of four channels,
* which are the microphone channel, the microphone channel, an unused channel, and the playback channel
*
* @param input_format The input format
* @param filter_length The length of filter. The larger the filter, the higher the CPU loading.
* Recommended filter_length = 4 for esp32s3 and esp32p4. Recommended filter_length = 2 for
* esp32c5.
* @param type The type of afe, AFE_TYPE_SR or AFE_TYPE_VC
* @param mode The mode of afe, AFE_MODE_LOW_COST or AFE_MODE_HIGH_PERF
*
* @return afe_config_t* The default config of afe
*/
afe_aec_handle_t *afe_aec_create(const char *input_format, int filter_length, afe_type_t type, afe_mode_t mode);
/**
* @brief Performs echo cancellation a frame, based on the audio sent to the speaker and frame from mic.
*
* @param inst The instance of AEC.
* @param indata Input audio data, format is define by input_format.
* @param outdata Near-end signal with echo removed. outdata must be 16-bit aligned.
* please use heap_caps_aligned_calloc(16, n, size, caps) to allocate an aligned chunk of memory
* @return The bytes of outdata.
*/
size_t afe_aec_process(afe_aec_handle_t *handel, const int16_t *indata, int16_t *outdata);
/**
* @brief Get frame size of AEC (the samples of one frame)
* @param handle The instance of AEC.
* @return Frame size
*/
int afe_aec_get_chunksize(afe_aec_handle_t *handle);
/**
* @brief Free the AEC instance
*
* @param inst The instance of AEC.
*
* @return None
*
*/
void afe_aec_destroy(afe_aec_handle_t *handel);
#ifdef __cplusplus
}
#endif
#endif //_ESP_AEC_H_

View File

@@ -0,0 +1,288 @@
#pragma once
#include "esp_aec.h"
#include "esp_agc.h"
#include "esp_nsn_models.h"
#include "esp_vad.h"
#include "esp_vadn_models.h"
#include "esp_wn_iface.h"
#include "esp_wn_models.h"
#include "model_path.h"
#include "stdbool.h"
#include "stdint.h"
#include "stdlib.h"
#ifdef __cplusplus
extern "C" {
#endif
// AFE: Audio Front-End
// SR: Speech Recognition
// VC: Voice Communication
// Set AFE_SR mode
typedef enum {
SR_MODE_LOW_COST = 0, // Deprecated, please use afe_mode_t, AFE mode: low cost mode
SR_MODE_HIGH_PERF = 1, // Deprecated, please use afe_mode_t, AFE mode: high performance mode
} afe_sr_mode_t;
// Set AFE mode
typedef enum {
AFE_MODE_LOW_COST = 0, // AFE mode: low cost mode
AFE_MODE_HIGH_PERF = 1, // AFE mode: high performance mode
} afe_mode_t;
// Set AFE type
typedef enum {
AFE_TYPE_SR = 0, // Speech recognition scenarios, excluding nonlinear noise suppression
AFE_TYPE_VC = 1, // Voice communication scenarios, 16KHz input, including nonlinear noise suppression
AFE_TYPE_VC_8K = 2, // Voice communication scenarios, 8KHz input, note that the input data must be 8KHz
} afe_type_t;
typedef enum {
AFE_MEMORY_ALLOC_MORE_INTERNAL = 1, // malloc with more internal ram
AFE_MEMORY_ALLOC_INTERNAL_PSRAM_BALANCE = 2, // malloc with internal ram and psram in balance
AFE_MEMORY_ALLOC_MORE_PSRAM = 3 // malloc with more psram
} afe_memory_alloc_mode_t;
typedef enum {
AFE_MN_PEAK_AGC_MODE_1 = -9, // The peak amplitude of fetch audio is -9dB
AFE_MN_PEAK_AGC_MODE_2 = -6, // The peak amplitude of fetch audio is -6dB
AFE_MN_PEAK_AGC_MODE_3 = -3, // The peak amplitude of fetcg is -3dB
AFE_MN_PEAK_NO_AGC = 0, // There is no agc gain
} afe_mn_peak_agc_mode_t;
typedef struct {
int total_ch_num; // total channel num, include microphone channel, playback channel and unknown channel
int mic_num; // microphone channel number
uint8_t *mic_ids; // microphone channel indices
int ref_num; // playback reference channel number
uint8_t *ref_ids; // playback reference channel indices
int sample_rate; // sample rate of audio
} afe_pcm_config_t;
typedef enum {
AFE_NS_MODE_WEBRTC = 0, // please use model name of NS, SSP: "WEBRTC"
AFE_NS_MODE_NET = 1, // please use model name of NSNET
} afe_ns_mode_t;
typedef enum {
AFE_AGC_MODE_WEBRTC = 0, // WEBRTC AGC
AFE_AGC_MODE_WAKENET = 1, // AGC gain is calculated by wakenet model if wakenet is activated
} afe_agc_mode_t;
/**
* @brief Function to get the debug audio data
*
* @param data The debug audio data which don't be modify. It should be copied away as soon as possible that
* avoid blocking for too long.
* @param data_size The number of bytes of data.
* @returns
*/
typedef void (*afe_debug_hook_callback_t)(const int16_t *data, int data_size);
typedef enum {
AFE_DEBUG_HOOK_MASE_TASK_IN = 0, // To get the input data of mase task
AFE_DEBUG_HOOK_FETCH_TASK_IN = 1, // To get the input data of fetch task
AFE_DEBUG_HOOK_MAX = 2
} afe_debug_hook_type_t;
typedef struct {
afe_debug_hook_type_t hook_type; // debug type of hook
afe_debug_hook_callback_t hook_callback; // callback function which transfer debug audio data
} afe_debug_hook_t;
typedef struct {
/********** AEC(Acoustic Echo Cancellation) **********/
bool aec_init; // Whether to init aec
aec_mode_t aec_mode; // The mode of aec, AEC_MODE_SR_LOW_COST or AEC_MODE_SR_HIGH_PERF
int aec_filter_length; // The filter length of aec
/********** SE(Speech Enhancement, microphone array processing) **********/
bool se_init; // Whether to init se
/********** NS(Noise Suppression) **********/
bool ns_init; // Whether to init ns
char *ns_model_name; // Model name of ns
afe_ns_mode_t afe_ns_mode; // Model mode of ns
/********** VAD(Voice Activity Detection) **********/
bool vad_init; // Whether to init vad
vad_mode_t vad_mode; // The value can be: VAD_MODE_0, VAD_MODE_1, VAD_MODE_2, VAD_MODE_3, VAD_MODE_4
char *vad_model_name; // The model name of vad, If it is null, WebRTC VAD will be used.
int vad_min_speech_ms; // The minimum duration of speech in ms. It should be bigger than 32 ms, default: 128 ms
int vad_min_noise_ms; // The minimum duration of noise or silence in ms. It should be bigger than 64 ms, default:
// 1000 ms
int vad_delay_ms; // The delay of the first speech frame in ms, default: 128 ms
// If you find vad cache can not cover all speech, please increase this value.
bool vad_mute_playback; // If true, the playback will be muted for vad detection. default: false
bool vad_enable_channel_trigger; // If true, the vad will be used to choose the channel id. default: false
/********** WakeNet(Wake Word Engine) **********/
bool wakenet_init;
char *wakenet_model_name; // The model name of wakenet 1
char *wakenet_model_name_2; // The model name of wakenet 2 if has wakenet 2
det_mode_t wakenet_mode; // The mode of wakenet
/********** AGC(Automatic Gain Control) **********/
bool agc_init; // Whether to init agc
afe_agc_mode_t
agc_mode; // The AGC mode for ASR. and the gain generated by AGC acts on the audio after far linear gain.
int agc_compression_gain_db; // Compression gain in dB (default 9)
int agc_target_level_dbfs; // Target level in -dBfs of envelope (default 3, means target level is -3 dBFS)
/********** General AFE(Audio Front End) parameter **********/
afe_pcm_config_t pcm_config; // Config the channel num of original data which is fed to the afe feed function.
afe_mode_t afe_mode; // The mode of afe AFE_MODE_LOW_COST or AFE_MODE_HIGH_PERF
afe_type_t afe_type; // The mode of afe AFE_MODE_LOW_COST or AFE_MODE_HIGH_PERF
int afe_perferred_core; // The preferred core of afe se task, which is created in afe_create function.
int afe_perferred_priority; // The preferred priority of afe se task, which is created in afe_create function.
int afe_ringbuf_size; // The ring buffer size: the number of frame data in ring buffer.
afe_memory_alloc_mode_t memory_alloc_mode; // The memory alloc mode for afe. From Internal RAM or PSRAM
float afe_linear_gain; // The linear gain for afe output the value should be in [0.1, 10.0]. This value acts
// directly on the output amplitude: out_linear_gain * amplitude.
bool debug_init;
bool fixed_first_channel; // If true, the channel after first wake-up is fixed to raw data of microphone
// otherwise, select channel number by wakenet
} afe_config_t;
/**
* @brief Get AFE default configuration. The default configuration will enable all algorithms as much as possible based
* on the chip target and input format. You can manually fine-tune it after creating the configuration
*
* The input format:
* M to represent the microphone channel
* R to represent the playback reference channel
* N to represent an unknown or unused channel
*
* For example, input_format="MMNR" indicates that the input data consists of four channels,
* which are the microphone channel, the microphone channel, an unused channel, and the playback channel
*
* @param input_format The input format
* @param models Models from partition, which is configured by Kconfig
* @param type The type of afe, AFE_TYPE_SR or AFE_TYPE_VC
* @param mode The mode of afe, AFE_MODE_LOW_COST or AFE_MODE_HIGH_PERF
*
* @return afe_config_t* The default config of afe
*/
afe_config_t *afe_config_init(const char *input_format, srmodel_list_t *models, afe_type_t type, afe_mode_t mode);
/**
* @brief Check AFE configuration and make sure it is correct.
*
* @warning If there is a configuration conflict, this function will modify some parameters.
* The guiding behind these modifications is to maintain the highest performance of the output audio and results.
* And remove the conflict between different algorithms.
*
* For example, If input is two-channel data, the SE(BSS) algorithm will be prioritized over the NS algorithm.
* If SE(BSS) algorithm is deactivated, will only use the first microphone channel.
*
* @param afe_config Input AFE config
*
* @return afe_config_t* The modified AFE config
*/
afe_config_t *afe_config_check(afe_config_t *afe_config);
/**
* @brief Parse input format
*
* @param input_format The input format, same with afe_config_init() function
* @param pcm_config The pcm config
*
* @return true if the input format is parsed successfully, otherwise false
*/
bool afe_parse_input_format(const char *input_format, afe_pcm_config_t *pcm_config);
/**
* @brief Parse I2S input data
*
* @param data The input multi channel data
* @param frame_size The frame size of input, it is also the size of single channel data
* @param mic_data The output microphone data
* @param ref_data The output playback reference data
* @param pcm_config The pcm config
*
*/
void afe_parse_input(int16_t *data, int frame_size, int16_t *mic_data, int16_t *ref_data, afe_pcm_config_t *pcm_config);
/**
* @brief Parse input data, from interleaved arrangement to contiguous arrangement
*
* @param data The input multi channel data
* @param frame_size The frame size of input, it is also the size of single channel data
* @param channel_num The channel number of data
* @param out_data The output data
*
*/
void afe_parse_data(int16_t *data, int frame_size, int channel_num, int16_t *out_data);
/**
* @brief Format input data, from contiguous arrangement to interleaved arrangement
*
* @param data The input multi channel data
* @param frame_size The frame size of input, it is also the size of single channel data
* @param channel_num The channel number of data
* @param out_data The output data
*
*/
void afe_format_data(int16_t *data, int frame_size, int channel_num, int16_t *out_data);
/**
* @brief Adjust the gain of input data
*
* @warning the input data will be modified inplace.
*
* @param data The input audio data
* @param frame_size The frame size of input, it is also the size of single channel data
* @param factor The gain factor
*
* @return int16_t* The output audio data
*/
int16_t *afe_adjust_gain(int16_t *data, int frame_size, float factor);
/**
* @brief Adjust the gain of input data
*
* @warning the input data will be modified inplace.
*
* @param in_data The input audio data
* @param in_frame_size Input data frame size of input
* @param channel_num The channel number of input data, which is same as output data
* @param out_data The output audio data
* @param out_frame_size Onput data frame size of input
*
*/
void afe_concat_data(int16_t *in_data, int in_frame_size, int channel_num, int16_t *out_data, int out_frame_size);
/**
* @brief Copy the afe config
*
* @param dst_config The destination afe config
* @param src_config The source afe config
*
* @return The destination afe config
*/
afe_config_t *afe_config_copy(afe_config_t *dst_config, const afe_config_t *src_config);
/**
* @brief Print the afe config
*
* @param afe_config The afe config
*/
void afe_config_print(const afe_config_t *afe_config);
/**
* @brief Allocate afe config
*
* @return The afe config pointer
*/
afe_config_t *afe_config_alloc();
/**
* @brief Free afe config
*
* @param afe_config The afe config pointer
*/
void afe_config_free(afe_config_t *afe_config);
#ifdef __cplusplus
}
#endif

View File

@@ -0,0 +1,48 @@
#ifndef _ESP_AFE_DOA_H_
#define _ESP_AFE_DOA_H_
#include "esp_doa.h"
#include "esp_afe_config.h"
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
typedef struct {
doa_handle_t *doa_handle;
afe_pcm_config_t pcm_config;
int16_t *leftdata;
int16_t *rightdata;
int frame_size;
} afe_doa_handle_t;
/**
* @brief Initialize SRP-PHAT processor
* @param input_format The input format
* @param fs Sampling rate (Hz), e.g., 16000
* @param resolution Angular search resolution (degrees), e.g., 20
* @param d_mics Microphone spacing (meters), e.g., 0.06
* @param input_timedate_samples input timedate samples, e.g., 1024
* @return Initialized doa_handle_t object pointer, Recommend using the above configuration for better performance
*/
afe_doa_handle_t *afe_doa_create(const char *input_format, int fs, float resolution, float d_mics, int input_timedate_samples);
/**
* @brief Process audio frame for direction estimation
* @param handle doa_handle_t instance pointer
* @param indata Input audio data, format is define by input_format.
* @return Estimated sound direction in degrees, e.g., 0-180
*/
float afe_doa_process(afe_doa_handle_t *handle, const int16_t *indata);
/**
* @brief Release all allocated resources
* @param doa doa_handle_t instance pointer to be freed
*/
void afe_doa_destroy(afe_doa_handle_t *handle);
#ifdef __cplusplus
}
#endif
#endif /* _ESP_AFE_DOA_H_ */

View File

@@ -0,0 +1,237 @@
#pragma once
#include "esp_afe_config.h"
#include "stdbool.h"
#include "stdint.h"
#include "stdlib.h"
#include "freertos/FreeRTOS.h"
#include "freertos/task.h"
#ifdef __cplusplus
extern "C" {
#endif
// AFE: Audio Front-End
// SR: Speech Recognition
// afe_sr/AFE_SR: the audio front-end for speech recognition
// Opaque AFE_SR data container
typedef struct esp_afe_sr_data_t esp_afe_sr_data_t;
/**
* @brief The state of vad
*/
typedef enum {
AFE_VAD_SILENCE = 0, // Deprecated, please use vad_state_t, noise or silence
AFE_VAD_SPEECH = 1 // Deprecated, please use vad_state_t, speech
} afe_vad_state_t;
/**
* @brief The result of fetch function
*/
typedef struct afe_fetch_result_t {
int16_t *data; // the target channel data of audio.
int data_size; // the size of data. The unit is byte.
int16_t *vad_cache; // the cache data of vad. It's only valid when vad_cache_size > 0. It is used to complete the
// audio that was truncated.
int vad_cache_size; // the size of vad_cache. The unit is byte.
float data_volume; // the volume of input audio, the unit is decibel(dB). This value is calculated before agc.
// (note: invalid in vc). if enable wakenet, the window length is the receptive fields of
// wakenet(about 1.5s), otherwise is the frame length.
wakenet_state_t wakeup_state; // the value is wakenet_state_t
int wake_word_index; // if the wake word is detected. It will store the wake word index which start from 1.
int wakenet_model_index; // if there are multiple wakenets, this value identifies which model be wakes up. Index
// start from 1.
vad_state_t vad_state; // the value is afe_vad_state_t
int trigger_channel_id; // the channel index of output
int wake_word_length; // the length of wake word. The unit is the number of samples.
int ret_value; // the return state of fetch function
int16_t *raw_data; // the multi-channel output data of audio.
int raw_data_channels; // the channel number of raw data
float ringbuff_free_pct; // the percent of ringbuff free size. if the value is larger than 0.5, it means the ringbuff is buzy.
void *reserved; // reserved for future use
} afe_fetch_result_t;
/**
* @brief Function to initialze a AFE_SR instance
*
* @param afe_config The config of AFE_SR
* @returns Handle to the AFE_SR data
*/
typedef esp_afe_sr_data_t *(*esp_afe_sr_iface_op_create_from_config_t)(afe_config_t *afe_config);
/**
* @brief Get the amount of each channel samples per frame that need to be passed to the function
*
* Every speech enhancement AFE_SR processes a certain number of samples at the same time. This function
* can be used to query that amount. Note that the returned amount is in 16-bit samples, not in bytes.
*
* @param afe The AFE_SR object to query
* @return The amount of samples to feed the fetch function
*/
typedef int (*esp_afe_sr_iface_op_get_samp_chunksize_t)(esp_afe_sr_data_t *afe);
/**
* @brief Get the channel number
*
* @param afe The AFE_SR object to query
* @return The amount of total channels
*/
typedef int (*esp_afe_sr_iface_op_get_channel_num_t)(esp_afe_sr_data_t *afe);
/**
* @brief Get the sample rate of the samples to feed to the function
*
* @param afe The AFE_SR object to query
* @return The sample rate, in hz
*/
typedef int (*esp_afe_sr_iface_op_get_samp_rate_t)(esp_afe_sr_data_t *afe);
/**
* @brief Feed samples of an audio stream to the AFE_SR
*
* @Warning The input data should be arranged in the format of channel interleaving.
* The last channel is reference signal if it has reference data.
*
* @param afe The AFE_SR object to query
*
* @param in The input microphone signal, only support signed 16-bit @ 16 KHZ. The frame size can be queried by the
* `get_feed_chunksize`.
* @return The size of input
*/
typedef int (*esp_afe_sr_iface_op_feed_t)(esp_afe_sr_data_t *afe, const int16_t *in);
/**
* @brief fetch enhanced samples of an audio stream from the AFE_SR
*
* @Warning The output is single channel data, no matter how many channels the input is.
* Timeout is 2000 ms. If you want to adjust timeout, please refer to the definition of `fetch_with_delay`.
*
* @param afe The AFE_SR object to query
* @return The result of output, please refer to the definition of `afe_fetch_result_t`. (The frame size of output
* audio can be queried by the `get_fetch_chunksize`.)
*/
typedef afe_fetch_result_t *(*esp_afe_sr_iface_op_fetch_t)(esp_afe_sr_data_t *afe);
/**
* @brief fetch enhanced samples of an audio stream from the AFE_SR, same with the function `fetch`
*
* @Warning The output is single channel data, no matter how many channels the input is.
*
* @param afe The AFE_SR object to query
* @param ticks_to_wait The timeout value, in ticks, to wait for the fetch result.
* @return The result of output, please refer to the definition of `afe_fetch_result_t`. (The frame size of output
* audio can be queried by the `get_fetch_chunksize`.)
*/
typedef afe_fetch_result_t *(*esp_afe_sr_iface_op_fetch_with_delay_t)(esp_afe_sr_data_t *afe, TickType_t ticks_to_wait);
/**
* @brief reset ringbuf of AFE.
*
* @param afe The AFE_SR object to query
* @return -1: fail, 1: success
*/
typedef int (*esp_afe_sr_iface_op_reset_buffer_t)(esp_afe_sr_data_t *afe);
/**
* @brief Set wakenet detection threshold
*
* @param afe The AFE_SR object to query
* @param index The wakenet index, just support 1: wakenet1 or 2: wakenet2
* @param threshold The wakenet detection threshold, the value is between 0.4 and 0.9999.
* @return -1: fail, 1: success
*/
typedef int (*esp_afe_sr_iface_op_set_wakenet_threshold_t)(esp_afe_sr_data_t *afe, int index, float threshold);
/**
* @brief Reset wakenet detection threshold to inital state
*
* @param afe The AFE_SR object to query
* @param index The wakenet index, just support 1: wakenet1 or 2: wakenet2
* @return -1: fail, 1: success
*/
typedef int (*esp_afe_sr_iface_op_reset_wakenet_threshold_t)(esp_afe_sr_data_t *afe, int index);
/**
* @brief Reset one function/module/algorithm.
*
* @param afe The AFE_SR object to query
* @return -1: fail, 1: success
*/
typedef int (*esp_afe_sr_iface_op_reset_op_t)(esp_afe_sr_data_t *afe);
/**
* @brief Disable one function/module/algorithm.
*
* @param afe The AFE_SR object to query
* @return -1: fail, 0: disabled, 1: enabled
*/
typedef int (*esp_afe_sr_iface_op_disable_func_t)(esp_afe_sr_data_t *afe);
/**
* @brief Enable one function/module/algorithm.
*
* @param afe The AFE_SR object to query
* @return -1: fail, 0: disabled, 1: enabled
*/
typedef int (*esp_afe_sr_iface_op_enable_func_t)(esp_afe_sr_data_t *afe);
/**
* @brief Print all functions/modules/algorithms pipeline.
* The pipeline is the order of the functions/modules/algorithms.
* The format like this: [input] -> |AEC(VOIP_HIGH_PERF)| -> |WakeNet(wn9_hilexin)| -> [output]
*
* @param afe The AFE_SR object to query
*/
typedef void (*esp_afe_sr_iface_op_print_pipeline_t)(esp_afe_sr_data_t *afe);
/**
* @brief Destroy a AFE_SR instance
*
* @param afe AFE_SR object to destroy
*/
typedef void (*esp_afe_sr_iface_op_destroy_t)(esp_afe_sr_data_t *afe);
/**
* This structure contains the functions used to do operations on a AFE_SR.
*/
typedef struct {
esp_afe_sr_iface_op_create_from_config_t create_from_config;
esp_afe_sr_iface_op_feed_t feed;
esp_afe_sr_iface_op_fetch_t fetch;
esp_afe_sr_iface_op_fetch_with_delay_t fetch_with_delay;
esp_afe_sr_iface_op_reset_buffer_t reset_buffer;
esp_afe_sr_iface_op_get_samp_chunksize_t get_feed_chunksize;
esp_afe_sr_iface_op_get_samp_chunksize_t get_fetch_chunksize;
esp_afe_sr_iface_op_get_channel_num_t get_channel_num; // same with get_feed_channel_num
esp_afe_sr_iface_op_get_channel_num_t get_feed_channel_num;
esp_afe_sr_iface_op_get_channel_num_t get_fetch_channel_num;
esp_afe_sr_iface_op_get_samp_rate_t get_samp_rate;
esp_afe_sr_iface_op_set_wakenet_threshold_t set_wakenet_threshold;
esp_afe_sr_iface_op_reset_wakenet_threshold_t reset_wakenet_threshold;
esp_afe_sr_iface_op_disable_func_t disable_wakenet;
esp_afe_sr_iface_op_enable_func_t enable_wakenet;
esp_afe_sr_iface_op_disable_func_t disable_aec;
esp_afe_sr_iface_op_enable_func_t enable_aec;
esp_afe_sr_iface_op_disable_func_t disable_se;
esp_afe_sr_iface_op_enable_func_t enable_se;
esp_afe_sr_iface_op_disable_func_t disable_vad;
esp_afe_sr_iface_op_enable_func_t enable_vad;
esp_afe_sr_iface_op_reset_op_t reset_vad;
esp_afe_sr_iface_op_disable_func_t disable_ns;
esp_afe_sr_iface_op_enable_func_t enable_ns;
esp_afe_sr_iface_op_disable_func_t disable_agc;
esp_afe_sr_iface_op_enable_func_t enable_agc;
esp_afe_sr_iface_op_print_pipeline_t print_pipeline;
esp_afe_sr_iface_op_destroy_t destroy;
} esp_afe_sr_iface_t;
// struct is used to store the AFE handle and data for the AFE task
typedef struct {
esp_afe_sr_data_t *afe_data;
esp_afe_sr_iface_t *afe_handle;
TaskHandle_t feed_task;
TaskHandle_t fetch_task;
} afe_task_into_t;
#ifdef __cplusplus
}
#endif

View File

@@ -0,0 +1,13 @@
#pragma once
#ifdef __cplusplus
extern "C" {
#endif
#include "esp_afe_sr_iface.h"
esp_afe_sr_iface_t *esp_afe_handle_from_config(const afe_config_t *config);
#ifdef __cplusplus
}
#endif

View File

@@ -0,0 +1,47 @@
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License
#ifndef _ESP_AGC_H_
#define _ESP_AGC_H_
#ifdef __cplusplus
extern "C" {
#endif
////all positive value is valid, negective is error
typedef enum {
ESP_AGC_SUCCESS = 0, ////success
ESP_AGC_FAIL = -1, ////agc fail
ESP_AGC_SAMPLE_RATE_ERROR = -2, ///sample rate can be only 8khz, 16khz, 32khz
ESP_AGC_FRAME_SIZE_ERROR = -3, ////the input frame size should be only 10ms, so should together with sample-rate to get the frame size
} ESP_AGE_ERR;
typedef enum {
AGC_MODE_SR = -1, // Bypass WEBRTC AGC
AGC_MODE_0 = 0, // Only saturation protection
AGC_MODE_1 = 1, // Analog Automatic Gain Control [-targetLevelDbfs (default -3 dBOv)]
AGC_MODE_2 = 2, // Digital Automatic Gain Control [-targetLevelDbfs (default -3 dBOv)]
AGC_MODE_3 = 3, // Fixed Digital Gain [compressionGaindB (default 8 dB)]
} agc_mode_t;
void *esp_agc_open(agc_mode_t agc_mode, int sample_rate);
void set_agc_config(void *agc_handle, int gain_dB, int limiter_enable, int target_level_dbfs);
int esp_agc_process(void *agc_handle, short *in_pcm, short *out_pcm, int frame_size, int sample_rate);
void esp_agc_close(void *agc_handle);
#ifdef __cplusplus
}
#endif
#endif // _ESP_AGC_H_

View File

@@ -0,0 +1,41 @@
#ifndef _ESP_DOA_H_
#define _ESP_DOA_H_
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
typedef struct doa_handle_t doa_handle_t;
/**
* @brief Initialize SRP-PHAT processor
* @param fs Sampling rate (Hz), e.g., 16000
* @param resolution Angular search resolution (degrees), e.g., 20
* @param d_mics Microphone spacing (meters), e.g., 0.06
* @param input_timedate_samples input timedate samples, e.g., 1024
* @return Initialized doa_handle_t object pointer, Recommend using the above configuration for better performance
*/
doa_handle_t *esp_doa_create(int fs, float resolution, float d_mics, int input_timedate_samples);
/**
* @brief Release all allocated resources
* @param doa doa_handle_t instance pointer to be freed
*/
void esp_doa_destroy(doa_handle_t *doa);
/**
* @brief Process audio frame for direction estimation
* @param doa doa_handle_t instance pointer
* @param left Left channel 16-bit PCM data
* @param right Right channel 16-bit PCM data
* @return Estimated sound direction in degrees, e.g., 0-180
*/
float esp_doa_process(doa_handle_t *doa, int16_t* left, int16_t* right);
#ifdef __cplusplus
}
#endif
#endif /* _ESP_DOA_H_ */

View File

@@ -0,0 +1,93 @@
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License
#ifndef _ESP_MASE_H_
#define _ESP_MASE_H_
#ifdef __cplusplus
extern "C" {
#endif
#define MASE_SAMPLE_RATE 16000 // Supports 16kHz only
#define MASE_FRAME_SIZE 16 // Supports 16ms only
#define MASE_MIC_DISTANCE 65 // According to physical design of mic-array
/**
* @brief Sets mic-array type, currently 2-mic line array and 3-mic circular array
* are supported.
*/
typedef enum {
TWO_MIC_LINE = 0,
THREE_MIC_CIRCLE = 1
} mase_mic_array_type_t;
/**
* @brief Sets operating mode, supporting normal mode and wake-up enhancement mode
*/
typedef enum {
NORMAL_ENHANCEMENT_MODE = 0,
WAKE_UP_ENHANCEMENT_MODE = 1
} mase_op_mode_t;
typedef void* mase_handle_t;
/**
* @brief Creates an instance to the MASE structure.
*
* @param sample_rate The sampling frequency (Hz) must be 16000.
*
* @param frame_size The length of the audio processing must be 16ms.
*
* @param array_type '0' for 2-mic line array and '1' for 3-mic circular array.
*
* @param mic_distance The distance between neiboring microphones in mm.
*
* @param operating_mode '0' for normal mode and '1' for wake-up enhanced mode.
*
* @param filter_strength Strengh of the mic-array speech enhancement, must be 0, 1, 2 or 3.
*
* @return
* - NULL: Create failed
* - Others: An instance of MASE
*/
mase_handle_t mase_create(int fs, int frame_size, int array_type, float mic_distance, int operating_mode, int filter_strength);
/**
* @brief Performs mic array processing for one frame.
*
* @param inst The instance of MASE.
*
* @param in An array of 16-bit signed audio samples from mic.
*
* @param dsp_out Returns enhanced signal.
*
* @return None
*
*/
void mase_process(mase_handle_t st, int16_t *in, int16_t *dsp_out);
/**
* @brief Free the MASE instance
*
* @param inst The instance of MASE.
*
* @return None
*
*/
void mase_destory(mase_handle_t st);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,89 @@
#pragma once
#include "esp_speech_features.h"
#include <stdint.h>
/*
This describes an interface for a MFCC runner, that is, some kind of implementation that can be
fed sample chunks and returns the MFCC cepstrum of those samples. This is an abstracted interface so
multiple implementations can be used.
*/
typedef struct esp_mfcc_data_t esp_mfcc_data_t;
// Options for the mfcc algorithm itself. These more-or-less match the parameters of csf_mfcc (from c_speech_features),
// please refer to its documentation for details.
typedef struct {
int winstep_ms; // The step between successive windows in ms. (10)
int winlen_ms; // The length of the analysis window in ms. (25)
int nch; // The number of input channel
int numcep; // The number of cepstrum to return
int nfilter; // The number of filters in the filterbank
int nfft; // The FFT size
int samp_freq; // The sample-rate of the signal.
int low_freq; // The lowest band edge of mel filters, in hz. (e.g. 0)
int high_freq; // The highest band edge of mel filters, in hz. Must not be higher than samp_freq
float preemph; // Preemphasis filter coefficient. 0 is no filter. (e.g. 0.97)
char *win_type; // Analysis window type to apply to each frame "hanning","hamming","sine","rectangular","povey"
bool append_energy; //  If true, the zeroth cepstral coefficient is replaced with the log of the total frame energy
bool use_power; // If true, use power of fft spectrum, else use magnitude of fft spectrum
int use_log_fbank; // 0: return fbank, 1: return log(x+log_epsilon), 2: return log(max(x, log_epsilon))
float log_epsilon; // log epsilon. (e.g. 1e-7)
bool psram_first; // Alloc memory from PSRAM first
bool remove_dc_offset; // Whether to subtract mean of wave before FFT
} esp_mfcc_opts_t;
/**
* @brief Un-initialize and free a mfcc runner
*
* Function to free a previously allocated mfcc runner.
*
* @param r Runner object to destroy
*/
typedef void (*esp_mfcc_op_destroy_t)(esp_mfcc_data_t *r);
/**
* @brief Initialize parameters for a mfcc runner.
*
* After creation, a mfcc runner needs to be initialized first; this is usually done
* in the initialization routine of a speech recognition algorithm. This provides
* a pointer to do this for a specific mfcc runner.
*
* @param opt Options for the mfcc process
* @return True if success, false on error.
*/
typedef esp_mfcc_data_t *(*esp_mfcc_op_create_t)(const esp_mfcc_opts_t *opt);
/**
* @brief Run a mfcc iteration on frame by frame
*
* This will take a set of samples and return a ceptrum. Note that this may be pipelined:
* an initial call to this function may return NULL and subsequent calls may return the
* cepstrum of previous calls.
*
* @param r The mfcc runner
* @param samp An array of signed 16-bit samples. The amount of samples should be sampfreq/(winstep_ms/1000).
* @return A set of cepstral values, or NULL if no such values are available yet. Free using the free_cepbuf function
* when done with this buffer. Note that some implementations require the buffer to be freed before another call
* to this function is done.
*/
typedef float *(*esp_mfcc_op_run_step_t)(esp_mfcc_data_t *r, int16_t *samp, int16_t nch);
typedef void (*esp_mfcc_op_run_step_s16_t)(esp_mfcc_data_t *r, int16_t *samp, int16_t *fbank);
/**
* @brief Clean all state of mfcc handle
*
* @param r The mfcc runner
*/
typedef void (*esp_mfcc_op_clean_t)(esp_mfcc_data_t *r);
/**
* @brief Operations possible on a mfcc runner
*/
typedef struct {
esp_mfcc_op_destroy_t destroy;
esp_mfcc_op_create_t create;
esp_mfcc_op_run_step_t run_step;
esp_mfcc_op_run_step_s16_t run_step_s16;
esp_mfcc_op_clean_t clean;
} esp_mfcc_iface_t;

View File

@@ -0,0 +1,44 @@
#pragma once
#include "esp_mfcc_iface.h"
extern const esp_mfcc_iface_t esp_fbank_f32; // float32-fbank handle
extern const esp_mfcc_iface_t esp_fbank_s16; // int16-fbank handle
/**
* @brief Return basic opts used in wakenet9 & multinet5
**/
esp_mfcc_opts_t *get_mfcc_opts_wn9();
/**
* @brief Return basic opts used in wakenet9s
**/
esp_mfcc_opts_t *get_mfcc_opts(const char *win_type, bool use_power, int winstep_ms, int winlen_ms, int nfilter);
/**
* @brief Return basic opts for default kaldifeat
*
opts->psram_first = true;
opts->use_power = true;
opts->use_log_fbank = 2; // log(max(x, log_epsilon))
opts->log_epsilon = 1.1920928955078125e-07f; // torch.finfo(torch.float32).eps
opts->win_type = "povey";
opts->low_freq = 20;
opts->high_freq = 7600;
opts->samp_freq = 16000;
opts->nch = 1;
opts->nfft = 512;
opts->nfilter = 80;
opts->numcep = 80;
opts->preemph = 0.97;
opts->append_energy = false;
opts->winlen_ms = 25;
opts->winstep_ms = 10;
opts->remove_dc_offset = true;
*
**/
esp_mfcc_opts_t *get_mfcc_opts_kaldi();
/**
* @brief Print mfcc opts
**/
void print_mfcc_opts(esp_mfcc_opts_t *opts);

View File

@@ -0,0 +1,224 @@
#pragma once
#include "stdint.h"
#include "esp_wn_iface.h"
#ifdef __cplusplus
extern "C" {
#endif
#define ESP_MN_RESULT_MAX_NUM 5
#define ESP_MN_MAX_PHRASE_NUM 400
#define ESP_MN_MAX_PHRASE_LEN 63
#define ESP_MN_MIN_PHRASE_LEN 2
#define ESP_MN_PREFIX "mn"
#define ESP_MN_ENGLISH "en"
#define ESP_MN_CHINESE "cn"
typedef enum {
ESP_MN_STATE_DETECTING = 0, // detecting
ESP_MN_STATE_DETECTED = 1, // detected
ESP_MN_STATE_TIMEOUT = 2, // time out
} esp_mn_state_t;
//Set multinet loading mode
//The memory comsumption is decreased with increasing mode,
//As a consequence also the CPU loading rate goes up
typedef enum {
ESP_MN_LOAD_FROM_PSRAM = 0, // Load all weights from PSRAM. Fastest computation with Maximum memory consumption
ESP_MN_LOAD_FROM_PSRAM_FLASH = 1, // Load some weights from PSRAM and laod the rest from FLASH (default)
ESP_MN_LOAD_FROM_FLASH = 2, // Load more weights from FLASH. Minimum memory consumption with slowest computation
} esp_mn_loader_mode_t;
typedef enum {
ESP_MN_GREEDY_SEARCH = 0, // greedy search
ESP_MN_BEAM_SEARCH = 1, // beam search
ESP_MN_BEAM_SEARCH_WITH_FST = 2, // beam search with trie language model
} esp_mn_search_method_t;
typedef enum {
CHINESE_ID = 1, // Chinese language
ENGLISH_ID = 2, // English language
} language_id_t;
// Return all possible recognition results
typedef struct{
esp_mn_state_t state;
int num; // The number of phrase in list, num<=5. When num=0, no phrase is recognized.
int command_id[ESP_MN_RESULT_MAX_NUM]; // The list of command id.
int phrase_id[ESP_MN_RESULT_MAX_NUM]; // The list of phrase id.
float prob[ESP_MN_RESULT_MAX_NUM]; // The list of probability.
char string[256]; // recognized string with commands graph
char raw_string[256]; // recognized string without commands graph
} esp_mn_results_t;
typedef struct {
char *string; // command string
char *phonemes; // command phonemes, if applicable
int16_t command_id; // the command id
float threshold; // trigger threshold, default: 0
int16_t *wave; // prompt wave data of the phrase
} esp_mn_phrase_t;
typedef struct _mn_node_ {
esp_mn_phrase_t *phrase;
struct _mn_node_ *next;
} esp_mn_node_t;
typedef struct{
int16_t num; // The number of error phrases, which can not added into model
esp_mn_phrase_t **phrases; // The array of error phrase pointer
} esp_mn_error_t;
/**
* @brief Initialze a model instance with specified model name.
*
* @param model_name The wakenet model name.
* @param duration The duration (ms) to trigger the timeout
*
* @returns Handle to the model data.
*/
typedef model_iface_data_t* (*esp_mn_iface_op_create_t)(const char *model_name, int duration);
/**
* @brief Switch multinet mode to change memory consumption and CPU loading
*
* @warning Just Support multinet6 or later versions
*
* @param model The model object to query
* @param mode The multinet loader mode
*
* @returns Handle to the model data.
*/
typedef model_iface_data_t* (*esp_mn_iface_op_switch_loader_mode_t)(model_iface_data_t *model, esp_mn_loader_mode_t mode);
/**
* @brief Callback function type to fetch the amount of samples that need to be passed to the detect function
*
* Every speech recognition model processes a certain number of samples at the same time. This function
* can be used to query that amount. Note that the returned amount is in 16-bit samples, not in bytes.
*
* @param model The model object to query
* @return The amount of samples to feed the detect function
*/
typedef int (*esp_mn_iface_op_get_samp_chunksize_t)(model_iface_data_t *model);
/**
* @brief Callback function type to fetch the number of frames recognized by the command word
*
* @param model The model object to query
* @return The number of the frames recognized by the command word
*/
typedef int (*esp_mn_iface_op_get_samp_chunknum_t)(model_iface_data_t *model);
/**
* @brief Set the detection threshold to manually abjust the probability
*
* @param model The model object to query
* @param det_treshold The threshold to trigger speech commands, the range of det_threshold is 0.0~0.9999
*/
typedef int (*esp_mn_iface_op_set_det_threshold_t)(model_iface_data_t *model, float det_threshold);
/**
* @brief Get the sample rate of the samples to feed to the detect function
*
* @param model The model object to query
* @return The sample rate, in hz
*/
typedef int (*esp_mn_iface_op_get_samp_rate_t)(model_iface_data_t *model);
/**
* @brief Get the language of model
*
* @param model The language name
* @return Language name string defined in esp_mn_models.h, eg: ESP_MN_CHINESE, ESP_MN_ENGLISH
*/
typedef char * (*esp_mn_iface_op_get_language_t)(model_iface_data_t *model);
/**
* @brief Feed samples of an audio stream to the speech recognition model and detect if there is a speech command found.
*
* @param model The model object to query.
* @param samples An array of 16-bit signed audio samples. The array size used can be queried by the
* get_samp_chunksize function.
* @return The state of multinet
*/
typedef esp_mn_state_t (*esp_mn_iface_op_detect_t)(model_iface_data_t *model, int16_t *samples);
/**
* @brief Destroy a speech commands recognition model
*
* @param model The Model object to destroy
*/
typedef void (*esp_mn_iface_op_destroy_t)(model_iface_data_t *model);
/**
* @brief Get recognition results
*
* @param model The Model object to query
*
* @return The current results.
*/
typedef esp_mn_results_t* (*esp_mn_iface_op_get_results_t)(model_iface_data_t *model);
/**
* @brief Open the log print
*
* @param model_data The model object to query.
*
*/
typedef void (*esp_mn_iface_op_open_log_t)(model_iface_data_t *model_data);
/**
* @brief Clean all status of model
*
* @param model_data The model object to query.
*
*/
typedef void (*esp_mn_iface_op_clean_t)(model_iface_data_t *model_data);
/**
* @brief Set the speech commands by mn_command_root
*
* @param model_data The model object to query.
* @param mn_command_root The speech commands link.
* @return The error phrase id info.
*/
typedef esp_mn_error_t* (*esp_wn_iface_op_set_speech_commands)(model_iface_data_t *model_data, esp_mn_node_t *mn_command_root);
/**
* @brief Print out current commands in fst, note the ones "added" but not "updated" will not be shown here
*
* @param model_data The model object to query
*/
typedef void (*esp_mn_iface_op_print_active_speech_commands)(model_iface_data_t *model_data);
/**
* @brief Check if input string can be tokenized
*
* @param model_data The model object to query
* @param str The input string
*/
typedef int (*esp_mn_iface_op_check_speech_command)(model_iface_data_t *model_data, const char *str);
typedef struct {
esp_mn_iface_op_create_t create;
esp_mn_iface_op_get_samp_rate_t get_samp_rate;
esp_mn_iface_op_get_samp_chunksize_t get_samp_chunksize;
esp_mn_iface_op_get_samp_chunknum_t get_samp_chunknum;
esp_mn_iface_op_set_det_threshold_t set_det_threshold;
esp_mn_iface_op_get_language_t get_language;
esp_mn_iface_op_detect_t detect;
esp_mn_iface_op_destroy_t destroy;
esp_mn_iface_op_get_results_t get_results;
esp_mn_iface_op_open_log_t open_log;
esp_mn_iface_op_clean_t clean;
esp_wn_iface_op_set_speech_commands set_speech_commands;
esp_mn_iface_op_switch_loader_mode_t switch_loader_mode;
esp_mn_iface_op_print_active_speech_commands print_active_speech_commands;
esp_mn_iface_op_check_speech_command check_speech_command;
} esp_mn_iface_t;
#ifdef __cplusplus
}
#endif

View File

@@ -0,0 +1,66 @@
#pragma once
#include "esp_mn_iface.h"
//Contains declarations of all available speech recognion models. Pair this up with the right coefficients and you have a model that can recognize
//a specific phrase or word.
#ifdef __cplusplus
extern "C" {
#endif
/**
* @brief Get the multinet handle from model name
*
* @param model_name The name of model
* @returns The handle of multinet
*/
esp_mn_iface_t *esp_mn_handle_from_name(char *model_name);
/**
* @brief Get the multinet language from model name
*
* @param model_name The name of model
* @returns The language of multinet
*/
char *esp_mn_language_from_name(char *model_name);
/*
Configure wake word to use based on what's selected in menuconfig.
*/
#ifdef CONFIG_SR_MN_CN_MULTINET2_SINGLE_RECOGNITION
#include "multinet2_ch.h"
#define MULTINET_COEFF get_coeff_multinet2_ch
#define MULTINET_MODEL_NAME "mn2_cn"
#else
#define MULTINET_COEFF "COEFF_NULL"
#define MULTINET_MODEL_NAME "NULL"
#endif
/* example
static const esp_mn_iface_t *multinet = &MULTINET_MODEL;
//Initialize MultiNet model data
model_iface_data_t *model_data = multinet->create(&MULTINET_COEFF);
add_speech_commands(multinet, model_data);
//Set parameters of buffer
int audio_chunksize=model->get_samp_chunksize(model_data);
int frequency = model->get_samp_rate(model_data);
int16_t *buffer=malloc(audio_chunksize*sizeof(int16_t));
//Detect
int r=model->detect(model_data, buffer);
if (r>0) {
printf("Detection triggered output %d.\n", r);
}
//Destroy model
model->destroy(model_data)
*/
#ifdef __cplusplus
}
#endif

View File

@@ -0,0 +1,86 @@
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License
#ifndef _ESP_NS_H_
#define _ESP_NS_H_
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
#define NS_USE_SPIARM 0
#define NS_FRAME_LENGTH_MS 10 //Supports 10ms, 20ms, 30ms
/**
* The Sampling frequency (Hz) must be 16000Hz
*/
typedef void* ns_handle_t;
/**
* @brief Creates an instance to the NS structure.
*
* @param frame_length The length of the audio processing can be 10ms, 20ms, 30ms.
*
* @return
* - NULL: Create failed
* - Others: The instance of NS
*/
ns_handle_t ns_create(int frame_length);
/**
* @brief Creates an instance of the more powerful noise suppression algorithm.
*
* @warning frame_length only supports be 10 ms.
*
* @param frame_length The length of the audio processing can only be 10ms.
* @param mode 0: Mild, 1: Medium, 2: Aggressive
* @param sample_rate The sample rate of the audio.
*
* @return
* - NULL: Create failed
* - Others: The instance of NS
*/
ns_handle_t ns_pro_create(int frame_length, int mode, int sample_rate);
/**
* @brief Feed samples of an audio stream to the NS and get the audio stream after Noise suppression.
*
* @param inst The instance of NS.
*
* @param indata An array of 16-bit signed audio samples.
*
* @param outdata An array of 16-bit signed audio samples after noise suppression.
*
* @return None
*
*/
void ns_process(ns_handle_t inst, int16_t *indata, int16_t *outdata);
/**
* @brief Free the NS instance
*
* @param inst The instance of NS.
*
* @return None
*
*/
void ns_destroy(ns_handle_t inst);
#ifdef __cplusplus
}
#endif
#endif //_ESP_NS_H_

View File

@@ -0,0 +1,64 @@
#pragma once
#include "stdint.h"
//Opaque model data container
typedef struct esp_nsn_data_t esp_nsn_data_t;
/**
* @brief Easy function type to initialze a model instance
*
* @param model_name The name of the model instance
* @returns Handle to the model data
*/
typedef esp_nsn_data_t* (*esp_nsn_iface_op_create_t)(char *model_name);
/**
* @brief Get the amount of samples that need to be passed to the process function
*
* Every noise suppression model processes a certain number of samples at the same time. This function
* can be used to query that amount. Note that the returned amount is in 16-bit samples, not in bytes.
*
* @param model The model object to query
* @return The amount of samples to feed the process function
*/
typedef int (*esp_nsn_iface_op_get_samp_chunksize_t)(esp_nsn_data_t *model);
/**
* @brief Feed samples of an audio stream to the noise suppression model and get data after process.
*
*
* @param model The model object to query
* @param in_data An array of 16-bit signed audio samples. The array size used can be queried by the
* get_samp_chunksize function.
* @param out_data An array of 16-bit signed audio samples after process.
* @return The state of return.
*/
typedef int (*esp_nsn_iface_op_process_t)(esp_nsn_data_t *model, int16_t *in_data, int16_t *out_data);
/**
* @brief Get the sample rate of the samples to feed to the process function
*
* @param model The model object to query
* @return The sample rate, in hz
*/
typedef int (*esp_nsn_iface_op_get_samp_rate_t)(esp_nsn_data_t *model);
/**
* @brief Destroy a noise suppression model
*
* @param model Model object to destroy
*/
typedef void (*esp_nsn_iface_op_destroy_t)(esp_nsn_data_t *model);
/**
* This structure contains the functions used to do operations on a wake word detection model.
*/
typedef struct {
esp_nsn_iface_op_create_t create;
esp_nsn_iface_op_get_samp_chunksize_t get_samp_chunksize;
esp_nsn_iface_op_process_t process;
esp_nsn_iface_op_get_samp_rate_t get_samp_rate;
esp_nsn_iface_op_destroy_t destroy;
} esp_nsn_iface_t;

View File

@@ -0,0 +1,17 @@
#pragma once
#include "esp_nsn_iface.h"
/*
The prefix of nset
Now there are nsnet1 and nsnet2
*/
#define ESP_NSNET_PREFIX "nsnet"
/**
* @brief Get the nsnet handle from model name
*
* @param model_name The name of model
* @returns The handle of multinet
*/
esp_nsn_iface_t *esp_nsnet_handle_from_name(char *model_name);

View File

@@ -0,0 +1,62 @@
#pragma once
#include "c_speech_features_config.h"
#include "stdlib.h"
#include <assert.h>
#include <stdbool.h>
#ifndef M_2PI
#define M_2PI 6.283185307179586476925286766559005
#endif
typedef struct {
float *coeff;
int *bank_pos;
int nfilter;
} esp_mel_filter_t;
float *esp_mfcc_malloc(size_t size, bool from_psram);
void esp_mfcc_free(void *ptr);
/**
* @brief Initialize FFT table
* @warning For ESP-PLATFORM, use esp-dsp fft
* For Other platform, use kiss fft
*
* @param nfft The input samples number
* @return fft-table
**/
void *esp_fft_init(int nfft);
/**
* @brief Free FFT table
* @warning For ESP-PLATFORM, use esp-dsp fft
* For Other platform, use kiss fft
*
* @param fft_table The fft table initialized by esp_fft_init
* @param nfft The input samples number
* @return fft-table
**/
void esp_fft_deinit(void *fft_table, int nfft);
/**
* @brief Initial window function
* Currently support hanning, hamming, sine, povey, rectangular,
* wn9(512-hanning to get wakenet9& multinet5 compatible)
**/
float *esp_win_func_init(char *win_type, float *window_data, int frame_length);
float *esp_fftr(float *x, int nfft, void *fft_table);
float *esp_spectrum_step(float *x, int nfft, bool use_power, void *fft_handle);
void esp_audio_short_to_float(short *samples, float *x, int len, int remove_dc);
float *esp_preemphasis_step(float *x, unsigned int len, float coeff, float last);
esp_mel_filter_t *esp_mel_filter_init(
int nfft, int nfilter, int low_freq, int high_freq, int samp_freq, bool from_psram);
void esp_mel_filter_deinit(esp_mel_filter_t *mel_filter);
float *esp_mel_dotprod_step(float *x, float *out, esp_mel_filter_t *mel_filter, int use_log_fbank, float epsilon);

View File

@@ -0,0 +1,84 @@
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License
#ifndef _ESP_WEBRTC_H_
#define _ESP_WEBRTC_H_
#ifdef __cplusplus
extern "C" {
#endif
#include "esp_agc.h"
#include "esp_log.h"
#include "esp_ns.h"
#include "sr_ringbuf.h"
#include <stdint.h>
#include "esp_heap_caps.h"
typedef struct {
void *ns_handle;
void *agc_handle;
int frame_size;
int sample_rate;
int16_t *buff;
int16_t *out_data;
sr_ringbuf_handle_t rb;
} webrtc_handle_t;
/**
* @brief Creates an instance of webrtc.
*
* @warning frame_length can supports be 10 ms, 20 ms, 30 ms, 32 ms.
*
* @param frame_length_ms The length of the audio processing
* @param ns_mode The mode of NS. -1 means NS is disabled. 0: Mild, 1: Medium, 2: Aggressive
* @param agc_mode The model of AGC
* @param agc_gain The gain of AGC. default is 9
* @param agc_target_level The target level of AGC. default is -3 dbfs
* @param sample_rate The sample rate of the audio.
*
* @return
* - NULL: Create failed
* - Others: The instance of webrtc
*/
webrtc_handle_t *webrtc_create(
int frame_length_ms, int ns_mode, agc_mode_t agc_mode, int agc_gain, int agc_target_level, int sample_rate);
/**
* @brief Feed samples of an audio stream to the webrtc and get the audio stream after Noise suppression.
*
* @param handle The instance of NS.
* @param in_data An array of 16-bit signed audio samples.
* @param out_size The sample size of output data
* @param enable_ns Enable noise suppression
* @param enable_agc Enable automatic gain control
*
* @return data after noise suppression
*/
int16_t *webrtc_process(webrtc_handle_t *handle, int16_t *indata, int *size, bool enable_ns, bool enable_agc);
/**
* @brief Free the webrtc instance
*
* @param handle The instance of webrtc.
*
* @return None
*
*/
void webrtc_destroy(webrtc_handle_t *handle);
#ifdef __cplusplus
}
#endif
#endif //_ESP_NS_H_

View File

@@ -0,0 +1,178 @@
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License
#ifndef _ESP_VAD_H_
#define _ESP_VAD_H_
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
#define SAMPLE_RATE_HZ 16000 // Supports 32000, 16000, 8000
#define VAD_FRAME_LENGTH_MS 30 // Supports 10ms, 20ms, 30ms
/**
* @brief Sets the VAD operating mode. A more aggressive (higher mode) VAD is more
* restrictive in reporting speech. So If you want trigger more speech, please select lower mode.
*/
typedef enum {
VAD_MODE_0 = 0, // Normal
VAD_MODE_1, // Aggressive
VAD_MODE_2, // Very Aggressive
VAD_MODE_3, // Very Very Aggressive
VAD_MODE_4 // Very Very Very Aggressive
} vad_mode_t;
typedef enum {
VAD_SILENCE = 0,
VAD_SPEECH = 1,
} vad_state_t;
typedef struct vad_trigger_tag {
vad_state_t state;
unsigned int min_speech_len;
unsigned int noise_len;
unsigned int min_noise_len;
unsigned int speech_len;
} vad_trigger_t;
#define vad_MAX_LEN INT32_MAX - 1
/**
* @brief Allocate wakenet trigger
*
* @param min_speech_len Minimum frame number of speech duration
* @param min_noise_len Minimum frame number of noise duration
*
* @return Trigger pointer
**/
vad_trigger_t *vad_trigger_alloc(int min_speech_len, int min_noise_len);
/**
* @brief Free wakenet trigger
**/
void vad_trigger_free(vad_trigger_t *trigger);
/**
* @brief Reset wakenet trigger
**/
void vad_trigger_reset(vad_trigger_t *trigger);
/**
* @brief detect activaty voice by trigger
**/
vad_state_t vad_trigger_detect(vad_trigger_t *trigger, vad_state_t state);
typedef struct {
vad_trigger_t *trigger;
void *vad_inst;
int sample_rate;
int frame_size;
} vad_handle_with_trigger_t;
typedef vad_handle_with_trigger_t *vad_handle_t;
// typedef vad_handle_tag * vad_handle_t;
/**
* @brief Creates an instance to the VAD structure.
*
* @param vad_mode Sets the VAD operating mode.
*
* @return
* - NULL: Create failed
* - Others: The instance of VAD
*/
vad_handle_t vad_create(vad_mode_t vad_mode);
/**
* @brief Creates an instance to the VAD structure.
*
* @param vad_mode Sets the VAD operating mode.
* @param sample_rate Sample rate in Hz
* @param one_frame_ms Length of the audio chunksize, can be 10ms, 20ms, 30ms, default: 30.
* @param min_speech_ms Minimum speech duration, unit is ms
* @param min_noise_ms Minimum noise duration, unit is ms
* @return
* - NULL: Create failed
* - Others: The instance of VAD
*/
vad_handle_t vad_create_with_param(
vad_mode_t vad_mode, int sample_rate, int one_frame_ms, int min_speech_ms, int min_noise_ms);
/**
* @brief Feed samples of an audio stream to the VAD and check if there is someone speaking.
*
* @param handle The instance of VAD.
* @param data An array of 16-bit signed audio samples.
* @param sample_rate_hz The Sampling frequency (Hz) can be 32000, 16000, 8000, default: 16000.
* @param one_frame_ms The length of the audio processing can be 10ms, 20ms, 30ms, default: 30.
* @return
* - VAD_SILENCE if no voice
* - VAD_SPEECH if voice is detected
*
*/
vad_state_t vad_process(vad_handle_t handle, int16_t *data, int sample_rate_hz, int one_frame_ms);
/**
* @brief Feed samples of an audio stream to the VAD and check if there is someone speaking.
*
* @param handle The instance of VAD.
* @param data An array of 16-bit signed audio samples.
* @return
* - VAD_SILENCE if no voice
* - VAD_SPEECH if voice is detected
*
*/
vad_state_t vad_process_with_trigger(vad_handle_t handle, int16_t *data);
/**
* @brief Reset trigger state as Silence
*
* @param handle The instance of VAD.
*/
void vad_reset_trigger(vad_handle_t handle);
/**
* @brief Free the VAD instance
*
* @param inst The instance of VAD.
*
* @return None
*
*/
void vad_destroy(vad_handle_t inst);
/*
* Programming Guide:
*
* @code{c}
* vad_handle_t vad_inst = vad_create(VAD_MODE_3, SAMPLE_RATE_HZ, VAD_FRAME_LENGTH_MS); // Creates an instance to
* the VAD structure.
*
* while (1) {
* //Use buffer to receive the audio data from MIC.
* vad_state_t vad_state = vad_process(vad_inst, buffer); // Feed samples to the VAD process and get the result.
* }
*
* vad_destroy(vad_inst); // Free the VAD instance at the end of whole VAD process
*
* @endcode
*/
#ifdef __cplusplus
}
#endif
#endif //_ESP_VAD_H_

View File

@@ -0,0 +1,164 @@
#pragma once
#include "esp_vad.h"
#include "stdint.h"
#include "dl_lib_convq_queue.h"
#ifdef __cplusplus
extern "C" {
#endif
// Opaque model data container
typedef struct model_iface_data_t model_iface_data_t;
// /**
// * @brief The state of vad
// */
// typedef enum {
// VAD_NOISE = -1, // Noise
// VADNET_STATE_SILENCE = 0, // Silence
// VAD_SPEECH = 1 // Speech
// } vad_state_t;
/**
* @brief Easy function type to initialze a model instance with a detection mode
* and specified model name
*
* @param model_name The specified model name
* @param mode The voice activity detection mode
* @param channel_num The number of input audio channels
* @param min_speech_ms The minimum duration of speech in ms to trigger vad
* speech
* @param min_noise_ms The minimum duration of noise in ms to trigger vad
* noise
* @returns Handle to the model data
*/
typedef model_iface_data_t *(*esp_vadn_iface_op_create_t)(
const void *model_name, vad_mode_t mode, int channel_num, int min_speech_ms, int min_noise_ms);
/**
* @brief Get the amount of samples that need to be passed to the detect
* function
*
* Every speech recognition model processes a certain number of samples at the
* same time. This function can be used to query that amount. Note that the
* returned amount is in 16-bit samples, not in bytes.
*
* @param model The model object to query
* @return The amount of samples to feed the detect function
*/
typedef int (*esp_vadn_iface_op_get_samp_chunksize_t)(model_iface_data_t *model);
/**
* @brief Get the channel number of samples that need to be passed to the detect
* function
*
* Every speech recognition model processes a certain number of samples at the
* same time. This function can be used to query that amount. Note that the
* returned amount is in 16-bit samples, not in bytes.
*
* @param model The model object to query
* @return The amount of samples to feed the detect function
*/
typedef int (*esp_vadn_iface_op_get_channel_num_t)(model_iface_data_t *model);
/**
* @brief Get the sample rate of the samples to feed to the detect function
*
* @param model The model object to query
* @return The sample rate, in hz
*/
typedef int (*esp_vadn_iface_op_get_samp_rate_t)(model_iface_data_t *model);
/**
* @brief Set the detection threshold to manually abjust the probability
*
* @param model The model object to query
* @param det_treshold The threshold to trigger wake words, the range of
* det_threshold is 0.5~0.9999
* @return 0: setting failed, 1: setting success
*/
typedef int (*esp_vadn_iface_op_set_det_threshold_t)(model_iface_data_t *model, float det_threshold);
/**
* @brief Get the voice activity detection threshold
*
* @param model The model object to query
* @returns the detection threshold
*/
typedef float (*esp_vadn_iface_op_get_det_threshold_t)(model_iface_data_t *model);
/**
* @brief Feed samples of an audio stream to the vad model and detect whether is
* voice.
*
* @param model The model object to query
* @param samples An array of 16-bit signed audio samples. The array size used
* can be queried by the get_samp_chunksize function.
* @return The index of wake words, return 0 if no wake word is detected, else
* the index of the wake words.
*/
typedef vad_state_t (*esp_vadn_iface_op_detect_t)(model_iface_data_t *model, int16_t *samples);
/**
* @brief Feed MFCC of an audio stream to the vad model and detect whether is
* voice.
*
* @param model The model object to query
* @param cq An array of 16-bit MFCC.
* @return The index of wake words, return 0 if no wake word is detected, else
* the index of the wake words.
*/
typedef vad_state_t (*esp_vadn_iface_op_detect_mfcc_t)(model_iface_data_t *model, dl_convq_queue_t *cq);
/**
* @brief Get MFCC of an audio stream
*
* @param model The model object to query
* @return MFCC data
*/
typedef dl_convq_queue_t* (*esp_vadn_iface_op_get_mfcc_data_t)(model_iface_data_t *model);
/**
* @brief Get the triggered channel index. Channel index starts from zero
*
* @param model The model object to query
* @return The channel index
*/
typedef int (*esp_vadn_iface_op_get_triggered_channel_t)(model_iface_data_t *model);
/**
* @brief Clean all states of model
*
* @param model The model object to query
*/
typedef void (*esp_vadn_iface_op_clean_t)(model_iface_data_t *model);
/**
* @brief Destroy a model object
*
* @param model Model object to destroy
*/
typedef void (*esp_vadn_iface_op_destroy_t)(model_iface_data_t *model);
/**
* This structure contains the functions used to do operations on a voice
* activity detection model.
*/
typedef struct {
esp_vadn_iface_op_create_t create;
esp_vadn_iface_op_get_samp_chunksize_t get_samp_chunksize;
esp_vadn_iface_op_get_channel_num_t get_channel_num;
esp_vadn_iface_op_get_samp_rate_t get_samp_rate;
esp_vadn_iface_op_set_det_threshold_t set_det_threshold;
esp_vadn_iface_op_get_det_threshold_t get_det_threshold;
esp_vadn_iface_op_get_triggered_channel_t get_triggered_channel;
esp_vadn_iface_op_detect_t detect;
esp_vadn_iface_op_detect_mfcc_t detect_mfcc;
esp_vadn_iface_op_get_mfcc_data_t get_mfcc_data;
esp_vadn_iface_op_clean_t clean;
esp_vadn_iface_op_destroy_t destroy;
} esp_vadn_iface_t;
#ifdef __cplusplus
}
#endif

View File

@@ -0,0 +1,22 @@
#pragma once
#include "esp_vadn_iface.h"
#ifdef __cplusplus
extern "C" {
#endif
// The prefix of vadnet model name is used to filter all wakenet from availabel models.
#define ESP_VADN_PREFIX "vadnet"
/**
* @brief Get the wakenet handle from model name
*
* @param model_name The name of model
* @returns The handle of wakenet
*/
const esp_vadn_iface_t *esp_vadn_handle_from_name(const char *model_name);
#ifdef __cplusplus
}
#endif

View File

@@ -0,0 +1,90 @@
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License
#ifndef _ESP_WEBRTC_H_
#define _ESP_WEBRTC_H_
#ifdef __cplusplus
extern "C" {
#endif
#include <stdint.h>
#include "sr_ringbuf.h"
#include "esp_log.h"
#include "esp_agc.h"
#include "esp_ns.h"
#include "esp_heap_caps.h"
typedef struct {
void* ns_handle;
void* agc_handle;
int frame_size;
int sample_rate;
int16_t *buff;
int16_t *out_data;
sr_ringbuf_handle_t rb;
}webrtc_handle_t;
/**
* @brief Creates an instance of webrtc.
*
* @warning frame_length can supports be 10 ms, 20 ms, 30 ms, 32 ms.
*
* @param frame_length_ms The length of the audio processing
* @param ns_mode The mode of NS. -1 means NS is disabled. 0: Mild, 1: Medium, 2: Aggressive
* @param agc_mode The model of AGC
* @param agc_gain The gain of AGC. default is 9
* @param agc_target_level The target level of AGC. default is -3 dbfs
* @param sample_rate The sample rate of the audio.
*
* @return
* - NULL: Create failed
* - Others: The instance of webrtc
*/
webrtc_handle_t* webrtc_create(
int frame_length_ms,
int ns_mode,
agc_mode_t agc_mode,
int agc_gain,
int agc_target_level,
int sample_rate);
/**
* @brief Feed samples of an audio stream to the webrtc and get the audio stream after Noise suppression.
*
* @param handle The instance of NS.
* @param in_data An array of 16-bit signed audio samples.
* @param out_size The sample size of output data
* @param enable_ns Enable noise suppression
* @param enable_agc Enable automatic gain control
*
* @return data after noise suppression
*/
int16_t* webrtc_process(webrtc_handle_t *handle, int16_t *indata, int *size, bool enable_ns, bool enable_agc);
/**
* @brief Free the webrtc instance
*
* @param handle The instance of webrtc.
*
* @return None
*
*/
void webrtc_destroy(webrtc_handle_t *handle);
#ifdef __cplusplus
}
#endif
#endif //_ESP_NS_H_

View File

@@ -0,0 +1,226 @@
#pragma once
#include "stdint.h"
#include "dl_lib_convq_queue.h"
#ifdef __cplusplus
extern "C" {
#endif
//Opaque model data container
typedef struct model_iface_data_t model_iface_data_t;
/**
* @brief The state of wakeup
*/
typedef enum
{
WAKENET_NO_DETECT = 0, // wake word is not detected
WAKENET_CHANNEL_VERIFIED = -1, // output channel is verified
WAKENET_DETECTED = 1 // wake word is detected
} wakenet_state_t;
//Set wake words recognition operating mode
//The probability of being wake words is increased with increasing mode,
//As a consequence also the false alarm rate goes up
typedef enum {
DET_MODE_90 = 0, // Normal
DET_MODE_95 = 1, // Aggressive
DET_MODE_2CH_90 = 2,
DET_MODE_2CH_95 = 3,
DET_MODE_3CH_90 = 4,
DET_MODE_3CH_95 = 5,
DET_MODE_90_COPY_PARAMS = 6, // Aggressive
} det_mode_t;
typedef struct {
int wake_word_num; //The number of all wake words
char **wake_word_list; //The name list of wake words
} wake_word_info_t;
/**
* @brief Easy function type to initialze a model instance with a detection mode and specified wake word coefficient
*
* @param model_name The specified wake word model coefficient
* @param det_mode The wake words detection mode to trigger wake words, DET_MODE_90 or DET_MODE_95
* @returns Handle to the model data
*/
typedef model_iface_data_t* (*esp_wn_iface_op_create_t)(const void *model_name, det_mode_t det_mode);
/**
* @brief Get the amount of samples that need to be passed to the detect function
*
* Every speech recognition model processes a certain number of samples at the same time. This function
* can be used to query that amount. Note that the returned amount is in 16-bit samples, not in bytes.
*
* @param model The model object to query
* @return The amount of samples to feed the detect function
*/
typedef int (*esp_wn_iface_op_get_samp_chunksize_t)(model_iface_data_t *model);
/**
* @brief Get the channel number of samples that need to be passed to the detect function
*
* Every speech recognition model processes a certain number of samples at the same time. This function
* can be used to query that amount. Note that the returned amount is in 16-bit samples, not in bytes.
*
* @param model The model object to query
* @return The amount of samples to feed the detect function
*/
typedef int (*esp_wn_iface_op_get_channel_num_t)(model_iface_data_t *model);
/**
* @brief Get the start point of wake word when one wake word is detected.
*
* @Warning: This function should be called when the channel index is verified.
* The returned value is the number of samples from start point of wake word to detected point.
*
* @param model The model object to query
* @return The number of samples from start point to detected point (end point)
*/
typedef int (*esp_wn_iface_op_get_start_point_t)(model_iface_data_t *model);
/**
* @brief Get the sample rate of the samples to feed to the detect function
*
* @param model The model object to query
* @return The sample rate, in hz
*/
typedef int (*esp_wn_iface_op_get_samp_rate_t)(model_iface_data_t *model);
/**
* @brief Get the number of wake words
*
* @param model The model object to query
* @returns the number of wake words
*/
typedef int (*esp_wn_iface_op_get_word_num_t)(model_iface_data_t *model);
/**
* @brief Get the name of wake word by index
*
* @Warning The index of wake word start with 1
* @param model The model object to query
* @param word_index The index of wake word
* @returns the detection threshold
*/
typedef char* (*esp_wn_iface_op_get_word_name_t)(model_iface_data_t *model, int word_index);
/**
* @brief Set the detection threshold to manually abjust the probability
*
* @param model The model object to query
* @param det_treshold The threshold to trigger wake words, the range of det_threshold is 0.4~0.9999
* @param word_index The index of wake word
* @return 0: setting failed, 1: setting success
*/
typedef int (*esp_wn_iface_op_set_det_threshold_t)(model_iface_data_t *model, float det_threshold, int word_index);
/**
* @brief Reset the threshold to its initial state
*
* @param model The model object to query
* @return 0: setting failed, 1: setting success
*/
typedef int (*esp_wn_iface_op_reset_det_threshold_t)(model_iface_data_t *model);
/**
* @brief Get the wake word detection threshold of different modes
*
* @param model The model object to query
* @param word_index The index of wake word
* @returns the detection threshold
*/
typedef float (*esp_wn_iface_op_get_det_threshold_t)(model_iface_data_t *model, int word_index);
/**
* @brief Feed samples of an audio stream to the keyword detection model and detect if there is a keyword found.
*
* @Warning The index of wake word start with 1, 0 means no wake words is detected.
*
* @param model The model object to query
* @param samples An array of 16-bit signed audio samples. The array size used can be queried by the
* get_samp_chunksize function.
* @return The index of wake words, return 0 if no wake word is detected, else the index of the wake words.
*/
typedef wakenet_state_t (*esp_wn_iface_op_detect_t)(model_iface_data_t *model, int16_t *samples);
/**
* @brief Get the volume gain
*
* @param model The model object to query
* @param target_db The target dB to calculate volume gain
* @returns the volume gain
*/
typedef float (*esp_wn_iface_op_get_vol_gain_t)(model_iface_data_t *model, float target_db);
/**
* @brief Get the triggered channel index. Channel index starts from zero
*
* @param model The model object to query
* @return The channel index
*/
typedef int (*esp_wn_iface_op_get_triggered_channel_t)(model_iface_data_t *model);
/**
* @brief Clean all states of model
*
* @param model The model object to query
*/
typedef void (*esp_wn_iface_op_clean_t)(model_iface_data_t *model);
/**
* @brief Destroy a speech recognition model
*
* @param model Model object to destroy
*/
typedef void (*esp_wn_iface_op_destroy_t)(model_iface_data_t *model);
/**
* @brief Feed MFCC of an audio stream to the vad model and detect whether is
* voice.
*
* @param model The model object to query
* @param cq An array of 16-bit MFCC.
* @return The index of wake words, return 0 if no wake word is detected, else
* the index of the wake words.
*/
typedef wakenet_state_t (*esp_wn_iface_op_detect_mfcc_t)(model_iface_data_t *model, int16_t *samples, dl_convq_queue_t *cq);
/**
* @brief Get MFCC of an audio stream
*
* @param model The model object to query
* @return MFCC data
*/
typedef dl_convq_queue_t* (*esp_wn_iface_op_get_mfcc_data_t)(model_iface_data_t *model);
/**
* This structure contains the functions used to do operations on a wake word detection model.
*/
typedef struct {
esp_wn_iface_op_create_t create;
esp_wn_iface_op_get_start_point_t get_start_point;
esp_wn_iface_op_get_samp_chunksize_t get_samp_chunksize;
esp_wn_iface_op_get_channel_num_t get_channel_num;
esp_wn_iface_op_get_samp_rate_t get_samp_rate;
esp_wn_iface_op_get_word_num_t get_word_num;
esp_wn_iface_op_get_word_name_t get_word_name;
esp_wn_iface_op_set_det_threshold_t set_det_threshold;
esp_wn_iface_op_reset_det_threshold_t reset_det_threshold;
esp_wn_iface_op_get_det_threshold_t get_det_threshold;
esp_wn_iface_op_get_triggered_channel_t get_triggered_channel;
esp_wn_iface_op_get_vol_gain_t get_vol_gain;
esp_wn_iface_op_detect_t detect;
esp_wn_iface_op_detect_mfcc_t detect_mfcc;
esp_wn_iface_op_get_mfcc_data_t get_mfcc_data;
esp_wn_iface_op_clean_t clean;
esp_wn_iface_op_destroy_t destroy;
} esp_wn_iface_t;
#ifdef __cplusplus
}
#endif

View File

@@ -0,0 +1,52 @@
#pragma once
#include "esp_wn_iface.h"
#ifdef __cplusplus
extern "C" {
#endif
// The prefix of wakenet model name is used to filter all wakenet from availabel models.
#define ESP_WN_PREFIX "wn"
/**
* @brief Get the wakenet handle from model name
*
* @param model_name The name of model
* @returns The handle of wakenet
*/
const esp_wn_iface_t *esp_wn_handle_from_name(const char *model_name);
/**
* @brief Get the wake word name from model name
*
* @param model_name The name of model
* @returns The wake word name, like "alexa","hilexin","xiaoaitongxue"
*/
char *esp_wn_wakeword_from_name(const char *model_name);
#ifdef __cplusplus
}
#endif
/*
static const sr_model_iface_t *model = esp_wn_handle_from_name(model_name);
//Initialize wakeNet model data
static model_iface_data_t *model_data=model->create(model_name, DET_MODE_90);
//Set parameters of buffer
int audio_chunksize=model->get_samp_chunksize(model_data);
int frequency = model->get_samp_rate(model_data);
int16_t *buffer=malloc(audio_chunksize*sizeof(int16_t));
//Detect
int r=model->detect(model_data, buffer);
if (r>0) {
printf("Detection triggered output %d.\n", r);
}
//Destroy model
model->destroy(model_data)
*/

View File

@@ -0,0 +1,20 @@
#ifndef __FLITE_G2P_H__
#define __FLITE_G2P_H__
typedef struct {
int num_phonemes;
int phoneme_size;
char **phonemes;
} flite_g2p_result;
void flite_g2p_result_free(flite_g2p_result *result);
flite_g2p_result *flite_g2p_get_result(const char *grapheme);
void flite_g2p_result_print_string(flite_g2p_result *result, int map_phonemes);
char *flite_g2p_result_get_string(flite_g2p_result *result, int map_phonemes);
char *flite_g2p(const char *graphemes, int map_phonemes);
#endif

View File

@@ -0,0 +1,9 @@
//Generated by mkmodel_py
#pragma once
#include <string.h>
#include "dl_lib_coefgetter_if.h"
#include "dl_lib_matrix.h"
#include "dl_lib_matrixq.h"
#include "dl_lib_matrixq8.h"
extern const model_coeff_getter_t get_coeff_hilexin_wn5;

View File

@@ -0,0 +1,9 @@
//Generated by mkmodel_py
#pragma once
#include <string.h>
#include "dl_lib_coefgetter_if.h"
#include "dl_lib_matrix.h"
#include "dl_lib_matrixq.h"
#include "dl_lib_matrixq8.h"
extern const model_coeff_getter_t get_coeff_hilexin_wn5X2;

View File

@@ -0,0 +1,9 @@
//Generated by mkmodel_py
#pragma once
#include <string.h>
#include "dl_lib_coefgetter_if.h"
#include "dl_lib_matrix.h"
#include "dl_lib_matrixq.h"
#include "dl_lib_matrixq8.h"
extern const model_coeff_getter_t get_coeff_hilexin_wn5X3;

View File

@@ -0,0 +1,9 @@
//Generated by mkmodel_py
#pragma once
#include <string.h>
#include "dl_lib_coefgetter_if.h"
#include "dl_lib_matrix.h"
#include "dl_lib_matrixq.h"
#include "dl_lib_matrixq8.h"
extern const model_coeff_getter_t get_coeff_multinet2_ch;

View File

@@ -0,0 +1,9 @@
//Generated by mkmodel_py
#pragma once
#include <string.h>
#include "dl_lib_coefgetter_if.h"
#include "dl_lib_matrix.h"
#include "dl_lib_matrixq.h"
#include "dl_lib_matrixq8.h"
extern const model_coeff_getter_t get_coeff_nihaoxiaoxin_wn5X3;

View File

@@ -0,0 +1,9 @@
//Generated by mkmodel_py
#pragma once
#include <string.h>
#include "dl_lib_coefgetter_if.h"
#include "dl_lib_matrix.h"
#include "dl_lib_matrixq.h"
#include "dl_lib_matrixq8.h"
extern const model_coeff_getter_t get_coeff_nihaoxiaozhi_wn5;

View File

@@ -0,0 +1,9 @@
//Generated by mkmodel_py
#pragma once
#include <string.h>
#include "dl_lib_coefgetter_if.h"
#include "dl_lib_matrix.h"
#include "dl_lib_matrixq.h"
#include "dl_lib_matrixq8.h"
extern const model_coeff_getter_t get_coeff_nihaoxiaozhi_wn5X2;

View File

@@ -0,0 +1,9 @@
//Generated by mkmodel_py
#pragma once
#include <string.h>
#include "dl_lib_coefgetter_if.h"
#include "dl_lib_matrix.h"
#include "dl_lib_matrixq.h"
#include "dl_lib_matrixq8.h"
extern const model_coeff_getter_t get_coeff_nihaoxiaozhi_wn5X3;

View File

@@ -0,0 +1,29 @@
#pragma once
#include <float.h>
#include <math.h>
/* #undef ENABLE_DOUBLE */
#ifdef ENABLE_DOUBLE
# define csf_float double
# define csf_ceil ceil
# define csf_floor floor
# define csf_sin sin
# define csf_log log
# define csf_log10 log10
# define csf_pow pow
# define csf_sqrt sqrt
# define csf_abs fabs
# define csf_float_min DBL_MIN
#else
# define csf_float float
# define csf_ceil ceilf
# define csf_floor floorf
# define csf_sin sinf
# define csf_log logf
# define csf_log10 log10f
# define csf_pow powf
# define csf_sqrt sqrtf
# define csf_abs fabsf
# define csf_float_min FLT_MIN
#endif

View File

@@ -0,0 +1,418 @@
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef DL_LIB_H
#define DL_LIB_H
#include "dl_lib_matrix.h"
#include "dl_lib_matrixq.h"
#include "dl_lib_matrixq8.h"
#ifdef ESP_PLATFORM
#include "freertos/FreeRTOS.h"
#include "freertos/task.h"
#include "freertos/queue.h"
#include "esp_system.h"
#include "esp_heap_caps.h"
#include "sdkconfig.h"
#define DL_SPIRAM_SUPPORT 1
#endif
#ifdef CONFIG_IDF_TARGET_ESP32S3
#include "esp32s3/rom/cache.h"
#endif
#ifdef __cplusplus
extern "C" {
#endif
typedef int padding_state;
// /**
// * @brief Allocate a chunk of memory which has the given capabilities.
// * Equivalent semantics to libc malloc(), for capability-aware memory.
// * In IDF, malloc(p) is equivalent to heap_caps_malloc(p, MALLOC_CAP_8BIT).
// *
// * @param size In bytes, of the amount of memory to allocate
// * @param caps Bitwise OR of MALLOC_CAP_* flags indicating the type of memory to be returned
// * MALLOC_CAP_SPIRAM: Memory must be in SPI RAM
// * MALLOC_CAP_INTERNAL: Memory must be internal; specifically it should not disappear when flash/spiram cache is switched off
// * MALLOC_CAP_DMA: Memory must be able to accessed by DMA
// * MALLOC_CAP_DEFAULT: Memory can be returned in a non-capability-specific memory allocation
// * @return Pointer to currently allocated heap memory
// **/
// void *heap_caps_malloc(size_t size, uint32_t caps);
/**
* @brief Allocate aligned memory from internal memory or external memory.
* if cnt*size > CONFIG_SPIRAM_MALLOC_ALWAYSINTERNAL, allocate memory from internal RAM
* else, allocate memory from PSRAM
*
* @param cnt Number of continuing chunks of memory to allocate
* @param size Size, in bytes, of a chunk of memory to allocate
* @param align Aligned size, in bits
* @return Pointer to currently allocated heap memory
*/
void *dl_lib_calloc(int cnt, int size, int align);
/**
* @brief Always allocate aligned memory from external memory.
*
* @param cnt Number of continuing chunks of memory to allocate
* @param size Size, in bytes, of a chunk of memory to allocate
* @param align Aligned size, in bits
* @return Pointer to currently aligned heap memory
*/
void *dl_lib_calloc_psram(int cnt, int size, int align);
/**
* @brief Free aligned memory allocated by `dl_lib_calloc` or `dl_lib_calloc_psram`
*
* @param ptr Pointer to free
*/
void dl_lib_free(void *ptr);
/**
* @brief Does a fast version of the exp() operation on a floating point number.
*
* As described in https://codingforspeed.com/using-faster-exponential-approximation/
* Should be good til an input of 5 or so with a steps factor of 8.
*
* @param in Floating point input
* @param steps Approximation steps. More is more precise. 8 or 10 should be good enough for most purposes.
* @return Exp()'ed output
*/
fptp_t fast_exp(double x, int steps);
/**
* @brief Does a fast version of the exp() operation on a floating point number.
*
* @param in Floating point input
* @return Exp()'ed output
*/
double fast_exp_pro(double x);
/**
* @brief Does a softmax operation on a matrix.
*
* @param in Input matrix
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
*/
void dl_softmax(const dl_matrix2d_t *in, dl_matrix2d_t *out);
/**
* @brief Does a softmax operation on a quantized matrix.
*
* @param in Input matrix
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
*/
void dl_softmax_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out);
/**
* @brief Does a sigmoid operation on a floating point number
*
* @param in Floating point input
* @return Sigmoid output
*/
fptp_t dl_sigmoid_op(fptp_t in);
/**
* @brief Does a sigmoid operation on a matrix.
*
* @param in Input matrix
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
*/
void dl_sigmoid(const dl_matrix2d_t *in, dl_matrix2d_t *out);
/**
* @brief Does a tanh operation on a floating point number
*
* @param in Floating point input number
* @return Tanh value
*/
fptp_t dl_tanh_op(fptp_t v);
/**
* @brief Does a tanh operation on a matrix.
*
* @param in Input matrix
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
*/
void dl_tanh(const dl_matrix2d_t *in, dl_matrix2d_t *out);
/**
* @brief Does a relu (Rectifier Linear Unit) operation on a floating point number
*
* @param in Floating point input
* @param clip If value is higher than this, it will be clipped to this value
* @return Relu output
*/
fptp_t dl_relu_op(fptp_t in, fptp_t clip);
/**
* @brief Does a ReLu operation on a matrix.
*
* @param in Input matrix
* @param clip If values are higher than this, they will be clipped to this value
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
*/
void dl_relu(const dl_matrix2d_t *in, fptp_t clip, dl_matrix2d_t *out);
/**
* @brief Fully connected layer operation
*
* @param in Input vector
* @param weight Weights of the neurons
* @param bias Biases for the neurons. Can be NULL if a bias of 0 is required.
* @param out Output array. Outputs are placed here. Needs to be an initialized, weight->w by in->h in size, matrix.
*/
void dl_fully_connect_layer(const dl_matrix2d_t *in, const dl_matrix2d_t *weight, const dl_matrix2d_t *bias, dl_matrix2d_t *out);
/**
* @brief Pre-calculate the sqrtvari variable for the batch_normalize function.
* The sqrtvari matrix depends on the variance and epsilon values, which normally are constant. Hence,
* this matrix only needs to be calculated once. This function does that.
*
* @param
* @return
*/
void dl_batch_normalize_get_sqrtvar(const dl_matrix2d_t *variance, fptp_t epsilon, dl_matrix2d_t *out);
/**
* @brief Batch-normalize a matrix
*
* @param m The matrix to normalize
* @param offset Offset matrix
* @param scale Scale matrix
* @param mean Mean matrix
* @param sqrtvari Matrix precalculated using dl_batch_normalize_get_sqrtvar
* @return
*/
void dl_batch_normalize(dl_matrix2d_t *m, const dl_matrix2d_t *offset, const dl_matrix2d_t *scale,
const dl_matrix2d_t *mean, const dl_matrix2d_t *sqrtvari);
/**
* @brief Do a basic LSTM layer pass.
*
* @warning Returns state_h pointer, so do not free result.
* @param in Input vector
* @param state_c Internal state of the LSTM network
* @param state_h Internal state (previous output values) of the LSTM network
* @param weights Weights for the neurons
* @param bias Bias for the neurons. Can be NULL if no bias is required
* @return Output values of the neurons
*/
dl_matrix2d_t *dl_basic_lstm_layer(const dl_matrix2d_t *in, dl_matrix2d_t *state_c, dl_matrix2d_t *state_h,
const dl_matrix2d_t *weight, const dl_matrix2d_t *bias);
/**
* @brief Do a basic LSTM layer pass, partial quantized version.
* This LSTM function accepts 16-bit fixed-point weights and 32-bit float-point bias.
*
* @warning Returns state_h pointer, so do not free result.
* @param in Input vector
* @param state_c Internal state of the LSTM network
* @param state_h Internal state (previous output values) of the LSTM network
* @param weights Weights for the neurons, need to be quantised
* @param bias Bias for the neurons. Can be NULL if no bias is required
* @return Output values of the neurons
*/
dl_matrix2dq_t *dl_basic_lstm_layer_quantised_weights(const dl_matrix2d_t *in, dl_matrix2d_t *state_c, dl_matrix2d_t *state_h,
const dl_matrix2dq_t *weight, const dl_matrix2d_t *bias);
/**
* @brief Do a fully-connected layer pass, fully-quantized version.
*
* @param in Input vector
* @param weight Weights of the neurons
* @param bias Bias values of the neurons. Can be NULL if no bias is needed.
* @param shift Number of bits to shift the result back by. See dl_lib_matrixq.h for more info
* @return Output values of the neurons
*/
void dl_fully_connect_layer_q(const dl_matrix2dq_t *in, const dl_matrix2dq_t *weight, const dl_matrix2dq_t *bias, dl_matrix2dq_t *out, int shift);
/**
* @brief Do a basic LSTM layer pass, fully-quantized version
*
* @warning Returns state_h pointer, so do not free result.
* @param in Input vector
* @param state_c Internal state of the LSTM network
* @param state_h Internal state (previous output values) of the LSTM network
* @param weights Weights for the neurons
* @param bias Bias for the neurons. Can be NULL if no bias is required
* @param shift Number of bits to shift the result back by. See dl_lib_matrixq.h for more info
* @return Output values of the neurons
*/
dl_matrix2dq_t *dl_basic_lstm_layer_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *state_c, dl_matrix2dq_t *state_h,
const dl_matrix2dq_t *weight, const dl_matrix2dq_t *bias, int shift);
/**
* @brief Batch-normalize a matrix, fully-quantized version
*
* @param m The matrix to normalize
* @param offset Offset matrix
* @param scale Scale matrix
* @param mean Mean matrix
* @param sqrtvari Matrix precalculated using dl_batch_normalize_get_sqrtvar
* @param shift Number of bits to shift the result back by. See dl_lib_matrixq.h for more info
* @return
*/
void dl_batch_normalize_q(dl_matrix2dq_t *m, const dl_matrix2dq_t *offset, const dl_matrix2dq_t *scale,
const dl_matrix2dq_t *mean, const dl_matrix2dq_t *sqrtvari, int shift);
/**
* @brief Does a relu (Rectifier Linear Unit) operation on a fixed-point number
* This accepts and returns fixed-point 32-bit number with the last 15 bits being the bits after the decimal
* point. (Equivalent to a mantissa in a quantized matrix with exponent -15.)
*
* @param in Fixed-point input
* @param clip If value is higher than this, it will be clipped to this value
* @return Relu output
*/
qtp_t dl_relu_q_op(qtp_t in, qtp_t clip);
/**
* @brief Does a ReLu operation on a matrix, quantized version
*
* @param in Input matrix
* @param clip If values are higher than this, they will be clipped to this value
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
*/
void dl_relu_q(const dl_matrix2dq_t *in, fptp_t clip, dl_matrix2dq_t *out);
/**
* @brief Does a sigmoid operation on a fixed-point number.
* This accepts and returns a fixed-point 32-bit number with the last 15 bits being the bits after the decimal
* point. (Equivalent to a mantissa in a quantized matrix with exponent -15.)
*
* @param in Fixed-point input
* @return Sigmoid output
*/
int dl_sigmoid_op_q(const int in);
int16_t dl_sigmoid_op_q8(const int16_t in);
/**
* @brief Does a sigmoid operation on a matrix, quantized version
*
* @param in Input matrix
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
*/
void dl_sigmoid_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out);
/**
* @brief Does a tanh operation on a matrix, quantized version
*
* @param in Input matrix
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
*/
void dl_tanh_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out);
/**
* @brief Does a tanh operation on a fixed-point number.
* This accepts and returns a fixed-point 32-bit number with the last 15 bits being the bits after the decimal
* point. (Equivalent to a mantissa in a quantized matrix with exponent -15.)
*
* @param in Fixed-point input
* @return tanh output
*/
int dl_tanh_op_q(int v);
int16_t dl_tanh_op_q8(int16_t v);
void load_mat_psram_mn4(void);
void load_mat_psram_mn3(void);
void free_mat_psram_mn4(void);
void free_mat_psram_mn3(void);
qtp_t dl_hard_sigmoid_op(qtp_t in, int exponent);
qtp_t dl_hard_tanh_op(qtp_t in, int exponent);
int16_t dl_table_tanh_op(int16_t in, int exponent);
int16_t dl_table_sigmoid_op(int16_t in, int exponent);
void dl_hard_sigmoid_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out);
void dl_hard_tanh_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out);
void dl_table_sigmoid_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out);
void dl_table_tanh_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out);
/**
* @brief Filter out the number greater than clip in the matrix, quantized version
*
* @param in Input matrix
* @param clip If values are higher than this, they will be clipped to this value
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
*/
void dl_minimum(const dl_matrix2d_t *in, fptp_t clip, dl_matrix2d_t *out);
/**
* @brief Filter out the number greater than clip in the matrix, float version
*
* @param in Input matrix
* @param clip If values are higher than this, they will be clipped to this value
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
*/
void dl_minimum_q(const dl_matrix2dq_t *in, fptp_t clip, dl_matrix2dq_t *out);
/**
* @brief Do a basic CNN layer pass.
*
* @Warning This just supports the single channel input image, and the output is single row matrix.
That is to say, the height of output is 1, and the weight of output is out_channels*out_image_width*out_image_height
*
* @param in Input single channel image
* @param weight Weights of the neurons, weight->w = out_channels, weight->h = filter_width*filter_height
* @param bias Bias for the CNN layer.
* @param filter_height The height of convolution kernel
* @param filter_width The width of convolution kernel
* @param out_channels The number of output channels of convolution kernel
* @param stride_x The step length of the convolution window in x(width) direction
* @param stride_y The step length of the convolution window in y(height) direction
* @param pad One of `"VALID"` or `"SAME"`, 0 is "VALID" and the other is "SAME"
* @param out The result of CNN layer, out->h=1.
* @return The result of CNN layer.
*/
dl_matrix2d_t *dl_basic_conv_layer(const dl_matrix2d_t *in, const dl_matrix2d_t *weight, const dl_matrix2d_t *bias, int filter_width, int filter_height,
const int out_channels, const int stride_x, const int stride_y, padding_state pad, const dl_matrix2d_t* out);
/**
* @brief Do a basic CNN layer pass, quantised wersion.
*
* @Warning This just supports the single channel input image, and the output is single row matrix.
That is to say, the height of output is 1, and the weight of output is out_channels*out_image_width*out_image_height
*
* @param in Input single channel image
* @param weight Weights of the neurons, weight->w = out_channels, weight->h = filter_width*filter_height,
* @param bias Bias of the neurons.
* @param filter_height The height of convolution kernel
* @param filter_width The width of convolution kernel
* @param out_channels The number of output channels of convolution kernel
* @param stride_x The step length of the convolution window in x(width) direction
* @param stride_y The step length of the convolution window in y(height) direction
* @param pad One of `"VALID"` or `"SAME"`, 0 is "VALID" and the other is "SAME"
* @param out The result of CNN layer, out->h=1
* @return The result of CNN layer
*/
dl_matrix2d_t *dl_basic_conv_layer_quantised_weight(const dl_matrix2d_t *in, const dl_matrix2dq_t *weight, const dl_matrix2d_t *bias, int filter_width, int filter_height,
const int out_channels, const int stride_x, const int stride_y, padding_state pad, const dl_matrix2d_t* out);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,80 @@
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef DL_LIB_COEFGETTER_IF_H
#define DL_LIB_COEFGETTER_IF_H
#include "dl_lib_matrix.h"
#include "dl_lib_matrixq.h"
#include "dl_lib_matrixq8.h"
#include "cJSON.h"
#ifdef __cplusplus
extern "C" {
#endif
//Set this if the coefficient requested is a batch-normalization popvar matrix which needs to be preprocessed by
//dl_batch_normalize_get_sqrtvar first.
#define COEF_GETTER_HINT_BNVAR (1<<0)
/*
This struct describes the basic information of model data:
word_num: the number of wake words or speech commands
word_list: the name list of wake words or speech commands
thres_list: the threshold list of wake words or speech commands
info_str: the string used to reflect the version and information of model data
which consist of the architecture of network, the version of model data, wake words and their threshold
*/
typedef struct {
int word_num;
char **word_list;
int *win_list;
float *thresh_list;
char *info_str;
} model_info_t;
/*
Alphabet struct describes the basic grapheme or phoneme.
item_num: the number of baisc item(grapheme or phonemr)
items: the list of basic item
*/
typedef struct {
int item_num;
char **items;
}alphabet_t;
/*
This struct describes a generic coefficient getter: a way to get the constant coefficients needed for a neural network.
For the two getters, the name describes the name of the coefficient matrix, usually the same as the Numpy filename the
coefficient was originally stored in. The arg argument can be used to optionally pass an additional user-defined argument
to the getter (e.g. the directory to look for files in the case of the Numpy file loader getter). The hint argument
is a bitwise OR of the COEF_GETTER_HINT_* flags or 0 when none is needed. Use the free_f/free_q functions to release the
memory for the returned matrices, when applicable.
*/
typedef struct {
const dl_matrix2d_t* (*getter_f)(const char *name, void *arg, int hint);
const dl_matrix2dq_t* (*getter_q)(const char *name, void *arg, int hint);
const dl_matrix2dq8_t* (*getter_q8)(const char *name, void *arg, int hint);
void (*free_f)(const dl_matrix2d_t *m);
void (*free_q)(const dl_matrix2dq_t *m);
void (*free_q8)(const dl_matrix2dq8_t *m);
const model_info_t* (*getter_info)(void *arg);
const alphabet_t* (*getter_alphabet)(void *arg);
const cJSON* (*getter_config)(void *arg);
} model_coeff_getter_t;
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,180 @@
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef DL_LIB_CONV_QUEUE_H
#define DL_LIB_CONV_QUEUE_H
#include "dl_lib_matrix.h"
#ifdef __cplusplus
extern "C" {
#endif
typedef float fptp_t;
//Flags for matrices
// #define DL_MF_FOREIGNDATA (0) /*< Matrix *item data actually points to another matrix and should not be freed */
//Float convolution FIFO queue.
typedef struct {
int n; /*< the length of queue */
int c; /*< the channel number of queue element*/
int front; /*< the front(top) position of queue */
int flag; /*< not used*/
fptp_t *item; /*< Pointer to item array */
} dl_conv_queue_t;
/**
* @brief Allocate a convolution queue
*
* @param n The length of queue
* @param c The channel number of elements in the queue
* @return The convolution queue, or NULL if out of memory
*/
dl_conv_queue_t *dl_conv_queue_alloc(int n, int c);
/**
* @brief Allocate a convolution queue from psram
*
* @param n The length of queue
* @param c The channel number of elements in the queue
* @return The convolution queue, or NULL if out of memory
*/
dl_conv_queue_t *dl_conv_queue_alloc_from_psram(int n, int c);
/**
* @brief Free a convolution queue
*
* @param cq The convolution queue to free
*/
void dl_conv_queue_free(dl_conv_queue_t *cq);
void dl_conv_to_matrix2d(dl_conv_queue_t *cq, dl_matrix2d_t* out);
/**
* @brief Move the front pointer of queue forward,
the First(oldest) element become the last(newest) element,
*
* @param cq Input convolution queue
* @return Pointer of oldest element
*/
fptp_t *dl_conv_queue_pop(dl_conv_queue_t *cq);
/**
* @brief Remove the oldest element, then insert the input element at the end of queue
*
* @param cq Input convolution queue
* @param item The new element
*/
void dl_conv_queue_push(dl_conv_queue_t *cq, fptp_t* item);
/**
* @brief Get the pointer of element in the queue by offset
*
* @param cq Input convolution queue
* @param offset Offset from the front of the queue
* @return Pointer of the element
*/
fptp_t *dl_get_queue_item(dl_conv_queue_t *cq, int offset);
/**
* @brief Does a sigmoid operation on the one of element in the convolution queue.
* Gets the pointer of element in the convolution queue by offset, and does a sigmoid operation
* by this pointer, then return the pointer
*
* @param cq Input convolution queue
* @param offset Offset from the front of the queue
* @return Pointer of the element
*/
fptp_t *dl_sigmoid_step(dl_conv_queue_t *cq, int offset);
/**
* @brief Does a tanh operation on the one of element in the convolution queue.
* Gets the pointer of element in the convolution queue by offset, and does a tanh operation
* by this pointer, then return the pointer
*
* @param cq Input convolution queue
* @param offset Offset from the front of the queue
* @return Pointer of the element
*/
fptp_t *dl_tanh_step(dl_conv_queue_t *cq, int offset);
/**
* @brief Does a softmax operation on the one of element in the convolution queue.
* Gets the pointer of element in the convolution queue by offset, and does a softmax operation
* by this pointer, then return the pointer
*
* @param cq Input convolution queue
* @param offset Offset from the front of the queue
* @return Pointer of the element
*/
fptp_t *dl_softmax_step(dl_conv_queue_t *cq, int offset);
fptp_t *dl_relu_step(dl_conv_queue_t *cq, int offset);
fptp_t *dl_relu_look(dl_matrix2d_t *cq, int offset);
dl_matrix2d_t *dl_matrix_concat1(const dl_conv_queue_t *a, const dl_matrix2d_t *b);
dl_matrix2d_t *dl_basic_lstm_layer1(const dl_conv_queue_t *in, dl_matrix2d_t *state_c, dl_matrix2d_t *state_h,
const dl_matrix2d_t *weight, const dl_matrix2d_t *bias);
/**
* @brief Fast implement for 1D atrous convolution (a.k.a. convolution with holes or dilated convolution)
* based on convolution queue.
*
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
* is first element of output queue and should not be freed separately.
*
* @param in Input convolution queue
* @param out Output convolution queue
* @param rate A positive int, the stride with which we sample input value
* @param size A positive int, the size of 1D-filter
* @param kernel The kernel matrix of filter
* @param bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
* @return The result of atrous convolution
*/
fptp_t *dl_atrous_conv1d_step(dl_conv_queue_t *in, dl_conv_queue_t *out, int rate, int size,
dl_matrix2d_t* kernel, dl_matrix2d_t* bias);
fptp_t *dl_look_conv_step(dl_conv_queue_t *in, dl_matrix2d_t *out, int rate, int size,
dl_matrix2d_t* kernel, dl_matrix2d_t* bias);
/**
* @brief Fast implement of dilation layer as follows
*
* |-> [gate(sigmoid)] -|
* input - | |-> (*) - output
* |-> [filter(tanh)] -|
*
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
* is first element of output queue and should not be freed separately.
*
* @param in Input convolution queue
* @param out Output convolution queue
* @param rate A positive int, the stride with which we sample input value
* @param size A positive int, the size of 1D-filter
* @param filter_kernel The kernel matrix of filter
* @param filter_bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
* @param gate_kernel The kernel matrix of gate
* @param gate_bias The bias matrix of gate. Can be NULL if a bias of 0 is required.
* @return The result of dilation layer
*/
fptp_t *dl_dilation_layer(dl_conv_queue_t *in, dl_conv_queue_t *out, int rate, int size,
dl_matrix2d_t* filter_kernel, dl_matrix2d_t* filter_bias,
dl_matrix2d_t* gate_kernel, dl_matrix2d_t* gate_bias);
void test_atrous_conv(int size, int rate, int in_channel, int out_channel);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,303 @@
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef DL_LIB_CONVQ8_QUEUE_H
#define DL_LIB_CONVQ8_QUEUE_H
#include "dl_lib_matrixq.h"
#include "dl_lib_matrixq8.h"
#include "dl_lib_conv_queue.h"
#include "dl_lib_convq_queue.h"
#ifdef __cplusplus
extern "C" {
#endif
//[nch, n, c]
typedef struct {
int n; /*< the length of queue */
int c; /*< the number of queue element*/
int front; /*< the front(top) position of queue */
int nch; /*< the channel of queue */
int exponent; /*< The values in items should be multiplied by pow(2,exponent)
to get the real values */
q8tp_t *itemq; /*< Pointer to item array */
} dl_convq8_queue_t;
/**
* @brief Allocate a fixed-point convolution queue
*
* @param n The length of queue
* @param c The number of elements in the queue
* @return The convolution queue, or NULL if out of memory
*/
dl_convq8_queue_t *dl_convq8_queue_alloc(int n, int c);
/**
* @brief Allocate a fixed-point convolution queue
*
* @param n The length of queue
* @param c The number of elements in the queue
* @param c The channel of queue
* @return The convolution queue, or NULL if out of memory
*/
dl_convq8_queue_t *dl_convq8_queue_alloc_mc(int n, int c, int nch);
/**
* @brief Allocate a bit fixed-point convolution queue from PSRAM
*
* @param n The length of queue
* @param c The number of elements in the queue
* @param nch The channel of queue
* @return The convolution queue, or NULL if out of memory
*/
dl_convq8_queue_t *dl_convq8_queue_alloc_mc_from_psram(int n, int c, int nch);
/**
* @brief Free a fixed-point convolution queue
*
* @param cq The fixed-point convolution queue to free
*/
void dl_convq8_queue_free(dl_convq8_queue_t *cq);
/**
* @brief Set itemq of convolution queue to 0
*
* @param cq The fixed-point convolution queue to free
*/
void dl_convq8_queue_bzero(dl_convq8_queue_t *cqm);
/**
* @brief Move the front pointer of queue forward,
the First(oldest) element become the last(newest) element,
*
* @param cq Input fixed-point convolution queue
* @return Pointer of oldest element
*/
q8tp_t *dl_convq8_queue_pop(dl_convq8_queue_t *cq);
q8tp_t *dl_convq8_queue_popn(dl_convq8_queue_t *cq, int n);
/**
* @brief Insert the float-point element at the end of queue.
* The precision of fixed-point numbers is described by the Qm.f notation,
*
* @param cq Input fixed-point convolution queue
* @param item The float-point element
* @param m_bit The number of integer bits including the sign bits
* @param f_bit The number of fractional bits
*/
void dl_convq8_queue_push_by_qmf(dl_convq8_queue_t *cq, fptp_t* item, int m_bit, int f_bit);
/**
* @brief Get the pointer of element in the queue by offset
*
* @param cq Input fixed-point convolution queue
* @param offset Offset from the front of the queue
* @return Pointer of the element
*/
q8tp_t *dl_get_queue_itemq8(dl_convq8_queue_t *cq, int offset);
/**
* @brief Get the pointer of element in the queue by offset
*
* @param cq Input fixed-point convolution queue
* @param offset Offset from the front of the queue
* @param ch Channel index of queue
* @return Pointer of the element
*/
q8tp_t *dl_get_queue_itemq8_mc(dl_convq8_queue_t *cq, int offset, int ch);
/**
* @brief Fast and quantised implement for 1D atrous convolution (a.k.a. convolution with holes or dilated convolution)
* based on convolution queue.
*
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
* is last element of output queue and should not be freed separately.
*
* @param in Input fixed-point convolution queue
* @param out Output fixed-point convolution queue
* @param rate A positive int, the stride with which we sample input value
* @param size A positive int, the size of 1D-filter
* @param kernel Kernel matrix of filter
* @param bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
* @param out_exponent Shift ratio used in dot operation between two 16-bit fixed point vector
* @param offset Offset used to calculate the beginning of input conv queue
* @param prenum The num to control the parameter size of preload operation
* @return The result of atrous convolution
*/
void dl_atrous_conv1dq8_steps(dl_convq8_queue_t *in, dl_convq8_queue_t *out, int rate, int size,
dl_matrix2dq8_t* kernel, dl_matrix2dq8_t* bias,
int out_exponent, int offset, int prenum);
/**
* @brief Fast implement of dilation layer as follows
*
* |-> [gate(sigmoid)] -|
* input - | |-> (*) - output
* |-> [filter(tanh)] -|
*
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
* is last element of output queue and should not be freed separately.
*
* @param in Input fixed-point convolution queue
* @param out Output fixed-point convolution queue
* @param rate A positive int, the stride with which we sample input value
* @param size A positive int, the size of 1D-filter
* @param filter_kernel The kernel matrix of filter
* @param filter_bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
* @param gate_kernel The kernel matrix of gate
* @param gate_bias The bias matrix of gate. Can be NULL if a bias of 0 is required.
* @param offset Offset used to calculate the beginning of input conv queue
* @param prenum The num to control the parameter size of preload operation
* @return The result of dilation layer
*/
void dl_dilation_layerq8_steps(dl_convq8_queue_t *in, dl_convq8_queue_t *out, int rate, int size,
dl_matrix2dq8_t* filter_kernel, dl_matrix2dq8_t* filter_bias,
dl_matrix2dq8_t* gate_kernel, dl_matrix2dq8_t* gate_bias,
int offset, int prenum);
dl_conv_queue_t *dl_convq8_queue_add(dl_convq8_queue_t *cq1, dl_convq8_queue_t *cq2);
int8_t dl_sigmoid_lutq8(int in);
/**
* @brief Allocate a 8-bit fixed-point Multi-Channel convolution queue
*
* @param n The length of queue
* @param c The number of elements in the queue
* @param nch  The channel number
* @return The convolution queue, or NULL if out of memory
*/
dl_convq8_queue_t **dl_convq8_queue_mc_alloc(int n, int c, int nch);
/**
* @brief Free a 8-bit fixed-point Multi-Channel convolution queue
*
* @param cqm The fixed-point convolution queue to free
* @param nch The channel number
*/
void dl_convq8_queue_mc_free(dl_convq8_queue_t **cqm, int nch);
/**
* @brief Tanh activation function for 8-bit fixed-point Multi-Channel convolution queue input
*
* @param cqm Input 8-bit fixed-point Multi-Channel convolution queue
* @param offset Offset used to calculate the beginning of input conv queue
* @param nch The channel number
*/
void dl_tanh_convq8_mc(dl_convq8_queue_t **cqm, int offset, int nch);
/**
* @brief Fast and quantised 16-bit implement for Multi-channel 1D atrous convolution (a.k.a. convolution with holes or dilated convolution)
* Usually, this layer is used as first layer for 8-bit network.
*
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
* Input is a 16-bit queue point, Output is an 8-bit queue point.
*
* @param in Input 16bit fixed-point convolution queue array
* @param out Output 8bit fixed-point convolution queue array
* @param rate A positive int, the stride with which we sample input value
* @param size A positive int, the size of 1D-filter
* @param kernel The kernel matrix of filter
* @param bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
* @param out_exponent Exponent of output
* @param offset Offset used to calculate the beginning of input conv queue
* @param prenum The num to control the parameter size of preload operation
*/
void dl_atrous_conv1dq8_16in_mc_steps(dl_convq_queue_t **in, dl_convq8_queue_t **out, int nch, int rate, int size,
dl_matrix2dq_t* kernel, dl_matrix2dq_t* bias, int out_exponent, int offset, int prenum);
/**
* @brief Fast and quantised 8-bit implement for Multi-channel 1D atrous convolution (a.k.a. convolution with holes or dilated convolution)
* based on convolution queue.
*
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
* is last element of output queue and should not be freed separately.
*
* @param in Input 8bit fixed-point convolution queue array
* @param out Output 8bit fixed-point convolution queue array
* @param rate A positive int, the stride with which we sample input value
* @param size A positive int, the size of 1D-filter
* @param kernel The kernel matrix of filter
* @param bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
* @param out_exponent Exponent of output
* @param offset Offset used to calculate the beginning of input conv queue
* @param prenum The num to control the parameter size of preload operation
*/
void dl_atrous_conv1dq8_mc_steps(dl_convq8_queue_t **in, dl_convq8_queue_t **out,
int nch, int rate, int size,
dl_matrix2dq8_t* kernel, dl_matrix2dq8_t* bias,
int out_exponent, int offset, int prenum);
/**
* @brief Fast implement of 8-bit dilation layer as follows
*
* |-> [gate(sigmoid)] -|
* input - | |-> (*) - output
* |-> [filter(tanh)] -|
*
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
* is last element of output queue and should not be freed separately.
*
* @param in Input 8-bit fixed-point convolution queue
* @param out Output 8-bit fixed-point convolution queue
* @param rate A positive int, the stride with which we sample input value
* @param size A positive int, the size of 1D-filter
* @param filter_kernel The kernel matrix of filter
* @param filter_bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
* @param gate_kernel The kernel matrix of gate
* @param gate_bias The bias matrix of gate. Can be NULL if a bias of 0 is required.
* @param offset Offset used to calculate the beginning of input conv queue
* @param prenum The num to control the parameter size of preload operation
*/
void dl_dilation_layerq8_mc_steps(dl_convq8_queue_t **in, dl_convq8_queue_t **out, int nch, int rate, int size,
dl_matrix2dq8_t* filter_kernel, dl_matrix2dq8_t* filter_bias,
dl_matrix2dq8_t* gate_kernel, dl_matrix2dq8_t* gate_bias,
int offset, int prenum);
void dl_convq8_queue_mc_bzero(dl_convq8_queue_t **cqm, int nch);
dl_convq8_queue_t *dl_convq8_queue_alloc_from_psram(int n, int c);
qtp_t *dl_dilation_layerq16_8(dl_convq_queue_t *in, dl_convq8_queue_t *out, int rate, int size,
dl_matrix2dq_t* filter_kernel, dl_matrix2dq_t* filter_bias,
dl_matrix2dq_t* gate_kernel, dl_matrix2dq_t* gate_bias, int prenum);
qtp_t *dl_dilation_layerq8(dl_convq8_queue_t *in, dl_convq8_queue_t *out, int rate, int size,
dl_matrix2dq8_t* filter_kernel, dl_matrix2dq_t* filter_bias,
dl_matrix2dq8_t* gate_kernel, dl_matrix2dq_t* gate_bias, int prenum);
dl_matrix2dq8_t *dl_convq8_lstm_layer(const dl_convq8_queue_t *in, dl_convq8_queue_t *out, dl_matrix2dq8_t *state_c,
dl_matrix2dq8_t *state_h, const dl_matrix2dq8_t *in_weight, const dl_matrix2dq8_t *h_weight,
const dl_matrix2dq_t *bias, int prenum);
qtp_t *dl_atrous_conv1dq8_16_s3(dl_convq8_queue_t *in, dl_convq_queue_t *out, int rate, int size,
dl_matrix2dq8_t* kernel, dl_matrix2dq_t* bias, int prenum);
void print_convq8(dl_convq8_queue_t *cq, int offset);
void print_convq(dl_convq_queue_t *cq, int offset);
void dl_relu_convq8(dl_convq8_queue_t *cq);
void lstmq8_free(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,382 @@
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef DL_LIB_CONVQ_QUEUE_H
#define DL_LIB_CONVQ_QUEUE_H
#include "dl_lib_matrixq.h"
#include "dl_lib_conv_queue.h"
#include "dl_lib.h"
#ifdef __cplusplus
extern "C" {
#endif
//fixed-point convolution FIFO queue.
//[nch, n, c]
typedef struct {
int n; /*< the length of queue */
int c; /*< the number of queue element*/
int front; /*< the front(top) position of queue */
int nch; /*< the multiple of queue*/
int exponent; /*< The values in items should be multiplied by pow(2,exponent)
to get the real values */
qtp_t *itemq; /*< Pointer to item array */
} dl_convq_queue_t;
/**
* @brief Allocate a fixed-point convolution queue
*
* @param n The length of queue
* @param c The number of elements in the queue
* @return The convolution queue, or NULL if out of memory
*/
dl_convq_queue_t *dl_convq_queue_alloc(int n, int c);
/**
* @brief Allocate a fixed-point convolution queue from PSRAM
*
* @param n The length of queue
* @param c The number of elements in the queue
* @return The convolution queue, or NULL if out of memory
*/
dl_convq_queue_t *dl_convq_queue_alloc_from_psram(int n, int c);
/**
* @brief Allocate a fixed-point multi-channel convolution queue
*
* @param n The length of queue
* @param c The number of elements in the queue
* @param nch The channel of conv queue
* @return The convolution queue, or NULL if out of memory
*/
dl_convq_queue_t *dl_convq_queue_alloc_mc(int n, int c, int nch);
/**
* @brief Allocate a fixed-point multi-channel convolution queue from PSRAM
*
* @param n The length of queue
* @param c The number of elements in the queue
* @param nch The channel of conv queue
* @return The convolution queue, or NULL if out of memory
*/
dl_convq_queue_t *dl_convq_queue_alloc_mc_from_psram(int n, int c, int nch);
void dl_convq_to_matrix2dq(dl_convq_queue_t *cq, dl_matrix2dq_t* out, int row);
/**
* @brief Free a fixed-point convolution queue
*
* @param cq The fixed-point convolution queue to free
*/
void dl_convq_queue_free(dl_convq_queue_t *cq);
/**
* @brief Set itemq of convolution queue to 0
*
* @param cq The fixed-point convolution queue point
*/
void dl_convq_queue_bzero(dl_convq_queue_t *cq);
/**
* @brief Move the front pointer of queue forward,
the First(oldest) element become the last(newest) element,
*
* @param cq Input fixed-point convolution queue
* @return Pointer of oldest element
*/
qtp_t *dl_convq_queue_pop(dl_convq_queue_t *cq);
qtp_t *dl_convq_queue_popn(dl_convq_queue_t *cq, int n);
/**
* @brief Remove the oldest element, then insert the input element at the end of queue
*
* @param cq Input fixed-point convolution queue
* @param item The new element
*/
void dl_convq_queue_push(dl_convq_queue_t *cq, dl_matrix2dq_t *a, int shift);
/**
* @brief Insert the float-point element at the end of queue.
* The precision of fixed-point numbers is described by the Qm.f notation,
*
* @param cq Input fixed-point convolution queue
* @param item The float-point element
* @param m_bit The number of integer bits including the sign bits
* @param f_bit The number of fractional bits
*/
void dl_convq_queue_push_by_qmf(dl_convq_queue_t *cq, fptp_t* item, int m_bit, int f_bit);
void dl_convq16_queue_push_by_qmf(dl_convq_queue_t *cq, fptp_t* item, int m_bit, int f_bit);
dl_conv_queue_t *dl_queue_from_convq(dl_convq_queue_t *cq1);
/**
* @brief Get the pointer of element in the queue by offset
*
* @param cq Input fixed-point convolution queue
* @param last_num Offset from the front of the queue
* @return Pointer of the element
*/
qtp_t *dl_get_queue_itemq(dl_convq_queue_t *cq, int last_num);
/**
* @brief Get the pointer of element in the queue by offset
*
* @param cq Input fixed-point convolution queue
* @param offset Offset from the front of the queue
* @param ch Channel index of convolution queue
* @return Pointer of the element
*/
qtp_t *dl_get_queue_itemq_mc(dl_convq_queue_t *cq, int offset, int ch);
/**
* @brief Does a tanh operation on the one of element in the convolution queue.
* Gets the pointer of element in the convolution queue by offset, and does a
* tanh operation by this pointer, then return the pointer
*
* @param cq Input fixed-point convolution queue
* @param offset Offset from the front of the queue
* @return Pointer of the element
*/
void dl_tanh_convq(dl_convq_queue_t *cq, int offset);
/**
* @brief Does a tanh operation on the one of element in multi channel convolution queue.
* Gets the pointer of element in the convolution queue by offset, and does a
* tanh operation by this pointer, then return the pointer
*
* @param cq Input fixed-point multi channnel convolution queue
* @param offset Offset from the front of the queue
* @param nch The channel number of cqm
* @return Pointer of the element
*/
void dl_tanh_convq_mc(dl_convq_queue_t **cqm, int offset, int nch);
/**
* @brief Does a relu operation on the one of element in the convolution queue.
* Gets the pointer of element in the convolution queue by offset, and does a
* relu operation by this pointer, then return the pointer
*
* @param cq Input fixed-point convolution queue
* @param offset Offset from the front of the queue
* @return Pointer of the element
*/
void dl_relu_convq(dl_convq_queue_t *cq, fptp_t clip, int last_num);
/**
* @brief Does a softmax operation on the one of element in the convolution queue.
* Gets the pointer of element in the convolution queue by offset, input data
stay as it is. Results are saved into the *out* array.
*
* @param cq Input fixed-point convolution queue
* @param offset Offset from the front of the queue
* @param out Old array to re-use. Passing NULL will allocate a new matrix.
* @return softmax results
*/
fptp_t * dl_softmax_step_q(dl_convq_queue_t *cq, int offset, fptp_t *out);
/**
* @brief Fast and quantised implement for 1D atrous convolution (a.k.a. convolution with holes or dilated convolution)
* based on convolution queue.
*
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
* is last element of output queue and should not be freed separately.
*
* @param in Input fixed-point convolution queue
* @param out Output fixed-point convolution queue
* @param rate A positive int, the stride with which we sample input value
* @param size A positive int, the size of 1D-filter
* @param kernel The kernel matrix of filter
* @param bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
* @param shift Shift ratio used in dot operation between two 16-bit fixed point vector
* @return The result of atrous convolution
*/
qtp_t * dl_atrous_conv1dq(dl_convq_queue_t *in, dl_convq_queue_t *out, int rate, int size,
dl_matrix2dq_t* kernel, dl_matrix2dq_t* bias, int shift, int prenum);
/**
* @brief Fast implement of dilation layer as follows
*
* |-> [gate(sigmoid)] -|
* input - | |-> (*) - output
* |-> [filter(tanh)] -|
*
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
* is last element of output queue and should not be freed separately.
*
* @param in Input fixed-point convolution queue
* @param out Output fixed-point convolution queue
* @param rate A positive int, the stride with which we sample input value
* @param size A positive int, the size of 1D-filter
* @param filter_kernel The kernel matrix of filter
* @param filter_bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
* @param gate_kernel The kernel matrix of gate
* @param gate_bias The bias matrix of gate. Can be NULL if a bias of 0 is required.
* @param filter_shift Shift ratio used in filter operation between two 16-bit fixed point vector
* @param gate_shift Shift ratio used in gate operation between two 16-bit fixed point vector
* @return The result of dilation layer
*/
qtp_t *dl_dilation_layerq_steps(dl_convq_queue_t *in, dl_convq_queue_t *out, int rate, int size,
dl_matrix2dq_t* filter_kernel, dl_matrix2dq_t* filter_bias,
dl_matrix2dq_t* gate_kernel, dl_matrix2dq_t* gate_bias,
int filter_shift, int gate_shift, int offset, int prenum);
qtp_t *dl_dilation_layerq(dl_convq_queue_t *in, dl_convq_queue_t *out, int rate, int size,
dl_matrix2dq_t* filter_kernel, dl_matrix2dq_t* filter_bias,
dl_matrix2dq_t* gate_kernel, dl_matrix2dq_t* gate_bias,
int filter_shift, int gate_shift, int prenum);
qtp_t *dl_dilation_layerq16(dl_convq_queue_t *in, dl_convq_queue_t *out, int rate, int size,
dl_matrix2dq_t* filter_kernel, dl_matrix2dq_t* filter_bias,
dl_matrix2dq_t* gate_kernel, dl_matrix2dq_t* gate_bias, int prenum);
qtp_t *dl_atrous_conv1dq_steps(dl_convq_queue_t *in, dl_convq_queue_t *out, int rate, int size,
dl_matrix2dq_t* kernel, dl_matrix2dq_t* bias, int shift, int offset, int prenum);
/**
* @brief Add a pair of fixed-point convolution queue item-by-item, and return float-point convolution queue
*
* @param cq1 First fixed-point convolution queue
* @param cq2 Seconf fixed-point convolution queue
* @return The result of float-point convolution queue
*/
dl_conv_queue_t *dl_convq_queue_add(dl_convq_queue_t *cq1, dl_convq_queue_t *cq2);
/**
* @brief Fast implement of LSTM layer by dl_atrous_conv1dq function
*
* @Warning LSTM kernel is split into two part, the first part input is the last layer output,
* and kernel is parameter *in_weight*. The second part input is the last frame LSTM output,
* the kernel is parameters *h_weight*.
*
* @param in Input fixed-point convolution queue
* @param out Output fixed-point convolution queue
* @param state_c Internal state of the LSTM network
* @param state_h Internal state (previous output values) of the LSTM network
* @param in_weight the LSTM kernel needed by first part
* @param h_weight the LSTM kernel needed by second part
* @param bias The bias matrix of LSTM. Can be NULL if a bias of 0 is required.
* @in_shift Shift ratio used in first part
* @h_shift Shift ratio used in second part
* @return The result of LSTM layer
*/
dl_matrix2dq_t *dl_convq_lstm_layer(const dl_convq_queue_t *in, dl_convq_queue_t *out, dl_matrix2dq_t *state_c,
dl_matrix2dq_t *state_h, const dl_matrix2dq_t *in_weight, const dl_matrix2dq_t *h_weight,
const dl_matrix2dq_t *bias, int in_shift, int h_shift, int prenum);
dl_matrix2dq_t *dl_basic_lstm_layer1_q(const dl_convq_queue_t *in, dl_matrix2dq_t *state_c, dl_matrix2dq_t *state_h,
const dl_matrix2dq_t *weight, const dl_matrix2dq_t *bias, int step, int shift);
dl_matrix2dq_t *dl_convq16_lstm_layer(dl_convq_queue_t *in, dl_convq_queue_t *out, dl_matrix2dq_t *state_c,
dl_matrix2dq_t *state_h, dl_matrix2dq_t *in_weight, dl_matrix2dq_t *h_weight,
dl_matrix2dq_t *bias, int prenum);
/**
* @brief Allocate a fixed-point multi channel convolution queue
*
* @param n The length of queue
* @param c The channel number of elements in the queue
* @param nch the channel numbet of convolution queue
* @return The convolution queue, or NULL if out of memory
*/
dl_convq_queue_t **dl_convq_queue_mc_alloc(int n, int c, int nch);
/**
* @brief Free a fixed-point multi channel convolution queue
*
* @param cqm The fixed-point convolution queue to free
* @param nch The channel number of cqm
*/
void dl_convq_queue_mc_free(dl_convq_queue_t **cqm, int nch);
/**
* @brief Fast and quantised implement for 1D atrous convolution (a.k.a. convolution with holes or dilated convolution)
* based on convolution queue.
*
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
* is last element of output queue and should not be freed separately.
*
* @param in Input fixed-point convolution queue
* @param out Output fixed-point convolution queue
* @param nch The channel number of input
* @param rate A positive int, the stride with which we sample input value
* @param size A positive int, the size of 1D-filter
* @param kernel The kernel matrix of filter
* @param bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
* @param shift Shift ratio used in dot operation between two 16-bit fixed point vector
* @param offset the offset to calculate input convq
* @param prenum the preload size, 0: do not use preload function
* @return The result of atrous convolution
*/
qtp_t *dl_atrous_conv1dq_mc_steps( dl_convq_queue_t **in,
dl_convq_queue_t **out,
int nch,
int rate,
int size,
dl_matrix2dq_t* kernel,
dl_matrix2dq_t* bias,
int shift,
int offset,
int prenum);
/**
* @brief Fast implement of dilation layer as follows for multi channel input
*
* |-> [gate(sigmoid)] -|
* input - | |-> (*) - output
* |-> [filter(tanh)] -|
*
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
* is last element of output queue and should not be freed separately.
*
* @param in Input fixed-point convolution queue
* @param out Output fixed-point convolution queue
* @param nch The channel number of input
* @param rate A positive int, the stride with which we sample input value
* @param size A positive int, the size of 1D-filter
* @param filter_kernel The kernel matrix of filter
* @param filter_bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
* @param gate_kernel The kernel matrix of gate
* @param gate_bias The bias matrix of gate. Can be NULL if a bias of 0 is required.
* @param filter_shift Shift ratio used in filter operation between two 16-bit fixed point vector
* @param gate_shift Shift ratio used in gate operation between two 16-bit fixed point vector
* @param offset The offset to calculate input convq
* @param prenum The preload size, 0: do not use preload function
* @return The result of dilation layer
*/
qtp_t *dl_dilation_layerq_mc_steps( dl_convq_queue_t **in,
dl_convq_queue_t **out,
int nch,
int rate,
int size,
dl_matrix2dq_t* filter_kernel,
dl_matrix2dq_t* filter_bias,
dl_matrix2dq_t* gate_kernel,
dl_matrix2dq_t* gate_bias,
int filter_shift,
int gate_shift,
int offset,
int prenum);
void test_atrous_convq(int size, int rate, int in_channel, int out_channel);
void test_lstm_convq(int size, int in_dim, int lstm_cell);
void dl_nn_tanh_i162(dl_convq_queue_t **cqm, int offset, int nch);
void dl_copy_queue_item_by_qmf(dl_convq_queue_t *cq, fptp_t* item, int m_bit, int f_bit, int offset, int ch);
void dl_convq_queue_mc_bzero(dl_convq_queue_t **cqm, int nch);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,257 @@
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef DL_LIB_MATRIX_H
#define DL_LIB_MATRIX_H
#ifdef ESP_PLATFORM
#include "freertos/FreeRTOS.h"
#include "freertos/task.h"
#include "freertos/queue.h"
#include "esp_system.h"
#endif
#ifdef __cplusplus
extern "C" {
#endif
typedef float fptp_t;
#if CONFIG_BT_SHARE_MEM_REUSE
extern multi_heap_handle_t gst_heap;
#endif
//Flags for matrices
#define DL_MF_FOREIGNDATA 1 /*< Matrix pointer and item data actually points to another matrix and should not be freed */
#define DL_MF_FOREIGNITEM 2 /*< Only item data actually points to another matrix and should not be freed */
//'Normal' float matrix
typedef struct {
int w; /*< Width */
int h; /*< Height */
int stride; /*< Row stride, essentially how many items to skip to get to the same position in the next row */
int flags; /*< Flags. OR of DL_MF_* values */
fptp_t *item; /*< Pointer to item array */
} dl_matrix2d_t;
//Macro to quickly access the raw items in a matrix
#define DL_ITM(m, x, y) m->item[(x)+(y)*m->stride]
/**
* @brief Allocate a matrix
*
* @param w Width of the matrix
* @param h Height of the matrix
* @return The matrix, or NULL if out of memory
*/
dl_matrix2d_t *dl_matrix_alloc(int w, int h);
/**
* @brief Free a matrix
* Frees the matrix structure and (if it doesn't have the DL_MF_FOREIGNDATA flag set) the m->items space as well.
*
* @param m Matrix to free
*/
void dl_matrix_free(dl_matrix2d_t *m);
/**
* @brief Zero out the matrix
* Sets all entries in the matrix to 0.
*
* @param m Matrix to zero
*/
void dl_matrix_zero(dl_matrix2d_t *m);
/**
* @brief Copy the matrix into psram
* Copy the matrix from flash or iram/psram into psram
*
* @param m Matrix to zero
*/
dl_matrix2d_t *dl_matrix_copy_to_psram(const dl_matrix2d_t *m);
/**
* @brief Generate a new matrix using a range of items from an existing matrix.
* When using this, the data of the new matrix is not allocated/copied but it re-uses a pointer
* to the existing data. Changing the data in the resulting matrix, as a result, will also change
* the data in the existing matrix that has been sliced.
*
* @param x X-offset of the origin of the returned matrix within the sliced matrix
* @param y Y-offset of the origin of the returned matrix within the sliced matrix
* @param w Width of the resulting matrix
* @param h Height of the resulting matrix
* @param in Old matrix (with foreign data) to re-use. Passing NULL will allocate a new matrix.
* @return The resulting slice matrix, or NULL if out of memory
*/
dl_matrix2d_t *dl_matrix_slice(const dl_matrix2d_t *src, int x, int y, int w, int h, dl_matrix2d_t *in);
/**
* @brief select a range of items from an existing matrix and flatten them into one dimension.
*
* @Warning The results are flattened in row-major order.
*
* @param x X-offset of the origin of the returned matrix within the sliced matrix
* @param y Y-offset of the origin of the returned matrix within the sliced matrix
* @param w Width of the resulting matrix
* @param h Height of the resulting matrix
* @param in Old matrix to re-use. Passing NULL will allocate a new matrix.
* @return The resulting flatten matrix, or NULL if out of memory
*/
dl_matrix2d_t *dl_matrix_flatten(const dl_matrix2d_t *src, int x, int y, int w, int h, dl_matrix2d_t *in);
/**
* @brief Generate a matrix from existing floating-point data
*
* @param w Width of resulting matrix
* @param h Height of resulting matrix
* @param data Data to populate matrix with
* @return A newaly allocated matrix populated with the given input data, or NULL if out of memory.
*/
dl_matrix2d_t *dl_matrix_from_data(int w, int h, int stride, const void *data);
/**
* @brief Multiply a pair of matrices item-by-item: res=a*b
*
* @param a First multiplicand
* @param b Second multiplicand
* @param res Multiplicated data. Can be equal to a or b to overwrite that.
*/
void dl_matrix_mul(const dl_matrix2d_t *a, const dl_matrix2d_t *b, dl_matrix2d_t *res);
/**
* @brief Do a dotproduct of two matrices : res=a.b
*
* @param a First multiplicand
* @param b Second multiplicand
* @param res Dotproduct data. *Must* be a *different* matrix from a or b!
*/
void dl_matrix_dot(const dl_matrix2d_t *a, const dl_matrix2d_t *b, dl_matrix2d_t *res);
/**
* @brief Add a pair of matrices item-by-item: res=a-b
*
* @param a First matrix
* @param b Second matrix
* @param res Added data. Can be equal to a or b to overwrite that.
*/
void dl_matrix_add(const dl_matrix2d_t *a, const dl_matrix2d_t *b, dl_matrix2d_t *out);
/**
* @brief Divide a pair of matrices item-by-item: res=a/b
*
* @param a First matrix
* @param b Second matrix
* @param res Divided data. Can be equal to a or b to overwrite that.
*/
void dl_matrix_div(const dl_matrix2d_t *a, const dl_matrix2d_t *b, dl_matrix2d_t *out);
/**
* @brief Subtract a matrix from another, item-by-item: res=a-b
*
* @param a First matrix
* @param b Second matrix
* @param res Subtracted data. Can be equal to a or b to overwrite that.
*/
void dl_matrix_sub(const dl_matrix2d_t *a, const dl_matrix2d_t *b, dl_matrix2d_t *out);
/**
* @brief Add a constant to every item of the matrix
*
* @param subj Matrix to add the constant to
* @param add The constant
*/
void dl_matrix_add_const(dl_matrix2d_t *subj, const fptp_t add);
/**
* @brief Concatenate the rows of two matrices into a new matrix
*
* @param a First matrix
* @param b Second matrix
* @return A newly allocated array with as avlues a|b
*/
dl_matrix2d_t *dl_matrix_concat(const dl_matrix2d_t *a, const dl_matrix2d_t *b);
dl_matrix2d_t *dl_matrix_concat_h( dl_matrix2d_t *a, const dl_matrix2d_t *b);
/**
* @brief Print the contents of a matrix to stdout. Used for debugging.
*
* @param a The matrix to print.
*/
void dl_printmatrix(const dl_matrix2d_t *a);
/**
* @brief Return the average square error given a correct and a test matrix.
*
* ...Well, more or less. If anything, it gives an indication of the error between
* the two. Check the code for the exact implementation.
*
* @param a First of the two matrices to compare
* @param b Second of the two matrices to compare
* @return value indicating the relative difference between matrices
*/
float dl_matrix_get_avg_sq_err(const dl_matrix2d_t *a, const dl_matrix2d_t *b);
/**
* @brief Check if two matrices have the same shape, that is, the same amount of rows and columns
*
* @param a First of the two matrices to compare
* @param b Second of the two matrices to compare
* @return true if the two matrices are shaped the same, false otherwise.
*/
int dl_matrix_same_shape(const dl_matrix2d_t *a, const dl_matrix2d_t *b);
/**
* @brief Get a specific item from the matrix
*
* Please use these for external matrix access instead of DL_ITM
*
* @param m Matrix to access
* @param x Column address
* @param y Row address
* @return Value in that position
*/
inline static fptp_t dl_matrix_get(const dl_matrix2d_t *m, const int x, const int y) {
return DL_ITM(m, x, y);
}
/**
* @brief Set a specific item in the matrix to the given value
*
* Please use these for external matrix access instead of DL_ITM
*
* @param m Matrix to access
* @param x Column address
* @param y Row address
* @param val Value to write to that position
*/
inline static void dl_matrix_set(dl_matrix2d_t *m, const int x, const int y, fptp_t val) {
DL_ITM(m, x, y)=val;
}
void matrix_get_range(const dl_matrix2d_t *m, fptp_t *rmin, fptp_t *rmax);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,387 @@
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef DL_LIB_MATRIXQ_H
#define DL_LIB_MATRIXQ_H
#include <stdint.h>
#include "dl_lib_matrix.h"
#ifdef __cplusplus
extern "C" {
#endif
typedef int16_t qtp_t;
//Quantized matrix. Uses fixed numbers and has the storage for the rows/columns inverted
//for easy use as a multiplicand without stressing out the flash cache too much.
typedef struct {
int w;
int h;
int stride; //Normally equals h, not w!
int flags;
int exponent; //The values in items should be multiplied by pow(2,exponent) to get the real values.
qtp_t *itemq;
} dl_matrix2dq_t;
#define DL_QTP_SHIFT 15
#define DL_QTP_RANGE ((1<<DL_QTP_SHIFT)-1)
#define DL_ITMQ(m, x, y) m->itemq[(y)+(x)*m->stride]
#define DL_QTP_EXP_NA 255 //non-applicable exponent because matrix is null
#define DL_SHIFT_AUTO 32
/**
* @info About quantized matrices and shift values
*
* Grab a coffee (or tea, or hot water) and sit down when you read this for the first
* time. Quantized matrices can speed up your operations, but come with some quirks, and
* it's good to understand how they work before using them.
*
* The data in the quantized matrix type is stored similarily to floating-point types:
* when storing a real value, the value is stored as a mantissa (base number) and an
* exponent. The 'real' value that can be re-derived from those two numbers is something
* similar to mantissa*2^exponent. Up to this point, there's not that much difference from
* the standard floating point implementations like e.g. IEEE-754.
*
* The difference with respect to quantized matrices is that for a quantized matrix, it is
* assumed all values stored have more-or-less the same order of magnitude. This allows the
* matrix to only store all the mantissas, while the exponents are shared; there is only one
* exponent for the entire matrix. This makes it quicker to handle matrix operations - the
* logic to fix the exponents only needs to happen once, while the rest can be done in simple
* integer arithmetic. It also nets us some memory savings - while normally a floating point
* number is 32-bit, storing only 16-bit mantissas as the matrix items almost halves the
* memory requirements.
*
* While most of the details of handling the intricacies of the quantized matrixes are done
* transparently by the code in dl_lib_matrixq.c, some implementation details leak out,
* specifically in places where addition/subtraction/division happens.
*
* The problem is that the routines do not know what the size of the resulting operation is. For
* instance, when adding two matrices of numbers, the resulting numbers *could* be large enough
* to overflow the mantissa of the result if the exponent is the same. However, if by default we
* assume the mantissas needs to be scaled back, we may lose precision.
*
* In order to counter this, all operations that have this issue have a ``shift`` argument. If
* the argument is zero, the routine will be conservative, that is, increase the exponent of
* the result to such an extent it's mathematically impossible a value in the result will exceed
* the maximum value that can be stored. However, when this argument is larger than zero, the
* algorithm will hold back on this scaling by the indicated amount of bits, preserving precision
* but increasing the chance of some of the calculated values not fitting in the mantissa anymore.
* If this happens, the value will be clipped to the largest (or, for negative values, smallest)
* value possible. (Neural networks usually are okay with this happening for a limited amount
* of matrix indices).
*
* For deciding on these shift values, it is recommended to start with a shift value of one, then
* use dl_matrixq_check_sanity on the result. If this indicates clipping, lower the shift value.
* If it indicates bits are under-used, increase it. Note that for adding and subtraction, only
* shift values of 0 or 1 make sense; these routines will error out if you try to do something
* else.
*
* For neural networks and other noise-tolerant applications, note that even when
* dl_matrixq_check_sanity does not indicate any problems, twiddling with the shift value may lead
* to slightly improved precision. Feel free to experiment.
**/
/**
* @brief Allocate a matrix
*
* @param w Width of the matrix
* @param h Height of the matrix
* @return The matrix, or NULL if out of memory
*/
dl_matrix2dq_t *dl_matrixq_alloc(int w, int h);
dl_matrix2dq_t *dl_matrixq_alloc_psram(int w, int h);
/**
* @brief Convert a floating-point matrix to a quantized matrix
*
* @param m Floating-point matrix to convert
* @param out Quantized matrix to re-use. If NULL, allocate a new one.
* @Return The quantized version of the floating-point matrix
*/
dl_matrix2dq_t *dl_matrixq_from_matrix2d(const dl_matrix2d_t *m, dl_matrix2dq_t *out);
/**
* TODO: DESCRIBE THIS FUNCTION
*/
dl_matrix2dq_t *dl_matrixq_from_matrix2d_by_qmf(const dl_matrix2d_t *m, dl_matrix2dq_t *out, int m_bit, int f_bit);
/**
* @brief Convert a quantized matrix to a floating-point one.
*
* @param m Floating-point matrix to convert
* @param out Quantized matrix to re-use. If NULL, allocate a new one.
* @Return The quantized version of the floating-point matrix
**/
dl_matrix2d_t *dl_matrix2d_from_matrixq(const dl_matrix2dq_t *m, dl_matrix2d_t *out);
/**
* @brief Free a quantized matrix
* Frees the matrix structure and (if it doesn't have the DL_MF_FOREIGNDATA flag set) the m->items space as well.
*
* @param m Matrix to free
*/
void dl_matrixq_free(dl_matrix2dq_t *m);
/**
* @brief Zero out the matrix
* Sets all entries in the matrix to 0.
*
* @param m Matrix to zero
*/
void dl_matrixq_zero(dl_matrix2dq_t *m);
/**
* @brief Copy the matrix into psram
* Copy the matrix from flash or iram/psram into psram
*
* @param m Matrix to copy
*/
dl_matrix2dq_t *dl_matrixq_copy_to_psram(const dl_matrix2dq_t *m);
/**
* @brief Do a dotproduct of two quantized matrices : res=a.b, Result is a fixed-point matrix.
*
* @param a First multiplicand
* @param b Second multiplicand
* @param res Dotproduct data. *Must* be a *different* matrix from a or b!
* @param shift Shift ratio
*/
void dl_matrixq_dot(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *res, int shift);
/**
* @brief Do a dotproduct of two quantized matrices: res=a.b, Result is a floating-point matrix.
*
* @param a First multiplicand
* @param b Second multiplicand
* @param res Dotproduct data. *Must* be a *different* matrix from a or b!
*/
void dl_matrixq_dot_matrix_out(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2d_t *res);
/**
* @brief Do a dotproduct of two quantized matrices : res=a.b. This always uses the simple & stupid C algo for the dot product.
*
* Result is a fixed-point matrix.
*
* Use this only if you expect something is wrong with the accelerated routines that dl_matrixq_dot calls; this function can be
* much slower than dl_matrixq_dot .
*
* @param a First multiplicand
* @param b Second multiplicand
* @param res Dotproduct data. *Must* be a *different* matrix from a or b!
* @param shift Shift ratio
*/
void dl_matrixq_dot_c_impl(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *res, int shift);
/**
* @brief Do a dotproduct of two quantized matrices : res=a.b. This always uses the simple & stupid C algo for the dot product.
*
* Result is a floating-point matrix.
*
* Use this only if you expect something is wrong with the accelerated routines that dl_matrixq_dot_matrix_out calls; this function can be
* much slower than dl_matrixq_dot_matrix_out.
*
* @param a First multiplicand
* @param b Second multiplicand
* @param res Dotproduct data. *Must* be a *different* matrix from a or b!
*/
void dl_matrixq_dot_matrix_out_c_impl(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2d_t *res);
/**
* @brief Do a dotproduct of a floating point and a quantized matrix. Result is a floating-point matrix.
*
* @param a First multiplicand; float matrix
* @param b Second multiplicand; quantized matrix
* @param res Dotproduct data; float matrix. *Must* be a *different* matrix from a or b!
*/
void dl_matrix_matrixq_dot(const dl_matrix2d_t *a, const dl_matrix2dq_t *b, dl_matrix2d_t *res);
/**
* @brief Print the contents of a quantized matrix to stdout. Used for debugging.
*
* @param a The matrix to print.
*/
void dl_printmatrixq(const dl_matrix2dq_t *a);
/**
* @brief Add a pair of quantizedmatrices item-by-item: res=a-b
*
* @param a First matrix
* @param b Second matrix
* @param res Added data. Can be equal to a or b to overwrite that.
* @param shift Shift value. Only 0 or 1 makes sense here. <ToDo: check>
*/
void dl_matrixq_add(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *res, int shift);
/**
* @brief Generate a new matrix using a range of items from an existing matrix.
* When using this, the data of the new matrix is not allocated/copied but it re-uses a pointer
* to the existing data. Changing the data in the resulting matrix, as a result, will also change
* the data in the existing matrix that has been sliced.
*
* @Warning In contrast to the floating point equivalent of this function, the fixed-point version
* of this has the issue that as soon as the output exponent of one of the slices changes, the data
* in the sliced matrix gets corrupted (because the exponent of that matrix is still the same.) If you
* use this function, either treat the slices as read-only, or assume the sliced matrix contains
* garbage after modifying the data in one of the slices.
*
* @param x X-offset of the origin of the returned matrix within the sliced matrix
* @param y Y-offset of the origin of the returned matrix within the sliced matrix
* @param w Width of the resulting matrix
* @param h Height of the resulting matrix
* @param in Old matrix (with foreign data) to re-use. Passing NULL will allocate a new matrix.
* @return The resulting slice matrix, or NULL if out of memory
*/
dl_matrix2dq_t *dl_matrixq_slice(const dl_matrix2dq_t *src, int x, int y, int w, int h, dl_matrix2dq_t *in);
/**
* @brief select a range of items from an existing matrix and flatten them into one dimension.
*
* @Warning The results are flattened in row-major order.
*
* @param x X-offset of the origin of the returned matrix within the sliced matrix
* @param y Y-offset of the origin of the returned matrix within the sliced matrix
* @param w Width of the resulting matrix
* @param h Height of the resulting matrix
* @param in Old matrix to re-use. Passing NULL will allocate a new matrix.
* @return The resulting flatten matrix, or NULL if out of memory
*/
dl_matrix2dq_t *dl_matrixq_flatten(const dl_matrix2dq_t *src, int x, int y, int w, int h, dl_matrix2dq_t *in);
/**
* @brief Subtract a quantized matrix from another, item-by-item: res=a-b
*
* @param a First matrix
* @param b Second matrix
* @param res Subtracted data. Can be equal to a or b to overwrite that.
* @param shift Shift value. Only 0 or 1 makes sense here. <ToDo: check>
*/
void dl_matrixq_sub(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *res, int shift);
/**
* @brief Multiply a pair of quantized matrices item-by-item: res=a*b
*
* @param a First multiplicand
* @param b Second multiplicand
* @param res Multiplicated data. Can be equal to a or b to overwrite that matrix.
*/
void dl_matrixq_mul( dl_matrix2dq_t *a, dl_matrix2dq_t *b, dl_matrix2dq_t *res);
/**
* @brief Divide a pair of quantized matrices item-by-item: res=a/b
*
* @param a First matrix
* @param b Second matrix
* @param res Divided data. Can be equal to a or b to overwrite that.
*/
void dl_matrixq_div(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *out, int shift);
/**
* @brief Check if two quantized matrices have the same shape, that is, the same amount of
* rows and columns
*
* @param a First of the two matrices to compare
* @param b Second of the two matrices to compare
* @return true if the two matrices are shaped the same, false otherwise.
*/
int dl_matrixq_same_shape(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b);
/**
* @brief Concatenate the rows of two quantized matrices into a new matrix
*
* @param a First matrix
* @param b Second matrix
* @return A newly allocated quantized matrix with as values a|b
*/
dl_matrix2dq_t *dl_matrixq_concat(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b);
/**
* @brief Add a constant to every item of the quantized matrix
*
* @param subj Matrix to add the constant to
* @param add The constant
*/
void dl_matrixq_add_const(dl_matrix2dq_t *subj, const fptp_t add, int shift);
/**
* @brief Check the sanity of a quantized matrix
*
* Due to the nature of quantized matrices, depending on the calculations a quantized
* matrix is the result of and the shift values chosen in those calculations, a quantized
* matrix may have an exponent and mantissas that lead to a loss of precision, either because
* most significant mantissa bits are unused, or because a fair amount of mantissas are
* clipped. This function checks if this is the case and will report a message to stdout
* if significant loss of precision is detected.
*
* @param m The quantized matrix to check
* @param name A string to be displayed in the message if the sanity check fails
* @return True if matrix is sane, false otherwise
**/
int dl_matrixq_check_sanity(dl_matrix2dq_t *m, const char *name);
/**
* @brief re-adjust the exponent of the matrix to fit the mantissa better
*
* This function will shift up all the data in the mantissas so there are no
* most-significant bits that are unused in all mantissas. It will also adjust
* the exponent to keep the actua values in the matrix the same.
*
* Some operations done on a matrix, especially operations that re-use the
* result of earlier operations done in the same way, can lead to the loss of
* data because the exponent of the quantized matrix is never re-adjusted. You
* can do that implicitely by calling this function.
*
* @param m The matrix to re-adjust
**/
void dl_matrixq_readjust_exp(dl_matrix2dq_t *m);
/**
* @brief Get the floating-point value of a specific item from the quantized matrix
*
* @param m Matrix to access
* @param x Column address
* @param y Row address
* @return Value in that position
*/
fptp_t dl_matrixq_get(const dl_matrix2dq_t *m, const int x, const int y);
/**
* @brief Set a specific item in the quantized matrix to the given
* floating-point value
*
* @warning If the given value is more than the exponent in the quantized matrix
* allows for, all mantissas in the matrix will be shifted down to make the value
* 'fit'. If, however, the exponent is such that the value would result in a
* quantized mantissa of 0, nothing is done.
*
* @param m Matrix to access
* @param x Column address
* @param y Row address
* @param val Value to write to that position
*/
void dl_matrixq_set(dl_matrix2dq_t *m, const int x, const int y, fptp_t val);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,80 @@
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef DL_LIB_MATRIXQ8_H
#define DL_LIB_MATRIXQ8_H
#include <stdint.h>
#include "dl_lib_matrix.h"
#include "dl_lib.h"
#include "dl_lib_matrixq.h"
#ifdef __cplusplus
extern "C" {
#endif
typedef int8_t q8tp_t;
typedef struct {
int w;
int h;
int stride; //Normally equals h, not w!
int flags;
int exponent; //The values in items should be multiplied by pow(2,exponent) to get the real values.
q8tp_t *itemq;
} dl_matrix2dq8_t;
#define DL_Q8TP_SHIFT 7
#define DL_Q8TP_RANGE ((1<<DL_Q8TP_SHIFT)-1)
#define DL_ITMQ8(m, x, y) m->itemq[(y)+(x)*m->stride]
/**
* @brief Allocate a matrix
*
* @param w Width of the matrix
* @param h Height of the matrix
* @return The matrix, or NULL if out of memory
*/
dl_matrix2dq8_t *dl_matrixq8_alloc(int w, int h);
/**
* @brief Free a quantized matrix
* Frees the matrix structure and (if it doesn't have the DL_MF_FOREIGNDATA flag set) the m->items space as well.
*
* @param m Matrix to free
*/
void dl_matrixq8_free(dl_matrix2dq8_t *m);
/**
* @brief Copy a quantized matrix
* Copy a quantized matrix from flash or iram/psram
*
* @param m Matrix to copy
*/
dl_matrix2dq8_t *dl_matrixq8_copy_to_psram(const dl_matrix2dq8_t *m);
/**
* @brief Convert a floating-point matrix to a quantized matrix
*
* @param m Floating-point matrix to convert
* @param out Quantized matrix to re-use. If NULL, allocate a new one.
* @Return The quantized version of the floating-point matrix
*/
dl_matrix2dq8_t *dl_matrixq8_from_matrix2d(const dl_matrix2d_t *m, dl_matrix2dq8_t *out);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,105 @@
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License
#ifndef _ESP_AEC_H_
#define _ESP_AEC_H_
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
#define USE_AEC_FFT // Not kiss_fft
#define AEC_SAMPLE_RATE 16000 // Only Support 16000Hz
#define AEC_FRAME_LENGTH_MS 32
typedef struct aec_handle_t aec_handle_t;
typedef enum {
AEC_MODE_SR_LOW_COST = 0, // Low Cost AEC fro speech recognition
AEC_MODE_SR_HIGH_PERF = 1, // High Perforamce AEC for speech recognition
AEC_MODE_VOIP_LOW_COST = 3, // Low Cost AEC for voice communication
AEC_MODE_VOIP_HIGH_PERF = 4, // High Perforamce AEC for voice communication
} aec_mode_t;
/**
* @brief Creates an instance to the AEC structure.
* Please get frame size by aec_get_chunksize() function
*
* @param sample_rate The Sampling frequency (Hz) must be 16000.
* @param filter_length Number of filter, recommend to set 4. The larger the filter_length, the more resource consumption.
* @param channel_num The input microphone channel number
* @param mode The mode of AEC, recommend to set AEC_MODE_SR_LOW_COST
* @return
* - NULL: Create failed
* - Others: The instance of AEC
*/
aec_handle_t *aec_create(int sample_rate, int filter_length, int channel_num, aec_mode_t mode);
/**
* @brief Creates an instance to the AEC structure, same with aec_create().
*
* @param filter_length Number of filter, recommend to set 4. The larger the filter_length, the more resource consumption.
* @param channel_num The input microphone channel number
* @param mode The mode of AEC, recommend to set AEC_MODE_SR_LOW_COST
* @return
* - NULL: Create failed
* - Others: The instance of AEC
*/
aec_handle_t *aec_pro_create(int filter_length, int channel_num, aec_mode_t mode);
/**
* @brief Performs echo cancellation a frame, based on the audio sent to the speaker and frame from mic.
*
* @warning The indata, refdata and outdata must be 16-bit signed. please allocate memory by heap_caps_aligned_alloc().
*
* @param inst The instance of AEC. Format for multi-channel data is "ch0 ch0 ch0 ..., ch1 ch1 ch1 ..."
* @param indata An array of 16-bit signed audio samples from mic.
* @param refdata An array of 16-bit signed audio samples sent to the speaker.
* @param outdata Returns near-end signal with echo removed. Format for multi-channel data is "ch0 ch0 ch0..., ch1 ch1 ch1 ..."
* @return None
*
*/
void aec_process(const aec_handle_t *handel, int16_t *indata, int16_t *refdata, int16_t *outdata);
/**
* @brief Get frame size of AEC (the samples of one frame)
* @param handle The instance of AEC.
* @return Frame size
*/
int aec_get_chunksize(const aec_handle_t *handle);
/**
* @brief Get AEC mode string
*
* @param aec_mode The mode of AEC.
*
* @return AEC mode string
*/
char * aec_get_mode_string(aec_mode_t aec_mode);
/**
* @brief Free the AEC instance
*
* @param inst The instance of AEC.
*
* @return None
*
*/
void aec_destroy(aec_handle_t *handel);
#ifdef __cplusplus
}
#endif
#endif //_ESP_AEC_H_

View File

@@ -0,0 +1,81 @@
#ifndef _ESP_AFE_AEC_H_
#define _ESP_AFE_AEC_H_
#include "esp_aec.h"
#include "esp_afe_config.h"
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
typedef struct {
aec_handle_t *handle;
aec_mode_t mode;
afe_pcm_config_t pcm_config;
int frame_size;
int16_t *data;
} afe_aec_handle_t;
/**
* @brief Creates an instance to the AEC structure.
*
* @warning Currently only support 1 microphone channel and 1 playback channe.
* If input has multiple microphone channels and playback channels, just the first microphone channel and playback
* channel will be selected.
*
* The input format, same as afe config:
* M to represent the microphone channel
* R to represent the playback reference channel
* N to represent an unknown or unused channel
*
* For example, input_format="MMNR" indicates that the input data consists of four channels,
* which are the microphone channel, the microphone channel, an unused channel, and the playback channel
*
* @param input_format The input format
* @param filter_length The length of filter. The larger the filter, the higher the CPU loading.
* Recommended filter_length = 4 for esp32s3 and esp32p4. Recommended filter_length = 2 for
* esp32c5.
* @param type The type of afe, AFE_TYPE_SR or AFE_TYPE_VC
* @param mode The mode of afe, AFE_MODE_LOW_COST or AFE_MODE_HIGH_PERF
*
* @return afe_config_t* The default config of afe
*/
afe_aec_handle_t *afe_aec_create(const char *input_format, int filter_length, afe_type_t type, afe_mode_t mode);
/**
* @brief Performs echo cancellation a frame, based on the audio sent to the speaker and frame from mic.
*
* @param inst The instance of AEC.
* @param indata Input audio data, format is define by input_format.
* @param outdata Near-end signal with echo removed. outdata must be 16-bit aligned.
* please use heap_caps_aligned_calloc(16, n, size, caps) to allocate an aligned chunk of memory
* @return The bytes of outdata.
*/
size_t afe_aec_process(afe_aec_handle_t *handel, const int16_t *indata, int16_t *outdata);
/**
* @brief Get frame size of AEC (the samples of one frame)
* @param handle The instance of AEC.
* @return Frame size
*/
int afe_aec_get_chunksize(afe_aec_handle_t *handle);
/**
* @brief Free the AEC instance
*
* @param inst The instance of AEC.
*
* @return None
*
*/
void afe_aec_destroy(afe_aec_handle_t *handel);
#ifdef __cplusplus
}
#endif
#endif //_ESP_AEC_H_

View File

@@ -0,0 +1,288 @@
#pragma once
#include "esp_aec.h"
#include "esp_agc.h"
#include "esp_nsn_models.h"
#include "esp_vad.h"
#include "esp_vadn_models.h"
#include "esp_wn_iface.h"
#include "esp_wn_models.h"
#include "model_path.h"
#include "stdbool.h"
#include "stdint.h"
#include "stdlib.h"
#ifdef __cplusplus
extern "C" {
#endif
// AFE: Audio Front-End
// SR: Speech Recognition
// VC: Voice Communication
// Set AFE_SR mode
typedef enum {
SR_MODE_LOW_COST = 0, // Deprecated, please use afe_mode_t, AFE mode: low cost mode
SR_MODE_HIGH_PERF = 1, // Deprecated, please use afe_mode_t, AFE mode: high performance mode
} afe_sr_mode_t;
// Set AFE mode
typedef enum {
AFE_MODE_LOW_COST = 0, // AFE mode: low cost mode
AFE_MODE_HIGH_PERF = 1, // AFE mode: high performance mode
} afe_mode_t;
// Set AFE type
typedef enum {
AFE_TYPE_SR = 0, // Speech recognition scenarios, excluding nonlinear noise suppression
AFE_TYPE_VC = 1, // Voice communication scenarios, 16KHz input, including nonlinear noise suppression
AFE_TYPE_VC_8K = 2, // Voice communication scenarios, 8KHz input, note that the input data must be 8KHz
} afe_type_t;
typedef enum {
AFE_MEMORY_ALLOC_MORE_INTERNAL = 1, // malloc with more internal ram
AFE_MEMORY_ALLOC_INTERNAL_PSRAM_BALANCE = 2, // malloc with internal ram and psram in balance
AFE_MEMORY_ALLOC_MORE_PSRAM = 3 // malloc with more psram
} afe_memory_alloc_mode_t;
typedef enum {
AFE_MN_PEAK_AGC_MODE_1 = -9, // The peak amplitude of fetch audio is -9dB
AFE_MN_PEAK_AGC_MODE_2 = -6, // The peak amplitude of fetch audio is -6dB
AFE_MN_PEAK_AGC_MODE_3 = -3, // The peak amplitude of fetcg is -3dB
AFE_MN_PEAK_NO_AGC = 0, // There is no agc gain
} afe_mn_peak_agc_mode_t;
typedef struct {
int total_ch_num; // total channel num, include microphone channel, playback channel and unknown channel
int mic_num; // microphone channel number
uint8_t *mic_ids; // microphone channel indices
int ref_num; // playback reference channel number
uint8_t *ref_ids; // playback reference channel indices
int sample_rate; // sample rate of audio
} afe_pcm_config_t;
typedef enum {
AFE_NS_MODE_WEBRTC = 0, // please use model name of NS, SSP: "WEBRTC"
AFE_NS_MODE_NET = 1, // please use model name of NSNET
} afe_ns_mode_t;
typedef enum {
AFE_AGC_MODE_WEBRTC = 0, // WEBRTC AGC
AFE_AGC_MODE_WAKENET = 1, // AGC gain is calculated by wakenet model if wakenet is activated
} afe_agc_mode_t;
/**
* @brief Function to get the debug audio data
*
* @param data The debug audio data which don't be modify. It should be copied away as soon as possible that
* avoid blocking for too long.
* @param data_size The number of bytes of data.
* @returns
*/
typedef void (*afe_debug_hook_callback_t)(const int16_t *data, int data_size);
typedef enum {
AFE_DEBUG_HOOK_MASE_TASK_IN = 0, // To get the input data of mase task
AFE_DEBUG_HOOK_FETCH_TASK_IN = 1, // To get the input data of fetch task
AFE_DEBUG_HOOK_MAX = 2
} afe_debug_hook_type_t;
typedef struct {
afe_debug_hook_type_t hook_type; // debug type of hook
afe_debug_hook_callback_t hook_callback; // callback function which transfer debug audio data
} afe_debug_hook_t;
typedef struct {
/********** AEC(Acoustic Echo Cancellation) **********/
bool aec_init; // Whether to init aec
aec_mode_t aec_mode; // The mode of aec, AEC_MODE_SR_LOW_COST or AEC_MODE_SR_HIGH_PERF
int aec_filter_length; // The filter length of aec
/********** SE(Speech Enhancement, microphone array processing) **********/
bool se_init; // Whether to init se
/********** NS(Noise Suppression) **********/
bool ns_init; // Whether to init ns
char *ns_model_name; // Model name of ns
afe_ns_mode_t afe_ns_mode; // Model mode of ns
/********** VAD(Voice Activity Detection) **********/
bool vad_init; // Whether to init vad
vad_mode_t vad_mode; // The value can be: VAD_MODE_0, VAD_MODE_1, VAD_MODE_2, VAD_MODE_3, VAD_MODE_4
char *vad_model_name; // The model name of vad, If it is null, WebRTC VAD will be used.
int vad_min_speech_ms; // The minimum duration of speech in ms. It should be bigger than 32 ms, default: 128 ms
int vad_min_noise_ms; // The minimum duration of noise or silence in ms. It should be bigger than 64 ms, default:
// 1000 ms
int vad_delay_ms; // The delay of the first speech frame in ms, default: 128 ms
// If you find vad cache can not cover all speech, please increase this value.
bool vad_mute_playback; // If true, the playback will be muted for vad detection. default: false
bool vad_enable_channel_trigger; // If true, the vad will be used to choose the channel id. default: false
/********** WakeNet(Wake Word Engine) **********/
bool wakenet_init;
char *wakenet_model_name; // The model name of wakenet 1
char *wakenet_model_name_2; // The model name of wakenet 2 if has wakenet 2
det_mode_t wakenet_mode; // The mode of wakenet
/********** AGC(Automatic Gain Control) **********/
bool agc_init; // Whether to init agc
afe_agc_mode_t
agc_mode; // The AGC mode for ASR. and the gain generated by AGC acts on the audio after far linear gain.
int agc_compression_gain_db; // Compression gain in dB (default 9)
int agc_target_level_dbfs; // Target level in -dBfs of envelope (default 3, means target level is -3 dBFS)
/********** General AFE(Audio Front End) parameter **********/
afe_pcm_config_t pcm_config; // Config the channel num of original data which is fed to the afe feed function.
afe_mode_t afe_mode; // The mode of afe AFE_MODE_LOW_COST or AFE_MODE_HIGH_PERF
afe_type_t afe_type; // The mode of afe AFE_MODE_LOW_COST or AFE_MODE_HIGH_PERF
int afe_perferred_core; // The preferred core of afe se task, which is created in afe_create function.
int afe_perferred_priority; // The preferred priority of afe se task, which is created in afe_create function.
int afe_ringbuf_size; // The ring buffer size: the number of frame data in ring buffer.
afe_memory_alloc_mode_t memory_alloc_mode; // The memory alloc mode for afe. From Internal RAM or PSRAM
float afe_linear_gain; // The linear gain for afe output the value should be in [0.1, 10.0]. This value acts
// directly on the output amplitude: out_linear_gain * amplitude.
bool debug_init;
bool fixed_first_channel; // If true, the channel after first wake-up is fixed to raw data of microphone
// otherwise, select channel number by wakenet
} afe_config_t;
/**
* @brief Get AFE default configuration. The default configuration will enable all algorithms as much as possible based
* on the chip target and input format. You can manually fine-tune it after creating the configuration
*
* The input format:
* M to represent the microphone channel
* R to represent the playback reference channel
* N to represent an unknown or unused channel
*
* For example, input_format="MMNR" indicates that the input data consists of four channels,
* which are the microphone channel, the microphone channel, an unused channel, and the playback channel
*
* @param input_format The input format
* @param models Models from partition, which is configured by Kconfig
* @param type The type of afe, AFE_TYPE_SR or AFE_TYPE_VC
* @param mode The mode of afe, AFE_MODE_LOW_COST or AFE_MODE_HIGH_PERF
*
* @return afe_config_t* The default config of afe
*/
afe_config_t *afe_config_init(const char *input_format, srmodel_list_t *models, afe_type_t type, afe_mode_t mode);
/**
* @brief Check AFE configuration and make sure it is correct.
*
* @warning If there is a configuration conflict, this function will modify some parameters.
* The guiding behind these modifications is to maintain the highest performance of the output audio and results.
* And remove the conflict between different algorithms.
*
* For example, If input is two-channel data, the SE(BSS) algorithm will be prioritized over the NS algorithm.
* If SE(BSS) algorithm is deactivated, will only use the first microphone channel.
*
* @param afe_config Input AFE config
*
* @return afe_config_t* The modified AFE config
*/
afe_config_t *afe_config_check(afe_config_t *afe_config);
/**
* @brief Parse input format
*
* @param input_format The input format, same with afe_config_init() function
* @param pcm_config The pcm config
*
* @return true if the input format is parsed successfully, otherwise false
*/
bool afe_parse_input_format(const char *input_format, afe_pcm_config_t *pcm_config);
/**
* @brief Parse I2S input data
*
* @param data The input multi channel data
* @param frame_size The frame size of input, it is also the size of single channel data
* @param mic_data The output microphone data
* @param ref_data The output playback reference data
* @param pcm_config The pcm config
*
*/
void afe_parse_input(int16_t *data, int frame_size, int16_t *mic_data, int16_t *ref_data, afe_pcm_config_t *pcm_config);
/**
* @brief Parse input data, from interleaved arrangement to contiguous arrangement
*
* @param data The input multi channel data
* @param frame_size The frame size of input, it is also the size of single channel data
* @param channel_num The channel number of data
* @param out_data The output data
*
*/
void afe_parse_data(int16_t *data, int frame_size, int channel_num, int16_t *out_data);
/**
* @brief Format input data, from contiguous arrangement to interleaved arrangement
*
* @param data The input multi channel data
* @param frame_size The frame size of input, it is also the size of single channel data
* @param channel_num The channel number of data
* @param out_data The output data
*
*/
void afe_format_data(int16_t *data, int frame_size, int channel_num, int16_t *out_data);
/**
* @brief Adjust the gain of input data
*
* @warning the input data will be modified inplace.
*
* @param data The input audio data
* @param frame_size The frame size of input, it is also the size of single channel data
* @param factor The gain factor
*
* @return int16_t* The output audio data
*/
int16_t *afe_adjust_gain(int16_t *data, int frame_size, float factor);
/**
* @brief Adjust the gain of input data
*
* @warning the input data will be modified inplace.
*
* @param in_data The input audio data
* @param in_frame_size Input data frame size of input
* @param channel_num The channel number of input data, which is same as output data
* @param out_data The output audio data
* @param out_frame_size Onput data frame size of input
*
*/
void afe_concat_data(int16_t *in_data, int in_frame_size, int channel_num, int16_t *out_data, int out_frame_size);
/**
* @brief Copy the afe config
*
* @param dst_config The destination afe config
* @param src_config The source afe config
*
* @return The destination afe config
*/
afe_config_t *afe_config_copy(afe_config_t *dst_config, const afe_config_t *src_config);
/**
* @brief Print the afe config
*
* @param afe_config The afe config
*/
void afe_config_print(const afe_config_t *afe_config);
/**
* @brief Allocate afe config
*
* @return The afe config pointer
*/
afe_config_t *afe_config_alloc();
/**
* @brief Free afe config
*
* @param afe_config The afe config pointer
*/
void afe_config_free(afe_config_t *afe_config);
#ifdef __cplusplus
}
#endif

View File

@@ -0,0 +1,48 @@
#ifndef _ESP_AFE_DOA_H_
#define _ESP_AFE_DOA_H_
#include "esp_doa.h"
#include "esp_afe_config.h"
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
typedef struct {
doa_handle_t *doa_handle;
afe_pcm_config_t pcm_config;
int16_t *leftdata;
int16_t *rightdata;
int frame_size;
} afe_doa_handle_t;
/**
* @brief Initialize SRP-PHAT processor
* @param input_format The input format
* @param fs Sampling rate (Hz), e.g., 16000
* @param resolution Angular search resolution (degrees), e.g., 20
* @param d_mics Microphone spacing (meters), e.g., 0.06
* @param input_timedate_samples input timedate samples, e.g., 1024
* @return Initialized doa_handle_t object pointer, Recommend using the above configuration for better performance
*/
afe_doa_handle_t *afe_doa_create(const char *input_format, int fs, float resolution, float d_mics, int input_timedate_samples);
/**
* @brief Process audio frame for direction estimation
* @param handle doa_handle_t instance pointer
* @param indata Input audio data, format is define by input_format.
* @return Estimated sound direction in degrees, e.g., 0-180
*/
float afe_doa_process(afe_doa_handle_t *handle, const int16_t *indata);
/**
* @brief Release all allocated resources
* @param doa doa_handle_t instance pointer to be freed
*/
void afe_doa_destroy(afe_doa_handle_t *handle);
#ifdef __cplusplus
}
#endif
#endif /* _ESP_AFE_DOA_H_ */

View File

@@ -0,0 +1,237 @@
#pragma once
#include "esp_afe_config.h"
#include "stdbool.h"
#include "stdint.h"
#include "stdlib.h"
#include "freertos/FreeRTOS.h"
#include "freertos/task.h"
#ifdef __cplusplus
extern "C" {
#endif
// AFE: Audio Front-End
// SR: Speech Recognition
// afe_sr/AFE_SR: the audio front-end for speech recognition
// Opaque AFE_SR data container
typedef struct esp_afe_sr_data_t esp_afe_sr_data_t;
/**
* @brief The state of vad
*/
typedef enum {
AFE_VAD_SILENCE = 0, // Deprecated, please use vad_state_t, noise or silence
AFE_VAD_SPEECH = 1 // Deprecated, please use vad_state_t, speech
} afe_vad_state_t;
/**
* @brief The result of fetch function
*/
typedef struct afe_fetch_result_t {
int16_t *data; // the target channel data of audio.
int data_size; // the size of data. The unit is byte.
int16_t *vad_cache; // the cache data of vad. It's only valid when vad_cache_size > 0. It is used to complete the
// audio that was truncated.
int vad_cache_size; // the size of vad_cache. The unit is byte.
float data_volume; // the volume of input audio, the unit is decibel(dB). This value is calculated before agc.
// (note: invalid in vc). if enable wakenet, the window length is the receptive fields of
// wakenet(about 1.5s), otherwise is the frame length.
wakenet_state_t wakeup_state; // the value is wakenet_state_t
int wake_word_index; // if the wake word is detected. It will store the wake word index which start from 1.
int wakenet_model_index; // if there are multiple wakenets, this value identifies which model be wakes up. Index
// start from 1.
vad_state_t vad_state; // the value is afe_vad_state_t
int trigger_channel_id; // the channel index of output
int wake_word_length; // the length of wake word. The unit is the number of samples.
int ret_value; // the return state of fetch function
int16_t *raw_data; // the multi-channel output data of audio.
int raw_data_channels; // the channel number of raw data
float ringbuff_free_pct; // the percent of ringbuff free size. if the value is larger than 0.5, it means the ringbuff is buzy.
void *reserved; // reserved for future use
} afe_fetch_result_t;
/**
* @brief Function to initialze a AFE_SR instance
*
* @param afe_config The config of AFE_SR
* @returns Handle to the AFE_SR data
*/
typedef esp_afe_sr_data_t *(*esp_afe_sr_iface_op_create_from_config_t)(afe_config_t *afe_config);
/**
* @brief Get the amount of each channel samples per frame that need to be passed to the function
*
* Every speech enhancement AFE_SR processes a certain number of samples at the same time. This function
* can be used to query that amount. Note that the returned amount is in 16-bit samples, not in bytes.
*
* @param afe The AFE_SR object to query
* @return The amount of samples to feed the fetch function
*/
typedef int (*esp_afe_sr_iface_op_get_samp_chunksize_t)(esp_afe_sr_data_t *afe);
/**
* @brief Get the channel number
*
* @param afe The AFE_SR object to query
* @return The amount of total channels
*/
typedef int (*esp_afe_sr_iface_op_get_channel_num_t)(esp_afe_sr_data_t *afe);
/**
* @brief Get the sample rate of the samples to feed to the function
*
* @param afe The AFE_SR object to query
* @return The sample rate, in hz
*/
typedef int (*esp_afe_sr_iface_op_get_samp_rate_t)(esp_afe_sr_data_t *afe);
/**
* @brief Feed samples of an audio stream to the AFE_SR
*
* @Warning The input data should be arranged in the format of channel interleaving.
* The last channel is reference signal if it has reference data.
*
* @param afe The AFE_SR object to query
*
* @param in The input microphone signal, only support signed 16-bit @ 16 KHZ. The frame size can be queried by the
* `get_feed_chunksize`.
* @return The size of input
*/
typedef int (*esp_afe_sr_iface_op_feed_t)(esp_afe_sr_data_t *afe, const int16_t *in);
/**
* @brief fetch enhanced samples of an audio stream from the AFE_SR
*
* @Warning The output is single channel data, no matter how many channels the input is.
* Timeout is 2000 ms. If you want to adjust timeout, please refer to the definition of `fetch_with_delay`.
*
* @param afe The AFE_SR object to query
* @return The result of output, please refer to the definition of `afe_fetch_result_t`. (The frame size of output
* audio can be queried by the `get_fetch_chunksize`.)
*/
typedef afe_fetch_result_t *(*esp_afe_sr_iface_op_fetch_t)(esp_afe_sr_data_t *afe);
/**
* @brief fetch enhanced samples of an audio stream from the AFE_SR, same with the function `fetch`
*
* @Warning The output is single channel data, no matter how many channels the input is.
*
* @param afe The AFE_SR object to query
* @param ticks_to_wait The timeout value, in ticks, to wait for the fetch result.
* @return The result of output, please refer to the definition of `afe_fetch_result_t`. (The frame size of output
* audio can be queried by the `get_fetch_chunksize`.)
*/
typedef afe_fetch_result_t *(*esp_afe_sr_iface_op_fetch_with_delay_t)(esp_afe_sr_data_t *afe, TickType_t ticks_to_wait);
/**
* @brief reset ringbuf of AFE.
*
* @param afe The AFE_SR object to query
* @return -1: fail, 1: success
*/
typedef int (*esp_afe_sr_iface_op_reset_buffer_t)(esp_afe_sr_data_t *afe);
/**
* @brief Set wakenet detection threshold
*
* @param afe The AFE_SR object to query
* @param index The wakenet index, just support 1: wakenet1 or 2: wakenet2
* @param threshold The wakenet detection threshold, the value is between 0.4 and 0.9999.
* @return -1: fail, 1: success
*/
typedef int (*esp_afe_sr_iface_op_set_wakenet_threshold_t)(esp_afe_sr_data_t *afe, int index, float threshold);
/**
* @brief Reset wakenet detection threshold to inital state
*
* @param afe The AFE_SR object to query
* @param index The wakenet index, just support 1: wakenet1 or 2: wakenet2
* @return -1: fail, 1: success
*/
typedef int (*esp_afe_sr_iface_op_reset_wakenet_threshold_t)(esp_afe_sr_data_t *afe, int index);
/**
* @brief Reset one function/module/algorithm.
*
* @param afe The AFE_SR object to query
* @return -1: fail, 1: success
*/
typedef int (*esp_afe_sr_iface_op_reset_op_t)(esp_afe_sr_data_t *afe);
/**
* @brief Disable one function/module/algorithm.
*
* @param afe The AFE_SR object to query
* @return -1: fail, 0: disabled, 1: enabled
*/
typedef int (*esp_afe_sr_iface_op_disable_func_t)(esp_afe_sr_data_t *afe);
/**
* @brief Enable one function/module/algorithm.
*
* @param afe The AFE_SR object to query
* @return -1: fail, 0: disabled, 1: enabled
*/
typedef int (*esp_afe_sr_iface_op_enable_func_t)(esp_afe_sr_data_t *afe);
/**
* @brief Print all functions/modules/algorithms pipeline.
* The pipeline is the order of the functions/modules/algorithms.
* The format like this: [input] -> |AEC(VOIP_HIGH_PERF)| -> |WakeNet(wn9_hilexin)| -> [output]
*
* @param afe The AFE_SR object to query
*/
typedef void (*esp_afe_sr_iface_op_print_pipeline_t)(esp_afe_sr_data_t *afe);
/**
* @brief Destroy a AFE_SR instance
*
* @param afe AFE_SR object to destroy
*/
typedef void (*esp_afe_sr_iface_op_destroy_t)(esp_afe_sr_data_t *afe);
/**
* This structure contains the functions used to do operations on a AFE_SR.
*/
typedef struct {
esp_afe_sr_iface_op_create_from_config_t create_from_config;
esp_afe_sr_iface_op_feed_t feed;
esp_afe_sr_iface_op_fetch_t fetch;
esp_afe_sr_iface_op_fetch_with_delay_t fetch_with_delay;
esp_afe_sr_iface_op_reset_buffer_t reset_buffer;
esp_afe_sr_iface_op_get_samp_chunksize_t get_feed_chunksize;
esp_afe_sr_iface_op_get_samp_chunksize_t get_fetch_chunksize;
esp_afe_sr_iface_op_get_channel_num_t get_channel_num; // same with get_feed_channel_num
esp_afe_sr_iface_op_get_channel_num_t get_feed_channel_num;
esp_afe_sr_iface_op_get_channel_num_t get_fetch_channel_num;
esp_afe_sr_iface_op_get_samp_rate_t get_samp_rate;
esp_afe_sr_iface_op_set_wakenet_threshold_t set_wakenet_threshold;
esp_afe_sr_iface_op_reset_wakenet_threshold_t reset_wakenet_threshold;
esp_afe_sr_iface_op_disable_func_t disable_wakenet;
esp_afe_sr_iface_op_enable_func_t enable_wakenet;
esp_afe_sr_iface_op_disable_func_t disable_aec;
esp_afe_sr_iface_op_enable_func_t enable_aec;
esp_afe_sr_iface_op_disable_func_t disable_se;
esp_afe_sr_iface_op_enable_func_t enable_se;
esp_afe_sr_iface_op_disable_func_t disable_vad;
esp_afe_sr_iface_op_enable_func_t enable_vad;
esp_afe_sr_iface_op_reset_op_t reset_vad;
esp_afe_sr_iface_op_disable_func_t disable_ns;
esp_afe_sr_iface_op_enable_func_t enable_ns;
esp_afe_sr_iface_op_disable_func_t disable_agc;
esp_afe_sr_iface_op_enable_func_t enable_agc;
esp_afe_sr_iface_op_print_pipeline_t print_pipeline;
esp_afe_sr_iface_op_destroy_t destroy;
} esp_afe_sr_iface_t;
// struct is used to store the AFE handle and data for the AFE task
typedef struct {
esp_afe_sr_data_t *afe_data;
esp_afe_sr_iface_t *afe_handle;
TaskHandle_t feed_task;
TaskHandle_t fetch_task;
} afe_task_into_t;
#ifdef __cplusplus
}
#endif

View File

@@ -0,0 +1,13 @@
#pragma once
#ifdef __cplusplus
extern "C" {
#endif
#include "esp_afe_sr_iface.h"
esp_afe_sr_iface_t *esp_afe_handle_from_config(const afe_config_t *config);
#ifdef __cplusplus
}
#endif

View File

@@ -0,0 +1,47 @@
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License
#ifndef _ESP_AGC_H_
#define _ESP_AGC_H_
#ifdef __cplusplus
extern "C" {
#endif
////all positive value is valid, negective is error
typedef enum {
ESP_AGC_SUCCESS = 0, ////success
ESP_AGC_FAIL = -1, ////agc fail
ESP_AGC_SAMPLE_RATE_ERROR = -2, ///sample rate can be only 8khz, 16khz, 32khz
ESP_AGC_FRAME_SIZE_ERROR = -3, ////the input frame size should be only 10ms, so should together with sample-rate to get the frame size
} ESP_AGE_ERR;
typedef enum {
AGC_MODE_SR = -1, // Bypass WEBRTC AGC
AGC_MODE_0 = 0, // Only saturation protection
AGC_MODE_1 = 1, // Analog Automatic Gain Control [-targetLevelDbfs (default -3 dBOv)]
AGC_MODE_2 = 2, // Digital Automatic Gain Control [-targetLevelDbfs (default -3 dBOv)]
AGC_MODE_3 = 3, // Fixed Digital Gain [compressionGaindB (default 8 dB)]
} agc_mode_t;
void *esp_agc_open(agc_mode_t agc_mode, int sample_rate);
void set_agc_config(void *agc_handle, int gain_dB, int limiter_enable, int target_level_dbfs);
int esp_agc_process(void *agc_handle, short *in_pcm, short *out_pcm, int frame_size, int sample_rate);
void esp_agc_close(void *agc_handle);
#ifdef __cplusplus
}
#endif
#endif // _ESP_AGC_H_

View File

@@ -0,0 +1,41 @@
#ifndef _ESP_DOA_H_
#define _ESP_DOA_H_
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
typedef struct doa_handle_t doa_handle_t;
/**
* @brief Initialize SRP-PHAT processor
* @param fs Sampling rate (Hz), e.g., 16000
* @param resolution Angular search resolution (degrees), e.g., 20
* @param d_mics Microphone spacing (meters), e.g., 0.06
* @param input_timedate_samples input timedate samples, e.g., 1024
* @return Initialized doa_handle_t object pointer, Recommend using the above configuration for better performance
*/
doa_handle_t *esp_doa_create(int fs, float resolution, float d_mics, int input_timedate_samples);
/**
* @brief Release all allocated resources
* @param doa doa_handle_t instance pointer to be freed
*/
void esp_doa_destroy(doa_handle_t *doa);
/**
* @brief Process audio frame for direction estimation
* @param doa doa_handle_t instance pointer
* @param left Left channel 16-bit PCM data
* @param right Right channel 16-bit PCM data
* @return Estimated sound direction in degrees, e.g., 0-180
*/
float esp_doa_process(doa_handle_t *doa, int16_t* left, int16_t* right);
#ifdef __cplusplus
}
#endif
#endif /* _ESP_DOA_H_ */

View File

@@ -0,0 +1,93 @@
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License
#ifndef _ESP_MASE_H_
#define _ESP_MASE_H_
#ifdef __cplusplus
extern "C" {
#endif
#define MASE_SAMPLE_RATE 16000 // Supports 16kHz only
#define MASE_FRAME_SIZE 16 // Supports 16ms only
#define MASE_MIC_DISTANCE 65 // According to physical design of mic-array
/**
* @brief Sets mic-array type, currently 2-mic line array and 3-mic circular array
* are supported.
*/
typedef enum {
TWO_MIC_LINE = 0,
THREE_MIC_CIRCLE = 1
} mase_mic_array_type_t;
/**
* @brief Sets operating mode, supporting normal mode and wake-up enhancement mode
*/
typedef enum {
NORMAL_ENHANCEMENT_MODE = 0,
WAKE_UP_ENHANCEMENT_MODE = 1
} mase_op_mode_t;
typedef void* mase_handle_t;
/**
* @brief Creates an instance to the MASE structure.
*
* @param sample_rate The sampling frequency (Hz) must be 16000.
*
* @param frame_size The length of the audio processing must be 16ms.
*
* @param array_type '0' for 2-mic line array and '1' for 3-mic circular array.
*
* @param mic_distance The distance between neiboring microphones in mm.
*
* @param operating_mode '0' for normal mode and '1' for wake-up enhanced mode.
*
* @param filter_strength Strengh of the mic-array speech enhancement, must be 0, 1, 2 or 3.
*
* @return
* - NULL: Create failed
* - Others: An instance of MASE
*/
mase_handle_t mase_create(int fs, int frame_size, int array_type, float mic_distance, int operating_mode, int filter_strength);
/**
* @brief Performs mic array processing for one frame.
*
* @param inst The instance of MASE.
*
* @param in An array of 16-bit signed audio samples from mic.
*
* @param dsp_out Returns enhanced signal.
*
* @return None
*
*/
void mase_process(mase_handle_t st, int16_t *in, int16_t *dsp_out);
/**
* @brief Free the MASE instance
*
* @param inst The instance of MASE.
*
* @return None
*
*/
void mase_destory(mase_handle_t st);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,86 @@
#pragma once
#include "esp_speech_features.h"
#include <stdint.h>
/*
This describes an interface for a MFCC runner, that is, some kind of implementation that can be
fed sample chunks and returns the MFCC cepstrum of those samples. This is an abstracted interface so
multiple implementations can be used.
*/
typedef struct esp_mfcc_data_t esp_mfcc_data_t;
// Options for the mfcc algorithm itself. These more-or-less match the parameters of csf_mfcc (from c_speech_features),
// please refer to its documentation for details.
typedef struct {
int winstep_ms; // The step between successive windows in ms. (10)
int winlen_ms; // The length of the analysis window in ms. (25)
int nch; // The number of input channel
int numcep; // The number of cepstrum to return
int nfilter; // The number of filters in the filterbank
int nfft; // The FFT size
int samp_freq; // The sample-rate of the signal.
int low_freq; // The lowest band edge of mel filters, in hz. (e.g. 0)
int high_freq; // The highest band edge of mel filters, in hz. Must not be higher than samp_freq
float preemph; // Preemphasis filter coefficient. 0 is no filter. (e.g. 0.97)
char *win_type; // Analysis window type to apply to each frame "hanning","hamming","sine","rectangular","povey"
bool append_energy; //  If true, the zeroth cepstral coefficient is replaced with the log of the total frame energy
bool use_power; // If true, use power of fft spectrum, else use magnitude of fft spectrum
int use_log_fbank; // 0: return fbank, 1: return log(x+log_epsilon), 2: return log(max(x, log_epsilon))
float log_epsilon; // log epsilon. (e.g. 1e-7)
bool psram_first; // Alloc memory from PSRAM first
bool remove_dc_offset; // Whether to subtract mean of wave before FFT
} esp_mfcc_opts_t;
/**
* @brief Un-initialize and free a mfcc runner
*
* Function to free a previously allocated mfcc runner.
*
* @param r Runner object to destroy
*/
typedef void (*esp_mfcc_op_destroy_t)(esp_mfcc_data_t *r);
/**
* @brief Initialize parameters for a mfcc runner.
*
* After creation, a mfcc runner needs to be initialized first; this is usually done
* in the initialization routine of a speech recognition algorithm. This provides
* a pointer to do this for a specific mfcc runner.
*
* @param opt Options for the mfcc process
* @return True if success, false on error.
*/
typedef esp_mfcc_data_t *(*esp_mfcc_op_create_t)(const esp_mfcc_opts_t *opt);
/**
* @brief Run a mfcc iteration on frame by frame
*
* This will take a set of samples and return a ceptrum. Note that this may be pipelined:
* an initial call to this function may return NULL and subsequent calls may return the
* cepstrum of previous calls.
*
* @param r The mfcc runner
* @param samp An array of signed 16-bit samples. The amount of samples should be sampfreq/(winstep_ms/1000).
* @return A set of cepstral values, or NULL if no such values are available yet. Free using the free_cepbuf function
* when done with this buffer. Note that some implementations require the buffer to be freed before another call
* to this function is done.
*/
typedef float *(*esp_mfcc_op_run_step_t)(esp_mfcc_data_t *r, int16_t *samp, int16_t nch);
/**
* @brief Clean all state of mfcc handle
*
* @param r The mfcc runner
*/
typedef void (*esp_mfcc_op_clean_t)(esp_mfcc_data_t *r);
/**
* @brief Operations possible on a mfcc runner
*/
typedef struct {
esp_mfcc_op_destroy_t destroy;
esp_mfcc_op_create_t create;
esp_mfcc_op_run_step_t run_step;
esp_mfcc_op_clean_t clean;
} esp_mfcc_iface_t;

View File

@@ -0,0 +1,89 @@
#pragma once
#include "esp_speech_features.h"
#include <stdint.h>
/*
This describes an interface for a MFCC runner, that is, some kind of implementation that can be
fed sample chunks and returns the MFCC cepstrum of those samples. This is an abstracted interface so
multiple implementations can be used.
*/
typedef struct esp_mfcc_data_t esp_mfcc_data_t;
// Options for the mfcc algorithm itself. These more-or-less match the parameters of csf_mfcc (from c_speech_features),
// please refer to its documentation for details.
typedef struct {
int winstep_ms; // The step between successive windows in ms. (10)
int winlen_ms; // The length of the analysis window in ms. (25)
int nch; // The number of input channel
int numcep; // The number of cepstrum to return
int nfilter; // The number of filters in the filterbank
int nfft; // The FFT size
int samp_freq; // The sample-rate of the signal.
int low_freq; // The lowest band edge of mel filters, in hz. (e.g. 0)
int high_freq; // The highest band edge of mel filters, in hz. Must not be higher than samp_freq
float preemph; // Preemphasis filter coefficient. 0 is no filter. (e.g. 0.97)
char *win_type; // Analysis window type to apply to each frame "hanning","hamming","sine","rectangular","povey"
bool append_energy; //  If true, the zeroth cepstral coefficient is replaced with the log of the total frame energy
bool use_power; // If true, use power of fft spectrum, else use magnitude of fft spectrum
int use_log_fbank; // 0: return fbank, 1: return log(x+log_epsilon), 2: return log(max(x, log_epsilon))
float log_epsilon; // log epsilon. (e.g. 1e-7)
bool psram_first; // Alloc memory from PSRAM first
bool remove_dc_offset; // Whether to subtract mean of wave before FFT
} esp_mfcc_opts_t;
/**
* @brief Un-initialize and free a mfcc runner
*
* Function to free a previously allocated mfcc runner.
*
* @param r Runner object to destroy
*/
typedef void (*esp_mfcc_op_destroy_t)(esp_mfcc_data_t *r);
/**
* @brief Initialize parameters for a mfcc runner.
*
* After creation, a mfcc runner needs to be initialized first; this is usually done
* in the initialization routine of a speech recognition algorithm. This provides
* a pointer to do this for a specific mfcc runner.
*
* @param opt Options for the mfcc process
* @return True if success, false on error.
*/
typedef esp_mfcc_data_t *(*esp_mfcc_op_create_t)(const esp_mfcc_opts_t *opt);
/**
* @brief Run a mfcc iteration on frame by frame
*
* This will take a set of samples and return a ceptrum. Note that this may be pipelined:
* an initial call to this function may return NULL and subsequent calls may return the
* cepstrum of previous calls.
*
* @param r The mfcc runner
* @param samp An array of signed 16-bit samples. The amount of samples should be sampfreq/(winstep_ms/1000).
* @return A set of cepstral values, or NULL if no such values are available yet. Free using the free_cepbuf function
* when done with this buffer. Note that some implementations require the buffer to be freed before another call
* to this function is done.
*/
typedef float *(*esp_mfcc_op_run_step_t)(esp_mfcc_data_t *r, int16_t *samp, int16_t nch);
typedef void (*esp_mfcc_op_run_step_s16_t)(esp_mfcc_data_t *r, int16_t *samp, int16_t *fbank);
/**
* @brief Clean all state of mfcc handle
*
* @param r The mfcc runner
*/
typedef void (*esp_mfcc_op_clean_t)(esp_mfcc_data_t *r);
/**
* @brief Operations possible on a mfcc runner
*/
typedef struct {
esp_mfcc_op_destroy_t destroy;
esp_mfcc_op_create_t create;
esp_mfcc_op_run_step_t run_step;
esp_mfcc_op_run_step_s16_t run_step_s16;
esp_mfcc_op_clean_t clean;
} esp_mfcc_iface_t;

View File

@@ -0,0 +1,44 @@
#pragma once
#include "esp_mfcc_iface.h"
extern const esp_mfcc_iface_t esp_fbank_f32; // float32-fbank handle
extern const esp_mfcc_iface_t esp_fbank_s16; // int16-fbank handle
/**
* @brief Return basic opts used in wakenet9 & multinet5
**/
esp_mfcc_opts_t *get_mfcc_opts_wn9();
/**
* @brief Return basic opts used in wakenet9s
**/
esp_mfcc_opts_t *get_mfcc_opts(const char *win_type, bool use_power, int winstep_ms, int winlen_ms, int nfilter);
/**
* @brief Return basic opts for default kaldifeat
*
opts->psram_first = true;
opts->use_power = true;
opts->use_log_fbank = 2; // log(max(x, log_epsilon))
opts->log_epsilon = 1.1920928955078125e-07f; // torch.finfo(torch.float32).eps
opts->win_type = "povey";
opts->low_freq = 20;
opts->high_freq = 7600;
opts->samp_freq = 16000;
opts->nch = 1;
opts->nfft = 512;
opts->nfilter = 80;
opts->numcep = 80;
opts->preemph = 0.97;
opts->append_energy = false;
opts->winlen_ms = 25;
opts->winstep_ms = 10;
opts->remove_dc_offset = true;
*
**/
esp_mfcc_opts_t *get_mfcc_opts_kaldi();
/**
* @brief Print mfcc opts
**/
void print_mfcc_opts(esp_mfcc_opts_t *opts);

View File

@@ -0,0 +1,224 @@
#pragma once
#include "stdint.h"
#include "esp_wn_iface.h"
#ifdef __cplusplus
extern "C" {
#endif
#define ESP_MN_RESULT_MAX_NUM 5
#define ESP_MN_MAX_PHRASE_NUM 400
#define ESP_MN_MAX_PHRASE_LEN 63
#define ESP_MN_MIN_PHRASE_LEN 2
#define ESP_MN_PREFIX "mn"
#define ESP_MN_ENGLISH "en"
#define ESP_MN_CHINESE "cn"
typedef enum {
ESP_MN_STATE_DETECTING = 0, // detecting
ESP_MN_STATE_DETECTED = 1, // detected
ESP_MN_STATE_TIMEOUT = 2, // time out
} esp_mn_state_t;
//Set multinet loading mode
//The memory comsumption is decreased with increasing mode,
//As a consequence also the CPU loading rate goes up
typedef enum {
ESP_MN_LOAD_FROM_PSRAM = 0, // Load all weights from PSRAM. Fastest computation with Maximum memory consumption
ESP_MN_LOAD_FROM_PSRAM_FLASH = 1, // Load some weights from PSRAM and laod the rest from FLASH (default)
ESP_MN_LOAD_FROM_FLASH = 2, // Load more weights from FLASH. Minimum memory consumption with slowest computation
} esp_mn_loader_mode_t;
typedef enum {
ESP_MN_GREEDY_SEARCH = 0, // greedy search
ESP_MN_BEAM_SEARCH = 1, // beam search
ESP_MN_BEAM_SEARCH_WITH_FST = 2, // beam search with trie language model
} esp_mn_search_method_t;
typedef enum {
CHINESE_ID = 1, // Chinese language
ENGLISH_ID = 2, // English language
} language_id_t;
// Return all possible recognition results
typedef struct{
esp_mn_state_t state;
int num; // The number of phrase in list, num<=5. When num=0, no phrase is recognized.
int command_id[ESP_MN_RESULT_MAX_NUM]; // The list of command id.
int phrase_id[ESP_MN_RESULT_MAX_NUM]; // The list of phrase id.
float prob[ESP_MN_RESULT_MAX_NUM]; // The list of probability.
char string[256]; // recognized string with commands graph
char raw_string[256]; // recognized string without commands graph
} esp_mn_results_t;
typedef struct {
char *string; // command string
char *phonemes; // command phonemes, if applicable
int16_t command_id; // the command id
float threshold; // trigger threshold, default: 0
int16_t *wave; // prompt wave data of the phrase
} esp_mn_phrase_t;
typedef struct _mn_node_ {
esp_mn_phrase_t *phrase;
struct _mn_node_ *next;
} esp_mn_node_t;
typedef struct{
int16_t num; // The number of error phrases, which can not added into model
esp_mn_phrase_t **phrases; // The array of error phrase pointer
} esp_mn_error_t;
/**
* @brief Initialze a model instance with specified model name.
*
* @param model_name The wakenet model name.
* @param duration The duration (ms) to trigger the timeout
*
* @returns Handle to the model data.
*/
typedef model_iface_data_t* (*esp_mn_iface_op_create_t)(const char *model_name, int duration);
/**
* @brief Switch multinet mode to change memory consumption and CPU loading
*
* @warning Just Support multinet6 or later versions
*
* @param model The model object to query
* @param mode The multinet loader mode
*
* @returns Handle to the model data.
*/
typedef model_iface_data_t* (*esp_mn_iface_op_switch_loader_mode_t)(model_iface_data_t *model, esp_mn_loader_mode_t mode);
/**
* @brief Callback function type to fetch the amount of samples that need to be passed to the detect function
*
* Every speech recognition model processes a certain number of samples at the same time. This function
* can be used to query that amount. Note that the returned amount is in 16-bit samples, not in bytes.
*
* @param model The model object to query
* @return The amount of samples to feed the detect function
*/
typedef int (*esp_mn_iface_op_get_samp_chunksize_t)(model_iface_data_t *model);
/**
* @brief Callback function type to fetch the number of frames recognized by the command word
*
* @param model The model object to query
* @return The number of the frames recognized by the command word
*/
typedef int (*esp_mn_iface_op_get_samp_chunknum_t)(model_iface_data_t *model);
/**
* @brief Set the detection threshold to manually abjust the probability
*
* @param model The model object to query
* @param det_treshold The threshold to trigger speech commands, the range of det_threshold is 0.0~0.9999
*/
typedef int (*esp_mn_iface_op_set_det_threshold_t)(model_iface_data_t *model, float det_threshold);
/**
* @brief Get the sample rate of the samples to feed to the detect function
*
* @param model The model object to query
* @return The sample rate, in hz
*/
typedef int (*esp_mn_iface_op_get_samp_rate_t)(model_iface_data_t *model);
/**
* @brief Get the language of model
*
* @param model The language name
* @return Language name string defined in esp_mn_models.h, eg: ESP_MN_CHINESE, ESP_MN_ENGLISH
*/
typedef char * (*esp_mn_iface_op_get_language_t)(model_iface_data_t *model);
/**
* @brief Feed samples of an audio stream to the speech recognition model and detect if there is a speech command found.
*
* @param model The model object to query.
* @param samples An array of 16-bit signed audio samples. The array size used can be queried by the
* get_samp_chunksize function.
* @return The state of multinet
*/
typedef esp_mn_state_t (*esp_mn_iface_op_detect_t)(model_iface_data_t *model, int16_t *samples);
/**
* @brief Destroy a speech commands recognition model
*
* @param model The Model object to destroy
*/
typedef void (*esp_mn_iface_op_destroy_t)(model_iface_data_t *model);
/**
* @brief Get recognition results
*
* @param model The Model object to query
*
* @return The current results.
*/
typedef esp_mn_results_t* (*esp_mn_iface_op_get_results_t)(model_iface_data_t *model);
/**
* @brief Open the log print
*
* @param model_data The model object to query.
*
*/
typedef void (*esp_mn_iface_op_open_log_t)(model_iface_data_t *model_data);
/**
* @brief Clean all status of model
*
* @param model_data The model object to query.
*
*/
typedef void (*esp_mn_iface_op_clean_t)(model_iface_data_t *model_data);
/**
* @brief Set the speech commands by mn_command_root
*
* @param model_data The model object to query.
* @param mn_command_root The speech commands link.
* @return The error phrase id info.
*/
typedef esp_mn_error_t* (*esp_wn_iface_op_set_speech_commands)(model_iface_data_t *model_data, esp_mn_node_t *mn_command_root);
/**
* @brief Print out current commands in fst, note the ones "added" but not "updated" will not be shown here
*
* @param model_data The model object to query
*/
typedef void (*esp_mn_iface_op_print_active_speech_commands)(model_iface_data_t *model_data);
/**
* @brief Check if input string can be tokenized
*
* @param model_data The model object to query
* @param str The input string
*/
typedef int (*esp_mn_iface_op_check_speech_command)(model_iface_data_t *model_data, const char *str);
typedef struct {
esp_mn_iface_op_create_t create;
esp_mn_iface_op_get_samp_rate_t get_samp_rate;
esp_mn_iface_op_get_samp_chunksize_t get_samp_chunksize;
esp_mn_iface_op_get_samp_chunknum_t get_samp_chunknum;
esp_mn_iface_op_set_det_threshold_t set_det_threshold;
esp_mn_iface_op_get_language_t get_language;
esp_mn_iface_op_detect_t detect;
esp_mn_iface_op_destroy_t destroy;
esp_mn_iface_op_get_results_t get_results;
esp_mn_iface_op_open_log_t open_log;
esp_mn_iface_op_clean_t clean;
esp_wn_iface_op_set_speech_commands set_speech_commands;
esp_mn_iface_op_switch_loader_mode_t switch_loader_mode;
esp_mn_iface_op_print_active_speech_commands print_active_speech_commands;
esp_mn_iface_op_check_speech_command check_speech_command;
} esp_mn_iface_t;
#ifdef __cplusplus
}
#endif

View File

@@ -0,0 +1,66 @@
#pragma once
#include "esp_mn_iface.h"
//Contains declarations of all available speech recognion models. Pair this up with the right coefficients and you have a model that can recognize
//a specific phrase or word.
#ifdef __cplusplus
extern "C" {
#endif
/**
* @brief Get the multinet handle from model name
*
* @param model_name The name of model
* @returns The handle of multinet
*/
esp_mn_iface_t *esp_mn_handle_from_name(char *model_name);
/**
* @brief Get the multinet language from model name
*
* @param model_name The name of model
* @returns The language of multinet
*/
char *esp_mn_language_from_name(char *model_name);
/*
Configure wake word to use based on what's selected in menuconfig.
*/
#ifdef CONFIG_SR_MN_CN_MULTINET2_SINGLE_RECOGNITION
#include "multinet2_ch.h"
#define MULTINET_COEFF get_coeff_multinet2_ch
#define MULTINET_MODEL_NAME "mn2_cn"
#else
#define MULTINET_COEFF "COEFF_NULL"
#define MULTINET_MODEL_NAME "NULL"
#endif
/* example
static const esp_mn_iface_t *multinet = &MULTINET_MODEL;
//Initialize MultiNet model data
model_iface_data_t *model_data = multinet->create(&MULTINET_COEFF);
add_speech_commands(multinet, model_data);
//Set parameters of buffer
int audio_chunksize=model->get_samp_chunksize(model_data);
int frequency = model->get_samp_rate(model_data);
int16_t *buffer=malloc(audio_chunksize*sizeof(int16_t));
//Detect
int r=model->detect(model_data, buffer);
if (r>0) {
printf("Detection triggered output %d.\n", r);
}
//Destroy model
model->destroy(model_data)
*/
#ifdef __cplusplus
}
#endif

View File

@@ -0,0 +1,86 @@
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License
#ifndef _ESP_NS_H_
#define _ESP_NS_H_
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
#define NS_USE_SPIARM 0
#define NS_FRAME_LENGTH_MS 10 //Supports 10ms, 20ms, 30ms
/**
* The Sampling frequency (Hz) must be 16000Hz
*/
typedef void* ns_handle_t;
/**
* @brief Creates an instance to the NS structure.
*
* @param frame_length The length of the audio processing can be 10ms, 20ms, 30ms.
*
* @return
* - NULL: Create failed
* - Others: The instance of NS
*/
ns_handle_t ns_create(int frame_length);
/**
* @brief Creates an instance of the more powerful noise suppression algorithm.
*
* @warning frame_length only supports be 10 ms.
*
* @param frame_length The length of the audio processing can only be 10ms.
* @param mode 0: Mild, 1: Medium, 2: Aggressive
* @param sample_rate The sample rate of the audio.
*
* @return
* - NULL: Create failed
* - Others: The instance of NS
*/
ns_handle_t ns_pro_create(int frame_length, int mode, int sample_rate);
/**
* @brief Feed samples of an audio stream to the NS and get the audio stream after Noise suppression.
*
* @param inst The instance of NS.
*
* @param indata An array of 16-bit signed audio samples.
*
* @param outdata An array of 16-bit signed audio samples after noise suppression.
*
* @return None
*
*/
void ns_process(ns_handle_t inst, int16_t *indata, int16_t *outdata);
/**
* @brief Free the NS instance
*
* @param inst The instance of NS.
*
* @return None
*
*/
void ns_destroy(ns_handle_t inst);
#ifdef __cplusplus
}
#endif
#endif //_ESP_NS_H_

View File

@@ -0,0 +1,64 @@
#pragma once
#include "stdint.h"
//Opaque model data container
typedef struct esp_nsn_data_t esp_nsn_data_t;
/**
* @brief Easy function type to initialze a model instance
*
* @param model_name The name of the model instance
* @returns Handle to the model data
*/
typedef esp_nsn_data_t* (*esp_nsn_iface_op_create_t)(char *model_name);
/**
* @brief Get the amount of samples that need to be passed to the process function
*
* Every noise suppression model processes a certain number of samples at the same time. This function
* can be used to query that amount. Note that the returned amount is in 16-bit samples, not in bytes.
*
* @param model The model object to query
* @return The amount of samples to feed the process function
*/
typedef int (*esp_nsn_iface_op_get_samp_chunksize_t)(esp_nsn_data_t *model);
/**
* @brief Feed samples of an audio stream to the noise suppression model and get data after process.
*
*
* @param model The model object to query
* @param in_data An array of 16-bit signed audio samples. The array size used can be queried by the
* get_samp_chunksize function.
* @param out_data An array of 16-bit signed audio samples after process.
* @return The state of return.
*/
typedef int (*esp_nsn_iface_op_process_t)(esp_nsn_data_t *model, int16_t *in_data, int16_t *out_data);
/**
* @brief Get the sample rate of the samples to feed to the process function
*
* @param model The model object to query
* @return The sample rate, in hz
*/
typedef int (*esp_nsn_iface_op_get_samp_rate_t)(esp_nsn_data_t *model);
/**
* @brief Destroy a noise suppression model
*
* @param model Model object to destroy
*/
typedef void (*esp_nsn_iface_op_destroy_t)(esp_nsn_data_t *model);
/**
* This structure contains the functions used to do operations on a wake word detection model.
*/
typedef struct {
esp_nsn_iface_op_create_t create;
esp_nsn_iface_op_get_samp_chunksize_t get_samp_chunksize;
esp_nsn_iface_op_process_t process;
esp_nsn_iface_op_get_samp_rate_t get_samp_rate;
esp_nsn_iface_op_destroy_t destroy;
} esp_nsn_iface_t;

View File

@@ -0,0 +1,17 @@
#pragma once
#include "esp_nsn_iface.h"
/*
The prefix of nset
Now there are nsnet1 and nsnet2
*/
#define ESP_NSNET_PREFIX "nsnet"
/**
* @brief Get the nsnet handle from model name
*
* @param model_name The name of model
* @returns The handle of multinet
*/
esp_nsn_iface_t *esp_nsnet_handle_from_name(char *model_name);

View File

@@ -0,0 +1,62 @@
#pragma once
#include "c_speech_features_config.h"
#include "stdlib.h"
#include <assert.h>
#include <stdbool.h>
#ifndef M_2PI
#define M_2PI 6.283185307179586476925286766559005
#endif
typedef struct {
float *coeff;
int *bank_pos;
int nfilter;
} esp_mel_filter_t;
float *esp_mfcc_malloc(size_t size, bool from_psram);
void esp_mfcc_free(void *ptr);
/**
* @brief Initialize FFT table
* @warning For ESP-PLATFORM, use esp-dsp fft
* For Other platform, use kiss fft
*
* @param nfft The input samples number
* @return fft-table
**/
void *esp_fft_init(int nfft);
/**
* @brief Free FFT table
* @warning For ESP-PLATFORM, use esp-dsp fft
* For Other platform, use kiss fft
*
* @param fft_table The fft table initialized by esp_fft_init
* @param nfft The input samples number
* @return fft-table
**/
void esp_fft_deinit(void *fft_table, int nfft);
/**
* @brief Initial window function
* Currently support hanning, hamming, sine, povey, rectangular,
* wn9(512-hanning to get wakenet9& multinet5 compatible)
**/
float *esp_win_func_init(char *win_type, float *window_data, int frame_length);
float *esp_fftr(float *x, int nfft, void *fft_table);
float *esp_spectrum_step(float *x, int nfft, bool use_power, void *fft_handle);
void esp_audio_short_to_float(short *samples, float *x, int len, int remove_dc);
float *esp_preemphasis_step(float *x, unsigned int len, float coeff, float last);
esp_mel_filter_t *esp_mel_filter_init(
int nfft, int nfilter, int low_freq, int high_freq, int samp_freq, bool from_psram);
void esp_mel_filter_deinit(esp_mel_filter_t *mel_filter);
float *esp_mel_dotprod_step(float *x, float *out, esp_mel_filter_t *mel_filter, int use_log_fbank, float epsilon);

View File

@@ -0,0 +1,84 @@
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License
#ifndef _ESP_WEBRTC_H_
#define _ESP_WEBRTC_H_
#ifdef __cplusplus
extern "C" {
#endif
#include "esp_agc.h"
#include "esp_log.h"
#include "esp_ns.h"
#include "sr_ringbuf.h"
#include <stdint.h>
#include "esp_heap_caps.h"
typedef struct {
void *ns_handle;
void *agc_handle;
int frame_size;
int sample_rate;
int16_t *buff;
int16_t *out_data;
sr_ringbuf_handle_t rb;
} webrtc_handle_t;
/**
* @brief Creates an instance of webrtc.
*
* @warning frame_length can supports be 10 ms, 20 ms, 30 ms, 32 ms.
*
* @param frame_length_ms The length of the audio processing
* @param ns_mode The mode of NS. -1 means NS is disabled. 0: Mild, 1: Medium, 2: Aggressive
* @param agc_mode The model of AGC
* @param agc_gain The gain of AGC. default is 9
* @param agc_target_level The target level of AGC. default is -3 dbfs
* @param sample_rate The sample rate of the audio.
*
* @return
* - NULL: Create failed
* - Others: The instance of webrtc
*/
webrtc_handle_t *webrtc_create(
int frame_length_ms, int ns_mode, agc_mode_t agc_mode, int agc_gain, int agc_target_level, int sample_rate);
/**
* @brief Feed samples of an audio stream to the webrtc and get the audio stream after Noise suppression.
*
* @param handle The instance of NS.
* @param in_data An array of 16-bit signed audio samples.
* @param out_size The sample size of output data
* @param enable_ns Enable noise suppression
* @param enable_agc Enable automatic gain control
*
* @return data after noise suppression
*/
int16_t *webrtc_process(webrtc_handle_t *handle, int16_t *indata, int *size, bool enable_ns, bool enable_agc);
/**
* @brief Free the webrtc instance
*
* @param handle The instance of webrtc.
*
* @return None
*
*/
void webrtc_destroy(webrtc_handle_t *handle);
#ifdef __cplusplus
}
#endif
#endif //_ESP_NS_H_

View File

@@ -0,0 +1,178 @@
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License
#ifndef _ESP_VAD_H_
#define _ESP_VAD_H_
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
#define SAMPLE_RATE_HZ 16000 // Supports 32000, 16000, 8000
#define VAD_FRAME_LENGTH_MS 30 // Supports 10ms, 20ms, 30ms
/**
* @brief Sets the VAD operating mode. A more aggressive (higher mode) VAD is more
* restrictive in reporting speech. So If you want trigger more speech, please select lower mode.
*/
typedef enum {
VAD_MODE_0 = 0, // Normal
VAD_MODE_1, // Aggressive
VAD_MODE_2, // Very Aggressive
VAD_MODE_3, // Very Very Aggressive
VAD_MODE_4 // Very Very Very Aggressive
} vad_mode_t;
typedef enum {
VAD_SILENCE = 0,
VAD_SPEECH = 1,
} vad_state_t;
typedef struct vad_trigger_tag {
vad_state_t state;
unsigned int min_speech_len;
unsigned int noise_len;
unsigned int min_noise_len;
unsigned int speech_len;
} vad_trigger_t;
#define vad_MAX_LEN INT32_MAX - 1
/**
* @brief Allocate wakenet trigger
*
* @param min_speech_len Minimum frame number of speech duration
* @param min_noise_len Minimum frame number of noise duration
*
* @return Trigger pointer
**/
vad_trigger_t *vad_trigger_alloc(int min_speech_len, int min_noise_len);
/**
* @brief Free wakenet trigger
**/
void vad_trigger_free(vad_trigger_t *trigger);
/**
* @brief Reset wakenet trigger
**/
void vad_trigger_reset(vad_trigger_t *trigger);
/**
* @brief detect activaty voice by trigger
**/
vad_state_t vad_trigger_detect(vad_trigger_t *trigger, vad_state_t state);
typedef struct {
vad_trigger_t *trigger;
void *vad_inst;
int sample_rate;
int frame_size;
} vad_handle_with_trigger_t;
typedef vad_handle_with_trigger_t *vad_handle_t;
// typedef vad_handle_tag * vad_handle_t;
/**
* @brief Creates an instance to the VAD structure.
*
* @param vad_mode Sets the VAD operating mode.
*
* @return
* - NULL: Create failed
* - Others: The instance of VAD
*/
vad_handle_t vad_create(vad_mode_t vad_mode);
/**
* @brief Creates an instance to the VAD structure.
*
* @param vad_mode Sets the VAD operating mode.
* @param sample_rate Sample rate in Hz
* @param one_frame_ms Length of the audio chunksize, can be 10ms, 20ms, 30ms, default: 30.
* @param min_speech_ms Minimum speech duration, unit is ms
* @param min_noise_ms Minimum noise duration, unit is ms
* @return
* - NULL: Create failed
* - Others: The instance of VAD
*/
vad_handle_t vad_create_with_param(
vad_mode_t vad_mode, int sample_rate, int one_frame_ms, int min_speech_ms, int min_noise_ms);
/**
* @brief Feed samples of an audio stream to the VAD and check if there is someone speaking.
*
* @param handle The instance of VAD.
* @param data An array of 16-bit signed audio samples.
* @param sample_rate_hz The Sampling frequency (Hz) can be 32000, 16000, 8000, default: 16000.
* @param one_frame_ms The length of the audio processing can be 10ms, 20ms, 30ms, default: 30.
* @return
* - VAD_SILENCE if no voice
* - VAD_SPEECH if voice is detected
*
*/
vad_state_t vad_process(vad_handle_t handle, int16_t *data, int sample_rate_hz, int one_frame_ms);
/**
* @brief Feed samples of an audio stream to the VAD and check if there is someone speaking.
*
* @param handle The instance of VAD.
* @param data An array of 16-bit signed audio samples.
* @return
* - VAD_SILENCE if no voice
* - VAD_SPEECH if voice is detected
*
*/
vad_state_t vad_process_with_trigger(vad_handle_t handle, int16_t *data);
/**
* @brief Reset trigger state as Silence
*
* @param handle The instance of VAD.
*/
void vad_reset_trigger(vad_handle_t handle);
/**
* @brief Free the VAD instance
*
* @param inst The instance of VAD.
*
* @return None
*
*/
void vad_destroy(vad_handle_t inst);
/*
* Programming Guide:
*
* @code{c}
* vad_handle_t vad_inst = vad_create(VAD_MODE_3, SAMPLE_RATE_HZ, VAD_FRAME_LENGTH_MS); // Creates an instance to
* the VAD structure.
*
* while (1) {
* //Use buffer to receive the audio data from MIC.
* vad_state_t vad_state = vad_process(vad_inst, buffer); // Feed samples to the VAD process and get the result.
* }
*
* vad_destroy(vad_inst); // Free the VAD instance at the end of whole VAD process
*
* @endcode
*/
#ifdef __cplusplus
}
#endif
#endif //_ESP_VAD_H_

View File

@@ -0,0 +1,164 @@
#pragma once
#include "esp_vad.h"
#include "stdint.h"
#include "dl_lib_convq_queue.h"
#ifdef __cplusplus
extern "C" {
#endif
// Opaque model data container
typedef struct model_iface_data_t model_iface_data_t;
// /**
// * @brief The state of vad
// */
// typedef enum {
// VAD_NOISE = -1, // Noise
// VADNET_STATE_SILENCE = 0, // Silence
// VAD_SPEECH = 1 // Speech
// } vad_state_t;
/**
* @brief Easy function type to initialze a model instance with a detection mode
* and specified model name
*
* @param model_name The specified model name
* @param mode The voice activity detection mode
* @param channel_num The number of input audio channels
* @param min_speech_ms The minimum duration of speech in ms to trigger vad
* speech
* @param min_noise_ms The minimum duration of noise in ms to trigger vad
* noise
* @returns Handle to the model data
*/
typedef model_iface_data_t *(*esp_vadn_iface_op_create_t)(
const void *model_name, vad_mode_t mode, int channel_num, int min_speech_ms, int min_noise_ms);
/**
* @brief Get the amount of samples that need to be passed to the detect
* function
*
* Every speech recognition model processes a certain number of samples at the
* same time. This function can be used to query that amount. Note that the
* returned amount is in 16-bit samples, not in bytes.
*
* @param model The model object to query
* @return The amount of samples to feed the detect function
*/
typedef int (*esp_vadn_iface_op_get_samp_chunksize_t)(model_iface_data_t *model);
/**
* @brief Get the channel number of samples that need to be passed to the detect
* function
*
* Every speech recognition model processes a certain number of samples at the
* same time. This function can be used to query that amount. Note that the
* returned amount is in 16-bit samples, not in bytes.
*
* @param model The model object to query
* @return The amount of samples to feed the detect function
*/
typedef int (*esp_vadn_iface_op_get_channel_num_t)(model_iface_data_t *model);
/**
* @brief Get the sample rate of the samples to feed to the detect function
*
* @param model The model object to query
* @return The sample rate, in hz
*/
typedef int (*esp_vadn_iface_op_get_samp_rate_t)(model_iface_data_t *model);
/**
* @brief Set the detection threshold to manually abjust the probability
*
* @param model The model object to query
* @param det_treshold The threshold to trigger wake words, the range of
* det_threshold is 0.5~0.9999
* @return 0: setting failed, 1: setting success
*/
typedef int (*esp_vadn_iface_op_set_det_threshold_t)(model_iface_data_t *model, float det_threshold);
/**
* @brief Get the voice activity detection threshold
*
* @param model The model object to query
* @returns the detection threshold
*/
typedef float (*esp_vadn_iface_op_get_det_threshold_t)(model_iface_data_t *model);
/**
* @brief Feed samples of an audio stream to the vad model and detect whether is
* voice.
*
* @param model The model object to query
* @param samples An array of 16-bit signed audio samples. The array size used
* can be queried by the get_samp_chunksize function.
* @return The index of wake words, return 0 if no wake word is detected, else
* the index of the wake words.
*/
typedef vad_state_t (*esp_vadn_iface_op_detect_t)(model_iface_data_t *model, int16_t *samples);
/**
* @brief Feed MFCC of an audio stream to the vad model and detect whether is
* voice.
*
* @param model The model object to query
* @param cq An array of 16-bit MFCC.
* @return The index of wake words, return 0 if no wake word is detected, else
* the index of the wake words.
*/
typedef vad_state_t (*esp_vadn_iface_op_detect_mfcc_t)(model_iface_data_t *model, dl_convq_queue_t *cq);
/**
* @brief Get MFCC of an audio stream
*
* @param model The model object to query
* @return MFCC data
*/
typedef dl_convq_queue_t* (*esp_vadn_iface_op_get_mfcc_data_t)(model_iface_data_t *model);
/**
* @brief Get the triggered channel index. Channel index starts from zero
*
* @param model The model object to query
* @return The channel index
*/
typedef int (*esp_vadn_iface_op_get_triggered_channel_t)(model_iface_data_t *model);
/**
* @brief Clean all states of model
*
* @param model The model object to query
*/
typedef void (*esp_vadn_iface_op_clean_t)(model_iface_data_t *model);
/**
* @brief Destroy a model object
*
* @param model Model object to destroy
*/
typedef void (*esp_vadn_iface_op_destroy_t)(model_iface_data_t *model);
/**
* This structure contains the functions used to do operations on a voice
* activity detection model.
*/
typedef struct {
esp_vadn_iface_op_create_t create;
esp_vadn_iface_op_get_samp_chunksize_t get_samp_chunksize;
esp_vadn_iface_op_get_channel_num_t get_channel_num;
esp_vadn_iface_op_get_samp_rate_t get_samp_rate;
esp_vadn_iface_op_set_det_threshold_t set_det_threshold;
esp_vadn_iface_op_get_det_threshold_t get_det_threshold;
esp_vadn_iface_op_get_triggered_channel_t get_triggered_channel;
esp_vadn_iface_op_detect_t detect;
esp_vadn_iface_op_detect_mfcc_t detect_mfcc;
esp_vadn_iface_op_get_mfcc_data_t get_mfcc_data;
esp_vadn_iface_op_clean_t clean;
esp_vadn_iface_op_destroy_t destroy;
} esp_vadn_iface_t;
#ifdef __cplusplus
}
#endif

View File

@@ -0,0 +1,22 @@
#pragma once
#include "esp_vadn_iface.h"
#ifdef __cplusplus
extern "C" {
#endif
// The prefix of vadnet model name is used to filter all wakenet from availabel models.
#define ESP_VADN_PREFIX "vadnet"
/**
* @brief Get the wakenet handle from model name
*
* @param model_name The name of model
* @returns The handle of wakenet
*/
const esp_vadn_iface_t *esp_vadn_handle_from_name(const char *model_name);
#ifdef __cplusplus
}
#endif

View File

@@ -0,0 +1,226 @@
#pragma once
#include "stdint.h"
#include "dl_lib_convq_queue.h"
#ifdef __cplusplus
extern "C" {
#endif
//Opaque model data container
typedef struct model_iface_data_t model_iface_data_t;
/**
* @brief The state of wakeup
*/
typedef enum
{
WAKENET_NO_DETECT = 0, // wake word is not detected
WAKENET_CHANNEL_VERIFIED = -1, // output channel is verified
WAKENET_DETECTED = 1 // wake word is detected
} wakenet_state_t;
//Set wake words recognition operating mode
//The probability of being wake words is increased with increasing mode,
//As a consequence also the false alarm rate goes up
typedef enum {
DET_MODE_90 = 0, // Normal
DET_MODE_95 = 1, // Aggressive
DET_MODE_2CH_90 = 2,
DET_MODE_2CH_95 = 3,
DET_MODE_3CH_90 = 4,
DET_MODE_3CH_95 = 5,
DET_MODE_90_COPY_PARAMS = 6, // Aggressive
} det_mode_t;
typedef struct {
int wake_word_num; //The number of all wake words
char **wake_word_list; //The name list of wake words
} wake_word_info_t;
/**
* @brief Easy function type to initialze a model instance with a detection mode and specified wake word coefficient
*
* @param model_name The specified wake word model coefficient
* @param det_mode The wake words detection mode to trigger wake words, DET_MODE_90 or DET_MODE_95
* @returns Handle to the model data
*/
typedef model_iface_data_t* (*esp_wn_iface_op_create_t)(const void *model_name, det_mode_t det_mode);
/**
* @brief Get the amount of samples that need to be passed to the detect function
*
* Every speech recognition model processes a certain number of samples at the same time. This function
* can be used to query that amount. Note that the returned amount is in 16-bit samples, not in bytes.
*
* @param model The model object to query
* @return The amount of samples to feed the detect function
*/
typedef int (*esp_wn_iface_op_get_samp_chunksize_t)(model_iface_data_t *model);
/**
* @brief Get the channel number of samples that need to be passed to the detect function
*
* Every speech recognition model processes a certain number of samples at the same time. This function
* can be used to query that amount. Note that the returned amount is in 16-bit samples, not in bytes.
*
* @param model The model object to query
* @return The amount of samples to feed the detect function
*/
typedef int (*esp_wn_iface_op_get_channel_num_t)(model_iface_data_t *model);
/**
* @brief Get the start point of wake word when one wake word is detected.
*
* @Warning: This function should be called when the channel index is verified.
* The returned value is the number of samples from start point of wake word to detected point.
*
* @param model The model object to query
* @return The number of samples from start point to detected point (end point)
*/
typedef int (*esp_wn_iface_op_get_start_point_t)(model_iface_data_t *model);
/**
* @brief Get the sample rate of the samples to feed to the detect function
*
* @param model The model object to query
* @return The sample rate, in hz
*/
typedef int (*esp_wn_iface_op_get_samp_rate_t)(model_iface_data_t *model);
/**
* @brief Get the number of wake words
*
* @param model The model object to query
* @returns the number of wake words
*/
typedef int (*esp_wn_iface_op_get_word_num_t)(model_iface_data_t *model);
/**
* @brief Get the name of wake word by index
*
* @Warning The index of wake word start with 1
* @param model The model object to query
* @param word_index The index of wake word
* @returns the detection threshold
*/
typedef char* (*esp_wn_iface_op_get_word_name_t)(model_iface_data_t *model, int word_index);
/**
* @brief Set the detection threshold to manually abjust the probability
*
* @param model The model object to query
* @param det_treshold The threshold to trigger wake words, the range of det_threshold is 0.4~0.9999
* @param word_index The index of wake word
* @return 0: setting failed, 1: setting success
*/
typedef int (*esp_wn_iface_op_set_det_threshold_t)(model_iface_data_t *model, float det_threshold, int word_index);
/**
* @brief Reset the threshold to its initial state
*
* @param model The model object to query
* @return 0: setting failed, 1: setting success
*/
typedef int (*esp_wn_iface_op_reset_det_threshold_t)(model_iface_data_t *model);
/**
* @brief Get the wake word detection threshold of different modes
*
* @param model The model object to query
* @param word_index The index of wake word
* @returns the detection threshold
*/
typedef float (*esp_wn_iface_op_get_det_threshold_t)(model_iface_data_t *model, int word_index);
/**
* @brief Feed samples of an audio stream to the keyword detection model and detect if there is a keyword found.
*
* @Warning The index of wake word start with 1, 0 means no wake words is detected.
*
* @param model The model object to query
* @param samples An array of 16-bit signed audio samples. The array size used can be queried by the
* get_samp_chunksize function.
* @return The index of wake words, return 0 if no wake word is detected, else the index of the wake words.
*/
typedef wakenet_state_t (*esp_wn_iface_op_detect_t)(model_iface_data_t *model, int16_t *samples);
/**
* @brief Get the volume gain
*
* @param model The model object to query
* @param target_db The target dB to calculate volume gain
* @returns the volume gain
*/
typedef float (*esp_wn_iface_op_get_vol_gain_t)(model_iface_data_t *model, float target_db);
/**
* @brief Get the triggered channel index. Channel index starts from zero
*
* @param model The model object to query
* @return The channel index
*/
typedef int (*esp_wn_iface_op_get_triggered_channel_t)(model_iface_data_t *model);
/**
* @brief Clean all states of model
*
* @param model The model object to query
*/
typedef void (*esp_wn_iface_op_clean_t)(model_iface_data_t *model);
/**
* @brief Destroy a speech recognition model
*
* @param model Model object to destroy
*/
typedef void (*esp_wn_iface_op_destroy_t)(model_iface_data_t *model);
/**
* @brief Feed MFCC of an audio stream to the vad model and detect whether is
* voice.
*
* @param model The model object to query
* @param cq An array of 16-bit MFCC.
* @return The index of wake words, return 0 if no wake word is detected, else
* the index of the wake words.
*/
typedef wakenet_state_t (*esp_wn_iface_op_detect_mfcc_t)(model_iface_data_t *model, int16_t *samples, dl_convq_queue_t *cq);
/**
* @brief Get MFCC of an audio stream
*
* @param model The model object to query
* @return MFCC data
*/
typedef dl_convq_queue_t* (*esp_wn_iface_op_get_mfcc_data_t)(model_iface_data_t *model);
/**
* This structure contains the functions used to do operations on a wake word detection model.
*/
typedef struct {
esp_wn_iface_op_create_t create;
esp_wn_iface_op_get_start_point_t get_start_point;
esp_wn_iface_op_get_samp_chunksize_t get_samp_chunksize;
esp_wn_iface_op_get_channel_num_t get_channel_num;
esp_wn_iface_op_get_samp_rate_t get_samp_rate;
esp_wn_iface_op_get_word_num_t get_word_num;
esp_wn_iface_op_get_word_name_t get_word_name;
esp_wn_iface_op_set_det_threshold_t set_det_threshold;
esp_wn_iface_op_reset_det_threshold_t reset_det_threshold;
esp_wn_iface_op_get_det_threshold_t get_det_threshold;
esp_wn_iface_op_get_triggered_channel_t get_triggered_channel;
esp_wn_iface_op_get_vol_gain_t get_vol_gain;
esp_wn_iface_op_detect_t detect;
esp_wn_iface_op_detect_mfcc_t detect_mfcc;
esp_wn_iface_op_get_mfcc_data_t get_mfcc_data;
esp_wn_iface_op_clean_t clean;
esp_wn_iface_op_destroy_t destroy;
} esp_wn_iface_t;
#ifdef __cplusplus
}
#endif

View File

@@ -0,0 +1,52 @@
#pragma once
#include "esp_wn_iface.h"
#ifdef __cplusplus
extern "C" {
#endif
// The prefix of wakenet model name is used to filter all wakenet from availabel models.
#define ESP_WN_PREFIX "wn"
/**
* @brief Get the wakenet handle from model name
*
* @param model_name The name of model
* @returns The handle of wakenet
*/
const esp_wn_iface_t *esp_wn_handle_from_name(const char *model_name);
/**
* @brief Get the wake word name from model name
*
* @param model_name The name of model
* @returns The wake word name, like "alexa","hilexin","xiaoaitongxue"
*/
char *esp_wn_wakeword_from_name(const char *model_name);
#ifdef __cplusplus
}
#endif
/*
static const sr_model_iface_t *model = esp_wn_handle_from_name(model_name);
//Initialize wakeNet model data
static model_iface_data_t *model_data=model->create(model_name, DET_MODE_90);
//Set parameters of buffer
int audio_chunksize=model->get_samp_chunksize(model_data);
int frequency = model->get_samp_rate(model_data);
int16_t *buffer=malloc(audio_chunksize*sizeof(int16_t));
//Detect
int r=model->detect(model_data, buffer);
if (r>0) {
printf("Detection triggered output %d.\n", r);
}
//Destroy model
model->destroy(model_data)
*/

View File

@@ -0,0 +1,20 @@
#ifndef __FLITE_G2P_H__
#define __FLITE_G2P_H__
typedef struct {
int num_phonemes;
int phoneme_size;
char **phonemes;
} flite_g2p_result;
void flite_g2p_result_free(flite_g2p_result *result);
flite_g2p_result *flite_g2p_get_result(const char *grapheme);
void flite_g2p_result_print_string(flite_g2p_result *result, int map_phonemes);
char *flite_g2p_result_get_string(flite_g2p_result *result, int map_phonemes);
char *flite_g2p(const char *graphemes, int map_phonemes);
#endif

View File

@@ -0,0 +1,29 @@
#pragma once
#include <float.h>
#include <math.h>
/* #undef ENABLE_DOUBLE */
#ifdef ENABLE_DOUBLE
# define csf_float double
# define csf_ceil ceil
# define csf_floor floor
# define csf_sin sin
# define csf_log log
# define csf_log10 log10
# define csf_pow pow
# define csf_sqrt sqrt
# define csf_abs fabs
# define csf_float_min DBL_MIN
#else
# define csf_float float
# define csf_ceil ceilf
# define csf_floor floorf
# define csf_sin sinf
# define csf_log logf
# define csf_log10 log10f
# define csf_pow powf
# define csf_sqrt sqrtf
# define csf_abs fabsf
# define csf_float_min FLT_MIN
#endif

View File

@@ -0,0 +1,418 @@
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef DL_LIB_H
#define DL_LIB_H
#include "dl_lib_matrix.h"
#include "dl_lib_matrixq.h"
#include "dl_lib_matrixq8.h"
#ifdef ESP_PLATFORM
#include "freertos/FreeRTOS.h"
#include "freertos/task.h"
#include "freertos/queue.h"
#include "esp_system.h"
#include "esp_heap_caps.h"
#include "sdkconfig.h"
#define DL_SPIRAM_SUPPORT 1
#endif
#ifdef CONFIG_IDF_TARGET_ESP32S3
#include "esp32s3/rom/cache.h"
#endif
#ifdef __cplusplus
extern "C" {
#endif
typedef int padding_state;
// /**
// * @brief Allocate a chunk of memory which has the given capabilities.
// * Equivalent semantics to libc malloc(), for capability-aware memory.
// * In IDF, malloc(p) is equivalent to heap_caps_malloc(p, MALLOC_CAP_8BIT).
// *
// * @param size In bytes, of the amount of memory to allocate
// * @param caps Bitwise OR of MALLOC_CAP_* flags indicating the type of memory to be returned
// * MALLOC_CAP_SPIRAM: Memory must be in SPI RAM
// * MALLOC_CAP_INTERNAL: Memory must be internal; specifically it should not disappear when flash/spiram cache is switched off
// * MALLOC_CAP_DMA: Memory must be able to accessed by DMA
// * MALLOC_CAP_DEFAULT: Memory can be returned in a non-capability-specific memory allocation
// * @return Pointer to currently allocated heap memory
// **/
// void *heap_caps_malloc(size_t size, uint32_t caps);
/**
* @brief Allocate aligned memory from internal memory or external memory.
* if cnt*size > CONFIG_SPIRAM_MALLOC_ALWAYSINTERNAL, allocate memory from internal RAM
* else, allocate memory from PSRAM
*
* @param cnt Number of continuing chunks of memory to allocate
* @param size Size, in bytes, of a chunk of memory to allocate
* @param align Aligned size, in bits
* @return Pointer to currently allocated heap memory
*/
void *dl_lib_calloc(int cnt, int size, int align);
/**
* @brief Always allocate aligned memory from external memory.
*
* @param cnt Number of continuing chunks of memory to allocate
* @param size Size, in bytes, of a chunk of memory to allocate
* @param align Aligned size, in bits
* @return Pointer to currently aligned heap memory
*/
void *dl_lib_calloc_psram(int cnt, int size, int align);
/**
* @brief Free aligned memory allocated by `dl_lib_calloc` or `dl_lib_calloc_psram`
*
* @param ptr Pointer to free
*/
void dl_lib_free(void *ptr);
/**
* @brief Does a fast version of the exp() operation on a floating point number.
*
* As described in https://codingforspeed.com/using-faster-exponential-approximation/
* Should be good til an input of 5 or so with a steps factor of 8.
*
* @param in Floating point input
* @param steps Approximation steps. More is more precise. 8 or 10 should be good enough for most purposes.
* @return Exp()'ed output
*/
fptp_t fast_exp(double x, int steps);
/**
* @brief Does a fast version of the exp() operation on a floating point number.
*
* @param in Floating point input
* @return Exp()'ed output
*/
double fast_exp_pro(double x);
/**
* @brief Does a softmax operation on a matrix.
*
* @param in Input matrix
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
*/
void dl_softmax(const dl_matrix2d_t *in, dl_matrix2d_t *out);
/**
* @brief Does a softmax operation on a quantized matrix.
*
* @param in Input matrix
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
*/
void dl_softmax_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out);
/**
* @brief Does a sigmoid operation on a floating point number
*
* @param in Floating point input
* @return Sigmoid output
*/
fptp_t dl_sigmoid_op(fptp_t in);
/**
* @brief Does a sigmoid operation on a matrix.
*
* @param in Input matrix
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
*/
void dl_sigmoid(const dl_matrix2d_t *in, dl_matrix2d_t *out);
/**
* @brief Does a tanh operation on a floating point number
*
* @param in Floating point input number
* @return Tanh value
*/
fptp_t dl_tanh_op(fptp_t v);
/**
* @brief Does a tanh operation on a matrix.
*
* @param in Input matrix
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
*/
void dl_tanh(const dl_matrix2d_t *in, dl_matrix2d_t *out);
/**
* @brief Does a relu (Rectifier Linear Unit) operation on a floating point number
*
* @param in Floating point input
* @param clip If value is higher than this, it will be clipped to this value
* @return Relu output
*/
fptp_t dl_relu_op(fptp_t in, fptp_t clip);
/**
* @brief Does a ReLu operation on a matrix.
*
* @param in Input matrix
* @param clip If values are higher than this, they will be clipped to this value
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
*/
void dl_relu(const dl_matrix2d_t *in, fptp_t clip, dl_matrix2d_t *out);
/**
* @brief Fully connected layer operation
*
* @param in Input vector
* @param weight Weights of the neurons
* @param bias Biases for the neurons. Can be NULL if a bias of 0 is required.
* @param out Output array. Outputs are placed here. Needs to be an initialized, weight->w by in->h in size, matrix.
*/
void dl_fully_connect_layer(const dl_matrix2d_t *in, const dl_matrix2d_t *weight, const dl_matrix2d_t *bias, dl_matrix2d_t *out);
/**
* @brief Pre-calculate the sqrtvari variable for the batch_normalize function.
* The sqrtvari matrix depends on the variance and epsilon values, which normally are constant. Hence,
* this matrix only needs to be calculated once. This function does that.
*
* @param
* @return
*/
void dl_batch_normalize_get_sqrtvar(const dl_matrix2d_t *variance, fptp_t epsilon, dl_matrix2d_t *out);
/**
* @brief Batch-normalize a matrix
*
* @param m The matrix to normalize
* @param offset Offset matrix
* @param scale Scale matrix
* @param mean Mean matrix
* @param sqrtvari Matrix precalculated using dl_batch_normalize_get_sqrtvar
* @return
*/
void dl_batch_normalize(dl_matrix2d_t *m, const dl_matrix2d_t *offset, const dl_matrix2d_t *scale,
const dl_matrix2d_t *mean, const dl_matrix2d_t *sqrtvari);
/**
* @brief Do a basic LSTM layer pass.
*
* @warning Returns state_h pointer, so do not free result.
* @param in Input vector
* @param state_c Internal state of the LSTM network
* @param state_h Internal state (previous output values) of the LSTM network
* @param weights Weights for the neurons
* @param bias Bias for the neurons. Can be NULL if no bias is required
* @return Output values of the neurons
*/
dl_matrix2d_t *dl_basic_lstm_layer(const dl_matrix2d_t *in, dl_matrix2d_t *state_c, dl_matrix2d_t *state_h,
const dl_matrix2d_t *weight, const dl_matrix2d_t *bias);
/**
* @brief Do a basic LSTM layer pass, partial quantized version.
* This LSTM function accepts 16-bit fixed-point weights and 32-bit float-point bias.
*
* @warning Returns state_h pointer, so do not free result.
* @param in Input vector
* @param state_c Internal state of the LSTM network
* @param state_h Internal state (previous output values) of the LSTM network
* @param weights Weights for the neurons, need to be quantised
* @param bias Bias for the neurons. Can be NULL if no bias is required
* @return Output values of the neurons
*/
dl_matrix2dq_t *dl_basic_lstm_layer_quantised_weights(const dl_matrix2d_t *in, dl_matrix2d_t *state_c, dl_matrix2d_t *state_h,
const dl_matrix2dq_t *weight, const dl_matrix2d_t *bias);
/**
* @brief Do a fully-connected layer pass, fully-quantized version.
*
* @param in Input vector
* @param weight Weights of the neurons
* @param bias Bias values of the neurons. Can be NULL if no bias is needed.
* @param shift Number of bits to shift the result back by. See dl_lib_matrixq.h for more info
* @return Output values of the neurons
*/
void dl_fully_connect_layer_q(const dl_matrix2dq_t *in, const dl_matrix2dq_t *weight, const dl_matrix2dq_t *bias, dl_matrix2dq_t *out, int shift);
/**
* @brief Do a basic LSTM layer pass, fully-quantized version
*
* @warning Returns state_h pointer, so do not free result.
* @param in Input vector
* @param state_c Internal state of the LSTM network
* @param state_h Internal state (previous output values) of the LSTM network
* @param weights Weights for the neurons
* @param bias Bias for the neurons. Can be NULL if no bias is required
* @param shift Number of bits to shift the result back by. See dl_lib_matrixq.h for more info
* @return Output values of the neurons
*/
dl_matrix2dq_t *dl_basic_lstm_layer_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *state_c, dl_matrix2dq_t *state_h,
const dl_matrix2dq_t *weight, const dl_matrix2dq_t *bias, int shift);
/**
* @brief Batch-normalize a matrix, fully-quantized version
*
* @param m The matrix to normalize
* @param offset Offset matrix
* @param scale Scale matrix
* @param mean Mean matrix
* @param sqrtvari Matrix precalculated using dl_batch_normalize_get_sqrtvar
* @param shift Number of bits to shift the result back by. See dl_lib_matrixq.h for more info
* @return
*/
void dl_batch_normalize_q(dl_matrix2dq_t *m, const dl_matrix2dq_t *offset, const dl_matrix2dq_t *scale,
const dl_matrix2dq_t *mean, const dl_matrix2dq_t *sqrtvari, int shift);
/**
* @brief Does a relu (Rectifier Linear Unit) operation on a fixed-point number
* This accepts and returns fixed-point 32-bit number with the last 15 bits being the bits after the decimal
* point. (Equivalent to a mantissa in a quantized matrix with exponent -15.)
*
* @param in Fixed-point input
* @param clip If value is higher than this, it will be clipped to this value
* @return Relu output
*/
qtp_t dl_relu_q_op(qtp_t in, qtp_t clip);
/**
* @brief Does a ReLu operation on a matrix, quantized version
*
* @param in Input matrix
* @param clip If values are higher than this, they will be clipped to this value
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
*/
void dl_relu_q(const dl_matrix2dq_t *in, fptp_t clip, dl_matrix2dq_t *out);
/**
* @brief Does a sigmoid operation on a fixed-point number.
* This accepts and returns a fixed-point 32-bit number with the last 15 bits being the bits after the decimal
* point. (Equivalent to a mantissa in a quantized matrix with exponent -15.)
*
* @param in Fixed-point input
* @return Sigmoid output
*/
int dl_sigmoid_op_q(const int in);
int16_t dl_sigmoid_op_q8(const int16_t in);
/**
* @brief Does a sigmoid operation on a matrix, quantized version
*
* @param in Input matrix
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
*/
void dl_sigmoid_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out);
/**
* @brief Does a tanh operation on a matrix, quantized version
*
* @param in Input matrix
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
*/
void dl_tanh_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out);
/**
* @brief Does a tanh operation on a fixed-point number.
* This accepts and returns a fixed-point 32-bit number with the last 15 bits being the bits after the decimal
* point. (Equivalent to a mantissa in a quantized matrix with exponent -15.)
*
* @param in Fixed-point input
* @return tanh output
*/
int dl_tanh_op_q(int v);
int16_t dl_tanh_op_q8(int16_t v);
void load_mat_psram_mn4(void);
void load_mat_psram_mn3(void);
void free_mat_psram_mn4(void);
void free_mat_psram_mn3(void);
qtp_t dl_hard_sigmoid_op(qtp_t in, int exponent);
qtp_t dl_hard_tanh_op(qtp_t in, int exponent);
int16_t dl_table_tanh_op(int16_t in, int exponent);
int16_t dl_table_sigmoid_op(int16_t in, int exponent);
void dl_hard_sigmoid_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out);
void dl_hard_tanh_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out);
void dl_table_sigmoid_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out);
void dl_table_tanh_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out);
/**
* @brief Filter out the number greater than clip in the matrix, quantized version
*
* @param in Input matrix
* @param clip If values are higher than this, they will be clipped to this value
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
*/
void dl_minimum(const dl_matrix2d_t *in, fptp_t clip, dl_matrix2d_t *out);
/**
* @brief Filter out the number greater than clip in the matrix, float version
*
* @param in Input matrix
* @param clip If values are higher than this, they will be clipped to this value
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
*/
void dl_minimum_q(const dl_matrix2dq_t *in, fptp_t clip, dl_matrix2dq_t *out);
/**
* @brief Do a basic CNN layer pass.
*
* @Warning This just supports the single channel input image, and the output is single row matrix.
That is to say, the height of output is 1, and the weight of output is out_channels*out_image_width*out_image_height
*
* @param in Input single channel image
* @param weight Weights of the neurons, weight->w = out_channels, weight->h = filter_width*filter_height
* @param bias Bias for the CNN layer.
* @param filter_height The height of convolution kernel
* @param filter_width The width of convolution kernel
* @param out_channels The number of output channels of convolution kernel
* @param stride_x The step length of the convolution window in x(width) direction
* @param stride_y The step length of the convolution window in y(height) direction
* @param pad One of `"VALID"` or `"SAME"`, 0 is "VALID" and the other is "SAME"
* @param out The result of CNN layer, out->h=1.
* @return The result of CNN layer.
*/
dl_matrix2d_t *dl_basic_conv_layer(const dl_matrix2d_t *in, const dl_matrix2d_t *weight, const dl_matrix2d_t *bias, int filter_width, int filter_height,
const int out_channels, const int stride_x, const int stride_y, padding_state pad, const dl_matrix2d_t* out);
/**
* @brief Do a basic CNN layer pass, quantised wersion.
*
* @Warning This just supports the single channel input image, and the output is single row matrix.
That is to say, the height of output is 1, and the weight of output is out_channels*out_image_width*out_image_height
*
* @param in Input single channel image
* @param weight Weights of the neurons, weight->w = out_channels, weight->h = filter_width*filter_height,
* @param bias Bias of the neurons.
* @param filter_height The height of convolution kernel
* @param filter_width The width of convolution kernel
* @param out_channels The number of output channels of convolution kernel
* @param stride_x The step length of the convolution window in x(width) direction
* @param stride_y The step length of the convolution window in y(height) direction
* @param pad One of `"VALID"` or `"SAME"`, 0 is "VALID" and the other is "SAME"
* @param out The result of CNN layer, out->h=1
* @return The result of CNN layer
*/
dl_matrix2d_t *dl_basic_conv_layer_quantised_weight(const dl_matrix2d_t *in, const dl_matrix2dq_t *weight, const dl_matrix2d_t *bias, int filter_width, int filter_height,
const int out_channels, const int stride_x, const int stride_y, padding_state pad, const dl_matrix2d_t* out);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,80 @@
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef DL_LIB_COEFGETTER_IF_H
#define DL_LIB_COEFGETTER_IF_H
#include "dl_lib_matrix.h"
#include "dl_lib_matrixq.h"
#include "dl_lib_matrixq8.h"
#include "cJSON.h"
#ifdef __cplusplus
extern "C" {
#endif
//Set this if the coefficient requested is a batch-normalization popvar matrix which needs to be preprocessed by
//dl_batch_normalize_get_sqrtvar first.
#define COEF_GETTER_HINT_BNVAR (1<<0)
/*
This struct describes the basic information of model data:
word_num: the number of wake words or speech commands
word_list: the name list of wake words or speech commands
thres_list: the threshold list of wake words or speech commands
info_str: the string used to reflect the version and information of model data
which consist of the architecture of network, the version of model data, wake words and their threshold
*/
typedef struct {
int word_num;
char **word_list;
int *win_list;
float *thresh_list;
char *info_str;
} model_info_t;
/*
Alphabet struct describes the basic grapheme or phoneme.
item_num: the number of baisc item(grapheme or phonemr)
items: the list of basic item
*/
typedef struct {
int item_num;
char **items;
}alphabet_t;
/*
This struct describes a generic coefficient getter: a way to get the constant coefficients needed for a neural network.
For the two getters, the name describes the name of the coefficient matrix, usually the same as the Numpy filename the
coefficient was originally stored in. The arg argument can be used to optionally pass an additional user-defined argument
to the getter (e.g. the directory to look for files in the case of the Numpy file loader getter). The hint argument
is a bitwise OR of the COEF_GETTER_HINT_* flags or 0 when none is needed. Use the free_f/free_q functions to release the
memory for the returned matrices, when applicable.
*/
typedef struct {
const dl_matrix2d_t* (*getter_f)(const char *name, void *arg, int hint);
const dl_matrix2dq_t* (*getter_q)(const char *name, void *arg, int hint);
const dl_matrix2dq8_t* (*getter_q8)(const char *name, void *arg, int hint);
void (*free_f)(const dl_matrix2d_t *m);
void (*free_q)(const dl_matrix2dq_t *m);
void (*free_q8)(const dl_matrix2dq8_t *m);
const model_info_t* (*getter_info)(void *arg);
const alphabet_t* (*getter_alphabet)(void *arg);
const cJSON* (*getter_config)(void *arg);
} model_coeff_getter_t;
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,180 @@
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef DL_LIB_CONV_QUEUE_H
#define DL_LIB_CONV_QUEUE_H
#include "dl_lib_matrix.h"
#ifdef __cplusplus
extern "C" {
#endif
typedef float fptp_t;
//Flags for matrices
// #define DL_MF_FOREIGNDATA (0) /*< Matrix *item data actually points to another matrix and should not be freed */
//Float convolution FIFO queue.
typedef struct {
int n; /*< the length of queue */
int c; /*< the channel number of queue element*/
int front; /*< the front(top) position of queue */
int flag; /*< not used*/
fptp_t *item; /*< Pointer to item array */
} dl_conv_queue_t;
/**
* @brief Allocate a convolution queue
*
* @param n The length of queue
* @param c The channel number of elements in the queue
* @return The convolution queue, or NULL if out of memory
*/
dl_conv_queue_t *dl_conv_queue_alloc(int n, int c);
/**
* @brief Allocate a convolution queue from psram
*
* @param n The length of queue
* @param c The channel number of elements in the queue
* @return The convolution queue, or NULL if out of memory
*/
dl_conv_queue_t *dl_conv_queue_alloc_from_psram(int n, int c);
/**
* @brief Free a convolution queue
*
* @param cq The convolution queue to free
*/
void dl_conv_queue_free(dl_conv_queue_t *cq);
void dl_conv_to_matrix2d(dl_conv_queue_t *cq, dl_matrix2d_t* out);
/**
* @brief Move the front pointer of queue forward,
the First(oldest) element become the last(newest) element,
*
* @param cq Input convolution queue
* @return Pointer of oldest element
*/
fptp_t *dl_conv_queue_pop(dl_conv_queue_t *cq);
/**
* @brief Remove the oldest element, then insert the input element at the end of queue
*
* @param cq Input convolution queue
* @param item The new element
*/
void dl_conv_queue_push(dl_conv_queue_t *cq, fptp_t* item);
/**
* @brief Get the pointer of element in the queue by offset
*
* @param cq Input convolution queue
* @param offset Offset from the front of the queue
* @return Pointer of the element
*/
fptp_t *dl_get_queue_item(dl_conv_queue_t *cq, int offset);
/**
* @brief Does a sigmoid operation on the one of element in the convolution queue.
* Gets the pointer of element in the convolution queue by offset, and does a sigmoid operation
* by this pointer, then return the pointer
*
* @param cq Input convolution queue
* @param offset Offset from the front of the queue
* @return Pointer of the element
*/
fptp_t *dl_sigmoid_step(dl_conv_queue_t *cq, int offset);
/**
* @brief Does a tanh operation on the one of element in the convolution queue.
* Gets the pointer of element in the convolution queue by offset, and does a tanh operation
* by this pointer, then return the pointer
*
* @param cq Input convolution queue
* @param offset Offset from the front of the queue
* @return Pointer of the element
*/
fptp_t *dl_tanh_step(dl_conv_queue_t *cq, int offset);
/**
* @brief Does a softmax operation on the one of element in the convolution queue.
* Gets the pointer of element in the convolution queue by offset, and does a softmax operation
* by this pointer, then return the pointer
*
* @param cq Input convolution queue
* @param offset Offset from the front of the queue
* @return Pointer of the element
*/
fptp_t *dl_softmax_step(dl_conv_queue_t *cq, int offset);
fptp_t *dl_relu_step(dl_conv_queue_t *cq, int offset);
fptp_t *dl_relu_look(dl_matrix2d_t *cq, int offset);
dl_matrix2d_t *dl_matrix_concat1(const dl_conv_queue_t *a, const dl_matrix2d_t *b);
dl_matrix2d_t *dl_basic_lstm_layer1(const dl_conv_queue_t *in, dl_matrix2d_t *state_c, dl_matrix2d_t *state_h,
const dl_matrix2d_t *weight, const dl_matrix2d_t *bias);
/**
* @brief Fast implement for 1D atrous convolution (a.k.a. convolution with holes or dilated convolution)
* based on convolution queue.
*
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
* is first element of output queue and should not be freed separately.
*
* @param in Input convolution queue
* @param out Output convolution queue
* @param rate A positive int, the stride with which we sample input value
* @param size A positive int, the size of 1D-filter
* @param kernel The kernel matrix of filter
* @param bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
* @return The result of atrous convolution
*/
fptp_t *dl_atrous_conv1d_step(dl_conv_queue_t *in, dl_conv_queue_t *out, int rate, int size,
dl_matrix2d_t* kernel, dl_matrix2d_t* bias);
fptp_t *dl_look_conv_step(dl_conv_queue_t *in, dl_matrix2d_t *out, int rate, int size,
dl_matrix2d_t* kernel, dl_matrix2d_t* bias);
/**
* @brief Fast implement of dilation layer as follows
*
* |-> [gate(sigmoid)] -|
* input - | |-> (*) - output
* |-> [filter(tanh)] -|
*
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
* is first element of output queue and should not be freed separately.
*
* @param in Input convolution queue
* @param out Output convolution queue
* @param rate A positive int, the stride with which we sample input value
* @param size A positive int, the size of 1D-filter
* @param filter_kernel The kernel matrix of filter
* @param filter_bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
* @param gate_kernel The kernel matrix of gate
* @param gate_bias The bias matrix of gate. Can be NULL if a bias of 0 is required.
* @return The result of dilation layer
*/
fptp_t *dl_dilation_layer(dl_conv_queue_t *in, dl_conv_queue_t *out, int rate, int size,
dl_matrix2d_t* filter_kernel, dl_matrix2d_t* filter_bias,
dl_matrix2d_t* gate_kernel, dl_matrix2d_t* gate_bias);
void test_atrous_conv(int size, int rate, int in_channel, int out_channel);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,303 @@
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef DL_LIB_CONVQ8_QUEUE_H
#define DL_LIB_CONVQ8_QUEUE_H
#include "dl_lib_matrixq.h"
#include "dl_lib_matrixq8.h"
#include "dl_lib_conv_queue.h"
#include "dl_lib_convq_queue.h"
#ifdef __cplusplus
extern "C" {
#endif
//[nch, n, c]
typedef struct {
int n; /*< the length of queue */
int c; /*< the number of queue element*/
int front; /*< the front(top) position of queue */
int nch; /*< the channel of queue */
int exponent; /*< The values in items should be multiplied by pow(2,exponent)
to get the real values */
q8tp_t *itemq; /*< Pointer to item array */
} dl_convq8_queue_t;
/**
* @brief Allocate a fixed-point convolution queue
*
* @param n The length of queue
* @param c The number of elements in the queue
* @return The convolution queue, or NULL if out of memory
*/
dl_convq8_queue_t *dl_convq8_queue_alloc(int n, int c);
/**
* @brief Allocate a fixed-point convolution queue
*
* @param n The length of queue
* @param c The number of elements in the queue
* @param c The channel of queue
* @return The convolution queue, or NULL if out of memory
*/
dl_convq8_queue_t *dl_convq8_queue_alloc_mc(int n, int c, int nch);
/**
* @brief Allocate a bit fixed-point convolution queue from PSRAM
*
* @param n The length of queue
* @param c The number of elements in the queue
* @param nch The channel of queue
* @return The convolution queue, or NULL if out of memory
*/
dl_convq8_queue_t *dl_convq8_queue_alloc_mc_from_psram(int n, int c, int nch);
/**
* @brief Free a fixed-point convolution queue
*
* @param cq The fixed-point convolution queue to free
*/
void dl_convq8_queue_free(dl_convq8_queue_t *cq);
/**
* @brief Set itemq of convolution queue to 0
*
* @param cq The fixed-point convolution queue to free
*/
void dl_convq8_queue_bzero(dl_convq8_queue_t *cqm);
/**
* @brief Move the front pointer of queue forward,
the First(oldest) element become the last(newest) element,
*
* @param cq Input fixed-point convolution queue
* @return Pointer of oldest element
*/
q8tp_t *dl_convq8_queue_pop(dl_convq8_queue_t *cq);
q8tp_t *dl_convq8_queue_popn(dl_convq8_queue_t *cq, int n);
/**
* @brief Insert the float-point element at the end of queue.
* The precision of fixed-point numbers is described by the Qm.f notation,
*
* @param cq Input fixed-point convolution queue
* @param item The float-point element
* @param m_bit The number of integer bits including the sign bits
* @param f_bit The number of fractional bits
*/
void dl_convq8_queue_push_by_qmf(dl_convq8_queue_t *cq, fptp_t* item, int m_bit, int f_bit);
/**
* @brief Get the pointer of element in the queue by offset
*
* @param cq Input fixed-point convolution queue
* @param offset Offset from the front of the queue
* @return Pointer of the element
*/
q8tp_t *dl_get_queue_itemq8(dl_convq8_queue_t *cq, int offset);
/**
* @brief Get the pointer of element in the queue by offset
*
* @param cq Input fixed-point convolution queue
* @param offset Offset from the front of the queue
* @param ch Channel index of queue
* @return Pointer of the element
*/
q8tp_t *dl_get_queue_itemq8_mc(dl_convq8_queue_t *cq, int offset, int ch);
/**
* @brief Fast and quantised implement for 1D atrous convolution (a.k.a. convolution with holes or dilated convolution)
* based on convolution queue.
*
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
* is last element of output queue and should not be freed separately.
*
* @param in Input fixed-point convolution queue
* @param out Output fixed-point convolution queue
* @param rate A positive int, the stride with which we sample input value
* @param size A positive int, the size of 1D-filter
* @param kernel Kernel matrix of filter
* @param bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
* @param out_exponent Shift ratio used in dot operation between two 16-bit fixed point vector
* @param offset Offset used to calculate the beginning of input conv queue
* @param prenum The num to control the parameter size of preload operation
* @return The result of atrous convolution
*/
void dl_atrous_conv1dq8_steps(dl_convq8_queue_t *in, dl_convq8_queue_t *out, int rate, int size,
dl_matrix2dq8_t* kernel, dl_matrix2dq8_t* bias,
int out_exponent, int offset, int prenum);
/**
* @brief Fast implement of dilation layer as follows
*
* |-> [gate(sigmoid)] -|
* input - | |-> (*) - output
* |-> [filter(tanh)] -|
*
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
* is last element of output queue and should not be freed separately.
*
* @param in Input fixed-point convolution queue
* @param out Output fixed-point convolution queue
* @param rate A positive int, the stride with which we sample input value
* @param size A positive int, the size of 1D-filter
* @param filter_kernel The kernel matrix of filter
* @param filter_bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
* @param gate_kernel The kernel matrix of gate
* @param gate_bias The bias matrix of gate. Can be NULL if a bias of 0 is required.
* @param offset Offset used to calculate the beginning of input conv queue
* @param prenum The num to control the parameter size of preload operation
* @return The result of dilation layer
*/
void dl_dilation_layerq8_steps(dl_convq8_queue_t *in, dl_convq8_queue_t *out, int rate, int size,
dl_matrix2dq8_t* filter_kernel, dl_matrix2dq8_t* filter_bias,
dl_matrix2dq8_t* gate_kernel, dl_matrix2dq8_t* gate_bias,
int offset, int prenum);
dl_conv_queue_t *dl_convq8_queue_add(dl_convq8_queue_t *cq1, dl_convq8_queue_t *cq2);
int8_t dl_sigmoid_lutq8(int in);
/**
* @brief Allocate a 8-bit fixed-point Multi-Channel convolution queue
*
* @param n The length of queue
* @param c The number of elements in the queue
* @param nch  The channel number
* @return The convolution queue, or NULL if out of memory
*/
dl_convq8_queue_t **dl_convq8_queue_mc_alloc(int n, int c, int nch);
/**
* @brief Free a 8-bit fixed-point Multi-Channel convolution queue
*
* @param cqm The fixed-point convolution queue to free
* @param nch The channel number
*/
void dl_convq8_queue_mc_free(dl_convq8_queue_t **cqm, int nch);
/**
* @brief Tanh activation function for 8-bit fixed-point Multi-Channel convolution queue input
*
* @param cqm Input 8-bit fixed-point Multi-Channel convolution queue
* @param offset Offset used to calculate the beginning of input conv queue
* @param nch The channel number
*/
void dl_tanh_convq8_mc(dl_convq8_queue_t **cqm, int offset, int nch);
/**
* @brief Fast and quantised 16-bit implement for Multi-channel 1D atrous convolution (a.k.a. convolution with holes or dilated convolution)
* Usually, this layer is used as first layer for 8-bit network.
*
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
* Input is a 16-bit queue point, Output is an 8-bit queue point.
*
* @param in Input 16bit fixed-point convolution queue array
* @param out Output 8bit fixed-point convolution queue array
* @param rate A positive int, the stride with which we sample input value
* @param size A positive int, the size of 1D-filter
* @param kernel The kernel matrix of filter
* @param bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
* @param out_exponent Exponent of output
* @param offset Offset used to calculate the beginning of input conv queue
* @param prenum The num to control the parameter size of preload operation
*/
void dl_atrous_conv1dq8_16in_mc_steps(dl_convq_queue_t **in, dl_convq8_queue_t **out, int nch, int rate, int size,
dl_matrix2dq_t* kernel, dl_matrix2dq_t* bias, int out_exponent, int offset, int prenum);
/**
* @brief Fast and quantised 8-bit implement for Multi-channel 1D atrous convolution (a.k.a. convolution with holes or dilated convolution)
* based on convolution queue.
*
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
* is last element of output queue and should not be freed separately.
*
* @param in Input 8bit fixed-point convolution queue array
* @param out Output 8bit fixed-point convolution queue array
* @param rate A positive int, the stride with which we sample input value
* @param size A positive int, the size of 1D-filter
* @param kernel The kernel matrix of filter
* @param bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
* @param out_exponent Exponent of output
* @param offset Offset used to calculate the beginning of input conv queue
* @param prenum The num to control the parameter size of preload operation
*/
void dl_atrous_conv1dq8_mc_steps(dl_convq8_queue_t **in, dl_convq8_queue_t **out,
int nch, int rate, int size,
dl_matrix2dq8_t* kernel, dl_matrix2dq8_t* bias,
int out_exponent, int offset, int prenum);
/**
* @brief Fast implement of 8-bit dilation layer as follows
*
* |-> [gate(sigmoid)] -|
* input - | |-> (*) - output
* |-> [filter(tanh)] -|
*
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
* is last element of output queue and should not be freed separately.
*
* @param in Input 8-bit fixed-point convolution queue
* @param out Output 8-bit fixed-point convolution queue
* @param rate A positive int, the stride with which we sample input value
* @param size A positive int, the size of 1D-filter
* @param filter_kernel The kernel matrix of filter
* @param filter_bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
* @param gate_kernel The kernel matrix of gate
* @param gate_bias The bias matrix of gate. Can be NULL if a bias of 0 is required.
* @param offset Offset used to calculate the beginning of input conv queue
* @param prenum The num to control the parameter size of preload operation
*/
void dl_dilation_layerq8_mc_steps(dl_convq8_queue_t **in, dl_convq8_queue_t **out, int nch, int rate, int size,
dl_matrix2dq8_t* filter_kernel, dl_matrix2dq8_t* filter_bias,
dl_matrix2dq8_t* gate_kernel, dl_matrix2dq8_t* gate_bias,
int offset, int prenum);
void dl_convq8_queue_mc_bzero(dl_convq8_queue_t **cqm, int nch);
dl_convq8_queue_t *dl_convq8_queue_alloc_from_psram(int n, int c);
qtp_t *dl_dilation_layerq16_8(dl_convq_queue_t *in, dl_convq8_queue_t *out, int rate, int size,
dl_matrix2dq_t* filter_kernel, dl_matrix2dq_t* filter_bias,
dl_matrix2dq_t* gate_kernel, dl_matrix2dq_t* gate_bias, int prenum);
qtp_t *dl_dilation_layerq8(dl_convq8_queue_t *in, dl_convq8_queue_t *out, int rate, int size,
dl_matrix2dq8_t* filter_kernel, dl_matrix2dq_t* filter_bias,
dl_matrix2dq8_t* gate_kernel, dl_matrix2dq_t* gate_bias, int prenum);
dl_matrix2dq8_t *dl_convq8_lstm_layer(const dl_convq8_queue_t *in, dl_convq8_queue_t *out, dl_matrix2dq8_t *state_c,
dl_matrix2dq8_t *state_h, const dl_matrix2dq8_t *in_weight, const dl_matrix2dq8_t *h_weight,
const dl_matrix2dq_t *bias, int prenum);
qtp_t *dl_atrous_conv1dq8_16_s3(dl_convq8_queue_t *in, dl_convq_queue_t *out, int rate, int size,
dl_matrix2dq8_t* kernel, dl_matrix2dq_t* bias, int prenum);
void print_convq8(dl_convq8_queue_t *cq, int offset);
void print_convq(dl_convq_queue_t *cq, int offset);
void dl_relu_convq8(dl_convq8_queue_t *cq);
void lstmq8_free(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,382 @@
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef DL_LIB_CONVQ_QUEUE_H
#define DL_LIB_CONVQ_QUEUE_H
#include "dl_lib_matrixq.h"
#include "dl_lib_conv_queue.h"
#include "dl_lib.h"
#ifdef __cplusplus
extern "C" {
#endif
//fixed-point convolution FIFO queue.
//[nch, n, c]
typedef struct {
int n; /*< the length of queue */
int c; /*< the number of queue element*/
int front; /*< the front(top) position of queue */
int nch; /*< the multiple of queue*/
int exponent; /*< The values in items should be multiplied by pow(2,exponent)
to get the real values */
qtp_t *itemq; /*< Pointer to item array */
} dl_convq_queue_t;
/**
* @brief Allocate a fixed-point convolution queue
*
* @param n The length of queue
* @param c The number of elements in the queue
* @return The convolution queue, or NULL if out of memory
*/
dl_convq_queue_t *dl_convq_queue_alloc(int n, int c);
/**
* @brief Allocate a fixed-point convolution queue from PSRAM
*
* @param n The length of queue
* @param c The number of elements in the queue
* @return The convolution queue, or NULL if out of memory
*/
dl_convq_queue_t *dl_convq_queue_alloc_from_psram(int n, int c);
/**
* @brief Allocate a fixed-point multi-channel convolution queue
*
* @param n The length of queue
* @param c The number of elements in the queue
* @param nch The channel of conv queue
* @return The convolution queue, or NULL if out of memory
*/
dl_convq_queue_t *dl_convq_queue_alloc_mc(int n, int c, int nch);
/**
* @brief Allocate a fixed-point multi-channel convolution queue from PSRAM
*
* @param n The length of queue
* @param c The number of elements in the queue
* @param nch The channel of conv queue
* @return The convolution queue, or NULL if out of memory
*/
dl_convq_queue_t *dl_convq_queue_alloc_mc_from_psram(int n, int c, int nch);
void dl_convq_to_matrix2dq(dl_convq_queue_t *cq, dl_matrix2dq_t* out, int row);
/**
* @brief Free a fixed-point convolution queue
*
* @param cq The fixed-point convolution queue to free
*/
void dl_convq_queue_free(dl_convq_queue_t *cq);
/**
* @brief Set itemq of convolution queue to 0
*
* @param cq The fixed-point convolution queue point
*/
void dl_convq_queue_bzero(dl_convq_queue_t *cq);
/**
* @brief Move the front pointer of queue forward,
the First(oldest) element become the last(newest) element,
*
* @param cq Input fixed-point convolution queue
* @return Pointer of oldest element
*/
qtp_t *dl_convq_queue_pop(dl_convq_queue_t *cq);
qtp_t *dl_convq_queue_popn(dl_convq_queue_t *cq, int n);
/**
* @brief Remove the oldest element, then insert the input element at the end of queue
*
* @param cq Input fixed-point convolution queue
* @param item The new element
*/
void dl_convq_queue_push(dl_convq_queue_t *cq, dl_matrix2dq_t *a, int shift);
/**
* @brief Insert the float-point element at the end of queue.
* The precision of fixed-point numbers is described by the Qm.f notation,
*
* @param cq Input fixed-point convolution queue
* @param item The float-point element
* @param m_bit The number of integer bits including the sign bits
* @param f_bit The number of fractional bits
*/
void dl_convq_queue_push_by_qmf(dl_convq_queue_t *cq, fptp_t* item, int m_bit, int f_bit);
void dl_convq16_queue_push_by_qmf(dl_convq_queue_t *cq, fptp_t* item, int m_bit, int f_bit);
dl_conv_queue_t *dl_queue_from_convq(dl_convq_queue_t *cq1);
/**
* @brief Get the pointer of element in the queue by offset
*
* @param cq Input fixed-point convolution queue
* @param last_num Offset from the front of the queue
* @return Pointer of the element
*/
qtp_t *dl_get_queue_itemq(dl_convq_queue_t *cq, int last_num);
/**
* @brief Get the pointer of element in the queue by offset
*
* @param cq Input fixed-point convolution queue
* @param offset Offset from the front of the queue
* @param ch Channel index of convolution queue
* @return Pointer of the element
*/
qtp_t *dl_get_queue_itemq_mc(dl_convq_queue_t *cq, int offset, int ch);
/**
* @brief Does a tanh operation on the one of element in the convolution queue.
* Gets the pointer of element in the convolution queue by offset, and does a
* tanh operation by this pointer, then return the pointer
*
* @param cq Input fixed-point convolution queue
* @param offset Offset from the front of the queue
* @return Pointer of the element
*/
void dl_tanh_convq(dl_convq_queue_t *cq, int offset);
/**
* @brief Does a tanh operation on the one of element in multi channel convolution queue.
* Gets the pointer of element in the convolution queue by offset, and does a
* tanh operation by this pointer, then return the pointer
*
* @param cq Input fixed-point multi channnel convolution queue
* @param offset Offset from the front of the queue
* @param nch The channel number of cqm
* @return Pointer of the element
*/
void dl_tanh_convq_mc(dl_convq_queue_t **cqm, int offset, int nch);
/**
* @brief Does a relu operation on the one of element in the convolution queue.
* Gets the pointer of element in the convolution queue by offset, and does a
* relu operation by this pointer, then return the pointer
*
* @param cq Input fixed-point convolution queue
* @param offset Offset from the front of the queue
* @return Pointer of the element
*/
void dl_relu_convq(dl_convq_queue_t *cq, fptp_t clip, int last_num);
/**
* @brief Does a softmax operation on the one of element in the convolution queue.
* Gets the pointer of element in the convolution queue by offset, input data
stay as it is. Results are saved into the *out* array.
*
* @param cq Input fixed-point convolution queue
* @param offset Offset from the front of the queue
* @param out Old array to re-use. Passing NULL will allocate a new matrix.
* @return softmax results
*/
fptp_t * dl_softmax_step_q(dl_convq_queue_t *cq, int offset, fptp_t *out);
/**
* @brief Fast and quantised implement for 1D atrous convolution (a.k.a. convolution with holes or dilated convolution)
* based on convolution queue.
*
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
* is last element of output queue and should not be freed separately.
*
* @param in Input fixed-point convolution queue
* @param out Output fixed-point convolution queue
* @param rate A positive int, the stride with which we sample input value
* @param size A positive int, the size of 1D-filter
* @param kernel The kernel matrix of filter
* @param bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
* @param shift Shift ratio used in dot operation between two 16-bit fixed point vector
* @return The result of atrous convolution
*/
qtp_t * dl_atrous_conv1dq(dl_convq_queue_t *in, dl_convq_queue_t *out, int rate, int size,
dl_matrix2dq_t* kernel, dl_matrix2dq_t* bias, int shift, int prenum);
/**
* @brief Fast implement of dilation layer as follows
*
* |-> [gate(sigmoid)] -|
* input - | |-> (*) - output
* |-> [filter(tanh)] -|
*
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
* is last element of output queue and should not be freed separately.
*
* @param in Input fixed-point convolution queue
* @param out Output fixed-point convolution queue
* @param rate A positive int, the stride with which we sample input value
* @param size A positive int, the size of 1D-filter
* @param filter_kernel The kernel matrix of filter
* @param filter_bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
* @param gate_kernel The kernel matrix of gate
* @param gate_bias The bias matrix of gate. Can be NULL if a bias of 0 is required.
* @param filter_shift Shift ratio used in filter operation between two 16-bit fixed point vector
* @param gate_shift Shift ratio used in gate operation between two 16-bit fixed point vector
* @return The result of dilation layer
*/
qtp_t *dl_dilation_layerq_steps(dl_convq_queue_t *in, dl_convq_queue_t *out, int rate, int size,
dl_matrix2dq_t* filter_kernel, dl_matrix2dq_t* filter_bias,
dl_matrix2dq_t* gate_kernel, dl_matrix2dq_t* gate_bias,
int filter_shift, int gate_shift, int offset, int prenum);
qtp_t *dl_dilation_layerq(dl_convq_queue_t *in, dl_convq_queue_t *out, int rate, int size,
dl_matrix2dq_t* filter_kernel, dl_matrix2dq_t* filter_bias,
dl_matrix2dq_t* gate_kernel, dl_matrix2dq_t* gate_bias,
int filter_shift, int gate_shift, int prenum);
qtp_t *dl_dilation_layerq16(dl_convq_queue_t *in, dl_convq_queue_t *out, int rate, int size,
dl_matrix2dq_t* filter_kernel, dl_matrix2dq_t* filter_bias,
dl_matrix2dq_t* gate_kernel, dl_matrix2dq_t* gate_bias, int prenum);
qtp_t *dl_atrous_conv1dq_steps(dl_convq_queue_t *in, dl_convq_queue_t *out, int rate, int size,
dl_matrix2dq_t* kernel, dl_matrix2dq_t* bias, int shift, int offset, int prenum);
/**
* @brief Add a pair of fixed-point convolution queue item-by-item, and return float-point convolution queue
*
* @param cq1 First fixed-point convolution queue
* @param cq2 Seconf fixed-point convolution queue
* @return The result of float-point convolution queue
*/
dl_conv_queue_t *dl_convq_queue_add(dl_convq_queue_t *cq1, dl_convq_queue_t *cq2);
/**
* @brief Fast implement of LSTM layer by dl_atrous_conv1dq function
*
* @Warning LSTM kernel is split into two part, the first part input is the last layer output,
* and kernel is parameter *in_weight*. The second part input is the last frame LSTM output,
* the kernel is parameters *h_weight*.
*
* @param in Input fixed-point convolution queue
* @param out Output fixed-point convolution queue
* @param state_c Internal state of the LSTM network
* @param state_h Internal state (previous output values) of the LSTM network
* @param in_weight the LSTM kernel needed by first part
* @param h_weight the LSTM kernel needed by second part
* @param bias The bias matrix of LSTM. Can be NULL if a bias of 0 is required.
* @in_shift Shift ratio used in first part
* @h_shift Shift ratio used in second part
* @return The result of LSTM layer
*/
dl_matrix2dq_t *dl_convq_lstm_layer(const dl_convq_queue_t *in, dl_convq_queue_t *out, dl_matrix2dq_t *state_c,
dl_matrix2dq_t *state_h, const dl_matrix2dq_t *in_weight, const dl_matrix2dq_t *h_weight,
const dl_matrix2dq_t *bias, int in_shift, int h_shift, int prenum);
dl_matrix2dq_t *dl_basic_lstm_layer1_q(const dl_convq_queue_t *in, dl_matrix2dq_t *state_c, dl_matrix2dq_t *state_h,
const dl_matrix2dq_t *weight, const dl_matrix2dq_t *bias, int step, int shift);
dl_matrix2dq_t *dl_convq16_lstm_layer(dl_convq_queue_t *in, dl_convq_queue_t *out, dl_matrix2dq_t *state_c,
dl_matrix2dq_t *state_h, dl_matrix2dq_t *in_weight, dl_matrix2dq_t *h_weight,
dl_matrix2dq_t *bias, int prenum);
/**
* @brief Allocate a fixed-point multi channel convolution queue
*
* @param n The length of queue
* @param c The channel number of elements in the queue
* @param nch the channel numbet of convolution queue
* @return The convolution queue, or NULL if out of memory
*/
dl_convq_queue_t **dl_convq_queue_mc_alloc(int n, int c, int nch);
/**
* @brief Free a fixed-point multi channel convolution queue
*
* @param cqm The fixed-point convolution queue to free
* @param nch The channel number of cqm
*/
void dl_convq_queue_mc_free(dl_convq_queue_t **cqm, int nch);
/**
* @brief Fast and quantised implement for 1D atrous convolution (a.k.a. convolution with holes or dilated convolution)
* based on convolution queue.
*
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
* is last element of output queue and should not be freed separately.
*
* @param in Input fixed-point convolution queue
* @param out Output fixed-point convolution queue
* @param nch The channel number of input
* @param rate A positive int, the stride with which we sample input value
* @param size A positive int, the size of 1D-filter
* @param kernel The kernel matrix of filter
* @param bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
* @param shift Shift ratio used in dot operation between two 16-bit fixed point vector
* @param offset the offset to calculate input convq
* @param prenum the preload size, 0: do not use preload function
* @return The result of atrous convolution
*/
qtp_t *dl_atrous_conv1dq_mc_steps( dl_convq_queue_t **in,
dl_convq_queue_t **out,
int nch,
int rate,
int size,
dl_matrix2dq_t* kernel,
dl_matrix2dq_t* bias,
int shift,
int offset,
int prenum);
/**
* @brief Fast implement of dilation layer as follows for multi channel input
*
* |-> [gate(sigmoid)] -|
* input - | |-> (*) - output
* |-> [filter(tanh)] -|
*
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
* is last element of output queue and should not be freed separately.
*
* @param in Input fixed-point convolution queue
* @param out Output fixed-point convolution queue
* @param nch The channel number of input
* @param rate A positive int, the stride with which we sample input value
* @param size A positive int, the size of 1D-filter
* @param filter_kernel The kernel matrix of filter
* @param filter_bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
* @param gate_kernel The kernel matrix of gate
* @param gate_bias The bias matrix of gate. Can be NULL if a bias of 0 is required.
* @param filter_shift Shift ratio used in filter operation between two 16-bit fixed point vector
* @param gate_shift Shift ratio used in gate operation between two 16-bit fixed point vector
* @param offset The offset to calculate input convq
* @param prenum The preload size, 0: do not use preload function
* @return The result of dilation layer
*/
qtp_t *dl_dilation_layerq_mc_steps( dl_convq_queue_t **in,
dl_convq_queue_t **out,
int nch,
int rate,
int size,
dl_matrix2dq_t* filter_kernel,
dl_matrix2dq_t* filter_bias,
dl_matrix2dq_t* gate_kernel,
dl_matrix2dq_t* gate_bias,
int filter_shift,
int gate_shift,
int offset,
int prenum);
void test_atrous_convq(int size, int rate, int in_channel, int out_channel);
void test_lstm_convq(int size, int in_dim, int lstm_cell);
void dl_nn_tanh_i162(dl_convq_queue_t **cqm, int offset, int nch);
void dl_copy_queue_item_by_qmf(dl_convq_queue_t *cq, fptp_t* item, int m_bit, int f_bit, int offset, int ch);
void dl_convq_queue_mc_bzero(dl_convq_queue_t **cqm, int nch);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,257 @@
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef DL_LIB_MATRIX_H
#define DL_LIB_MATRIX_H
#ifdef ESP_PLATFORM
#include "freertos/FreeRTOS.h"
#include "freertos/task.h"
#include "freertos/queue.h"
#include "esp_system.h"
#endif
#ifdef __cplusplus
extern "C" {
#endif
typedef float fptp_t;
#if CONFIG_BT_SHARE_MEM_REUSE
extern multi_heap_handle_t gst_heap;
#endif
//Flags for matrices
#define DL_MF_FOREIGNDATA 1 /*< Matrix pointer and item data actually points to another matrix and should not be freed */
#define DL_MF_FOREIGNITEM 2 /*< Only item data actually points to another matrix and should not be freed */
//'Normal' float matrix
typedef struct {
int w; /*< Width */
int h; /*< Height */
int stride; /*< Row stride, essentially how many items to skip to get to the same position in the next row */
int flags; /*< Flags. OR of DL_MF_* values */
fptp_t *item; /*< Pointer to item array */
} dl_matrix2d_t;
//Macro to quickly access the raw items in a matrix
#define DL_ITM(m, x, y) m->item[(x)+(y)*m->stride]
/**
* @brief Allocate a matrix
*
* @param w Width of the matrix
* @param h Height of the matrix
* @return The matrix, or NULL if out of memory
*/
dl_matrix2d_t *dl_matrix_alloc(int w, int h);
/**
* @brief Free a matrix
* Frees the matrix structure and (if it doesn't have the DL_MF_FOREIGNDATA flag set) the m->items space as well.
*
* @param m Matrix to free
*/
void dl_matrix_free(dl_matrix2d_t *m);
/**
* @brief Zero out the matrix
* Sets all entries in the matrix to 0.
*
* @param m Matrix to zero
*/
void dl_matrix_zero(dl_matrix2d_t *m);
/**
* @brief Copy the matrix into psram
* Copy the matrix from flash or iram/psram into psram
*
* @param m Matrix to zero
*/
dl_matrix2d_t *dl_matrix_copy_to_psram(const dl_matrix2d_t *m);
/**
* @brief Generate a new matrix using a range of items from an existing matrix.
* When using this, the data of the new matrix is not allocated/copied but it re-uses a pointer
* to the existing data. Changing the data in the resulting matrix, as a result, will also change
* the data in the existing matrix that has been sliced.
*
* @param x X-offset of the origin of the returned matrix within the sliced matrix
* @param y Y-offset of the origin of the returned matrix within the sliced matrix
* @param w Width of the resulting matrix
* @param h Height of the resulting matrix
* @param in Old matrix (with foreign data) to re-use. Passing NULL will allocate a new matrix.
* @return The resulting slice matrix, or NULL if out of memory
*/
dl_matrix2d_t *dl_matrix_slice(const dl_matrix2d_t *src, int x, int y, int w, int h, dl_matrix2d_t *in);
/**
* @brief select a range of items from an existing matrix and flatten them into one dimension.
*
* @Warning The results are flattened in row-major order.
*
* @param x X-offset of the origin of the returned matrix within the sliced matrix
* @param y Y-offset of the origin of the returned matrix within the sliced matrix
* @param w Width of the resulting matrix
* @param h Height of the resulting matrix
* @param in Old matrix to re-use. Passing NULL will allocate a new matrix.
* @return The resulting flatten matrix, or NULL if out of memory
*/
dl_matrix2d_t *dl_matrix_flatten(const dl_matrix2d_t *src, int x, int y, int w, int h, dl_matrix2d_t *in);
/**
* @brief Generate a matrix from existing floating-point data
*
* @param w Width of resulting matrix
* @param h Height of resulting matrix
* @param data Data to populate matrix with
* @return A newaly allocated matrix populated with the given input data, or NULL if out of memory.
*/
dl_matrix2d_t *dl_matrix_from_data(int w, int h, int stride, const void *data);
/**
* @brief Multiply a pair of matrices item-by-item: res=a*b
*
* @param a First multiplicand
* @param b Second multiplicand
* @param res Multiplicated data. Can be equal to a or b to overwrite that.
*/
void dl_matrix_mul(const dl_matrix2d_t *a, const dl_matrix2d_t *b, dl_matrix2d_t *res);
/**
* @brief Do a dotproduct of two matrices : res=a.b
*
* @param a First multiplicand
* @param b Second multiplicand
* @param res Dotproduct data. *Must* be a *different* matrix from a or b!
*/
void dl_matrix_dot(const dl_matrix2d_t *a, const dl_matrix2d_t *b, dl_matrix2d_t *res);
/**
* @brief Add a pair of matrices item-by-item: res=a-b
*
* @param a First matrix
* @param b Second matrix
* @param res Added data. Can be equal to a or b to overwrite that.
*/
void dl_matrix_add(const dl_matrix2d_t *a, const dl_matrix2d_t *b, dl_matrix2d_t *out);
/**
* @brief Divide a pair of matrices item-by-item: res=a/b
*
* @param a First matrix
* @param b Second matrix
* @param res Divided data. Can be equal to a or b to overwrite that.
*/
void dl_matrix_div(const dl_matrix2d_t *a, const dl_matrix2d_t *b, dl_matrix2d_t *out);
/**
* @brief Subtract a matrix from another, item-by-item: res=a-b
*
* @param a First matrix
* @param b Second matrix
* @param res Subtracted data. Can be equal to a or b to overwrite that.
*/
void dl_matrix_sub(const dl_matrix2d_t *a, const dl_matrix2d_t *b, dl_matrix2d_t *out);
/**
* @brief Add a constant to every item of the matrix
*
* @param subj Matrix to add the constant to
* @param add The constant
*/
void dl_matrix_add_const(dl_matrix2d_t *subj, const fptp_t add);
/**
* @brief Concatenate the rows of two matrices into a new matrix
*
* @param a First matrix
* @param b Second matrix
* @return A newly allocated array with as avlues a|b
*/
dl_matrix2d_t *dl_matrix_concat(const dl_matrix2d_t *a, const dl_matrix2d_t *b);
dl_matrix2d_t *dl_matrix_concat_h( dl_matrix2d_t *a, const dl_matrix2d_t *b);
/**
* @brief Print the contents of a matrix to stdout. Used for debugging.
*
* @param a The matrix to print.
*/
void dl_printmatrix(const dl_matrix2d_t *a);
/**
* @brief Return the average square error given a correct and a test matrix.
*
* ...Well, more or less. If anything, it gives an indication of the error between
* the two. Check the code for the exact implementation.
*
* @param a First of the two matrices to compare
* @param b Second of the two matrices to compare
* @return value indicating the relative difference between matrices
*/
float dl_matrix_get_avg_sq_err(const dl_matrix2d_t *a, const dl_matrix2d_t *b);
/**
* @brief Check if two matrices have the same shape, that is, the same amount of rows and columns
*
* @param a First of the two matrices to compare
* @param b Second of the two matrices to compare
* @return true if the two matrices are shaped the same, false otherwise.
*/
int dl_matrix_same_shape(const dl_matrix2d_t *a, const dl_matrix2d_t *b);
/**
* @brief Get a specific item from the matrix
*
* Please use these for external matrix access instead of DL_ITM
*
* @param m Matrix to access
* @param x Column address
* @param y Row address
* @return Value in that position
*/
inline static fptp_t dl_matrix_get(const dl_matrix2d_t *m, const int x, const int y) {
return DL_ITM(m, x, y);
}
/**
* @brief Set a specific item in the matrix to the given value
*
* Please use these for external matrix access instead of DL_ITM
*
* @param m Matrix to access
* @param x Column address
* @param y Row address
* @param val Value to write to that position
*/
inline static void dl_matrix_set(dl_matrix2d_t *m, const int x, const int y, fptp_t val) {
DL_ITM(m, x, y)=val;
}
void matrix_get_range(const dl_matrix2d_t *m, fptp_t *rmin, fptp_t *rmax);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,387 @@
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef DL_LIB_MATRIXQ_H
#define DL_LIB_MATRIXQ_H
#include <stdint.h>
#include "dl_lib_matrix.h"
#ifdef __cplusplus
extern "C" {
#endif
typedef int16_t qtp_t;
//Quantized matrix. Uses fixed numbers and has the storage for the rows/columns inverted
//for easy use as a multiplicand without stressing out the flash cache too much.
typedef struct {
int w;
int h;
int stride; //Normally equals h, not w!
int flags;
int exponent; //The values in items should be multiplied by pow(2,exponent) to get the real values.
qtp_t *itemq;
} dl_matrix2dq_t;
#define DL_QTP_SHIFT 15
#define DL_QTP_RANGE ((1<<DL_QTP_SHIFT)-1)
#define DL_ITMQ(m, x, y) m->itemq[(y)+(x)*m->stride]
#define DL_QTP_EXP_NA 255 //non-applicable exponent because matrix is null
#define DL_SHIFT_AUTO 32
/**
* @info About quantized matrices and shift values
*
* Grab a coffee (or tea, or hot water) and sit down when you read this for the first
* time. Quantized matrices can speed up your operations, but come with some quirks, and
* it's good to understand how they work before using them.
*
* The data in the quantized matrix type is stored similarily to floating-point types:
* when storing a real value, the value is stored as a mantissa (base number) and an
* exponent. The 'real' value that can be re-derived from those two numbers is something
* similar to mantissa*2^exponent. Up to this point, there's not that much difference from
* the standard floating point implementations like e.g. IEEE-754.
*
* The difference with respect to quantized matrices is that for a quantized matrix, it is
* assumed all values stored have more-or-less the same order of magnitude. This allows the
* matrix to only store all the mantissas, while the exponents are shared; there is only one
* exponent for the entire matrix. This makes it quicker to handle matrix operations - the
* logic to fix the exponents only needs to happen once, while the rest can be done in simple
* integer arithmetic. It also nets us some memory savings - while normally a floating point
* number is 32-bit, storing only 16-bit mantissas as the matrix items almost halves the
* memory requirements.
*
* While most of the details of handling the intricacies of the quantized matrixes are done
* transparently by the code in dl_lib_matrixq.c, some implementation details leak out,
* specifically in places where addition/subtraction/division happens.
*
* The problem is that the routines do not know what the size of the resulting operation is. For
* instance, when adding two matrices of numbers, the resulting numbers *could* be large enough
* to overflow the mantissa of the result if the exponent is the same. However, if by default we
* assume the mantissas needs to be scaled back, we may lose precision.
*
* In order to counter this, all operations that have this issue have a ``shift`` argument. If
* the argument is zero, the routine will be conservative, that is, increase the exponent of
* the result to such an extent it's mathematically impossible a value in the result will exceed
* the maximum value that can be stored. However, when this argument is larger than zero, the
* algorithm will hold back on this scaling by the indicated amount of bits, preserving precision
* but increasing the chance of some of the calculated values not fitting in the mantissa anymore.
* If this happens, the value will be clipped to the largest (or, for negative values, smallest)
* value possible. (Neural networks usually are okay with this happening for a limited amount
* of matrix indices).
*
* For deciding on these shift values, it is recommended to start with a shift value of one, then
* use dl_matrixq_check_sanity on the result. If this indicates clipping, lower the shift value.
* If it indicates bits are under-used, increase it. Note that for adding and subtraction, only
* shift values of 0 or 1 make sense; these routines will error out if you try to do something
* else.
*
* For neural networks and other noise-tolerant applications, note that even when
* dl_matrixq_check_sanity does not indicate any problems, twiddling with the shift value may lead
* to slightly improved precision. Feel free to experiment.
**/
/**
* @brief Allocate a matrix
*
* @param w Width of the matrix
* @param h Height of the matrix
* @return The matrix, or NULL if out of memory
*/
dl_matrix2dq_t *dl_matrixq_alloc(int w, int h);
dl_matrix2dq_t *dl_matrixq_alloc_psram(int w, int h);
/**
* @brief Convert a floating-point matrix to a quantized matrix
*
* @param m Floating-point matrix to convert
* @param out Quantized matrix to re-use. If NULL, allocate a new one.
* @Return The quantized version of the floating-point matrix
*/
dl_matrix2dq_t *dl_matrixq_from_matrix2d(const dl_matrix2d_t *m, dl_matrix2dq_t *out);
/**
* TODO: DESCRIBE THIS FUNCTION
*/
dl_matrix2dq_t *dl_matrixq_from_matrix2d_by_qmf(const dl_matrix2d_t *m, dl_matrix2dq_t *out, int m_bit, int f_bit);
/**
* @brief Convert a quantized matrix to a floating-point one.
*
* @param m Floating-point matrix to convert
* @param out Quantized matrix to re-use. If NULL, allocate a new one.
* @Return The quantized version of the floating-point matrix
**/
dl_matrix2d_t *dl_matrix2d_from_matrixq(const dl_matrix2dq_t *m, dl_matrix2d_t *out);
/**
* @brief Free a quantized matrix
* Frees the matrix structure and (if it doesn't have the DL_MF_FOREIGNDATA flag set) the m->items space as well.
*
* @param m Matrix to free
*/
void dl_matrixq_free(dl_matrix2dq_t *m);
/**
* @brief Zero out the matrix
* Sets all entries in the matrix to 0.
*
* @param m Matrix to zero
*/
void dl_matrixq_zero(dl_matrix2dq_t *m);
/**
* @brief Copy the matrix into psram
* Copy the matrix from flash or iram/psram into psram
*
* @param m Matrix to copy
*/
dl_matrix2dq_t *dl_matrixq_copy_to_psram(const dl_matrix2dq_t *m);
/**
* @brief Do a dotproduct of two quantized matrices : res=a.b, Result is a fixed-point matrix.
*
* @param a First multiplicand
* @param b Second multiplicand
* @param res Dotproduct data. *Must* be a *different* matrix from a or b!
* @param shift Shift ratio
*/
void dl_matrixq_dot(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *res, int shift);
/**
* @brief Do a dotproduct of two quantized matrices: res=a.b, Result is a floating-point matrix.
*
* @param a First multiplicand
* @param b Second multiplicand
* @param res Dotproduct data. *Must* be a *different* matrix from a or b!
*/
void dl_matrixq_dot_matrix_out(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2d_t *res);
/**
* @brief Do a dotproduct of two quantized matrices : res=a.b. This always uses the simple & stupid C algo for the dot product.
*
* Result is a fixed-point matrix.
*
* Use this only if you expect something is wrong with the accelerated routines that dl_matrixq_dot calls; this function can be
* much slower than dl_matrixq_dot .
*
* @param a First multiplicand
* @param b Second multiplicand
* @param res Dotproduct data. *Must* be a *different* matrix from a or b!
* @param shift Shift ratio
*/
void dl_matrixq_dot_c_impl(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *res, int shift);
/**
* @brief Do a dotproduct of two quantized matrices : res=a.b. This always uses the simple & stupid C algo for the dot product.
*
* Result is a floating-point matrix.
*
* Use this only if you expect something is wrong with the accelerated routines that dl_matrixq_dot_matrix_out calls; this function can be
* much slower than dl_matrixq_dot_matrix_out.
*
* @param a First multiplicand
* @param b Second multiplicand
* @param res Dotproduct data. *Must* be a *different* matrix from a or b!
*/
void dl_matrixq_dot_matrix_out_c_impl(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2d_t *res);
/**
* @brief Do a dotproduct of a floating point and a quantized matrix. Result is a floating-point matrix.
*
* @param a First multiplicand; float matrix
* @param b Second multiplicand; quantized matrix
* @param res Dotproduct data; float matrix. *Must* be a *different* matrix from a or b!
*/
void dl_matrix_matrixq_dot(const dl_matrix2d_t *a, const dl_matrix2dq_t *b, dl_matrix2d_t *res);
/**
* @brief Print the contents of a quantized matrix to stdout. Used for debugging.
*
* @param a The matrix to print.
*/
void dl_printmatrixq(const dl_matrix2dq_t *a);
/**
* @brief Add a pair of quantizedmatrices item-by-item: res=a-b
*
* @param a First matrix
* @param b Second matrix
* @param res Added data. Can be equal to a or b to overwrite that.
* @param shift Shift value. Only 0 or 1 makes sense here. <ToDo: check>
*/
void dl_matrixq_add(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *res, int shift);
/**
* @brief Generate a new matrix using a range of items from an existing matrix.
* When using this, the data of the new matrix is not allocated/copied but it re-uses a pointer
* to the existing data. Changing the data in the resulting matrix, as a result, will also change
* the data in the existing matrix that has been sliced.
*
* @Warning In contrast to the floating point equivalent of this function, the fixed-point version
* of this has the issue that as soon as the output exponent of one of the slices changes, the data
* in the sliced matrix gets corrupted (because the exponent of that matrix is still the same.) If you
* use this function, either treat the slices as read-only, or assume the sliced matrix contains
* garbage after modifying the data in one of the slices.
*
* @param x X-offset of the origin of the returned matrix within the sliced matrix
* @param y Y-offset of the origin of the returned matrix within the sliced matrix
* @param w Width of the resulting matrix
* @param h Height of the resulting matrix
* @param in Old matrix (with foreign data) to re-use. Passing NULL will allocate a new matrix.
* @return The resulting slice matrix, or NULL if out of memory
*/
dl_matrix2dq_t *dl_matrixq_slice(const dl_matrix2dq_t *src, int x, int y, int w, int h, dl_matrix2dq_t *in);
/**
* @brief select a range of items from an existing matrix and flatten them into one dimension.
*
* @Warning The results are flattened in row-major order.
*
* @param x X-offset of the origin of the returned matrix within the sliced matrix
* @param y Y-offset of the origin of the returned matrix within the sliced matrix
* @param w Width of the resulting matrix
* @param h Height of the resulting matrix
* @param in Old matrix to re-use. Passing NULL will allocate a new matrix.
* @return The resulting flatten matrix, or NULL if out of memory
*/
dl_matrix2dq_t *dl_matrixq_flatten(const dl_matrix2dq_t *src, int x, int y, int w, int h, dl_matrix2dq_t *in);
/**
* @brief Subtract a quantized matrix from another, item-by-item: res=a-b
*
* @param a First matrix
* @param b Second matrix
* @param res Subtracted data. Can be equal to a or b to overwrite that.
* @param shift Shift value. Only 0 or 1 makes sense here. <ToDo: check>
*/
void dl_matrixq_sub(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *res, int shift);
/**
* @brief Multiply a pair of quantized matrices item-by-item: res=a*b
*
* @param a First multiplicand
* @param b Second multiplicand
* @param res Multiplicated data. Can be equal to a or b to overwrite that matrix.
*/
void dl_matrixq_mul( dl_matrix2dq_t *a, dl_matrix2dq_t *b, dl_matrix2dq_t *res);
/**
* @brief Divide a pair of quantized matrices item-by-item: res=a/b
*
* @param a First matrix
* @param b Second matrix
* @param res Divided data. Can be equal to a or b to overwrite that.
*/
void dl_matrixq_div(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *out, int shift);
/**
* @brief Check if two quantized matrices have the same shape, that is, the same amount of
* rows and columns
*
* @param a First of the two matrices to compare
* @param b Second of the two matrices to compare
* @return true if the two matrices are shaped the same, false otherwise.
*/
int dl_matrixq_same_shape(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b);
/**
* @brief Concatenate the rows of two quantized matrices into a new matrix
*
* @param a First matrix
* @param b Second matrix
* @return A newly allocated quantized matrix with as values a|b
*/
dl_matrix2dq_t *dl_matrixq_concat(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b);
/**
* @brief Add a constant to every item of the quantized matrix
*
* @param subj Matrix to add the constant to
* @param add The constant
*/
void dl_matrixq_add_const(dl_matrix2dq_t *subj, const fptp_t add, int shift);
/**
* @brief Check the sanity of a quantized matrix
*
* Due to the nature of quantized matrices, depending on the calculations a quantized
* matrix is the result of and the shift values chosen in those calculations, a quantized
* matrix may have an exponent and mantissas that lead to a loss of precision, either because
* most significant mantissa bits are unused, or because a fair amount of mantissas are
* clipped. This function checks if this is the case and will report a message to stdout
* if significant loss of precision is detected.
*
* @param m The quantized matrix to check
* @param name A string to be displayed in the message if the sanity check fails
* @return True if matrix is sane, false otherwise
**/
int dl_matrixq_check_sanity(dl_matrix2dq_t *m, const char *name);
/**
* @brief re-adjust the exponent of the matrix to fit the mantissa better
*
* This function will shift up all the data in the mantissas so there are no
* most-significant bits that are unused in all mantissas. It will also adjust
* the exponent to keep the actua values in the matrix the same.
*
* Some operations done on a matrix, especially operations that re-use the
* result of earlier operations done in the same way, can lead to the loss of
* data because the exponent of the quantized matrix is never re-adjusted. You
* can do that implicitely by calling this function.
*
* @param m The matrix to re-adjust
**/
void dl_matrixq_readjust_exp(dl_matrix2dq_t *m);
/**
* @brief Get the floating-point value of a specific item from the quantized matrix
*
* @param m Matrix to access
* @param x Column address
* @param y Row address
* @return Value in that position
*/
fptp_t dl_matrixq_get(const dl_matrix2dq_t *m, const int x, const int y);
/**
* @brief Set a specific item in the quantized matrix to the given
* floating-point value
*
* @warning If the given value is more than the exponent in the quantized matrix
* allows for, all mantissas in the matrix will be shifted down to make the value
* 'fit'. If, however, the exponent is such that the value would result in a
* quantized mantissa of 0, nothing is done.
*
* @param m Matrix to access
* @param x Column address
* @param y Row address
* @param val Value to write to that position
*/
void dl_matrixq_set(dl_matrix2dq_t *m, const int x, const int y, fptp_t val);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,80 @@
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef DL_LIB_MATRIXQ8_H
#define DL_LIB_MATRIXQ8_H
#include <stdint.h>
#include "dl_lib_matrix.h"
#include "dl_lib.h"
#include "dl_lib_matrixq.h"
#ifdef __cplusplus
extern "C" {
#endif
typedef int8_t q8tp_t;
typedef struct {
int w;
int h;
int stride; //Normally equals h, not w!
int flags;
int exponent; //The values in items should be multiplied by pow(2,exponent) to get the real values.
q8tp_t *itemq;
} dl_matrix2dq8_t;
#define DL_Q8TP_SHIFT 7
#define DL_Q8TP_RANGE ((1<<DL_Q8TP_SHIFT)-1)
#define DL_ITMQ8(m, x, y) m->itemq[(y)+(x)*m->stride]
/**
* @brief Allocate a matrix
*
* @param w Width of the matrix
* @param h Height of the matrix
* @return The matrix, or NULL if out of memory
*/
dl_matrix2dq8_t *dl_matrixq8_alloc(int w, int h);
/**
* @brief Free a quantized matrix
* Frees the matrix structure and (if it doesn't have the DL_MF_FOREIGNDATA flag set) the m->items space as well.
*
* @param m Matrix to free
*/
void dl_matrixq8_free(dl_matrix2dq8_t *m);
/**
* @brief Copy a quantized matrix
* Copy a quantized matrix from flash or iram/psram
*
* @param m Matrix to copy
*/
dl_matrix2dq8_t *dl_matrixq8_copy_to_psram(const dl_matrix2dq8_t *m);
/**
* @brief Convert a floating-point matrix to a quantized matrix
*
* @param m Floating-point matrix to convert
* @param out Quantized matrix to re-use. If NULL, allocate a new one.
* @Return The quantized version of the floating-point matrix
*/
dl_matrix2dq8_t *dl_matrixq8_from_matrix2d(const dl_matrix2d_t *m, dl_matrix2dq8_t *out);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,105 @@
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License
#ifndef _ESP_AEC_H_
#define _ESP_AEC_H_
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
#define USE_AEC_FFT // Not kiss_fft
#define AEC_SAMPLE_RATE 16000 // Only Support 16000Hz
#define AEC_FRAME_LENGTH_MS 32
typedef struct aec_handle_t aec_handle_t;
typedef enum {
AEC_MODE_SR_LOW_COST = 0, // Low Cost AEC fro speech recognition
AEC_MODE_SR_HIGH_PERF = 1, // High Perforamce AEC for speech recognition
AEC_MODE_VOIP_LOW_COST = 3, // Low Cost AEC for voice communication
AEC_MODE_VOIP_HIGH_PERF = 4, // High Perforamce AEC for voice communication
} aec_mode_t;
/**
* @brief Creates an instance to the AEC structure.
* Please get frame size by aec_get_chunksize() function
*
* @param sample_rate The Sampling frequency (Hz) must be 16000.
* @param filter_length Number of filter, recommend to set 4. The larger the filter_length, the more resource consumption.
* @param channel_num The input microphone channel number
* @param mode The mode of AEC, recommend to set AEC_MODE_SR_LOW_COST
* @return
* - NULL: Create failed
* - Others: The instance of AEC
*/
aec_handle_t *aec_create(int sample_rate, int filter_length, int channel_num, aec_mode_t mode);
/**
* @brief Creates an instance to the AEC structure, same with aec_create().
*
* @param filter_length Number of filter, recommend to set 4. The larger the filter_length, the more resource consumption.
* @param channel_num The input microphone channel number
* @param mode The mode of AEC, recommend to set AEC_MODE_SR_LOW_COST
* @return
* - NULL: Create failed
* - Others: The instance of AEC
*/
aec_handle_t *aec_pro_create(int filter_length, int channel_num, aec_mode_t mode);
/**
* @brief Performs echo cancellation a frame, based on the audio sent to the speaker and frame from mic.
*
* @warning The indata, refdata and outdata must be 16-bit signed. please allocate memory by heap_caps_aligned_alloc().
*
* @param inst The instance of AEC. Format for multi-channel data is "ch0 ch0 ch0 ..., ch1 ch1 ch1 ..."
* @param indata An array of 16-bit signed audio samples from mic.
* @param refdata An array of 16-bit signed audio samples sent to the speaker.
* @param outdata Returns near-end signal with echo removed. Format for multi-channel data is "ch0 ch0 ch0..., ch1 ch1 ch1 ..."
* @return None
*
*/
void aec_process(const aec_handle_t *handel, int16_t *indata, int16_t *refdata, int16_t *outdata);
/**
* @brief Get frame size of AEC (the samples of one frame)
* @param handle The instance of AEC.
* @return Frame size
*/
int aec_get_chunksize(const aec_handle_t *handle);
/**
* @brief Get AEC mode string
*
* @param aec_mode The mode of AEC.
*
* @return AEC mode string
*/
char * aec_get_mode_string(aec_mode_t aec_mode);
/**
* @brief Free the AEC instance
*
* @param inst The instance of AEC.
*
* @return None
*
*/
void aec_destroy(aec_handle_t *handel);
#ifdef __cplusplus
}
#endif
#endif //_ESP_AEC_H_

View File

@@ -0,0 +1,81 @@
#ifndef _ESP_AFE_AEC_H_
#define _ESP_AFE_AEC_H_
#include "esp_aec.h"
#include "esp_afe_config.h"
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
typedef struct {
aec_handle_t *handle;
aec_mode_t mode;
afe_pcm_config_t pcm_config;
int frame_size;
int16_t *data;
} afe_aec_handle_t;
/**
* @brief Creates an instance to the AEC structure.
*
* @warning Currently only support 1 microphone channel and 1 playback channe.
* If input has multiple microphone channels and playback channels, just the first microphone channel and playback
* channel will be selected.
*
* The input format, same as afe config:
* M to represent the microphone channel
* R to represent the playback reference channel
* N to represent an unknown or unused channel
*
* For example, input_format="MMNR" indicates that the input data consists of four channels,
* which are the microphone channel, the microphone channel, an unused channel, and the playback channel
*
* @param input_format The input format
* @param filter_length The length of filter. The larger the filter, the higher the CPU loading.
* Recommended filter_length = 4 for esp32s3 and esp32p4. Recommended filter_length = 2 for
* esp32c5.
* @param type The type of afe, AFE_TYPE_SR or AFE_TYPE_VC
* @param mode The mode of afe, AFE_MODE_LOW_COST or AFE_MODE_HIGH_PERF
*
* @return afe_config_t* The default config of afe
*/
afe_aec_handle_t *afe_aec_create(const char *input_format, int filter_length, afe_type_t type, afe_mode_t mode);
/**
* @brief Performs echo cancellation a frame, based on the audio sent to the speaker and frame from mic.
*
* @param inst The instance of AEC.
* @param indata Input audio data, format is define by input_format.
* @param outdata Near-end signal with echo removed. outdata must be 16-bit aligned.
* please use heap_caps_aligned_calloc(16, n, size, caps) to allocate an aligned chunk of memory
* @return The bytes of outdata.
*/
size_t afe_aec_process(afe_aec_handle_t *handel, const int16_t *indata, int16_t *outdata);
/**
* @brief Get frame size of AEC (the samples of one frame)
* @param handle The instance of AEC.
* @return Frame size
*/
int afe_aec_get_chunksize(afe_aec_handle_t *handle);
/**
* @brief Free the AEC instance
*
* @param inst The instance of AEC.
*
* @return None
*
*/
void afe_aec_destroy(afe_aec_handle_t *handel);
#ifdef __cplusplus
}
#endif
#endif //_ESP_AEC_H_

View File

@@ -0,0 +1,288 @@
#pragma once
#include "esp_aec.h"
#include "esp_agc.h"
#include "esp_nsn_models.h"
#include "esp_vad.h"
#include "esp_vadn_models.h"
#include "esp_wn_iface.h"
#include "esp_wn_models.h"
#include "model_path.h"
#include "stdbool.h"
#include "stdint.h"
#include "stdlib.h"
#ifdef __cplusplus
extern "C" {
#endif
// AFE: Audio Front-End
// SR: Speech Recognition
// VC: Voice Communication
// Set AFE_SR mode
typedef enum {
SR_MODE_LOW_COST = 0, // Deprecated, please use afe_mode_t, AFE mode: low cost mode
SR_MODE_HIGH_PERF = 1, // Deprecated, please use afe_mode_t, AFE mode: high performance mode
} afe_sr_mode_t;
// Set AFE mode
typedef enum {
AFE_MODE_LOW_COST = 0, // AFE mode: low cost mode
AFE_MODE_HIGH_PERF = 1, // AFE mode: high performance mode
} afe_mode_t;
// Set AFE type
typedef enum {
AFE_TYPE_SR = 0, // Speech recognition scenarios, excluding nonlinear noise suppression
AFE_TYPE_VC = 1, // Voice communication scenarios, 16KHz input, including nonlinear noise suppression
AFE_TYPE_VC_8K = 2, // Voice communication scenarios, 8KHz input, note that the input data must be 8KHz
} afe_type_t;
typedef enum {
AFE_MEMORY_ALLOC_MORE_INTERNAL = 1, // malloc with more internal ram
AFE_MEMORY_ALLOC_INTERNAL_PSRAM_BALANCE = 2, // malloc with internal ram and psram in balance
AFE_MEMORY_ALLOC_MORE_PSRAM = 3 // malloc with more psram
} afe_memory_alloc_mode_t;
typedef enum {
AFE_MN_PEAK_AGC_MODE_1 = -9, // The peak amplitude of fetch audio is -9dB
AFE_MN_PEAK_AGC_MODE_2 = -6, // The peak amplitude of fetch audio is -6dB
AFE_MN_PEAK_AGC_MODE_3 = -3, // The peak amplitude of fetcg is -3dB
AFE_MN_PEAK_NO_AGC = 0, // There is no agc gain
} afe_mn_peak_agc_mode_t;
typedef struct {
int total_ch_num; // total channel num, include microphone channel, playback channel and unknown channel
int mic_num; // microphone channel number
uint8_t *mic_ids; // microphone channel indices
int ref_num; // playback reference channel number
uint8_t *ref_ids; // playback reference channel indices
int sample_rate; // sample rate of audio
} afe_pcm_config_t;
typedef enum {
AFE_NS_MODE_WEBRTC = 0, // please use model name of NS, SSP: "WEBRTC"
AFE_NS_MODE_NET = 1, // please use model name of NSNET
} afe_ns_mode_t;
typedef enum {
AFE_AGC_MODE_WEBRTC = 0, // WEBRTC AGC
AFE_AGC_MODE_WAKENET = 1, // AGC gain is calculated by wakenet model if wakenet is activated
} afe_agc_mode_t;
/**
* @brief Function to get the debug audio data
*
* @param data The debug audio data which don't be modify. It should be copied away as soon as possible that
* avoid blocking for too long.
* @param data_size The number of bytes of data.
* @returns
*/
typedef void (*afe_debug_hook_callback_t)(const int16_t *data, int data_size);
typedef enum {
AFE_DEBUG_HOOK_MASE_TASK_IN = 0, // To get the input data of mase task
AFE_DEBUG_HOOK_FETCH_TASK_IN = 1, // To get the input data of fetch task
AFE_DEBUG_HOOK_MAX = 2
} afe_debug_hook_type_t;
typedef struct {
afe_debug_hook_type_t hook_type; // debug type of hook
afe_debug_hook_callback_t hook_callback; // callback function which transfer debug audio data
} afe_debug_hook_t;
typedef struct {
/********** AEC(Acoustic Echo Cancellation) **********/
bool aec_init; // Whether to init aec
aec_mode_t aec_mode; // The mode of aec, AEC_MODE_SR_LOW_COST or AEC_MODE_SR_HIGH_PERF
int aec_filter_length; // The filter length of aec
/********** SE(Speech Enhancement, microphone array processing) **********/
bool se_init; // Whether to init se
/********** NS(Noise Suppression) **********/
bool ns_init; // Whether to init ns
char *ns_model_name; // Model name of ns
afe_ns_mode_t afe_ns_mode; // Model mode of ns
/********** VAD(Voice Activity Detection) **********/
bool vad_init; // Whether to init vad
vad_mode_t vad_mode; // The value can be: VAD_MODE_0, VAD_MODE_1, VAD_MODE_2, VAD_MODE_3, VAD_MODE_4
char *vad_model_name; // The model name of vad, If it is null, WebRTC VAD will be used.
int vad_min_speech_ms; // The minimum duration of speech in ms. It should be bigger than 32 ms, default: 128 ms
int vad_min_noise_ms; // The minimum duration of noise or silence in ms. It should be bigger than 64 ms, default:
// 1000 ms
int vad_delay_ms; // The delay of the first speech frame in ms, default: 128 ms
// If you find vad cache can not cover all speech, please increase this value.
bool vad_mute_playback; // If true, the playback will be muted for vad detection. default: false
bool vad_enable_channel_trigger; // If true, the vad will be used to choose the channel id. default: false
/********** WakeNet(Wake Word Engine) **********/
bool wakenet_init;
char *wakenet_model_name; // The model name of wakenet 1
char *wakenet_model_name_2; // The model name of wakenet 2 if has wakenet 2
det_mode_t wakenet_mode; // The mode of wakenet
/********** AGC(Automatic Gain Control) **********/
bool agc_init; // Whether to init agc
afe_agc_mode_t
agc_mode; // The AGC mode for ASR. and the gain generated by AGC acts on the audio after far linear gain.
int agc_compression_gain_db; // Compression gain in dB (default 9)
int agc_target_level_dbfs; // Target level in -dBfs of envelope (default 3, means target level is -3 dBFS)
/********** General AFE(Audio Front End) parameter **********/
afe_pcm_config_t pcm_config; // Config the channel num of original data which is fed to the afe feed function.
afe_mode_t afe_mode; // The mode of afe AFE_MODE_LOW_COST or AFE_MODE_HIGH_PERF
afe_type_t afe_type; // The mode of afe AFE_MODE_LOW_COST or AFE_MODE_HIGH_PERF
int afe_perferred_core; // The preferred core of afe se task, which is created in afe_create function.
int afe_perferred_priority; // The preferred priority of afe se task, which is created in afe_create function.
int afe_ringbuf_size; // The ring buffer size: the number of frame data in ring buffer.
afe_memory_alloc_mode_t memory_alloc_mode; // The memory alloc mode for afe. From Internal RAM or PSRAM
float afe_linear_gain; // The linear gain for afe output the value should be in [0.1, 10.0]. This value acts
// directly on the output amplitude: out_linear_gain * amplitude.
bool debug_init;
bool fixed_first_channel; // If true, the channel after first wake-up is fixed to raw data of microphone
// otherwise, select channel number by wakenet
} afe_config_t;
/**
* @brief Get AFE default configuration. The default configuration will enable all algorithms as much as possible based
* on the chip target and input format. You can manually fine-tune it after creating the configuration
*
* The input format:
* M to represent the microphone channel
* R to represent the playback reference channel
* N to represent an unknown or unused channel
*
* For example, input_format="MMNR" indicates that the input data consists of four channels,
* which are the microphone channel, the microphone channel, an unused channel, and the playback channel
*
* @param input_format The input format
* @param models Models from partition, which is configured by Kconfig
* @param type The type of afe, AFE_TYPE_SR or AFE_TYPE_VC
* @param mode The mode of afe, AFE_MODE_LOW_COST or AFE_MODE_HIGH_PERF
*
* @return afe_config_t* The default config of afe
*/
afe_config_t *afe_config_init(const char *input_format, srmodel_list_t *models, afe_type_t type, afe_mode_t mode);
/**
* @brief Check AFE configuration and make sure it is correct.
*
* @warning If there is a configuration conflict, this function will modify some parameters.
* The guiding behind these modifications is to maintain the highest performance of the output audio and results.
* And remove the conflict between different algorithms.
*
* For example, If input is two-channel data, the SE(BSS) algorithm will be prioritized over the NS algorithm.
* If SE(BSS) algorithm is deactivated, will only use the first microphone channel.
*
* @param afe_config Input AFE config
*
* @return afe_config_t* The modified AFE config
*/
afe_config_t *afe_config_check(afe_config_t *afe_config);
/**
* @brief Parse input format
*
* @param input_format The input format, same with afe_config_init() function
* @param pcm_config The pcm config
*
* @return true if the input format is parsed successfully, otherwise false
*/
bool afe_parse_input_format(const char *input_format, afe_pcm_config_t *pcm_config);
/**
* @brief Parse I2S input data
*
* @param data The input multi channel data
* @param frame_size The frame size of input, it is also the size of single channel data
* @param mic_data The output microphone data
* @param ref_data The output playback reference data
* @param pcm_config The pcm config
*
*/
void afe_parse_input(int16_t *data, int frame_size, int16_t *mic_data, int16_t *ref_data, afe_pcm_config_t *pcm_config);
/**
* @brief Parse input data, from interleaved arrangement to contiguous arrangement
*
* @param data The input multi channel data
* @param frame_size The frame size of input, it is also the size of single channel data
* @param channel_num The channel number of data
* @param out_data The output data
*
*/
void afe_parse_data(int16_t *data, int frame_size, int channel_num, int16_t *out_data);
/**
* @brief Format input data, from contiguous arrangement to interleaved arrangement
*
* @param data The input multi channel data
* @param frame_size The frame size of input, it is also the size of single channel data
* @param channel_num The channel number of data
* @param out_data The output data
*
*/
void afe_format_data(int16_t *data, int frame_size, int channel_num, int16_t *out_data);
/**
* @brief Adjust the gain of input data
*
* @warning the input data will be modified inplace.
*
* @param data The input audio data
* @param frame_size The frame size of input, it is also the size of single channel data
* @param factor The gain factor
*
* @return int16_t* The output audio data
*/
int16_t *afe_adjust_gain(int16_t *data, int frame_size, float factor);
/**
* @brief Adjust the gain of input data
*
* @warning the input data will be modified inplace.
*
* @param in_data The input audio data
* @param in_frame_size Input data frame size of input
* @param channel_num The channel number of input data, which is same as output data
* @param out_data The output audio data
* @param out_frame_size Onput data frame size of input
*
*/
void afe_concat_data(int16_t *in_data, int in_frame_size, int channel_num, int16_t *out_data, int out_frame_size);
/**
* @brief Copy the afe config
*
* @param dst_config The destination afe config
* @param src_config The source afe config
*
* @return The destination afe config
*/
afe_config_t *afe_config_copy(afe_config_t *dst_config, const afe_config_t *src_config);
/**
* @brief Print the afe config
*
* @param afe_config The afe config
*/
void afe_config_print(const afe_config_t *afe_config);
/**
* @brief Allocate afe config
*
* @return The afe config pointer
*/
afe_config_t *afe_config_alloc();
/**
* @brief Free afe config
*
* @param afe_config The afe config pointer
*/
void afe_config_free(afe_config_t *afe_config);
#ifdef __cplusplus
}
#endif

View File

@@ -0,0 +1,48 @@
#ifndef _ESP_AFE_DOA_H_
#define _ESP_AFE_DOA_H_
#include "esp_doa.h"
#include "esp_afe_config.h"
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
typedef struct {
doa_handle_t *doa_handle;
afe_pcm_config_t pcm_config;
int16_t *leftdata;
int16_t *rightdata;
int frame_size;
} afe_doa_handle_t;
/**
* @brief Initialize SRP-PHAT processor
* @param input_format The input format
* @param fs Sampling rate (Hz), e.g., 16000
* @param resolution Angular search resolution (degrees), e.g., 20
* @param d_mics Microphone spacing (meters), e.g., 0.06
* @param input_timedate_samples input timedate samples, e.g., 1024
* @return Initialized doa_handle_t object pointer, Recommend using the above configuration for better performance
*/
afe_doa_handle_t *afe_doa_create(const char *input_format, int fs, float resolution, float d_mics, int input_timedate_samples);
/**
* @brief Process audio frame for direction estimation
* @param handle doa_handle_t instance pointer
* @param indata Input audio data, format is define by input_format.
* @return Estimated sound direction in degrees, e.g., 0-180
*/
float afe_doa_process(afe_doa_handle_t *handle, const int16_t *indata);
/**
* @brief Release all allocated resources
* @param doa doa_handle_t instance pointer to be freed
*/
void afe_doa_destroy(afe_doa_handle_t *handle);
#ifdef __cplusplus
}
#endif
#endif /* _ESP_AFE_DOA_H_ */

View File

@@ -0,0 +1,237 @@
#pragma once
#include "esp_afe_config.h"
#include "stdbool.h"
#include "stdint.h"
#include "stdlib.h"
#include "freertos/FreeRTOS.h"
#include "freertos/task.h"
#ifdef __cplusplus
extern "C" {
#endif
// AFE: Audio Front-End
// SR: Speech Recognition
// afe_sr/AFE_SR: the audio front-end for speech recognition
// Opaque AFE_SR data container
typedef struct esp_afe_sr_data_t esp_afe_sr_data_t;
/**
* @brief The state of vad
*/
typedef enum {
AFE_VAD_SILENCE = 0, // Deprecated, please use vad_state_t, noise or silence
AFE_VAD_SPEECH = 1 // Deprecated, please use vad_state_t, speech
} afe_vad_state_t;
/**
* @brief The result of fetch function
*/
typedef struct afe_fetch_result_t {
int16_t *data; // the target channel data of audio.
int data_size; // the size of data. The unit is byte.
int16_t *vad_cache; // the cache data of vad. It's only valid when vad_cache_size > 0. It is used to complete the
// audio that was truncated.
int vad_cache_size; // the size of vad_cache. The unit is byte.
float data_volume; // the volume of input audio, the unit is decibel(dB). This value is calculated before agc.
// (note: invalid in vc). if enable wakenet, the window length is the receptive fields of
// wakenet(about 1.5s), otherwise is the frame length.
wakenet_state_t wakeup_state; // the value is wakenet_state_t
int wake_word_index; // if the wake word is detected. It will store the wake word index which start from 1.
int wakenet_model_index; // if there are multiple wakenets, this value identifies which model be wakes up. Index
// start from 1.
vad_state_t vad_state; // the value is afe_vad_state_t
int trigger_channel_id; // the channel index of output
int wake_word_length; // the length of wake word. The unit is the number of samples.
int ret_value; // the return state of fetch function
int16_t *raw_data; // the multi-channel output data of audio.
int raw_data_channels; // the channel number of raw data
float ringbuff_free_pct; // the percent of ringbuff free size. if the value is larger than 0.5, it means the ringbuff is buzy.
void *reserved; // reserved for future use
} afe_fetch_result_t;
/**
* @brief Function to initialze a AFE_SR instance
*
* @param afe_config The config of AFE_SR
* @returns Handle to the AFE_SR data
*/
typedef esp_afe_sr_data_t *(*esp_afe_sr_iface_op_create_from_config_t)(afe_config_t *afe_config);
/**
* @brief Get the amount of each channel samples per frame that need to be passed to the function
*
* Every speech enhancement AFE_SR processes a certain number of samples at the same time. This function
* can be used to query that amount. Note that the returned amount is in 16-bit samples, not in bytes.
*
* @param afe The AFE_SR object to query
* @return The amount of samples to feed the fetch function
*/
typedef int (*esp_afe_sr_iface_op_get_samp_chunksize_t)(esp_afe_sr_data_t *afe);
/**
* @brief Get the channel number
*
* @param afe The AFE_SR object to query
* @return The amount of total channels
*/
typedef int (*esp_afe_sr_iface_op_get_channel_num_t)(esp_afe_sr_data_t *afe);
/**
* @brief Get the sample rate of the samples to feed to the function
*
* @param afe The AFE_SR object to query
* @return The sample rate, in hz
*/
typedef int (*esp_afe_sr_iface_op_get_samp_rate_t)(esp_afe_sr_data_t *afe);
/**
* @brief Feed samples of an audio stream to the AFE_SR
*
* @Warning The input data should be arranged in the format of channel interleaving.
* The last channel is reference signal if it has reference data.
*
* @param afe The AFE_SR object to query
*
* @param in The input microphone signal, only support signed 16-bit @ 16 KHZ. The frame size can be queried by the
* `get_feed_chunksize`.
* @return The size of input
*/
typedef int (*esp_afe_sr_iface_op_feed_t)(esp_afe_sr_data_t *afe, const int16_t *in);
/**
* @brief fetch enhanced samples of an audio stream from the AFE_SR
*
* @Warning The output is single channel data, no matter how many channels the input is.
* Timeout is 2000 ms. If you want to adjust timeout, please refer to the definition of `fetch_with_delay`.
*
* @param afe The AFE_SR object to query
* @return The result of output, please refer to the definition of `afe_fetch_result_t`. (The frame size of output
* audio can be queried by the `get_fetch_chunksize`.)
*/
typedef afe_fetch_result_t *(*esp_afe_sr_iface_op_fetch_t)(esp_afe_sr_data_t *afe);
/**
* @brief fetch enhanced samples of an audio stream from the AFE_SR, same with the function `fetch`
*
* @Warning The output is single channel data, no matter how many channels the input is.
*
* @param afe The AFE_SR object to query
* @param ticks_to_wait The timeout value, in ticks, to wait for the fetch result.
* @return The result of output, please refer to the definition of `afe_fetch_result_t`. (The frame size of output
* audio can be queried by the `get_fetch_chunksize`.)
*/
typedef afe_fetch_result_t *(*esp_afe_sr_iface_op_fetch_with_delay_t)(esp_afe_sr_data_t *afe, TickType_t ticks_to_wait);
/**
* @brief reset ringbuf of AFE.
*
* @param afe The AFE_SR object to query
* @return -1: fail, 1: success
*/
typedef int (*esp_afe_sr_iface_op_reset_buffer_t)(esp_afe_sr_data_t *afe);
/**
* @brief Set wakenet detection threshold
*
* @param afe The AFE_SR object to query
* @param index The wakenet index, just support 1: wakenet1 or 2: wakenet2
* @param threshold The wakenet detection threshold, the value is between 0.4 and 0.9999.
* @return -1: fail, 1: success
*/
typedef int (*esp_afe_sr_iface_op_set_wakenet_threshold_t)(esp_afe_sr_data_t *afe, int index, float threshold);
/**
* @brief Reset wakenet detection threshold to inital state
*
* @param afe The AFE_SR object to query
* @param index The wakenet index, just support 1: wakenet1 or 2: wakenet2
* @return -1: fail, 1: success
*/
typedef int (*esp_afe_sr_iface_op_reset_wakenet_threshold_t)(esp_afe_sr_data_t *afe, int index);
/**
* @brief Reset one function/module/algorithm.
*
* @param afe The AFE_SR object to query
* @return -1: fail, 1: success
*/
typedef int (*esp_afe_sr_iface_op_reset_op_t)(esp_afe_sr_data_t *afe);
/**
* @brief Disable one function/module/algorithm.
*
* @param afe The AFE_SR object to query
* @return -1: fail, 0: disabled, 1: enabled
*/
typedef int (*esp_afe_sr_iface_op_disable_func_t)(esp_afe_sr_data_t *afe);
/**
* @brief Enable one function/module/algorithm.
*
* @param afe The AFE_SR object to query
* @return -1: fail, 0: disabled, 1: enabled
*/
typedef int (*esp_afe_sr_iface_op_enable_func_t)(esp_afe_sr_data_t *afe);
/**
* @brief Print all functions/modules/algorithms pipeline.
* The pipeline is the order of the functions/modules/algorithms.
* The format like this: [input] -> |AEC(VOIP_HIGH_PERF)| -> |WakeNet(wn9_hilexin)| -> [output]
*
* @param afe The AFE_SR object to query
*/
typedef void (*esp_afe_sr_iface_op_print_pipeline_t)(esp_afe_sr_data_t *afe);
/**
* @brief Destroy a AFE_SR instance
*
* @param afe AFE_SR object to destroy
*/
typedef void (*esp_afe_sr_iface_op_destroy_t)(esp_afe_sr_data_t *afe);
/**
* This structure contains the functions used to do operations on a AFE_SR.
*/
typedef struct {
esp_afe_sr_iface_op_create_from_config_t create_from_config;
esp_afe_sr_iface_op_feed_t feed;
esp_afe_sr_iface_op_fetch_t fetch;
esp_afe_sr_iface_op_fetch_with_delay_t fetch_with_delay;
esp_afe_sr_iface_op_reset_buffer_t reset_buffer;
esp_afe_sr_iface_op_get_samp_chunksize_t get_feed_chunksize;
esp_afe_sr_iface_op_get_samp_chunksize_t get_fetch_chunksize;
esp_afe_sr_iface_op_get_channel_num_t get_channel_num; // same with get_feed_channel_num
esp_afe_sr_iface_op_get_channel_num_t get_feed_channel_num;
esp_afe_sr_iface_op_get_channel_num_t get_fetch_channel_num;
esp_afe_sr_iface_op_get_samp_rate_t get_samp_rate;
esp_afe_sr_iface_op_set_wakenet_threshold_t set_wakenet_threshold;
esp_afe_sr_iface_op_reset_wakenet_threshold_t reset_wakenet_threshold;
esp_afe_sr_iface_op_disable_func_t disable_wakenet;
esp_afe_sr_iface_op_enable_func_t enable_wakenet;
esp_afe_sr_iface_op_disable_func_t disable_aec;
esp_afe_sr_iface_op_enable_func_t enable_aec;
esp_afe_sr_iface_op_disable_func_t disable_se;
esp_afe_sr_iface_op_enable_func_t enable_se;
esp_afe_sr_iface_op_disable_func_t disable_vad;
esp_afe_sr_iface_op_enable_func_t enable_vad;
esp_afe_sr_iface_op_reset_op_t reset_vad;
esp_afe_sr_iface_op_disable_func_t disable_ns;
esp_afe_sr_iface_op_enable_func_t enable_ns;
esp_afe_sr_iface_op_disable_func_t disable_agc;
esp_afe_sr_iface_op_enable_func_t enable_agc;
esp_afe_sr_iface_op_print_pipeline_t print_pipeline;
esp_afe_sr_iface_op_destroy_t destroy;
} esp_afe_sr_iface_t;
// struct is used to store the AFE handle and data for the AFE task
typedef struct {
esp_afe_sr_data_t *afe_data;
esp_afe_sr_iface_t *afe_handle;
TaskHandle_t feed_task;
TaskHandle_t fetch_task;
} afe_task_into_t;
#ifdef __cplusplus
}
#endif

View File

@@ -0,0 +1,13 @@
#pragma once
#ifdef __cplusplus
extern "C" {
#endif
#include "esp_afe_sr_iface.h"
esp_afe_sr_iface_t *esp_afe_handle_from_config(const afe_config_t *config);
#ifdef __cplusplus
}
#endif

View File

@@ -0,0 +1,47 @@
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License
#ifndef _ESP_AGC_H_
#define _ESP_AGC_H_
#ifdef __cplusplus
extern "C" {
#endif
////all positive value is valid, negective is error
typedef enum {
ESP_AGC_SUCCESS = 0, ////success
ESP_AGC_FAIL = -1, ////agc fail
ESP_AGC_SAMPLE_RATE_ERROR = -2, ///sample rate can be only 8khz, 16khz, 32khz
ESP_AGC_FRAME_SIZE_ERROR = -3, ////the input frame size should be only 10ms, so should together with sample-rate to get the frame size
} ESP_AGE_ERR;
typedef enum {
AGC_MODE_SR = -1, // Bypass WEBRTC AGC
AGC_MODE_0 = 0, // Only saturation protection
AGC_MODE_1 = 1, // Analog Automatic Gain Control [-targetLevelDbfs (default -3 dBOv)]
AGC_MODE_2 = 2, // Digital Automatic Gain Control [-targetLevelDbfs (default -3 dBOv)]
AGC_MODE_3 = 3, // Fixed Digital Gain [compressionGaindB (default 8 dB)]
} agc_mode_t;
void *esp_agc_open(agc_mode_t agc_mode, int sample_rate);
void set_agc_config(void *agc_handle, int gain_dB, int limiter_enable, int target_level_dbfs);
int esp_agc_process(void *agc_handle, short *in_pcm, short *out_pcm, int frame_size, int sample_rate);
void esp_agc_close(void *agc_handle);
#ifdef __cplusplus
}
#endif
#endif // _ESP_AGC_H_

View File

@@ -0,0 +1,41 @@
#ifndef _ESP_DOA_H_
#define _ESP_DOA_H_
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
typedef struct doa_handle_t doa_handle_t;
/**
* @brief Initialize SRP-PHAT processor
* @param fs Sampling rate (Hz), e.g., 16000
* @param resolution Angular search resolution (degrees), e.g., 20
* @param d_mics Microphone spacing (meters), e.g., 0.06
* @param input_timedate_samples input timedate samples, e.g., 1024
* @return Initialized doa_handle_t object pointer, Recommend using the above configuration for better performance
*/
doa_handle_t *esp_doa_create(int fs, float resolution, float d_mics, int input_timedate_samples);
/**
* @brief Release all allocated resources
* @param doa doa_handle_t instance pointer to be freed
*/
void esp_doa_destroy(doa_handle_t *doa);
/**
* @brief Process audio frame for direction estimation
* @param doa doa_handle_t instance pointer
* @param left Left channel 16-bit PCM data
* @param right Right channel 16-bit PCM data
* @return Estimated sound direction in degrees, e.g., 0-180
*/
float esp_doa_process(doa_handle_t *doa, int16_t* left, int16_t* right);
#ifdef __cplusplus
}
#endif
#endif /* _ESP_DOA_H_ */

View File

@@ -0,0 +1,93 @@
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License
#ifndef _ESP_MASE_H_
#define _ESP_MASE_H_
#ifdef __cplusplus
extern "C" {
#endif
#define MASE_SAMPLE_RATE 16000 // Supports 16kHz only
#define MASE_FRAME_SIZE 16 // Supports 16ms only
#define MASE_MIC_DISTANCE 65 // According to physical design of mic-array
/**
* @brief Sets mic-array type, currently 2-mic line array and 3-mic circular array
* are supported.
*/
typedef enum {
TWO_MIC_LINE = 0,
THREE_MIC_CIRCLE = 1
} mase_mic_array_type_t;
/**
* @brief Sets operating mode, supporting normal mode and wake-up enhancement mode
*/
typedef enum {
NORMAL_ENHANCEMENT_MODE = 0,
WAKE_UP_ENHANCEMENT_MODE = 1
} mase_op_mode_t;
typedef void* mase_handle_t;
/**
* @brief Creates an instance to the MASE structure.
*
* @param sample_rate The sampling frequency (Hz) must be 16000.
*
* @param frame_size The length of the audio processing must be 16ms.
*
* @param array_type '0' for 2-mic line array and '1' for 3-mic circular array.
*
* @param mic_distance The distance between neiboring microphones in mm.
*
* @param operating_mode '0' for normal mode and '1' for wake-up enhanced mode.
*
* @param filter_strength Strengh of the mic-array speech enhancement, must be 0, 1, 2 or 3.
*
* @return
* - NULL: Create failed
* - Others: An instance of MASE
*/
mase_handle_t mase_create(int fs, int frame_size, int array_type, float mic_distance, int operating_mode, int filter_strength);
/**
* @brief Performs mic array processing for one frame.
*
* @param inst The instance of MASE.
*
* @param in An array of 16-bit signed audio samples from mic.
*
* @param dsp_out Returns enhanced signal.
*
* @return None
*
*/
void mase_process(mase_handle_t st, int16_t *in, int16_t *dsp_out);
/**
* @brief Free the MASE instance
*
* @param inst The instance of MASE.
*
* @return None
*
*/
void mase_destory(mase_handle_t st);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,86 @@
#pragma once
#include "esp_speech_features.h"
#include <stdint.h>
/*
This describes an interface for a MFCC runner, that is, some kind of implementation that can be
fed sample chunks and returns the MFCC cepstrum of those samples. This is an abstracted interface so
multiple implementations can be used.
*/
typedef struct esp_mfcc_data_t esp_mfcc_data_t;
// Options for the mfcc algorithm itself. These more-or-less match the parameters of csf_mfcc (from c_speech_features),
// please refer to its documentation for details.
typedef struct {
int winstep_ms; // The step between successive windows in ms. (10)
int winlen_ms; // The length of the analysis window in ms. (25)
int nch; // The number of input channel
int numcep; // The number of cepstrum to return
int nfilter; // The number of filters in the filterbank
int nfft; // The FFT size
int samp_freq; // The sample-rate of the signal.
int low_freq; // The lowest band edge of mel filters, in hz. (e.g. 0)
int high_freq; // The highest band edge of mel filters, in hz. Must not be higher than samp_freq
float preemph; // Preemphasis filter coefficient. 0 is no filter. (e.g. 0.97)
char *win_type; // Analysis window type to apply to each frame "hanning","hamming","sine","rectangular","povey"
bool append_energy; //  If true, the zeroth cepstral coefficient is replaced with the log of the total frame energy
bool use_power; // If true, use power of fft spectrum, else use magnitude of fft spectrum
int use_log_fbank; // 0: return fbank, 1: return log(x+log_epsilon), 2: return log(max(x, log_epsilon))
float log_epsilon; // log epsilon. (e.g. 1e-7)
bool psram_first; // Alloc memory from PSRAM first
bool remove_dc_offset; // Whether to subtract mean of wave before FFT
} esp_mfcc_opts_t;
/**
* @brief Un-initialize and free a mfcc runner
*
* Function to free a previously allocated mfcc runner.
*
* @param r Runner object to destroy
*/
typedef void (*esp_mfcc_op_destroy_t)(esp_mfcc_data_t *r);
/**
* @brief Initialize parameters for a mfcc runner.
*
* After creation, a mfcc runner needs to be initialized first; this is usually done
* in the initialization routine of a speech recognition algorithm. This provides
* a pointer to do this for a specific mfcc runner.
*
* @param opt Options for the mfcc process
* @return True if success, false on error.
*/
typedef esp_mfcc_data_t *(*esp_mfcc_op_create_t)(const esp_mfcc_opts_t *opt);
/**
* @brief Run a mfcc iteration on frame by frame
*
* This will take a set of samples and return a ceptrum. Note that this may be pipelined:
* an initial call to this function may return NULL and subsequent calls may return the
* cepstrum of previous calls.
*
* @param r The mfcc runner
* @param samp An array of signed 16-bit samples. The amount of samples should be sampfreq/(winstep_ms/1000).
* @return A set of cepstral values, or NULL if no such values are available yet. Free using the free_cepbuf function
* when done with this buffer. Note that some implementations require the buffer to be freed before another call
* to this function is done.
*/
typedef float *(*esp_mfcc_op_run_step_t)(esp_mfcc_data_t *r, int16_t *samp, int16_t nch);
/**
* @brief Clean all state of mfcc handle
*
* @param r The mfcc runner
*/
typedef void (*esp_mfcc_op_clean_t)(esp_mfcc_data_t *r);
/**
* @brief Operations possible on a mfcc runner
*/
typedef struct {
esp_mfcc_op_destroy_t destroy;
esp_mfcc_op_create_t create;
esp_mfcc_op_run_step_t run_step;
esp_mfcc_op_clean_t clean;
} esp_mfcc_iface_t;

View File

@@ -0,0 +1,89 @@
#pragma once
#include "esp_speech_features.h"
#include <stdint.h>
/*
This describes an interface for a MFCC runner, that is, some kind of implementation that can be
fed sample chunks and returns the MFCC cepstrum of those samples. This is an abstracted interface so
multiple implementations can be used.
*/
typedef struct esp_mfcc_data_t esp_mfcc_data_t;
// Options for the mfcc algorithm itself. These more-or-less match the parameters of csf_mfcc (from c_speech_features),
// please refer to its documentation for details.
typedef struct {
int winstep_ms; // The step between successive windows in ms. (10)
int winlen_ms; // The length of the analysis window in ms. (25)
int nch; // The number of input channel
int numcep; // The number of cepstrum to return
int nfilter; // The number of filters in the filterbank
int nfft; // The FFT size
int samp_freq; // The sample-rate of the signal.
int low_freq; // The lowest band edge of mel filters, in hz. (e.g. 0)
int high_freq; // The highest band edge of mel filters, in hz. Must not be higher than samp_freq
float preemph; // Preemphasis filter coefficient. 0 is no filter. (e.g. 0.97)
char *win_type; // Analysis window type to apply to each frame "hanning","hamming","sine","rectangular","povey"
bool append_energy; //  If true, the zeroth cepstral coefficient is replaced with the log of the total frame energy
bool use_power; // If true, use power of fft spectrum, else use magnitude of fft spectrum
int use_log_fbank; // 0: return fbank, 1: return log(x+log_epsilon), 2: return log(max(x, log_epsilon))
float log_epsilon; // log epsilon. (e.g. 1e-7)
bool psram_first; // Alloc memory from PSRAM first
bool remove_dc_offset; // Whether to subtract mean of wave before FFT
} esp_mfcc_opts_t;
/**
* @brief Un-initialize and free a mfcc runner
*
* Function to free a previously allocated mfcc runner.
*
* @param r Runner object to destroy
*/
typedef void (*esp_mfcc_op_destroy_t)(esp_mfcc_data_t *r);
/**
* @brief Initialize parameters for a mfcc runner.
*
* After creation, a mfcc runner needs to be initialized first; this is usually done
* in the initialization routine of a speech recognition algorithm. This provides
* a pointer to do this for a specific mfcc runner.
*
* @param opt Options for the mfcc process
* @return True if success, false on error.
*/
typedef esp_mfcc_data_t *(*esp_mfcc_op_create_t)(const esp_mfcc_opts_t *opt);
/**
* @brief Run a mfcc iteration on frame by frame
*
* This will take a set of samples and return a ceptrum. Note that this may be pipelined:
* an initial call to this function may return NULL and subsequent calls may return the
* cepstrum of previous calls.
*
* @param r The mfcc runner
* @param samp An array of signed 16-bit samples. The amount of samples should be sampfreq/(winstep_ms/1000).
* @return A set of cepstral values, or NULL if no such values are available yet. Free using the free_cepbuf function
* when done with this buffer. Note that some implementations require the buffer to be freed before another call
* to this function is done.
*/
typedef float *(*esp_mfcc_op_run_step_t)(esp_mfcc_data_t *r, int16_t *samp, int16_t nch);
typedef void (*esp_mfcc_op_run_step_s16_t)(esp_mfcc_data_t *r, int16_t *samp, int16_t *fbank);
/**
* @brief Clean all state of mfcc handle
*
* @param r The mfcc runner
*/
typedef void (*esp_mfcc_op_clean_t)(esp_mfcc_data_t *r);
/**
* @brief Operations possible on a mfcc runner
*/
typedef struct {
esp_mfcc_op_destroy_t destroy;
esp_mfcc_op_create_t create;
esp_mfcc_op_run_step_t run_step;
esp_mfcc_op_run_step_s16_t run_step_s16;
esp_mfcc_op_clean_t clean;
} esp_mfcc_iface_t;

View File

@@ -0,0 +1,44 @@
#pragma once
#include "esp_mfcc_iface.h"
extern const esp_mfcc_iface_t esp_fbank_f32; // float32-fbank handle
extern const esp_mfcc_iface_t esp_fbank_s16; // int16-fbank handle
/**
* @brief Return basic opts used in wakenet9 & multinet5
**/
esp_mfcc_opts_t *get_mfcc_opts_wn9();
/**
* @brief Return basic opts used in wakenet9s
**/
esp_mfcc_opts_t *get_mfcc_opts(const char *win_type, bool use_power, int winstep_ms, int winlen_ms, int nfilter);
/**
* @brief Return basic opts for default kaldifeat
*
opts->psram_first = true;
opts->use_power = true;
opts->use_log_fbank = 2; // log(max(x, log_epsilon))
opts->log_epsilon = 1.1920928955078125e-07f; // torch.finfo(torch.float32).eps
opts->win_type = "povey";
opts->low_freq = 20;
opts->high_freq = 7600;
opts->samp_freq = 16000;
opts->nch = 1;
opts->nfft = 512;
opts->nfilter = 80;
opts->numcep = 80;
opts->preemph = 0.97;
opts->append_energy = false;
opts->winlen_ms = 25;
opts->winstep_ms = 10;
opts->remove_dc_offset = true;
*
**/
esp_mfcc_opts_t *get_mfcc_opts_kaldi();
/**
* @brief Print mfcc opts
**/
void print_mfcc_opts(esp_mfcc_opts_t *opts);

View File

@@ -0,0 +1,224 @@
#pragma once
#include "stdint.h"
#include "esp_wn_iface.h"
#ifdef __cplusplus
extern "C" {
#endif
#define ESP_MN_RESULT_MAX_NUM 5
#define ESP_MN_MAX_PHRASE_NUM 400
#define ESP_MN_MAX_PHRASE_LEN 63
#define ESP_MN_MIN_PHRASE_LEN 2
#define ESP_MN_PREFIX "mn"
#define ESP_MN_ENGLISH "en"
#define ESP_MN_CHINESE "cn"
typedef enum {
ESP_MN_STATE_DETECTING = 0, // detecting
ESP_MN_STATE_DETECTED = 1, // detected
ESP_MN_STATE_TIMEOUT = 2, // time out
} esp_mn_state_t;
//Set multinet loading mode
//The memory comsumption is decreased with increasing mode,
//As a consequence also the CPU loading rate goes up
typedef enum {
ESP_MN_LOAD_FROM_PSRAM = 0, // Load all weights from PSRAM. Fastest computation with Maximum memory consumption
ESP_MN_LOAD_FROM_PSRAM_FLASH = 1, // Load some weights from PSRAM and laod the rest from FLASH (default)
ESP_MN_LOAD_FROM_FLASH = 2, // Load more weights from FLASH. Minimum memory consumption with slowest computation
} esp_mn_loader_mode_t;
typedef enum {
ESP_MN_GREEDY_SEARCH = 0, // greedy search
ESP_MN_BEAM_SEARCH = 1, // beam search
ESP_MN_BEAM_SEARCH_WITH_FST = 2, // beam search with trie language model
} esp_mn_search_method_t;
typedef enum {
CHINESE_ID = 1, // Chinese language
ENGLISH_ID = 2, // English language
} language_id_t;
// Return all possible recognition results
typedef struct{
esp_mn_state_t state;
int num; // The number of phrase in list, num<=5. When num=0, no phrase is recognized.
int command_id[ESP_MN_RESULT_MAX_NUM]; // The list of command id.
int phrase_id[ESP_MN_RESULT_MAX_NUM]; // The list of phrase id.
float prob[ESP_MN_RESULT_MAX_NUM]; // The list of probability.
char string[256]; // recognized string with commands graph
char raw_string[256]; // recognized string without commands graph
} esp_mn_results_t;
typedef struct {
char *string; // command string
char *phonemes; // command phonemes, if applicable
int16_t command_id; // the command id
float threshold; // trigger threshold, default: 0
int16_t *wave; // prompt wave data of the phrase
} esp_mn_phrase_t;
typedef struct _mn_node_ {
esp_mn_phrase_t *phrase;
struct _mn_node_ *next;
} esp_mn_node_t;
typedef struct{
int16_t num; // The number of error phrases, which can not added into model
esp_mn_phrase_t **phrases; // The array of error phrase pointer
} esp_mn_error_t;
/**
* @brief Initialze a model instance with specified model name.
*
* @param model_name The wakenet model name.
* @param duration The duration (ms) to trigger the timeout
*
* @returns Handle to the model data.
*/
typedef model_iface_data_t* (*esp_mn_iface_op_create_t)(const char *model_name, int duration);
/**
* @brief Switch multinet mode to change memory consumption and CPU loading
*
* @warning Just Support multinet6 or later versions
*
* @param model The model object to query
* @param mode The multinet loader mode
*
* @returns Handle to the model data.
*/
typedef model_iface_data_t* (*esp_mn_iface_op_switch_loader_mode_t)(model_iface_data_t *model, esp_mn_loader_mode_t mode);
/**
* @brief Callback function type to fetch the amount of samples that need to be passed to the detect function
*
* Every speech recognition model processes a certain number of samples at the same time. This function
* can be used to query that amount. Note that the returned amount is in 16-bit samples, not in bytes.
*
* @param model The model object to query
* @return The amount of samples to feed the detect function
*/
typedef int (*esp_mn_iface_op_get_samp_chunksize_t)(model_iface_data_t *model);
/**
* @brief Callback function type to fetch the number of frames recognized by the command word
*
* @param model The model object to query
* @return The number of the frames recognized by the command word
*/
typedef int (*esp_mn_iface_op_get_samp_chunknum_t)(model_iface_data_t *model);
/**
* @brief Set the detection threshold to manually abjust the probability
*
* @param model The model object to query
* @param det_treshold The threshold to trigger speech commands, the range of det_threshold is 0.0~0.9999
*/
typedef int (*esp_mn_iface_op_set_det_threshold_t)(model_iface_data_t *model, float det_threshold);
/**
* @brief Get the sample rate of the samples to feed to the detect function
*
* @param model The model object to query
* @return The sample rate, in hz
*/
typedef int (*esp_mn_iface_op_get_samp_rate_t)(model_iface_data_t *model);
/**
* @brief Get the language of model
*
* @param model The language name
* @return Language name string defined in esp_mn_models.h, eg: ESP_MN_CHINESE, ESP_MN_ENGLISH
*/
typedef char * (*esp_mn_iface_op_get_language_t)(model_iface_data_t *model);
/**
* @brief Feed samples of an audio stream to the speech recognition model and detect if there is a speech command found.
*
* @param model The model object to query.
* @param samples An array of 16-bit signed audio samples. The array size used can be queried by the
* get_samp_chunksize function.
* @return The state of multinet
*/
typedef esp_mn_state_t (*esp_mn_iface_op_detect_t)(model_iface_data_t *model, int16_t *samples);
/**
* @brief Destroy a speech commands recognition model
*
* @param model The Model object to destroy
*/
typedef void (*esp_mn_iface_op_destroy_t)(model_iface_data_t *model);
/**
* @brief Get recognition results
*
* @param model The Model object to query
*
* @return The current results.
*/
typedef esp_mn_results_t* (*esp_mn_iface_op_get_results_t)(model_iface_data_t *model);
/**
* @brief Open the log print
*
* @param model_data The model object to query.
*
*/
typedef void (*esp_mn_iface_op_open_log_t)(model_iface_data_t *model_data);
/**
* @brief Clean all status of model
*
* @param model_data The model object to query.
*
*/
typedef void (*esp_mn_iface_op_clean_t)(model_iface_data_t *model_data);
/**
* @brief Set the speech commands by mn_command_root
*
* @param model_data The model object to query.
* @param mn_command_root The speech commands link.
* @return The error phrase id info.
*/
typedef esp_mn_error_t* (*esp_wn_iface_op_set_speech_commands)(model_iface_data_t *model_data, esp_mn_node_t *mn_command_root);
/**
* @brief Print out current commands in fst, note the ones "added" but not "updated" will not be shown here
*
* @param model_data The model object to query
*/
typedef void (*esp_mn_iface_op_print_active_speech_commands)(model_iface_data_t *model_data);
/**
* @brief Check if input string can be tokenized
*
* @param model_data The model object to query
* @param str The input string
*/
typedef int (*esp_mn_iface_op_check_speech_command)(model_iface_data_t *model_data, const char *str);
typedef struct {
esp_mn_iface_op_create_t create;
esp_mn_iface_op_get_samp_rate_t get_samp_rate;
esp_mn_iface_op_get_samp_chunksize_t get_samp_chunksize;
esp_mn_iface_op_get_samp_chunknum_t get_samp_chunknum;
esp_mn_iface_op_set_det_threshold_t set_det_threshold;
esp_mn_iface_op_get_language_t get_language;
esp_mn_iface_op_detect_t detect;
esp_mn_iface_op_destroy_t destroy;
esp_mn_iface_op_get_results_t get_results;
esp_mn_iface_op_open_log_t open_log;
esp_mn_iface_op_clean_t clean;
esp_wn_iface_op_set_speech_commands set_speech_commands;
esp_mn_iface_op_switch_loader_mode_t switch_loader_mode;
esp_mn_iface_op_print_active_speech_commands print_active_speech_commands;
esp_mn_iface_op_check_speech_command check_speech_command;
} esp_mn_iface_t;
#ifdef __cplusplus
}
#endif

View File

@@ -0,0 +1,66 @@
#pragma once
#include "esp_mn_iface.h"
//Contains declarations of all available speech recognion models. Pair this up with the right coefficients and you have a model that can recognize
//a specific phrase or word.
#ifdef __cplusplus
extern "C" {
#endif
/**
* @brief Get the multinet handle from model name
*
* @param model_name The name of model
* @returns The handle of multinet
*/
esp_mn_iface_t *esp_mn_handle_from_name(char *model_name);
/**
* @brief Get the multinet language from model name
*
* @param model_name The name of model
* @returns The language of multinet
*/
char *esp_mn_language_from_name(char *model_name);
/*
Configure wake word to use based on what's selected in menuconfig.
*/
#ifdef CONFIG_SR_MN_CN_MULTINET2_SINGLE_RECOGNITION
#include "multinet2_ch.h"
#define MULTINET_COEFF get_coeff_multinet2_ch
#define MULTINET_MODEL_NAME "mn2_cn"
#else
#define MULTINET_COEFF "COEFF_NULL"
#define MULTINET_MODEL_NAME "NULL"
#endif
/* example
static const esp_mn_iface_t *multinet = &MULTINET_MODEL;
//Initialize MultiNet model data
model_iface_data_t *model_data = multinet->create(&MULTINET_COEFF);
add_speech_commands(multinet, model_data);
//Set parameters of buffer
int audio_chunksize=model->get_samp_chunksize(model_data);
int frequency = model->get_samp_rate(model_data);
int16_t *buffer=malloc(audio_chunksize*sizeof(int16_t));
//Detect
int r=model->detect(model_data, buffer);
if (r>0) {
printf("Detection triggered output %d.\n", r);
}
//Destroy model
model->destroy(model_data)
*/
#ifdef __cplusplus
}
#endif

Some files were not shown because too many files have changed in this diff Show More