add some code
This commit is contained in:
@@ -0,0 +1,29 @@
|
||||
#pragma once
|
||||
#include <float.h>
|
||||
#include <math.h>
|
||||
|
||||
/* #undef ENABLE_DOUBLE */
|
||||
|
||||
#ifdef ENABLE_DOUBLE
|
||||
# define csf_float double
|
||||
# define csf_ceil ceil
|
||||
# define csf_floor floor
|
||||
# define csf_sin sin
|
||||
# define csf_log log
|
||||
# define csf_log10 log10
|
||||
# define csf_pow pow
|
||||
# define csf_sqrt sqrt
|
||||
# define csf_abs fabs
|
||||
# define csf_float_min DBL_MIN
|
||||
#else
|
||||
# define csf_float float
|
||||
# define csf_ceil ceilf
|
||||
# define csf_floor floorf
|
||||
# define csf_sin sinf
|
||||
# define csf_log logf
|
||||
# define csf_log10 log10f
|
||||
# define csf_pow powf
|
||||
# define csf_sqrt sqrtf
|
||||
# define csf_abs fabsf
|
||||
# define csf_float_min FLT_MIN
|
||||
#endif
|
||||
@@ -0,0 +1,9 @@
|
||||
//Generated by mkmodel_py
|
||||
#pragma once
|
||||
#include <string.h>
|
||||
#include "dl_lib_coefgetter_if.h"
|
||||
#include "dl_lib_matrix.h"
|
||||
#include "dl_lib_matrixq.h"
|
||||
#include "dl_lib_matrixq8.h"
|
||||
|
||||
extern const model_coeff_getter_t get_coeff_customized_word_wn5;
|
||||
418
managed_components/espressif__esp-sr/include/esp32/dl_lib.h
Normal file
418
managed_components/espressif__esp-sr/include/esp32/dl_lib.h
Normal file
@@ -0,0 +1,418 @@
|
||||
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
#ifndef DL_LIB_H
|
||||
#define DL_LIB_H
|
||||
|
||||
#include "dl_lib_matrix.h"
|
||||
#include "dl_lib_matrixq.h"
|
||||
#include "dl_lib_matrixq8.h"
|
||||
|
||||
#ifdef ESP_PLATFORM
|
||||
#include "freertos/FreeRTOS.h"
|
||||
#include "freertos/task.h"
|
||||
#include "freertos/queue.h"
|
||||
#include "esp_system.h"
|
||||
#include "esp_heap_caps.h"
|
||||
#include "sdkconfig.h"
|
||||
#define DL_SPIRAM_SUPPORT 1
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_IDF_TARGET_ESP32S3
|
||||
#include "esp32s3/rom/cache.h"
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef int padding_state;
|
||||
|
||||
// /**
|
||||
// * @brief Allocate a chunk of memory which has the given capabilities.
|
||||
// * Equivalent semantics to libc malloc(), for capability-aware memory.
|
||||
// * In IDF, malloc(p) is equivalent to heap_caps_malloc(p, MALLOC_CAP_8BIT).
|
||||
// *
|
||||
// * @param size In bytes, of the amount of memory to allocate
|
||||
// * @param caps Bitwise OR of MALLOC_CAP_* flags indicating the type of memory to be returned
|
||||
// * MALLOC_CAP_SPIRAM: Memory must be in SPI RAM
|
||||
// * MALLOC_CAP_INTERNAL: Memory must be internal; specifically it should not disappear when flash/spiram cache is switched off
|
||||
// * MALLOC_CAP_DMA: Memory must be able to accessed by DMA
|
||||
// * MALLOC_CAP_DEFAULT: Memory can be returned in a non-capability-specific memory allocation
|
||||
// * @return Pointer to currently allocated heap memory
|
||||
// **/
|
||||
// void *heap_caps_malloc(size_t size, uint32_t caps);
|
||||
|
||||
/**
|
||||
* @brief Allocate aligned memory from internal memory or external memory.
|
||||
* if cnt*size > CONFIG_SPIRAM_MALLOC_ALWAYSINTERNAL, allocate memory from internal RAM
|
||||
* else, allocate memory from PSRAM
|
||||
*
|
||||
* @param cnt Number of continuing chunks of memory to allocate
|
||||
* @param size Size, in bytes, of a chunk of memory to allocate
|
||||
* @param align Aligned size, in bits
|
||||
* @return Pointer to currently allocated heap memory
|
||||
*/
|
||||
void *dl_lib_calloc(int cnt, int size, int align);
|
||||
|
||||
/**
|
||||
* @brief Always allocate aligned memory from external memory.
|
||||
*
|
||||
* @param cnt Number of continuing chunks of memory to allocate
|
||||
* @param size Size, in bytes, of a chunk of memory to allocate
|
||||
* @param align Aligned size, in bits
|
||||
* @return Pointer to currently aligned heap memory
|
||||
*/
|
||||
void *dl_lib_calloc_psram(int cnt, int size, int align);
|
||||
|
||||
/**
|
||||
* @brief Free aligned memory allocated by `dl_lib_calloc` or `dl_lib_calloc_psram`
|
||||
*
|
||||
* @param ptr Pointer to free
|
||||
*/
|
||||
void dl_lib_free(void *ptr);
|
||||
|
||||
/**
|
||||
* @brief Does a fast version of the exp() operation on a floating point number.
|
||||
*
|
||||
* As described in https://codingforspeed.com/using-faster-exponential-approximation/
|
||||
* Should be good til an input of 5 or so with a steps factor of 8.
|
||||
*
|
||||
* @param in Floating point input
|
||||
* @param steps Approximation steps. More is more precise. 8 or 10 should be good enough for most purposes.
|
||||
* @return Exp()'ed output
|
||||
*/
|
||||
fptp_t fast_exp(double x, int steps);
|
||||
|
||||
/**
|
||||
* @brief Does a fast version of the exp() operation on a floating point number.
|
||||
*
|
||||
* @param in Floating point input
|
||||
* @return Exp()'ed output
|
||||
*/
|
||||
double fast_exp_pro(double x);
|
||||
|
||||
/**
|
||||
* @brief Does a softmax operation on a matrix.
|
||||
*
|
||||
* @param in Input matrix
|
||||
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
|
||||
*/
|
||||
void dl_softmax(const dl_matrix2d_t *in, dl_matrix2d_t *out);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Does a softmax operation on a quantized matrix.
|
||||
*
|
||||
* @param in Input matrix
|
||||
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
|
||||
*/
|
||||
void dl_softmax_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out);
|
||||
|
||||
/**
|
||||
* @brief Does a sigmoid operation on a floating point number
|
||||
*
|
||||
* @param in Floating point input
|
||||
* @return Sigmoid output
|
||||
*/
|
||||
|
||||
fptp_t dl_sigmoid_op(fptp_t in);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Does a sigmoid operation on a matrix.
|
||||
*
|
||||
* @param in Input matrix
|
||||
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
|
||||
*/
|
||||
void dl_sigmoid(const dl_matrix2d_t *in, dl_matrix2d_t *out);
|
||||
|
||||
/**
|
||||
* @brief Does a tanh operation on a floating point number
|
||||
*
|
||||
* @param in Floating point input number
|
||||
* @return Tanh value
|
||||
*/
|
||||
fptp_t dl_tanh_op(fptp_t v);
|
||||
|
||||
/**
|
||||
* @brief Does a tanh operation on a matrix.
|
||||
*
|
||||
* @param in Input matrix
|
||||
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
|
||||
*/
|
||||
void dl_tanh(const dl_matrix2d_t *in, dl_matrix2d_t *out);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Does a relu (Rectifier Linear Unit) operation on a floating point number
|
||||
*
|
||||
* @param in Floating point input
|
||||
* @param clip If value is higher than this, it will be clipped to this value
|
||||
* @return Relu output
|
||||
*/
|
||||
fptp_t dl_relu_op(fptp_t in, fptp_t clip);
|
||||
|
||||
/**
|
||||
* @brief Does a ReLu operation on a matrix.
|
||||
*
|
||||
* @param in Input matrix
|
||||
* @param clip If values are higher than this, they will be clipped to this value
|
||||
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
|
||||
*/
|
||||
void dl_relu(const dl_matrix2d_t *in, fptp_t clip, dl_matrix2d_t *out);
|
||||
|
||||
/**
|
||||
* @brief Fully connected layer operation
|
||||
*
|
||||
* @param in Input vector
|
||||
* @param weight Weights of the neurons
|
||||
* @param bias Biases for the neurons. Can be NULL if a bias of 0 is required.
|
||||
* @param out Output array. Outputs are placed here. Needs to be an initialized, weight->w by in->h in size, matrix.
|
||||
*/
|
||||
void dl_fully_connect_layer(const dl_matrix2d_t *in, const dl_matrix2d_t *weight, const dl_matrix2d_t *bias, dl_matrix2d_t *out);
|
||||
|
||||
/**
|
||||
* @brief Pre-calculate the sqrtvari variable for the batch_normalize function.
|
||||
* The sqrtvari matrix depends on the variance and epsilon values, which normally are constant. Hence,
|
||||
* this matrix only needs to be calculated once. This function does that.
|
||||
*
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
void dl_batch_normalize_get_sqrtvar(const dl_matrix2d_t *variance, fptp_t epsilon, dl_matrix2d_t *out);
|
||||
|
||||
/**
|
||||
* @brief Batch-normalize a matrix
|
||||
*
|
||||
* @param m The matrix to normalize
|
||||
* @param offset Offset matrix
|
||||
* @param scale Scale matrix
|
||||
* @param mean Mean matrix
|
||||
* @param sqrtvari Matrix precalculated using dl_batch_normalize_get_sqrtvar
|
||||
* @return
|
||||
*/
|
||||
void dl_batch_normalize(dl_matrix2d_t *m, const dl_matrix2d_t *offset, const dl_matrix2d_t *scale,
|
||||
const dl_matrix2d_t *mean, const dl_matrix2d_t *sqrtvari);
|
||||
|
||||
/**
|
||||
* @brief Do a basic LSTM layer pass.
|
||||
*
|
||||
* @warning Returns state_h pointer, so do not free result.
|
||||
|
||||
* @param in Input vector
|
||||
* @param state_c Internal state of the LSTM network
|
||||
* @param state_h Internal state (previous output values) of the LSTM network
|
||||
* @param weights Weights for the neurons
|
||||
* @param bias Bias for the neurons. Can be NULL if no bias is required
|
||||
* @return Output values of the neurons
|
||||
*/
|
||||
dl_matrix2d_t *dl_basic_lstm_layer(const dl_matrix2d_t *in, dl_matrix2d_t *state_c, dl_matrix2d_t *state_h,
|
||||
const dl_matrix2d_t *weight, const dl_matrix2d_t *bias);
|
||||
|
||||
/**
|
||||
* @brief Do a basic LSTM layer pass, partial quantized version.
|
||||
* This LSTM function accepts 16-bit fixed-point weights and 32-bit float-point bias.
|
||||
*
|
||||
* @warning Returns state_h pointer, so do not free result.
|
||||
|
||||
* @param in Input vector
|
||||
* @param state_c Internal state of the LSTM network
|
||||
* @param state_h Internal state (previous output values) of the LSTM network
|
||||
* @param weights Weights for the neurons, need to be quantised
|
||||
* @param bias Bias for the neurons. Can be NULL if no bias is required
|
||||
* @return Output values of the neurons
|
||||
*/
|
||||
dl_matrix2dq_t *dl_basic_lstm_layer_quantised_weights(const dl_matrix2d_t *in, dl_matrix2d_t *state_c, dl_matrix2d_t *state_h,
|
||||
const dl_matrix2dq_t *weight, const dl_matrix2d_t *bias);
|
||||
|
||||
/**
|
||||
* @brief Do a fully-connected layer pass, fully-quantized version.
|
||||
*
|
||||
* @param in Input vector
|
||||
* @param weight Weights of the neurons
|
||||
* @param bias Bias values of the neurons. Can be NULL if no bias is needed.
|
||||
* @param shift Number of bits to shift the result back by. See dl_lib_matrixq.h for more info
|
||||
* @return Output values of the neurons
|
||||
*/
|
||||
void dl_fully_connect_layer_q(const dl_matrix2dq_t *in, const dl_matrix2dq_t *weight, const dl_matrix2dq_t *bias, dl_matrix2dq_t *out, int shift);
|
||||
|
||||
/**
|
||||
* @brief Do a basic LSTM layer pass, fully-quantized version
|
||||
*
|
||||
* @warning Returns state_h pointer, so do not free result.
|
||||
|
||||
* @param in Input vector
|
||||
* @param state_c Internal state of the LSTM network
|
||||
* @param state_h Internal state (previous output values) of the LSTM network
|
||||
* @param weights Weights for the neurons
|
||||
* @param bias Bias for the neurons. Can be NULL if no bias is required
|
||||
* @param shift Number of bits to shift the result back by. See dl_lib_matrixq.h for more info
|
||||
* @return Output values of the neurons
|
||||
*/
|
||||
dl_matrix2dq_t *dl_basic_lstm_layer_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *state_c, dl_matrix2dq_t *state_h,
|
||||
const dl_matrix2dq_t *weight, const dl_matrix2dq_t *bias, int shift);
|
||||
|
||||
/**
|
||||
* @brief Batch-normalize a matrix, fully-quantized version
|
||||
*
|
||||
* @param m The matrix to normalize
|
||||
* @param offset Offset matrix
|
||||
* @param scale Scale matrix
|
||||
* @param mean Mean matrix
|
||||
* @param sqrtvari Matrix precalculated using dl_batch_normalize_get_sqrtvar
|
||||
* @param shift Number of bits to shift the result back by. See dl_lib_matrixq.h for more info
|
||||
* @return
|
||||
*/
|
||||
void dl_batch_normalize_q(dl_matrix2dq_t *m, const dl_matrix2dq_t *offset, const dl_matrix2dq_t *scale,
|
||||
const dl_matrix2dq_t *mean, const dl_matrix2dq_t *sqrtvari, int shift);
|
||||
|
||||
/**
|
||||
* @brief Does a relu (Rectifier Linear Unit) operation on a fixed-point number
|
||||
* This accepts and returns fixed-point 32-bit number with the last 15 bits being the bits after the decimal
|
||||
* point. (Equivalent to a mantissa in a quantized matrix with exponent -15.)
|
||||
*
|
||||
* @param in Fixed-point input
|
||||
* @param clip If value is higher than this, it will be clipped to this value
|
||||
* @return Relu output
|
||||
*/
|
||||
qtp_t dl_relu_q_op(qtp_t in, qtp_t clip);
|
||||
|
||||
/**
|
||||
* @brief Does a ReLu operation on a matrix, quantized version
|
||||
*
|
||||
* @param in Input matrix
|
||||
* @param clip If values are higher than this, they will be clipped to this value
|
||||
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
|
||||
*/
|
||||
void dl_relu_q(const dl_matrix2dq_t *in, fptp_t clip, dl_matrix2dq_t *out);
|
||||
|
||||
/**
|
||||
* @brief Does a sigmoid operation on a fixed-point number.
|
||||
* This accepts and returns a fixed-point 32-bit number with the last 15 bits being the bits after the decimal
|
||||
* point. (Equivalent to a mantissa in a quantized matrix with exponent -15.)
|
||||
*
|
||||
* @param in Fixed-point input
|
||||
* @return Sigmoid output
|
||||
*/
|
||||
int dl_sigmoid_op_q(const int in);
|
||||
int16_t dl_sigmoid_op_q8(const int16_t in);
|
||||
/**
|
||||
* @brief Does a sigmoid operation on a matrix, quantized version
|
||||
*
|
||||
* @param in Input matrix
|
||||
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
|
||||
*/
|
||||
void dl_sigmoid_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out);
|
||||
|
||||
/**
|
||||
* @brief Does a tanh operation on a matrix, quantized version
|
||||
*
|
||||
* @param in Input matrix
|
||||
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
|
||||
*/
|
||||
void dl_tanh_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out);
|
||||
|
||||
/**
|
||||
* @brief Does a tanh operation on a fixed-point number.
|
||||
* This accepts and returns a fixed-point 32-bit number with the last 15 bits being the bits after the decimal
|
||||
* point. (Equivalent to a mantissa in a quantized matrix with exponent -15.)
|
||||
*
|
||||
* @param in Fixed-point input
|
||||
* @return tanh output
|
||||
*/
|
||||
int dl_tanh_op_q(int v);
|
||||
int16_t dl_tanh_op_q8(int16_t v);
|
||||
|
||||
void load_mat_psram_mn4(void);
|
||||
void load_mat_psram_mn3(void);
|
||||
void free_mat_psram_mn4(void);
|
||||
void free_mat_psram_mn3(void);
|
||||
qtp_t dl_hard_sigmoid_op(qtp_t in, int exponent);
|
||||
qtp_t dl_hard_tanh_op(qtp_t in, int exponent);
|
||||
|
||||
int16_t dl_table_tanh_op(int16_t in, int exponent);
|
||||
int16_t dl_table_sigmoid_op(int16_t in, int exponent);
|
||||
|
||||
void dl_hard_sigmoid_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out);
|
||||
void dl_hard_tanh_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out);
|
||||
|
||||
void dl_table_sigmoid_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out);
|
||||
void dl_table_tanh_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Filter out the number greater than clip in the matrix, quantized version
|
||||
*
|
||||
* @param in Input matrix
|
||||
* @param clip If values are higher than this, they will be clipped to this value
|
||||
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
|
||||
*/
|
||||
void dl_minimum(const dl_matrix2d_t *in, fptp_t clip, dl_matrix2d_t *out);
|
||||
|
||||
/**
|
||||
* @brief Filter out the number greater than clip in the matrix, float version
|
||||
*
|
||||
* @param in Input matrix
|
||||
* @param clip If values are higher than this, they will be clipped to this value
|
||||
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
|
||||
*/
|
||||
void dl_minimum_q(const dl_matrix2dq_t *in, fptp_t clip, dl_matrix2dq_t *out);
|
||||
/**
|
||||
* @brief Do a basic CNN layer pass.
|
||||
*
|
||||
* @Warning This just supports the single channel input image, and the output is single row matrix.
|
||||
That is to say, the height of output is 1, and the weight of output is out_channels*out_image_width*out_image_height
|
||||
*
|
||||
* @param in Input single channel image
|
||||
* @param weight Weights of the neurons, weight->w = out_channels, weight->h = filter_width*filter_height
|
||||
* @param bias Bias for the CNN layer.
|
||||
* @param filter_height The height of convolution kernel
|
||||
* @param filter_width The width of convolution kernel
|
||||
* @param out_channels The number of output channels of convolution kernel
|
||||
* @param stride_x The step length of the convolution window in x(width) direction
|
||||
* @param stride_y The step length of the convolution window in y(height) direction
|
||||
* @param pad One of `"VALID"` or `"SAME"`, 0 is "VALID" and the other is "SAME"
|
||||
* @param out The result of CNN layer, out->h=1.
|
||||
* @return The result of CNN layer.
|
||||
*/
|
||||
dl_matrix2d_t *dl_basic_conv_layer(const dl_matrix2d_t *in, const dl_matrix2d_t *weight, const dl_matrix2d_t *bias, int filter_width, int filter_height,
|
||||
const int out_channels, const int stride_x, const int stride_y, padding_state pad, const dl_matrix2d_t* out);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Do a basic CNN layer pass, quantised wersion.
|
||||
*
|
||||
* @Warning This just supports the single channel input image, and the output is single row matrix.
|
||||
That is to say, the height of output is 1, and the weight of output is out_channels*out_image_width*out_image_height
|
||||
*
|
||||
* @param in Input single channel image
|
||||
* @param weight Weights of the neurons, weight->w = out_channels, weight->h = filter_width*filter_height,
|
||||
* @param bias Bias of the neurons.
|
||||
* @param filter_height The height of convolution kernel
|
||||
* @param filter_width The width of convolution kernel
|
||||
* @param out_channels The number of output channels of convolution kernel
|
||||
* @param stride_x The step length of the convolution window in x(width) direction
|
||||
* @param stride_y The step length of the convolution window in y(height) direction
|
||||
* @param pad One of `"VALID"` or `"SAME"`, 0 is "VALID" and the other is "SAME"
|
||||
* @param out The result of CNN layer, out->h=1
|
||||
* @return The result of CNN layer
|
||||
*/
|
||||
dl_matrix2d_t *dl_basic_conv_layer_quantised_weight(const dl_matrix2d_t *in, const dl_matrix2dq_t *weight, const dl_matrix2d_t *bias, int filter_width, int filter_height,
|
||||
const int out_channels, const int stride_x, const int stride_y, padding_state pad, const dl_matrix2d_t* out);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,80 @@
|
||||
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
#ifndef DL_LIB_COEFGETTER_IF_H
|
||||
#define DL_LIB_COEFGETTER_IF_H
|
||||
|
||||
#include "dl_lib_matrix.h"
|
||||
#include "dl_lib_matrixq.h"
|
||||
#include "dl_lib_matrixq8.h"
|
||||
#include "cJSON.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
//Set this if the coefficient requested is a batch-normalization popvar matrix which needs to be preprocessed by
|
||||
//dl_batch_normalize_get_sqrtvar first.
|
||||
#define COEF_GETTER_HINT_BNVAR (1<<0)
|
||||
|
||||
/*
|
||||
This struct describes the basic information of model data:
|
||||
word_num: the number of wake words or speech commands
|
||||
word_list: the name list of wake words or speech commands
|
||||
thres_list: the threshold list of wake words or speech commands
|
||||
info_str: the string used to reflect the version and information of model data
|
||||
which consist of the architecture of network, the version of model data, wake words and their threshold
|
||||
*/
|
||||
typedef struct {
|
||||
int word_num;
|
||||
char **word_list;
|
||||
int *win_list;
|
||||
float *thresh_list;
|
||||
char *info_str;
|
||||
} model_info_t;
|
||||
|
||||
/*
|
||||
Alphabet struct describes the basic grapheme or phoneme.
|
||||
item_num: the number of baisc item(grapheme or phonemr)
|
||||
items: the list of basic item
|
||||
*/
|
||||
typedef struct {
|
||||
int item_num;
|
||||
char **items;
|
||||
}alphabet_t;
|
||||
|
||||
/*
|
||||
This struct describes a generic coefficient getter: a way to get the constant coefficients needed for a neural network.
|
||||
For the two getters, the name describes the name of the coefficient matrix, usually the same as the Numpy filename the
|
||||
coefficient was originally stored in. The arg argument can be used to optionally pass an additional user-defined argument
|
||||
to the getter (e.g. the directory to look for files in the case of the Numpy file loader getter). The hint argument
|
||||
is a bitwise OR of the COEF_GETTER_HINT_* flags or 0 when none is needed. Use the free_f/free_q functions to release the
|
||||
memory for the returned matrices, when applicable.
|
||||
*/
|
||||
typedef struct {
|
||||
const dl_matrix2d_t* (*getter_f)(const char *name, void *arg, int hint);
|
||||
const dl_matrix2dq_t* (*getter_q)(const char *name, void *arg, int hint);
|
||||
const dl_matrix2dq8_t* (*getter_q8)(const char *name, void *arg, int hint);
|
||||
void (*free_f)(const dl_matrix2d_t *m);
|
||||
void (*free_q)(const dl_matrix2dq_t *m);
|
||||
void (*free_q8)(const dl_matrix2dq8_t *m);
|
||||
const model_info_t* (*getter_info)(void *arg);
|
||||
const alphabet_t* (*getter_alphabet)(void *arg);
|
||||
const cJSON* (*getter_config)(void *arg);
|
||||
} model_coeff_getter_t;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,180 @@
|
||||
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
#ifndef DL_LIB_CONV_QUEUE_H
|
||||
#define DL_LIB_CONV_QUEUE_H
|
||||
|
||||
|
||||
#include "dl_lib_matrix.h"
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef float fptp_t;
|
||||
|
||||
//Flags for matrices
|
||||
// #define DL_MF_FOREIGNDATA (0) /*< Matrix *item data actually points to another matrix and should not be freed */
|
||||
|
||||
//Float convolution FIFO queue.
|
||||
typedef struct {
|
||||
int n; /*< the length of queue */
|
||||
int c; /*< the channel number of queue element*/
|
||||
int front; /*< the front(top) position of queue */
|
||||
int flag; /*< not used*/
|
||||
fptp_t *item; /*< Pointer to item array */
|
||||
} dl_conv_queue_t;
|
||||
|
||||
/**
|
||||
* @brief Allocate a convolution queue
|
||||
*
|
||||
* @param n The length of queue
|
||||
* @param c The channel number of elements in the queue
|
||||
* @return The convolution queue, or NULL if out of memory
|
||||
*/
|
||||
dl_conv_queue_t *dl_conv_queue_alloc(int n, int c);
|
||||
|
||||
/**
|
||||
* @brief Allocate a convolution queue from psram
|
||||
*
|
||||
* @param n The length of queue
|
||||
* @param c The channel number of elements in the queue
|
||||
* @return The convolution queue, or NULL if out of memory
|
||||
*/
|
||||
dl_conv_queue_t *dl_conv_queue_alloc_from_psram(int n, int c);
|
||||
|
||||
/**
|
||||
* @brief Free a convolution queue
|
||||
*
|
||||
* @param cq The convolution queue to free
|
||||
*/
|
||||
void dl_conv_queue_free(dl_conv_queue_t *cq);
|
||||
|
||||
void dl_conv_to_matrix2d(dl_conv_queue_t *cq, dl_matrix2d_t* out);
|
||||
|
||||
/**
|
||||
* @brief Move the front pointer of queue forward,
|
||||
the First(oldest) element become the last(newest) element,
|
||||
*
|
||||
* @param cq Input convolution queue
|
||||
* @return Pointer of oldest element
|
||||
*/
|
||||
fptp_t *dl_conv_queue_pop(dl_conv_queue_t *cq);
|
||||
|
||||
/**
|
||||
* @brief Remove the oldest element, then insert the input element at the end of queue
|
||||
*
|
||||
* @param cq Input convolution queue
|
||||
* @param item The new element
|
||||
*/
|
||||
void dl_conv_queue_push(dl_conv_queue_t *cq, fptp_t* item);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Get the pointer of element in the queue by offset
|
||||
*
|
||||
* @param cq Input convolution queue
|
||||
* @param offset Offset from the front of the queue
|
||||
* @return Pointer of the element
|
||||
*/
|
||||
fptp_t *dl_get_queue_item(dl_conv_queue_t *cq, int offset);
|
||||
|
||||
/**
|
||||
* @brief Does a sigmoid operation on the one of element in the convolution queue.
|
||||
* Gets the pointer of element in the convolution queue by offset, and does a sigmoid operation
|
||||
* by this pointer, then return the pointer
|
||||
*
|
||||
* @param cq Input convolution queue
|
||||
* @param offset Offset from the front of the queue
|
||||
* @return Pointer of the element
|
||||
*/
|
||||
fptp_t *dl_sigmoid_step(dl_conv_queue_t *cq, int offset);
|
||||
|
||||
/**
|
||||
* @brief Does a tanh operation on the one of element in the convolution queue.
|
||||
* Gets the pointer of element in the convolution queue by offset, and does a tanh operation
|
||||
* by this pointer, then return the pointer
|
||||
*
|
||||
* @param cq Input convolution queue
|
||||
* @param offset Offset from the front of the queue
|
||||
* @return Pointer of the element
|
||||
*/
|
||||
fptp_t *dl_tanh_step(dl_conv_queue_t *cq, int offset);
|
||||
|
||||
/**
|
||||
* @brief Does a softmax operation on the one of element in the convolution queue.
|
||||
* Gets the pointer of element in the convolution queue by offset, and does a softmax operation
|
||||
* by this pointer, then return the pointer
|
||||
*
|
||||
* @param cq Input convolution queue
|
||||
* @param offset Offset from the front of the queue
|
||||
* @return Pointer of the element
|
||||
*/
|
||||
fptp_t *dl_softmax_step(dl_conv_queue_t *cq, int offset);
|
||||
|
||||
fptp_t *dl_relu_step(dl_conv_queue_t *cq, int offset);
|
||||
fptp_t *dl_relu_look(dl_matrix2d_t *cq, int offset);
|
||||
dl_matrix2d_t *dl_matrix_concat1(const dl_conv_queue_t *a, const dl_matrix2d_t *b);
|
||||
dl_matrix2d_t *dl_basic_lstm_layer1(const dl_conv_queue_t *in, dl_matrix2d_t *state_c, dl_matrix2d_t *state_h,
|
||||
const dl_matrix2d_t *weight, const dl_matrix2d_t *bias);
|
||||
/**
|
||||
* @brief Fast implement for 1D atrous convolution (a.k.a. convolution with holes or dilated convolution)
|
||||
* based on convolution queue.
|
||||
*
|
||||
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
|
||||
* is first element of output queue and should not be freed separately.
|
||||
*
|
||||
* @param in Input convolution queue
|
||||
* @param out Output convolution queue
|
||||
* @param rate A positive int, the stride with which we sample input value
|
||||
* @param size A positive int, the size of 1D-filter
|
||||
* @param kernel The kernel matrix of filter
|
||||
* @param bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
|
||||
* @return The result of atrous convolution
|
||||
*/
|
||||
fptp_t *dl_atrous_conv1d_step(dl_conv_queue_t *in, dl_conv_queue_t *out, int rate, int size,
|
||||
dl_matrix2d_t* kernel, dl_matrix2d_t* bias);
|
||||
fptp_t *dl_look_conv_step(dl_conv_queue_t *in, dl_matrix2d_t *out, int rate, int size,
|
||||
dl_matrix2d_t* kernel, dl_matrix2d_t* bias);
|
||||
|
||||
/**
|
||||
* @brief Fast implement of dilation layer as follows
|
||||
*
|
||||
* |-> [gate(sigmoid)] -|
|
||||
* input - | |-> (*) - output
|
||||
* |-> [filter(tanh)] -|
|
||||
*
|
||||
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
|
||||
* is first element of output queue and should not be freed separately.
|
||||
*
|
||||
* @param in Input convolution queue
|
||||
* @param out Output convolution queue
|
||||
* @param rate A positive int, the stride with which we sample input value
|
||||
* @param size A positive int, the size of 1D-filter
|
||||
* @param filter_kernel The kernel matrix of filter
|
||||
* @param filter_bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
|
||||
* @param gate_kernel The kernel matrix of gate
|
||||
* @param gate_bias The bias matrix of gate. Can be NULL if a bias of 0 is required.
|
||||
* @return The result of dilation layer
|
||||
*/
|
||||
fptp_t *dl_dilation_layer(dl_conv_queue_t *in, dl_conv_queue_t *out, int rate, int size,
|
||||
dl_matrix2d_t* filter_kernel, dl_matrix2d_t* filter_bias,
|
||||
dl_matrix2d_t* gate_kernel, dl_matrix2d_t* gate_bias);
|
||||
|
||||
|
||||
void test_atrous_conv(int size, int rate, int in_channel, int out_channel);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,303 @@
|
||||
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
#ifndef DL_LIB_CONVQ8_QUEUE_H
|
||||
#define DL_LIB_CONVQ8_QUEUE_H
|
||||
|
||||
|
||||
#include "dl_lib_matrixq.h"
|
||||
#include "dl_lib_matrixq8.h"
|
||||
#include "dl_lib_conv_queue.h"
|
||||
#include "dl_lib_convq_queue.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
//[nch, n, c]
|
||||
typedef struct {
|
||||
int n; /*< the length of queue */
|
||||
int c; /*< the number of queue element*/
|
||||
int front; /*< the front(top) position of queue */
|
||||
int nch; /*< the channel of queue */
|
||||
int exponent; /*< The values in items should be multiplied by pow(2,exponent)
|
||||
to get the real values */
|
||||
q8tp_t *itemq; /*< Pointer to item array */
|
||||
} dl_convq8_queue_t;
|
||||
|
||||
/**
|
||||
* @brief Allocate a fixed-point convolution queue
|
||||
*
|
||||
* @param n The length of queue
|
||||
* @param c The number of elements in the queue
|
||||
* @return The convolution queue, or NULL if out of memory
|
||||
*/
|
||||
dl_convq8_queue_t *dl_convq8_queue_alloc(int n, int c);
|
||||
|
||||
/**
|
||||
* @brief Allocate a fixed-point convolution queue
|
||||
*
|
||||
* @param n The length of queue
|
||||
* @param c The number of elements in the queue
|
||||
* @param c The channel of queue
|
||||
* @return The convolution queue, or NULL if out of memory
|
||||
*/
|
||||
dl_convq8_queue_t *dl_convq8_queue_alloc_mc(int n, int c, int nch);
|
||||
|
||||
/**
|
||||
* @brief Allocate a bit fixed-point convolution queue from PSRAM
|
||||
*
|
||||
* @param n The length of queue
|
||||
* @param c The number of elements in the queue
|
||||
* @param nch The channel of queue
|
||||
* @return The convolution queue, or NULL if out of memory
|
||||
*/
|
||||
dl_convq8_queue_t *dl_convq8_queue_alloc_mc_from_psram(int n, int c, int nch);
|
||||
|
||||
/**
|
||||
* @brief Free a fixed-point convolution queue
|
||||
*
|
||||
* @param cq The fixed-point convolution queue to free
|
||||
*/
|
||||
void dl_convq8_queue_free(dl_convq8_queue_t *cq);
|
||||
|
||||
/**
|
||||
* @brief Set itemq of convolution queue to 0
|
||||
*
|
||||
* @param cq The fixed-point convolution queue to free
|
||||
*/
|
||||
void dl_convq8_queue_bzero(dl_convq8_queue_t *cqm);
|
||||
|
||||
/**
|
||||
* @brief Move the front pointer of queue forward,
|
||||
the First(oldest) element become the last(newest) element,
|
||||
*
|
||||
* @param cq Input fixed-point convolution queue
|
||||
* @return Pointer of oldest element
|
||||
*/
|
||||
q8tp_t *dl_convq8_queue_pop(dl_convq8_queue_t *cq);
|
||||
q8tp_t *dl_convq8_queue_popn(dl_convq8_queue_t *cq, int n);
|
||||
|
||||
/**
|
||||
* @brief Insert the float-point element at the end of queue.
|
||||
* The precision of fixed-point numbers is described by the Qm.f notation,
|
||||
*
|
||||
* @param cq Input fixed-point convolution queue
|
||||
* @param item The float-point element
|
||||
* @param m_bit The number of integer bits including the sign bits
|
||||
* @param f_bit The number of fractional bits
|
||||
*/
|
||||
void dl_convq8_queue_push_by_qmf(dl_convq8_queue_t *cq, fptp_t* item, int m_bit, int f_bit);
|
||||
|
||||
/**
|
||||
* @brief Get the pointer of element in the queue by offset
|
||||
*
|
||||
* @param cq Input fixed-point convolution queue
|
||||
* @param offset Offset from the front of the queue
|
||||
* @return Pointer of the element
|
||||
*/
|
||||
q8tp_t *dl_get_queue_itemq8(dl_convq8_queue_t *cq, int offset);
|
||||
|
||||
/**
|
||||
* @brief Get the pointer of element in the queue by offset
|
||||
*
|
||||
* @param cq Input fixed-point convolution queue
|
||||
* @param offset Offset from the front of the queue
|
||||
* @param ch Channel index of queue
|
||||
* @return Pointer of the element
|
||||
*/
|
||||
q8tp_t *dl_get_queue_itemq8_mc(dl_convq8_queue_t *cq, int offset, int ch);
|
||||
|
||||
/**
|
||||
* @brief Fast and quantised implement for 1D atrous convolution (a.k.a. convolution with holes or dilated convolution)
|
||||
* based on convolution queue.
|
||||
*
|
||||
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
|
||||
* is last element of output queue and should not be freed separately.
|
||||
*
|
||||
* @param in Input fixed-point convolution queue
|
||||
* @param out Output fixed-point convolution queue
|
||||
* @param rate A positive int, the stride with which we sample input value
|
||||
* @param size A positive int, the size of 1D-filter
|
||||
* @param kernel Kernel matrix of filter
|
||||
* @param bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
|
||||
* @param out_exponent Shift ratio used in dot operation between two 16-bit fixed point vector
|
||||
* @param offset Offset used to calculate the beginning of input conv queue
|
||||
* @param prenum The num to control the parameter size of preload operation
|
||||
* @return The result of atrous convolution
|
||||
*/
|
||||
void dl_atrous_conv1dq8_steps(dl_convq8_queue_t *in, dl_convq8_queue_t *out, int rate, int size,
|
||||
dl_matrix2dq8_t* kernel, dl_matrix2dq8_t* bias,
|
||||
int out_exponent, int offset, int prenum);
|
||||
|
||||
/**
|
||||
* @brief Fast implement of dilation layer as follows
|
||||
*
|
||||
* |-> [gate(sigmoid)] -|
|
||||
* input - | |-> (*) - output
|
||||
* |-> [filter(tanh)] -|
|
||||
*
|
||||
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
|
||||
* is last element of output queue and should not be freed separately.
|
||||
*
|
||||
* @param in Input fixed-point convolution queue
|
||||
* @param out Output fixed-point convolution queue
|
||||
* @param rate A positive int, the stride with which we sample input value
|
||||
* @param size A positive int, the size of 1D-filter
|
||||
* @param filter_kernel The kernel matrix of filter
|
||||
* @param filter_bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
|
||||
* @param gate_kernel The kernel matrix of gate
|
||||
* @param gate_bias The bias matrix of gate. Can be NULL if a bias of 0 is required.
|
||||
* @param offset Offset used to calculate the beginning of input conv queue
|
||||
* @param prenum The num to control the parameter size of preload operation
|
||||
* @return The result of dilation layer
|
||||
*/
|
||||
void dl_dilation_layerq8_steps(dl_convq8_queue_t *in, dl_convq8_queue_t *out, int rate, int size,
|
||||
dl_matrix2dq8_t* filter_kernel, dl_matrix2dq8_t* filter_bias,
|
||||
dl_matrix2dq8_t* gate_kernel, dl_matrix2dq8_t* gate_bias,
|
||||
int offset, int prenum);
|
||||
|
||||
|
||||
|
||||
|
||||
dl_conv_queue_t *dl_convq8_queue_add(dl_convq8_queue_t *cq1, dl_convq8_queue_t *cq2);
|
||||
|
||||
int8_t dl_sigmoid_lutq8(int in);
|
||||
/**
|
||||
* @brief Allocate a 8-bit fixed-point Multi-Channel convolution queue
|
||||
*
|
||||
* @param n The length of queue
|
||||
* @param c The number of elements in the queue
|
||||
* @param nch The channel number
|
||||
* @return The convolution queue, or NULL if out of memory
|
||||
*/
|
||||
dl_convq8_queue_t **dl_convq8_queue_mc_alloc(int n, int c, int nch);
|
||||
|
||||
/**
|
||||
* @brief Free a 8-bit fixed-point Multi-Channel convolution queue
|
||||
*
|
||||
* @param cqm The fixed-point convolution queue to free
|
||||
* @param nch The channel number
|
||||
*/
|
||||
void dl_convq8_queue_mc_free(dl_convq8_queue_t **cqm, int nch);
|
||||
|
||||
/**
|
||||
* @brief Tanh activation function for 8-bit fixed-point Multi-Channel convolution queue input
|
||||
*
|
||||
* @param cqm Input 8-bit fixed-point Multi-Channel convolution queue
|
||||
* @param offset Offset used to calculate the beginning of input conv queue
|
||||
* @param nch The channel number
|
||||
*/
|
||||
void dl_tanh_convq8_mc(dl_convq8_queue_t **cqm, int offset, int nch);
|
||||
|
||||
/**
|
||||
* @brief Fast and quantised 16-bit implement for Multi-channel 1D atrous convolution (a.k.a. convolution with holes or dilated convolution)
|
||||
* Usually, this layer is used as first layer for 8-bit network.
|
||||
*
|
||||
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
|
||||
* Input is a 16-bit queue point, Output is an 8-bit queue point.
|
||||
*
|
||||
* @param in Input 16bit fixed-point convolution queue array
|
||||
* @param out Output 8bit fixed-point convolution queue array
|
||||
* @param rate A positive int, the stride with which we sample input value
|
||||
* @param size A positive int, the size of 1D-filter
|
||||
* @param kernel The kernel matrix of filter
|
||||
* @param bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
|
||||
* @param out_exponent Exponent of output
|
||||
* @param offset Offset used to calculate the beginning of input conv queue
|
||||
* @param prenum The num to control the parameter size of preload operation
|
||||
*/
|
||||
void dl_atrous_conv1dq8_16in_mc_steps(dl_convq_queue_t **in, dl_convq8_queue_t **out, int nch, int rate, int size,
|
||||
dl_matrix2dq_t* kernel, dl_matrix2dq_t* bias, int out_exponent, int offset, int prenum);
|
||||
|
||||
/**
|
||||
* @brief Fast and quantised 8-bit implement for Multi-channel 1D atrous convolution (a.k.a. convolution with holes or dilated convolution)
|
||||
* based on convolution queue.
|
||||
*
|
||||
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
|
||||
* is last element of output queue and should not be freed separately.
|
||||
*
|
||||
* @param in Input 8bit fixed-point convolution queue array
|
||||
* @param out Output 8bit fixed-point convolution queue array
|
||||
* @param rate A positive int, the stride with which we sample input value
|
||||
* @param size A positive int, the size of 1D-filter
|
||||
* @param kernel The kernel matrix of filter
|
||||
* @param bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
|
||||
* @param out_exponent Exponent of output
|
||||
* @param offset Offset used to calculate the beginning of input conv queue
|
||||
* @param prenum The num to control the parameter size of preload operation
|
||||
*/
|
||||
void dl_atrous_conv1dq8_mc_steps(dl_convq8_queue_t **in, dl_convq8_queue_t **out,
|
||||
int nch, int rate, int size,
|
||||
dl_matrix2dq8_t* kernel, dl_matrix2dq8_t* bias,
|
||||
int out_exponent, int offset, int prenum);
|
||||
|
||||
/**
|
||||
* @brief Fast implement of 8-bit dilation layer as follows
|
||||
*
|
||||
* |-> [gate(sigmoid)] -|
|
||||
* input - | |-> (*) - output
|
||||
* |-> [filter(tanh)] -|
|
||||
*
|
||||
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
|
||||
* is last element of output queue and should not be freed separately.
|
||||
*
|
||||
* @param in Input 8-bit fixed-point convolution queue
|
||||
* @param out Output 8-bit fixed-point convolution queue
|
||||
* @param rate A positive int, the stride with which we sample input value
|
||||
* @param size A positive int, the size of 1D-filter
|
||||
* @param filter_kernel The kernel matrix of filter
|
||||
* @param filter_bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
|
||||
* @param gate_kernel The kernel matrix of gate
|
||||
* @param gate_bias The bias matrix of gate. Can be NULL if a bias of 0 is required.
|
||||
* @param offset Offset used to calculate the beginning of input conv queue
|
||||
* @param prenum The num to control the parameter size of preload operation
|
||||
*/
|
||||
void dl_dilation_layerq8_mc_steps(dl_convq8_queue_t **in, dl_convq8_queue_t **out, int nch, int rate, int size,
|
||||
dl_matrix2dq8_t* filter_kernel, dl_matrix2dq8_t* filter_bias,
|
||||
dl_matrix2dq8_t* gate_kernel, dl_matrix2dq8_t* gate_bias,
|
||||
int offset, int prenum);
|
||||
|
||||
void dl_convq8_queue_mc_bzero(dl_convq8_queue_t **cqm, int nch);
|
||||
|
||||
|
||||
|
||||
dl_convq8_queue_t *dl_convq8_queue_alloc_from_psram(int n, int c);
|
||||
|
||||
qtp_t *dl_dilation_layerq16_8(dl_convq_queue_t *in, dl_convq8_queue_t *out, int rate, int size,
|
||||
dl_matrix2dq_t* filter_kernel, dl_matrix2dq_t* filter_bias,
|
||||
dl_matrix2dq_t* gate_kernel, dl_matrix2dq_t* gate_bias, int prenum);
|
||||
|
||||
|
||||
qtp_t *dl_dilation_layerq8(dl_convq8_queue_t *in, dl_convq8_queue_t *out, int rate, int size,
|
||||
dl_matrix2dq8_t* filter_kernel, dl_matrix2dq_t* filter_bias,
|
||||
dl_matrix2dq8_t* gate_kernel, dl_matrix2dq_t* gate_bias, int prenum);
|
||||
|
||||
dl_matrix2dq8_t *dl_convq8_lstm_layer(const dl_convq8_queue_t *in, dl_convq8_queue_t *out, dl_matrix2dq8_t *state_c,
|
||||
dl_matrix2dq8_t *state_h, const dl_matrix2dq8_t *in_weight, const dl_matrix2dq8_t *h_weight,
|
||||
const dl_matrix2dq_t *bias, int prenum);
|
||||
|
||||
qtp_t *dl_atrous_conv1dq8_16_s3(dl_convq8_queue_t *in, dl_convq_queue_t *out, int rate, int size,
|
||||
dl_matrix2dq8_t* kernel, dl_matrix2dq_t* bias, int prenum);
|
||||
|
||||
void print_convq8(dl_convq8_queue_t *cq, int offset);
|
||||
void print_convq(dl_convq_queue_t *cq, int offset);
|
||||
void dl_relu_convq8(dl_convq8_queue_t *cq);
|
||||
|
||||
void lstmq8_free(void);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,382 @@
|
||||
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
#ifndef DL_LIB_CONVQ_QUEUE_H
|
||||
#define DL_LIB_CONVQ_QUEUE_H
|
||||
|
||||
#include "dl_lib_matrixq.h"
|
||||
#include "dl_lib_conv_queue.h"
|
||||
#include "dl_lib.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
//fixed-point convolution FIFO queue.
|
||||
//[nch, n, c]
|
||||
typedef struct {
|
||||
int n; /*< the length of queue */
|
||||
int c; /*< the number of queue element*/
|
||||
int front; /*< the front(top) position of queue */
|
||||
int nch; /*< the multiple of queue*/
|
||||
int exponent; /*< The values in items should be multiplied by pow(2,exponent)
|
||||
to get the real values */
|
||||
qtp_t *itemq; /*< Pointer to item array */
|
||||
} dl_convq_queue_t;
|
||||
|
||||
/**
|
||||
* @brief Allocate a fixed-point convolution queue
|
||||
*
|
||||
* @param n The length of queue
|
||||
* @param c The number of elements in the queue
|
||||
* @return The convolution queue, or NULL if out of memory
|
||||
*/
|
||||
dl_convq_queue_t *dl_convq_queue_alloc(int n, int c);
|
||||
|
||||
/**
|
||||
* @brief Allocate a fixed-point convolution queue from PSRAM
|
||||
*
|
||||
* @param n The length of queue
|
||||
* @param c The number of elements in the queue
|
||||
* @return The convolution queue, or NULL if out of memory
|
||||
*/
|
||||
dl_convq_queue_t *dl_convq_queue_alloc_from_psram(int n, int c);
|
||||
|
||||
/**
|
||||
* @brief Allocate a fixed-point multi-channel convolution queue
|
||||
*
|
||||
* @param n The length of queue
|
||||
* @param c The number of elements in the queue
|
||||
* @param nch The channel of conv queue
|
||||
* @return The convolution queue, or NULL if out of memory
|
||||
*/
|
||||
dl_convq_queue_t *dl_convq_queue_alloc_mc(int n, int c, int nch);
|
||||
|
||||
/**
|
||||
* @brief Allocate a fixed-point multi-channel convolution queue from PSRAM
|
||||
*
|
||||
* @param n The length of queue
|
||||
* @param c The number of elements in the queue
|
||||
* @param nch The channel of conv queue
|
||||
* @return The convolution queue, or NULL if out of memory
|
||||
*/
|
||||
dl_convq_queue_t *dl_convq_queue_alloc_mc_from_psram(int n, int c, int nch);
|
||||
|
||||
|
||||
void dl_convq_to_matrix2dq(dl_convq_queue_t *cq, dl_matrix2dq_t* out, int row);
|
||||
|
||||
/**
|
||||
* @brief Free a fixed-point convolution queue
|
||||
*
|
||||
* @param cq The fixed-point convolution queue to free
|
||||
*/
|
||||
void dl_convq_queue_free(dl_convq_queue_t *cq);
|
||||
|
||||
/**
|
||||
* @brief Set itemq of convolution queue to 0
|
||||
*
|
||||
* @param cq The fixed-point convolution queue point
|
||||
*/
|
||||
void dl_convq_queue_bzero(dl_convq_queue_t *cq);
|
||||
|
||||
/**
|
||||
* @brief Move the front pointer of queue forward,
|
||||
the First(oldest) element become the last(newest) element,
|
||||
*
|
||||
* @param cq Input fixed-point convolution queue
|
||||
* @return Pointer of oldest element
|
||||
*/
|
||||
qtp_t *dl_convq_queue_pop(dl_convq_queue_t *cq);
|
||||
qtp_t *dl_convq_queue_popn(dl_convq_queue_t *cq, int n);
|
||||
/**
|
||||
* @brief Remove the oldest element, then insert the input element at the end of queue
|
||||
*
|
||||
* @param cq Input fixed-point convolution queue
|
||||
* @param item The new element
|
||||
*/
|
||||
void dl_convq_queue_push(dl_convq_queue_t *cq, dl_matrix2dq_t *a, int shift);
|
||||
|
||||
/**
|
||||
* @brief Insert the float-point element at the end of queue.
|
||||
* The precision of fixed-point numbers is described by the Qm.f notation,
|
||||
*
|
||||
* @param cq Input fixed-point convolution queue
|
||||
* @param item The float-point element
|
||||
* @param m_bit The number of integer bits including the sign bits
|
||||
* @param f_bit The number of fractional bits
|
||||
*/
|
||||
void dl_convq_queue_push_by_qmf(dl_convq_queue_t *cq, fptp_t* item, int m_bit, int f_bit);
|
||||
|
||||
void dl_convq16_queue_push_by_qmf(dl_convq_queue_t *cq, fptp_t* item, int m_bit, int f_bit);
|
||||
|
||||
dl_conv_queue_t *dl_queue_from_convq(dl_convq_queue_t *cq1);
|
||||
|
||||
/**
|
||||
* @brief Get the pointer of element in the queue by offset
|
||||
*
|
||||
* @param cq Input fixed-point convolution queue
|
||||
* @param last_num Offset from the front of the queue
|
||||
* @return Pointer of the element
|
||||
*/
|
||||
qtp_t *dl_get_queue_itemq(dl_convq_queue_t *cq, int last_num);
|
||||
|
||||
/**
|
||||
* @brief Get the pointer of element in the queue by offset
|
||||
*
|
||||
* @param cq Input fixed-point convolution queue
|
||||
* @param offset Offset from the front of the queue
|
||||
* @param ch Channel index of convolution queue
|
||||
* @return Pointer of the element
|
||||
*/
|
||||
qtp_t *dl_get_queue_itemq_mc(dl_convq_queue_t *cq, int offset, int ch);
|
||||
|
||||
/**
|
||||
* @brief Does a tanh operation on the one of element in the convolution queue.
|
||||
* Gets the pointer of element in the convolution queue by offset, and does a
|
||||
* tanh operation by this pointer, then return the pointer
|
||||
*
|
||||
* @param cq Input fixed-point convolution queue
|
||||
* @param offset Offset from the front of the queue
|
||||
* @return Pointer of the element
|
||||
*/
|
||||
void dl_tanh_convq(dl_convq_queue_t *cq, int offset);
|
||||
|
||||
/**
|
||||
* @brief Does a tanh operation on the one of element in multi channel convolution queue.
|
||||
* Gets the pointer of element in the convolution queue by offset, and does a
|
||||
* tanh operation by this pointer, then return the pointer
|
||||
*
|
||||
* @param cq Input fixed-point multi channnel convolution queue
|
||||
* @param offset Offset from the front of the queue
|
||||
* @param nch The channel number of cqm
|
||||
* @return Pointer of the element
|
||||
*/
|
||||
void dl_tanh_convq_mc(dl_convq_queue_t **cqm, int offset, int nch);
|
||||
|
||||
/**
|
||||
* @brief Does a relu operation on the one of element in the convolution queue.
|
||||
* Gets the pointer of element in the convolution queue by offset, and does a
|
||||
* relu operation by this pointer, then return the pointer
|
||||
*
|
||||
* @param cq Input fixed-point convolution queue
|
||||
* @param offset Offset from the front of the queue
|
||||
* @return Pointer of the element
|
||||
*/
|
||||
void dl_relu_convq(dl_convq_queue_t *cq, fptp_t clip, int last_num);
|
||||
|
||||
/**
|
||||
* @brief Does a softmax operation on the one of element in the convolution queue.
|
||||
* Gets the pointer of element in the convolution queue by offset, input data
|
||||
stay as it is. Results are saved into the *out* array.
|
||||
*
|
||||
* @param cq Input fixed-point convolution queue
|
||||
* @param offset Offset from the front of the queue
|
||||
* @param out Old array to re-use. Passing NULL will allocate a new matrix.
|
||||
* @return softmax results
|
||||
*/
|
||||
fptp_t * dl_softmax_step_q(dl_convq_queue_t *cq, int offset, fptp_t *out);
|
||||
|
||||
/**
|
||||
* @brief Fast and quantised implement for 1D atrous convolution (a.k.a. convolution with holes or dilated convolution)
|
||||
* based on convolution queue.
|
||||
*
|
||||
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
|
||||
* is last element of output queue and should not be freed separately.
|
||||
*
|
||||
* @param in Input fixed-point convolution queue
|
||||
* @param out Output fixed-point convolution queue
|
||||
* @param rate A positive int, the stride with which we sample input value
|
||||
* @param size A positive int, the size of 1D-filter
|
||||
* @param kernel The kernel matrix of filter
|
||||
* @param bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
|
||||
* @param shift Shift ratio used in dot operation between two 16-bit fixed point vector
|
||||
* @return The result of atrous convolution
|
||||
*/
|
||||
qtp_t * dl_atrous_conv1dq(dl_convq_queue_t *in, dl_convq_queue_t *out, int rate, int size,
|
||||
dl_matrix2dq_t* kernel, dl_matrix2dq_t* bias, int shift, int prenum);
|
||||
|
||||
/**
|
||||
* @brief Fast implement of dilation layer as follows
|
||||
*
|
||||
* |-> [gate(sigmoid)] -|
|
||||
* input - | |-> (*) - output
|
||||
* |-> [filter(tanh)] -|
|
||||
*
|
||||
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
|
||||
* is last element of output queue and should not be freed separately.
|
||||
*
|
||||
* @param in Input fixed-point convolution queue
|
||||
* @param out Output fixed-point convolution queue
|
||||
* @param rate A positive int, the stride with which we sample input value
|
||||
* @param size A positive int, the size of 1D-filter
|
||||
* @param filter_kernel The kernel matrix of filter
|
||||
* @param filter_bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
|
||||
* @param gate_kernel The kernel matrix of gate
|
||||
* @param gate_bias The bias matrix of gate. Can be NULL if a bias of 0 is required.
|
||||
* @param filter_shift Shift ratio used in filter operation between two 16-bit fixed point vector
|
||||
* @param gate_shift Shift ratio used in gate operation between two 16-bit fixed point vector
|
||||
* @return The result of dilation layer
|
||||
*/
|
||||
qtp_t *dl_dilation_layerq_steps(dl_convq_queue_t *in, dl_convq_queue_t *out, int rate, int size,
|
||||
dl_matrix2dq_t* filter_kernel, dl_matrix2dq_t* filter_bias,
|
||||
dl_matrix2dq_t* gate_kernel, dl_matrix2dq_t* gate_bias,
|
||||
int filter_shift, int gate_shift, int offset, int prenum);
|
||||
|
||||
|
||||
qtp_t *dl_dilation_layerq(dl_convq_queue_t *in, dl_convq_queue_t *out, int rate, int size,
|
||||
dl_matrix2dq_t* filter_kernel, dl_matrix2dq_t* filter_bias,
|
||||
dl_matrix2dq_t* gate_kernel, dl_matrix2dq_t* gate_bias,
|
||||
int filter_shift, int gate_shift, int prenum);
|
||||
|
||||
qtp_t *dl_dilation_layerq16(dl_convq_queue_t *in, dl_convq_queue_t *out, int rate, int size,
|
||||
dl_matrix2dq_t* filter_kernel, dl_matrix2dq_t* filter_bias,
|
||||
dl_matrix2dq_t* gate_kernel, dl_matrix2dq_t* gate_bias, int prenum);
|
||||
|
||||
|
||||
qtp_t *dl_atrous_conv1dq_steps(dl_convq_queue_t *in, dl_convq_queue_t *out, int rate, int size,
|
||||
dl_matrix2dq_t* kernel, dl_matrix2dq_t* bias, int shift, int offset, int prenum);
|
||||
|
||||
/**
|
||||
* @brief Add a pair of fixed-point convolution queue item-by-item, and return float-point convolution queue
|
||||
*
|
||||
* @param cq1 First fixed-point convolution queue
|
||||
* @param cq2 Seconf fixed-point convolution queue
|
||||
* @return The result of float-point convolution queue
|
||||
*/
|
||||
dl_conv_queue_t *dl_convq_queue_add(dl_convq_queue_t *cq1, dl_convq_queue_t *cq2);
|
||||
|
||||
/**
|
||||
* @brief Fast implement of LSTM layer by dl_atrous_conv1dq function
|
||||
*
|
||||
* @Warning LSTM kernel is split into two part, the first part input is the last layer output,
|
||||
* and kernel is parameter *in_weight*. The second part input is the last frame LSTM output,
|
||||
* the kernel is parameters *h_weight*.
|
||||
*
|
||||
* @param in Input fixed-point convolution queue
|
||||
* @param out Output fixed-point convolution queue
|
||||
* @param state_c Internal state of the LSTM network
|
||||
* @param state_h Internal state (previous output values) of the LSTM network
|
||||
* @param in_weight the LSTM kernel needed by first part
|
||||
* @param h_weight the LSTM kernel needed by second part
|
||||
* @param bias The bias matrix of LSTM. Can be NULL if a bias of 0 is required.
|
||||
* @in_shift Shift ratio used in first part
|
||||
* @h_shift Shift ratio used in second part
|
||||
* @return The result of LSTM layer
|
||||
*/
|
||||
dl_matrix2dq_t *dl_convq_lstm_layer(const dl_convq_queue_t *in, dl_convq_queue_t *out, dl_matrix2dq_t *state_c,
|
||||
dl_matrix2dq_t *state_h, const dl_matrix2dq_t *in_weight, const dl_matrix2dq_t *h_weight,
|
||||
const dl_matrix2dq_t *bias, int in_shift, int h_shift, int prenum);
|
||||
dl_matrix2dq_t *dl_basic_lstm_layer1_q(const dl_convq_queue_t *in, dl_matrix2dq_t *state_c, dl_matrix2dq_t *state_h,
|
||||
const dl_matrix2dq_t *weight, const dl_matrix2dq_t *bias, int step, int shift);
|
||||
|
||||
dl_matrix2dq_t *dl_convq16_lstm_layer(dl_convq_queue_t *in, dl_convq_queue_t *out, dl_matrix2dq_t *state_c,
|
||||
dl_matrix2dq_t *state_h, dl_matrix2dq_t *in_weight, dl_matrix2dq_t *h_weight,
|
||||
dl_matrix2dq_t *bias, int prenum);
|
||||
|
||||
/**
|
||||
* @brief Allocate a fixed-point multi channel convolution queue
|
||||
*
|
||||
* @param n The length of queue
|
||||
* @param c The channel number of elements in the queue
|
||||
* @param nch the channel numbet of convolution queue
|
||||
* @return The convolution queue, or NULL if out of memory
|
||||
*/
|
||||
dl_convq_queue_t **dl_convq_queue_mc_alloc(int n, int c, int nch);
|
||||
|
||||
/**
|
||||
* @brief Free a fixed-point multi channel convolution queue
|
||||
*
|
||||
* @param cqm The fixed-point convolution queue to free
|
||||
* @param nch The channel number of cqm
|
||||
*/
|
||||
void dl_convq_queue_mc_free(dl_convq_queue_t **cqm, int nch);
|
||||
|
||||
/**
|
||||
* @brief Fast and quantised implement for 1D atrous convolution (a.k.a. convolution with holes or dilated convolution)
|
||||
* based on convolution queue.
|
||||
*
|
||||
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
|
||||
* is last element of output queue and should not be freed separately.
|
||||
*
|
||||
* @param in Input fixed-point convolution queue
|
||||
* @param out Output fixed-point convolution queue
|
||||
* @param nch The channel number of input
|
||||
* @param rate A positive int, the stride with which we sample input value
|
||||
* @param size A positive int, the size of 1D-filter
|
||||
* @param kernel The kernel matrix of filter
|
||||
* @param bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
|
||||
* @param shift Shift ratio used in dot operation between two 16-bit fixed point vector
|
||||
* @param offset the offset to calculate input convq
|
||||
* @param prenum the preload size, 0: do not use preload function
|
||||
* @return The result of atrous convolution
|
||||
*/
|
||||
qtp_t *dl_atrous_conv1dq_mc_steps( dl_convq_queue_t **in,
|
||||
dl_convq_queue_t **out,
|
||||
int nch,
|
||||
int rate,
|
||||
int size,
|
||||
dl_matrix2dq_t* kernel,
|
||||
dl_matrix2dq_t* bias,
|
||||
int shift,
|
||||
int offset,
|
||||
int prenum);
|
||||
|
||||
/**
|
||||
* @brief Fast implement of dilation layer as follows for multi channel input
|
||||
*
|
||||
* |-> [gate(sigmoid)] -|
|
||||
* input - | |-> (*) - output
|
||||
* |-> [filter(tanh)] -|
|
||||
*
|
||||
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
|
||||
* is last element of output queue and should not be freed separately.
|
||||
*
|
||||
* @param in Input fixed-point convolution queue
|
||||
* @param out Output fixed-point convolution queue
|
||||
* @param nch The channel number of input
|
||||
* @param rate A positive int, the stride with which we sample input value
|
||||
* @param size A positive int, the size of 1D-filter
|
||||
* @param filter_kernel The kernel matrix of filter
|
||||
* @param filter_bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
|
||||
* @param gate_kernel The kernel matrix of gate
|
||||
* @param gate_bias The bias matrix of gate. Can be NULL if a bias of 0 is required.
|
||||
* @param filter_shift Shift ratio used in filter operation between two 16-bit fixed point vector
|
||||
* @param gate_shift Shift ratio used in gate operation between two 16-bit fixed point vector
|
||||
* @param offset The offset to calculate input convq
|
||||
* @param prenum The preload size, 0: do not use preload function
|
||||
* @return The result of dilation layer
|
||||
*/
|
||||
qtp_t *dl_dilation_layerq_mc_steps( dl_convq_queue_t **in,
|
||||
dl_convq_queue_t **out,
|
||||
int nch,
|
||||
int rate,
|
||||
int size,
|
||||
dl_matrix2dq_t* filter_kernel,
|
||||
dl_matrix2dq_t* filter_bias,
|
||||
dl_matrix2dq_t* gate_kernel,
|
||||
dl_matrix2dq_t* gate_bias,
|
||||
int filter_shift,
|
||||
int gate_shift,
|
||||
int offset,
|
||||
int prenum);
|
||||
|
||||
void test_atrous_convq(int size, int rate, int in_channel, int out_channel);
|
||||
void test_lstm_convq(int size, int in_dim, int lstm_cell);
|
||||
void dl_nn_tanh_i162(dl_convq_queue_t **cqm, int offset, int nch);
|
||||
void dl_copy_queue_item_by_qmf(dl_convq_queue_t *cq, fptp_t* item, int m_bit, int f_bit, int offset, int ch);
|
||||
void dl_convq_queue_mc_bzero(dl_convq_queue_t **cqm, int nch);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,257 @@
|
||||
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
#ifndef DL_LIB_MATRIX_H
|
||||
#define DL_LIB_MATRIX_H
|
||||
|
||||
#ifdef ESP_PLATFORM
|
||||
#include "freertos/FreeRTOS.h"
|
||||
#include "freertos/task.h"
|
||||
#include "freertos/queue.h"
|
||||
#include "esp_system.h"
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef float fptp_t;
|
||||
|
||||
#if CONFIG_BT_SHARE_MEM_REUSE
|
||||
extern multi_heap_handle_t gst_heap;
|
||||
#endif
|
||||
|
||||
//Flags for matrices
|
||||
#define DL_MF_FOREIGNDATA 1 /*< Matrix pointer and item data actually points to another matrix and should not be freed */
|
||||
#define DL_MF_FOREIGNITEM 2 /*< Only item data actually points to another matrix and should not be freed */
|
||||
|
||||
//'Normal' float matrix
|
||||
typedef struct {
|
||||
int w; /*< Width */
|
||||
int h; /*< Height */
|
||||
int stride; /*< Row stride, essentially how many items to skip to get to the same position in the next row */
|
||||
int flags; /*< Flags. OR of DL_MF_* values */
|
||||
fptp_t *item; /*< Pointer to item array */
|
||||
} dl_matrix2d_t;
|
||||
|
||||
//Macro to quickly access the raw items in a matrix
|
||||
#define DL_ITM(m, x, y) m->item[(x)+(y)*m->stride]
|
||||
|
||||
|
||||
/**
|
||||
* @brief Allocate a matrix
|
||||
*
|
||||
* @param w Width of the matrix
|
||||
* @param h Height of the matrix
|
||||
* @return The matrix, or NULL if out of memory
|
||||
*/
|
||||
dl_matrix2d_t *dl_matrix_alloc(int w, int h);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Free a matrix
|
||||
* Frees the matrix structure and (if it doesn't have the DL_MF_FOREIGNDATA flag set) the m->items space as well.
|
||||
*
|
||||
* @param m Matrix to free
|
||||
*/
|
||||
void dl_matrix_free(dl_matrix2d_t *m);
|
||||
|
||||
/**
|
||||
* @brief Zero out the matrix
|
||||
* Sets all entries in the matrix to 0.
|
||||
*
|
||||
* @param m Matrix to zero
|
||||
*/
|
||||
void dl_matrix_zero(dl_matrix2d_t *m);
|
||||
|
||||
/**
|
||||
* @brief Copy the matrix into psram
|
||||
* Copy the matrix from flash or iram/psram into psram
|
||||
*
|
||||
* @param m Matrix to zero
|
||||
*/
|
||||
dl_matrix2d_t *dl_matrix_copy_to_psram(const dl_matrix2d_t *m);
|
||||
|
||||
/**
|
||||
* @brief Generate a new matrix using a range of items from an existing matrix.
|
||||
* When using this, the data of the new matrix is not allocated/copied but it re-uses a pointer
|
||||
* to the existing data. Changing the data in the resulting matrix, as a result, will also change
|
||||
* the data in the existing matrix that has been sliced.
|
||||
*
|
||||
* @param x X-offset of the origin of the returned matrix within the sliced matrix
|
||||
* @param y Y-offset of the origin of the returned matrix within the sliced matrix
|
||||
* @param w Width of the resulting matrix
|
||||
* @param h Height of the resulting matrix
|
||||
* @param in Old matrix (with foreign data) to re-use. Passing NULL will allocate a new matrix.
|
||||
* @return The resulting slice matrix, or NULL if out of memory
|
||||
*/
|
||||
dl_matrix2d_t *dl_matrix_slice(const dl_matrix2d_t *src, int x, int y, int w, int h, dl_matrix2d_t *in);
|
||||
|
||||
/**
|
||||
* @brief select a range of items from an existing matrix and flatten them into one dimension.
|
||||
*
|
||||
* @Warning The results are flattened in row-major order.
|
||||
*
|
||||
* @param x X-offset of the origin of the returned matrix within the sliced matrix
|
||||
* @param y Y-offset of the origin of the returned matrix within the sliced matrix
|
||||
* @param w Width of the resulting matrix
|
||||
* @param h Height of the resulting matrix
|
||||
* @param in Old matrix to re-use. Passing NULL will allocate a new matrix.
|
||||
* @return The resulting flatten matrix, or NULL if out of memory
|
||||
*/
|
||||
dl_matrix2d_t *dl_matrix_flatten(const dl_matrix2d_t *src, int x, int y, int w, int h, dl_matrix2d_t *in);
|
||||
|
||||
/**
|
||||
* @brief Generate a matrix from existing floating-point data
|
||||
*
|
||||
* @param w Width of resulting matrix
|
||||
* @param h Height of resulting matrix
|
||||
* @param data Data to populate matrix with
|
||||
* @return A newaly allocated matrix populated with the given input data, or NULL if out of memory.
|
||||
*/
|
||||
dl_matrix2d_t *dl_matrix_from_data(int w, int h, int stride, const void *data);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Multiply a pair of matrices item-by-item: res=a*b
|
||||
*
|
||||
* @param a First multiplicand
|
||||
* @param b Second multiplicand
|
||||
* @param res Multiplicated data. Can be equal to a or b to overwrite that.
|
||||
*/
|
||||
void dl_matrix_mul(const dl_matrix2d_t *a, const dl_matrix2d_t *b, dl_matrix2d_t *res);
|
||||
|
||||
/**
|
||||
* @brief Do a dotproduct of two matrices : res=a.b
|
||||
*
|
||||
* @param a First multiplicand
|
||||
* @param b Second multiplicand
|
||||
* @param res Dotproduct data. *Must* be a *different* matrix from a or b!
|
||||
*/
|
||||
void dl_matrix_dot(const dl_matrix2d_t *a, const dl_matrix2d_t *b, dl_matrix2d_t *res);
|
||||
|
||||
/**
|
||||
* @brief Add a pair of matrices item-by-item: res=a-b
|
||||
*
|
||||
* @param a First matrix
|
||||
* @param b Second matrix
|
||||
* @param res Added data. Can be equal to a or b to overwrite that.
|
||||
*/
|
||||
void dl_matrix_add(const dl_matrix2d_t *a, const dl_matrix2d_t *b, dl_matrix2d_t *out);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Divide a pair of matrices item-by-item: res=a/b
|
||||
*
|
||||
* @param a First matrix
|
||||
* @param b Second matrix
|
||||
* @param res Divided data. Can be equal to a or b to overwrite that.
|
||||
*/
|
||||
void dl_matrix_div(const dl_matrix2d_t *a, const dl_matrix2d_t *b, dl_matrix2d_t *out);
|
||||
|
||||
/**
|
||||
* @brief Subtract a matrix from another, item-by-item: res=a-b
|
||||
*
|
||||
* @param a First matrix
|
||||
* @param b Second matrix
|
||||
* @param res Subtracted data. Can be equal to a or b to overwrite that.
|
||||
*/
|
||||
void dl_matrix_sub(const dl_matrix2d_t *a, const dl_matrix2d_t *b, dl_matrix2d_t *out);
|
||||
|
||||
/**
|
||||
* @brief Add a constant to every item of the matrix
|
||||
*
|
||||
* @param subj Matrix to add the constant to
|
||||
* @param add The constant
|
||||
*/
|
||||
void dl_matrix_add_const(dl_matrix2d_t *subj, const fptp_t add);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Concatenate the rows of two matrices into a new matrix
|
||||
*
|
||||
* @param a First matrix
|
||||
* @param b Second matrix
|
||||
* @return A newly allocated array with as avlues a|b
|
||||
*/
|
||||
dl_matrix2d_t *dl_matrix_concat(const dl_matrix2d_t *a, const dl_matrix2d_t *b);
|
||||
|
||||
dl_matrix2d_t *dl_matrix_concat_h( dl_matrix2d_t *a, const dl_matrix2d_t *b);
|
||||
|
||||
/**
|
||||
* @brief Print the contents of a matrix to stdout. Used for debugging.
|
||||
*
|
||||
* @param a The matrix to print.
|
||||
*/
|
||||
void dl_printmatrix(const dl_matrix2d_t *a);
|
||||
|
||||
/**
|
||||
* @brief Return the average square error given a correct and a test matrix.
|
||||
*
|
||||
* ...Well, more or less. If anything, it gives an indication of the error between
|
||||
* the two. Check the code for the exact implementation.
|
||||
*
|
||||
* @param a First of the two matrices to compare
|
||||
* @param b Second of the two matrices to compare
|
||||
* @return value indicating the relative difference between matrices
|
||||
*/
|
||||
float dl_matrix_get_avg_sq_err(const dl_matrix2d_t *a, const dl_matrix2d_t *b);
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* @brief Check if two matrices have the same shape, that is, the same amount of rows and columns
|
||||
*
|
||||
* @param a First of the two matrices to compare
|
||||
* @param b Second of the two matrices to compare
|
||||
* @return true if the two matrices are shaped the same, false otherwise.
|
||||
*/
|
||||
int dl_matrix_same_shape(const dl_matrix2d_t *a, const dl_matrix2d_t *b);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Get a specific item from the matrix
|
||||
*
|
||||
* Please use these for external matrix access instead of DL_ITM
|
||||
*
|
||||
* @param m Matrix to access
|
||||
* @param x Column address
|
||||
* @param y Row address
|
||||
* @return Value in that position
|
||||
*/
|
||||
inline static fptp_t dl_matrix_get(const dl_matrix2d_t *m, const int x, const int y) {
|
||||
return DL_ITM(m, x, y);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Set a specific item in the matrix to the given value
|
||||
*
|
||||
* Please use these for external matrix access instead of DL_ITM
|
||||
*
|
||||
* @param m Matrix to access
|
||||
* @param x Column address
|
||||
* @param y Row address
|
||||
* @param val Value to write to that position
|
||||
*/
|
||||
inline static void dl_matrix_set(dl_matrix2d_t *m, const int x, const int y, fptp_t val) {
|
||||
DL_ITM(m, x, y)=val;
|
||||
}
|
||||
|
||||
void matrix_get_range(const dl_matrix2d_t *m, fptp_t *rmin, fptp_t *rmax);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
@@ -0,0 +1,387 @@
|
||||
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
#ifndef DL_LIB_MATRIXQ_H
|
||||
#define DL_LIB_MATRIXQ_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include "dl_lib_matrix.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef int16_t qtp_t;
|
||||
|
||||
//Quantized matrix. Uses fixed numbers and has the storage for the rows/columns inverted
|
||||
//for easy use as a multiplicand without stressing out the flash cache too much.
|
||||
typedef struct {
|
||||
int w;
|
||||
int h;
|
||||
int stride; //Normally equals h, not w!
|
||||
int flags;
|
||||
int exponent; //The values in items should be multiplied by pow(2,exponent) to get the real values.
|
||||
qtp_t *itemq;
|
||||
} dl_matrix2dq_t;
|
||||
|
||||
#define DL_QTP_SHIFT 15
|
||||
#define DL_QTP_RANGE ((1<<DL_QTP_SHIFT)-1)
|
||||
#define DL_ITMQ(m, x, y) m->itemq[(y)+(x)*m->stride]
|
||||
#define DL_QTP_EXP_NA 255 //non-applicable exponent because matrix is null
|
||||
|
||||
#define DL_SHIFT_AUTO 32
|
||||
|
||||
/**
|
||||
* @info About quantized matrices and shift values
|
||||
*
|
||||
* Grab a coffee (or tea, or hot water) and sit down when you read this for the first
|
||||
* time. Quantized matrices can speed up your operations, but come with some quirks, and
|
||||
* it's good to understand how they work before using them.
|
||||
*
|
||||
* The data in the quantized matrix type is stored similarily to floating-point types:
|
||||
* when storing a real value, the value is stored as a mantissa (base number) and an
|
||||
* exponent. The 'real' value that can be re-derived from those two numbers is something
|
||||
* similar to mantissa*2^exponent. Up to this point, there's not that much difference from
|
||||
* the standard floating point implementations like e.g. IEEE-754.
|
||||
*
|
||||
* The difference with respect to quantized matrices is that for a quantized matrix, it is
|
||||
* assumed all values stored have more-or-less the same order of magnitude. This allows the
|
||||
* matrix to only store all the mantissas, while the exponents are shared; there is only one
|
||||
* exponent for the entire matrix. This makes it quicker to handle matrix operations - the
|
||||
* logic to fix the exponents only needs to happen once, while the rest can be done in simple
|
||||
* integer arithmetic. It also nets us some memory savings - while normally a floating point
|
||||
* number is 32-bit, storing only 16-bit mantissas as the matrix items almost halves the
|
||||
* memory requirements.
|
||||
*
|
||||
* While most of the details of handling the intricacies of the quantized matrixes are done
|
||||
* transparently by the code in dl_lib_matrixq.c, some implementation details leak out,
|
||||
* specifically in places where addition/subtraction/division happens.
|
||||
*
|
||||
* The problem is that the routines do not know what the size of the resulting operation is. For
|
||||
* instance, when adding two matrices of numbers, the resulting numbers *could* be large enough
|
||||
* to overflow the mantissa of the result if the exponent is the same. However, if by default we
|
||||
* assume the mantissas needs to be scaled back, we may lose precision.
|
||||
*
|
||||
* In order to counter this, all operations that have this issue have a ``shift`` argument. If
|
||||
* the argument is zero, the routine will be conservative, that is, increase the exponent of
|
||||
* the result to such an extent it's mathematically impossible a value in the result will exceed
|
||||
* the maximum value that can be stored. However, when this argument is larger than zero, the
|
||||
* algorithm will hold back on this scaling by the indicated amount of bits, preserving precision
|
||||
* but increasing the chance of some of the calculated values not fitting in the mantissa anymore.
|
||||
* If this happens, the value will be clipped to the largest (or, for negative values, smallest)
|
||||
* value possible. (Neural networks usually are okay with this happening for a limited amount
|
||||
* of matrix indices).
|
||||
*
|
||||
* For deciding on these shift values, it is recommended to start with a shift value of one, then
|
||||
* use dl_matrixq_check_sanity on the result. If this indicates clipping, lower the shift value.
|
||||
* If it indicates bits are under-used, increase it. Note that for adding and subtraction, only
|
||||
* shift values of 0 or 1 make sense; these routines will error out if you try to do something
|
||||
* else.
|
||||
*
|
||||
* For neural networks and other noise-tolerant applications, note that even when
|
||||
* dl_matrixq_check_sanity does not indicate any problems, twiddling with the shift value may lead
|
||||
* to slightly improved precision. Feel free to experiment.
|
||||
**/
|
||||
|
||||
|
||||
/**
|
||||
* @brief Allocate a matrix
|
||||
*
|
||||
* @param w Width of the matrix
|
||||
* @param h Height of the matrix
|
||||
* @return The matrix, or NULL if out of memory
|
||||
*/
|
||||
dl_matrix2dq_t *dl_matrixq_alloc(int w, int h);
|
||||
dl_matrix2dq_t *dl_matrixq_alloc_psram(int w, int h);
|
||||
/**
|
||||
* @brief Convert a floating-point matrix to a quantized matrix
|
||||
*
|
||||
* @param m Floating-point matrix to convert
|
||||
* @param out Quantized matrix to re-use. If NULL, allocate a new one.
|
||||
* @Return The quantized version of the floating-point matrix
|
||||
*/
|
||||
dl_matrix2dq_t *dl_matrixq_from_matrix2d(const dl_matrix2d_t *m, dl_matrix2dq_t *out);
|
||||
|
||||
/**
|
||||
* TODO: DESCRIBE THIS FUNCTION
|
||||
*/
|
||||
dl_matrix2dq_t *dl_matrixq_from_matrix2d_by_qmf(const dl_matrix2d_t *m, dl_matrix2dq_t *out, int m_bit, int f_bit);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Convert a quantized matrix to a floating-point one.
|
||||
*
|
||||
* @param m Floating-point matrix to convert
|
||||
* @param out Quantized matrix to re-use. If NULL, allocate a new one.
|
||||
* @Return The quantized version of the floating-point matrix
|
||||
**/
|
||||
dl_matrix2d_t *dl_matrix2d_from_matrixq(const dl_matrix2dq_t *m, dl_matrix2d_t *out);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Free a quantized matrix
|
||||
* Frees the matrix structure and (if it doesn't have the DL_MF_FOREIGNDATA flag set) the m->items space as well.
|
||||
*
|
||||
* @param m Matrix to free
|
||||
*/
|
||||
void dl_matrixq_free(dl_matrix2dq_t *m);
|
||||
|
||||
/**
|
||||
* @brief Zero out the matrix
|
||||
* Sets all entries in the matrix to 0.
|
||||
*
|
||||
* @param m Matrix to zero
|
||||
*/
|
||||
void dl_matrixq_zero(dl_matrix2dq_t *m);
|
||||
|
||||
/**
|
||||
* @brief Copy the matrix into psram
|
||||
* Copy the matrix from flash or iram/psram into psram
|
||||
*
|
||||
* @param m Matrix to copy
|
||||
*/
|
||||
dl_matrix2dq_t *dl_matrixq_copy_to_psram(const dl_matrix2dq_t *m);
|
||||
|
||||
/**
|
||||
* @brief Do a dotproduct of two quantized matrices : res=a.b, Result is a fixed-point matrix.
|
||||
*
|
||||
* @param a First multiplicand
|
||||
* @param b Second multiplicand
|
||||
* @param res Dotproduct data. *Must* be a *different* matrix from a or b!
|
||||
* @param shift Shift ratio
|
||||
*/
|
||||
void dl_matrixq_dot(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *res, int shift);
|
||||
|
||||
/**
|
||||
* @brief Do a dotproduct of two quantized matrices: res=a.b, Result is a floating-point matrix.
|
||||
*
|
||||
* @param a First multiplicand
|
||||
* @param b Second multiplicand
|
||||
* @param res Dotproduct data. *Must* be a *different* matrix from a or b!
|
||||
*/
|
||||
void dl_matrixq_dot_matrix_out(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2d_t *res);
|
||||
|
||||
/**
|
||||
* @brief Do a dotproduct of two quantized matrices : res=a.b. This always uses the simple & stupid C algo for the dot product.
|
||||
*
|
||||
* Result is a fixed-point matrix.
|
||||
*
|
||||
* Use this only if you expect something is wrong with the accelerated routines that dl_matrixq_dot calls; this function can be
|
||||
* much slower than dl_matrixq_dot .
|
||||
*
|
||||
* @param a First multiplicand
|
||||
* @param b Second multiplicand
|
||||
* @param res Dotproduct data. *Must* be a *different* matrix from a or b!
|
||||
* @param shift Shift ratio
|
||||
*/
|
||||
void dl_matrixq_dot_c_impl(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *res, int shift);
|
||||
|
||||
/**
|
||||
* @brief Do a dotproduct of two quantized matrices : res=a.b. This always uses the simple & stupid C algo for the dot product.
|
||||
*
|
||||
* Result is a floating-point matrix.
|
||||
*
|
||||
* Use this only if you expect something is wrong with the accelerated routines that dl_matrixq_dot_matrix_out calls; this function can be
|
||||
* much slower than dl_matrixq_dot_matrix_out.
|
||||
*
|
||||
* @param a First multiplicand
|
||||
* @param b Second multiplicand
|
||||
* @param res Dotproduct data. *Must* be a *different* matrix from a or b!
|
||||
*/
|
||||
void dl_matrixq_dot_matrix_out_c_impl(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2d_t *res);
|
||||
|
||||
/**
|
||||
* @brief Do a dotproduct of a floating point and a quantized matrix. Result is a floating-point matrix.
|
||||
*
|
||||
* @param a First multiplicand; float matrix
|
||||
* @param b Second multiplicand; quantized matrix
|
||||
* @param res Dotproduct data; float matrix. *Must* be a *different* matrix from a or b!
|
||||
*/
|
||||
void dl_matrix_matrixq_dot(const dl_matrix2d_t *a, const dl_matrix2dq_t *b, dl_matrix2d_t *res);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Print the contents of a quantized matrix to stdout. Used for debugging.
|
||||
*
|
||||
* @param a The matrix to print.
|
||||
*/
|
||||
void dl_printmatrixq(const dl_matrix2dq_t *a);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Add a pair of quantizedmatrices item-by-item: res=a-b
|
||||
*
|
||||
* @param a First matrix
|
||||
* @param b Second matrix
|
||||
* @param res Added data. Can be equal to a or b to overwrite that.
|
||||
* @param shift Shift value. Only 0 or 1 makes sense here. <ToDo: check>
|
||||
*/
|
||||
void dl_matrixq_add(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *res, int shift);
|
||||
|
||||
/**
|
||||
* @brief Generate a new matrix using a range of items from an existing matrix.
|
||||
* When using this, the data of the new matrix is not allocated/copied but it re-uses a pointer
|
||||
* to the existing data. Changing the data in the resulting matrix, as a result, will also change
|
||||
* the data in the existing matrix that has been sliced.
|
||||
*
|
||||
* @Warning In contrast to the floating point equivalent of this function, the fixed-point version
|
||||
* of this has the issue that as soon as the output exponent of one of the slices changes, the data
|
||||
* in the sliced matrix gets corrupted (because the exponent of that matrix is still the same.) If you
|
||||
* use this function, either treat the slices as read-only, or assume the sliced matrix contains
|
||||
* garbage after modifying the data in one of the slices.
|
||||
*
|
||||
* @param x X-offset of the origin of the returned matrix within the sliced matrix
|
||||
* @param y Y-offset of the origin of the returned matrix within the sliced matrix
|
||||
* @param w Width of the resulting matrix
|
||||
* @param h Height of the resulting matrix
|
||||
* @param in Old matrix (with foreign data) to re-use. Passing NULL will allocate a new matrix.
|
||||
* @return The resulting slice matrix, or NULL if out of memory
|
||||
*/
|
||||
dl_matrix2dq_t *dl_matrixq_slice(const dl_matrix2dq_t *src, int x, int y, int w, int h, dl_matrix2dq_t *in);
|
||||
|
||||
/**
|
||||
* @brief select a range of items from an existing matrix and flatten them into one dimension.
|
||||
*
|
||||
* @Warning The results are flattened in row-major order.
|
||||
*
|
||||
* @param x X-offset of the origin of the returned matrix within the sliced matrix
|
||||
* @param y Y-offset of the origin of the returned matrix within the sliced matrix
|
||||
* @param w Width of the resulting matrix
|
||||
* @param h Height of the resulting matrix
|
||||
* @param in Old matrix to re-use. Passing NULL will allocate a new matrix.
|
||||
* @return The resulting flatten matrix, or NULL if out of memory
|
||||
*/
|
||||
dl_matrix2dq_t *dl_matrixq_flatten(const dl_matrix2dq_t *src, int x, int y, int w, int h, dl_matrix2dq_t *in);
|
||||
|
||||
/**
|
||||
* @brief Subtract a quantized matrix from another, item-by-item: res=a-b
|
||||
*
|
||||
* @param a First matrix
|
||||
* @param b Second matrix
|
||||
* @param res Subtracted data. Can be equal to a or b to overwrite that.
|
||||
* @param shift Shift value. Only 0 or 1 makes sense here. <ToDo: check>
|
||||
*/
|
||||
void dl_matrixq_sub(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *res, int shift);
|
||||
|
||||
/**
|
||||
* @brief Multiply a pair of quantized matrices item-by-item: res=a*b
|
||||
*
|
||||
* @param a First multiplicand
|
||||
* @param b Second multiplicand
|
||||
* @param res Multiplicated data. Can be equal to a or b to overwrite that matrix.
|
||||
*/
|
||||
void dl_matrixq_mul( dl_matrix2dq_t *a, dl_matrix2dq_t *b, dl_matrix2dq_t *res);
|
||||
|
||||
/**
|
||||
* @brief Divide a pair of quantized matrices item-by-item: res=a/b
|
||||
*
|
||||
* @param a First matrix
|
||||
* @param b Second matrix
|
||||
* @param res Divided data. Can be equal to a or b to overwrite that.
|
||||
*/
|
||||
void dl_matrixq_div(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *out, int shift);
|
||||
|
||||
/**
|
||||
* @brief Check if two quantized matrices have the same shape, that is, the same amount of
|
||||
* rows and columns
|
||||
*
|
||||
* @param a First of the two matrices to compare
|
||||
* @param b Second of the two matrices to compare
|
||||
* @return true if the two matrices are shaped the same, false otherwise.
|
||||
*/
|
||||
int dl_matrixq_same_shape(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b);
|
||||
|
||||
/**
|
||||
* @brief Concatenate the rows of two quantized matrices into a new matrix
|
||||
*
|
||||
* @param a First matrix
|
||||
* @param b Second matrix
|
||||
* @return A newly allocated quantized matrix with as values a|b
|
||||
*/
|
||||
dl_matrix2dq_t *dl_matrixq_concat(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b);
|
||||
|
||||
/**
|
||||
* @brief Add a constant to every item of the quantized matrix
|
||||
*
|
||||
* @param subj Matrix to add the constant to
|
||||
* @param add The constant
|
||||
*/
|
||||
void dl_matrixq_add_const(dl_matrix2dq_t *subj, const fptp_t add, int shift);
|
||||
|
||||
/**
|
||||
* @brief Check the sanity of a quantized matrix
|
||||
*
|
||||
* Due to the nature of quantized matrices, depending on the calculations a quantized
|
||||
* matrix is the result of and the shift values chosen in those calculations, a quantized
|
||||
* matrix may have an exponent and mantissas that lead to a loss of precision, either because
|
||||
* most significant mantissa bits are unused, or because a fair amount of mantissas are
|
||||
* clipped. This function checks if this is the case and will report a message to stdout
|
||||
* if significant loss of precision is detected.
|
||||
*
|
||||
* @param m The quantized matrix to check
|
||||
* @param name A string to be displayed in the message if the sanity check fails
|
||||
* @return True if matrix is sane, false otherwise
|
||||
**/
|
||||
|
||||
int dl_matrixq_check_sanity(dl_matrix2dq_t *m, const char *name);
|
||||
|
||||
/**
|
||||
* @brief re-adjust the exponent of the matrix to fit the mantissa better
|
||||
*
|
||||
* This function will shift up all the data in the mantissas so there are no
|
||||
* most-significant bits that are unused in all mantissas. It will also adjust
|
||||
* the exponent to keep the actua values in the matrix the same.
|
||||
*
|
||||
* Some operations done on a matrix, especially operations that re-use the
|
||||
* result of earlier operations done in the same way, can lead to the loss of
|
||||
* data because the exponent of the quantized matrix is never re-adjusted. You
|
||||
* can do that implicitely by calling this function.
|
||||
*
|
||||
* @param m The matrix to re-adjust
|
||||
**/
|
||||
void dl_matrixq_readjust_exp(dl_matrix2dq_t *m);
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* @brief Get the floating-point value of a specific item from the quantized matrix
|
||||
*
|
||||
* @param m Matrix to access
|
||||
* @param x Column address
|
||||
* @param y Row address
|
||||
* @return Value in that position
|
||||
*/
|
||||
fptp_t dl_matrixq_get(const dl_matrix2dq_t *m, const int x, const int y);
|
||||
|
||||
/**
|
||||
* @brief Set a specific item in the quantized matrix to the given
|
||||
* floating-point value
|
||||
*
|
||||
* @warning If the given value is more than the exponent in the quantized matrix
|
||||
* allows for, all mantissas in the matrix will be shifted down to make the value
|
||||
* 'fit'. If, however, the exponent is such that the value would result in a
|
||||
* quantized mantissa of 0, nothing is done.
|
||||
*
|
||||
* @param m Matrix to access
|
||||
* @param x Column address
|
||||
* @param y Row address
|
||||
* @param val Value to write to that position
|
||||
*/
|
||||
void dl_matrixq_set(dl_matrix2dq_t *m, const int x, const int y, fptp_t val);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,80 @@
|
||||
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
#ifndef DL_LIB_MATRIXQ8_H
|
||||
#define DL_LIB_MATRIXQ8_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include "dl_lib_matrix.h"
|
||||
#include "dl_lib.h"
|
||||
#include "dl_lib_matrixq.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef int8_t q8tp_t;
|
||||
|
||||
typedef struct {
|
||||
int w;
|
||||
int h;
|
||||
int stride; //Normally equals h, not w!
|
||||
int flags;
|
||||
int exponent; //The values in items should be multiplied by pow(2,exponent) to get the real values.
|
||||
q8tp_t *itemq;
|
||||
} dl_matrix2dq8_t;
|
||||
|
||||
#define DL_Q8TP_SHIFT 7
|
||||
#define DL_Q8TP_RANGE ((1<<DL_Q8TP_SHIFT)-1)
|
||||
#define DL_ITMQ8(m, x, y) m->itemq[(y)+(x)*m->stride]
|
||||
|
||||
/**
|
||||
* @brief Allocate a matrix
|
||||
*
|
||||
* @param w Width of the matrix
|
||||
* @param h Height of the matrix
|
||||
* @return The matrix, or NULL if out of memory
|
||||
*/
|
||||
dl_matrix2dq8_t *dl_matrixq8_alloc(int w, int h);
|
||||
|
||||
/**
|
||||
* @brief Free a quantized matrix
|
||||
* Frees the matrix structure and (if it doesn't have the DL_MF_FOREIGNDATA flag set) the m->items space as well.
|
||||
*
|
||||
* @param m Matrix to free
|
||||
*/
|
||||
void dl_matrixq8_free(dl_matrix2dq8_t *m);
|
||||
|
||||
/**
|
||||
* @brief Copy a quantized matrix
|
||||
* Copy a quantized matrix from flash or iram/psram
|
||||
*
|
||||
* @param m Matrix to copy
|
||||
*/
|
||||
dl_matrix2dq8_t *dl_matrixq8_copy_to_psram(const dl_matrix2dq8_t *m);
|
||||
|
||||
/**
|
||||
* @brief Convert a floating-point matrix to a quantized matrix
|
||||
*
|
||||
* @param m Floating-point matrix to convert
|
||||
* @param out Quantized matrix to re-use. If NULL, allocate a new one.
|
||||
* @Return The quantized version of the floating-point matrix
|
||||
*/
|
||||
dl_matrix2dq8_t *dl_matrixq8_from_matrix2d(const dl_matrix2d_t *m, dl_matrix2dq8_t *out);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
105
managed_components/espressif__esp-sr/include/esp32/esp_aec.h
Normal file
105
managed_components/espressif__esp-sr/include/esp32/esp_aec.h
Normal file
@@ -0,0 +1,105 @@
|
||||
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License
|
||||
#ifndef _ESP_AEC_H_
|
||||
#define _ESP_AEC_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define USE_AEC_FFT // Not kiss_fft
|
||||
#define AEC_SAMPLE_RATE 16000 // Only Support 16000Hz
|
||||
#define AEC_FRAME_LENGTH_MS 32
|
||||
|
||||
typedef struct aec_handle_t aec_handle_t;
|
||||
typedef enum {
|
||||
AEC_MODE_SR_LOW_COST = 0, // Low Cost AEC fro speech recognition
|
||||
AEC_MODE_SR_HIGH_PERF = 1, // High Perforamce AEC for speech recognition
|
||||
AEC_MODE_VOIP_LOW_COST = 3, // Low Cost AEC for voice communication
|
||||
AEC_MODE_VOIP_HIGH_PERF = 4, // High Perforamce AEC for voice communication
|
||||
} aec_mode_t;
|
||||
|
||||
/**
|
||||
* @brief Creates an instance to the AEC structure.
|
||||
* Please get frame size by aec_get_chunksize() function
|
||||
*
|
||||
* @param sample_rate The Sampling frequency (Hz) must be 16000.
|
||||
* @param filter_length Number of filter, recommend to set 4. The larger the filter_length, the more resource consumption.
|
||||
* @param channel_num The input microphone channel number
|
||||
* @param mode The mode of AEC, recommend to set AEC_MODE_SR_LOW_COST
|
||||
* @return
|
||||
* - NULL: Create failed
|
||||
* - Others: The instance of AEC
|
||||
*/
|
||||
aec_handle_t *aec_create(int sample_rate, int filter_length, int channel_num, aec_mode_t mode);
|
||||
|
||||
/**
|
||||
* @brief Creates an instance to the AEC structure, same with aec_create().
|
||||
*
|
||||
* @param filter_length Number of filter, recommend to set 4. The larger the filter_length, the more resource consumption.
|
||||
* @param channel_num The input microphone channel number
|
||||
* @param mode The mode of AEC, recommend to set AEC_MODE_SR_LOW_COST
|
||||
* @return
|
||||
* - NULL: Create failed
|
||||
* - Others: The instance of AEC
|
||||
*/
|
||||
aec_handle_t *aec_pro_create(int filter_length, int channel_num, aec_mode_t mode);
|
||||
|
||||
/**
|
||||
* @brief Performs echo cancellation a frame, based on the audio sent to the speaker and frame from mic.
|
||||
*
|
||||
* @warning The indata, refdata and outdata must be 16-bit signed. please allocate memory by heap_caps_aligned_alloc().
|
||||
*
|
||||
* @param inst The instance of AEC. Format for multi-channel data is "ch0 ch0 ch0 ..., ch1 ch1 ch1 ..."
|
||||
* @param indata An array of 16-bit signed audio samples from mic.
|
||||
* @param refdata An array of 16-bit signed audio samples sent to the speaker.
|
||||
* @param outdata Returns near-end signal with echo removed. Format for multi-channel data is "ch0 ch0 ch0..., ch1 ch1 ch1 ..."
|
||||
* @return None
|
||||
*
|
||||
*/
|
||||
void aec_process(const aec_handle_t *handel, int16_t *indata, int16_t *refdata, int16_t *outdata);
|
||||
|
||||
/**
|
||||
* @brief Get frame size of AEC (the samples of one frame)
|
||||
* @param handle The instance of AEC.
|
||||
* @return Frame size
|
||||
*/
|
||||
int aec_get_chunksize(const aec_handle_t *handle);
|
||||
|
||||
/**
|
||||
* @brief Get AEC mode string
|
||||
*
|
||||
* @param aec_mode The mode of AEC.
|
||||
*
|
||||
* @return AEC mode string
|
||||
*/
|
||||
char * aec_get_mode_string(aec_mode_t aec_mode);
|
||||
|
||||
/**
|
||||
* @brief Free the AEC instance
|
||||
*
|
||||
* @param inst The instance of AEC.
|
||||
*
|
||||
* @return None
|
||||
*
|
||||
*/
|
||||
void aec_destroy(aec_handle_t *handel);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif //_ESP_AEC_H_
|
||||
@@ -0,0 +1,81 @@
|
||||
|
||||
#ifndef _ESP_AFE_AEC_H_
|
||||
#define _ESP_AFE_AEC_H_
|
||||
|
||||
#include "esp_aec.h"
|
||||
#include "esp_afe_config.h"
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
aec_handle_t *handle;
|
||||
aec_mode_t mode;
|
||||
afe_pcm_config_t pcm_config;
|
||||
int frame_size;
|
||||
int16_t *data;
|
||||
} afe_aec_handle_t;
|
||||
|
||||
/**
|
||||
* @brief Creates an instance to the AEC structure.
|
||||
*
|
||||
* @warning Currently only support 1 microphone channel and 1 playback channe.
|
||||
* If input has multiple microphone channels and playback channels, just the first microphone channel and playback
|
||||
* channel will be selected.
|
||||
*
|
||||
* The input format, same as afe config:
|
||||
* M to represent the microphone channel
|
||||
* R to represent the playback reference channel
|
||||
* N to represent an unknown or unused channel
|
||||
*
|
||||
* For example, input_format="MMNR" indicates that the input data consists of four channels,
|
||||
* which are the microphone channel, the microphone channel, an unused channel, and the playback channel
|
||||
*
|
||||
* @param input_format The input format
|
||||
* @param filter_length The length of filter. The larger the filter, the higher the CPU loading.
|
||||
* Recommended filter_length = 4 for esp32s3 and esp32p4. Recommended filter_length = 2 for
|
||||
* esp32c5.
|
||||
* @param type The type of afe, AFE_TYPE_SR or AFE_TYPE_VC
|
||||
* @param mode The mode of afe, AFE_MODE_LOW_COST or AFE_MODE_HIGH_PERF
|
||||
*
|
||||
* @return afe_config_t* The default config of afe
|
||||
*/
|
||||
afe_aec_handle_t *afe_aec_create(const char *input_format, int filter_length, afe_type_t type, afe_mode_t mode);
|
||||
|
||||
/**
|
||||
* @brief Performs echo cancellation a frame, based on the audio sent to the speaker and frame from mic.
|
||||
*
|
||||
* @param inst The instance of AEC.
|
||||
* @param indata Input audio data, format is define by input_format.
|
||||
* @param outdata Near-end signal with echo removed. outdata must be 16-bit aligned.
|
||||
* please use heap_caps_aligned_calloc(16, n, size, caps) to allocate an aligned chunk of memory
|
||||
|
||||
* @return The bytes of outdata.
|
||||
*/
|
||||
size_t afe_aec_process(afe_aec_handle_t *handel, const int16_t *indata, int16_t *outdata);
|
||||
|
||||
/**
|
||||
* @brief Get frame size of AEC (the samples of one frame)
|
||||
* @param handle The instance of AEC.
|
||||
* @return Frame size
|
||||
*/
|
||||
int afe_aec_get_chunksize(afe_aec_handle_t *handle);
|
||||
|
||||
/**
|
||||
* @brief Free the AEC instance
|
||||
*
|
||||
* @param inst The instance of AEC.
|
||||
*
|
||||
* @return None
|
||||
*
|
||||
*/
|
||||
void afe_aec_destroy(afe_aec_handle_t *handel);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif //_ESP_AEC_H_
|
||||
@@ -0,0 +1,288 @@
|
||||
#pragma once
|
||||
#include "esp_aec.h"
|
||||
#include "esp_agc.h"
|
||||
#include "esp_nsn_models.h"
|
||||
#include "esp_vad.h"
|
||||
#include "esp_vadn_models.h"
|
||||
#include "esp_wn_iface.h"
|
||||
#include "esp_wn_models.h"
|
||||
#include "model_path.h"
|
||||
#include "stdbool.h"
|
||||
#include "stdint.h"
|
||||
#include "stdlib.h"
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// AFE: Audio Front-End
|
||||
// SR: Speech Recognition
|
||||
// VC: Voice Communication
|
||||
|
||||
// Set AFE_SR mode
|
||||
typedef enum {
|
||||
SR_MODE_LOW_COST = 0, // Deprecated, please use afe_mode_t, AFE mode: low cost mode
|
||||
SR_MODE_HIGH_PERF = 1, // Deprecated, please use afe_mode_t, AFE mode: high performance mode
|
||||
} afe_sr_mode_t;
|
||||
|
||||
// Set AFE mode
|
||||
typedef enum {
|
||||
AFE_MODE_LOW_COST = 0, // AFE mode: low cost mode
|
||||
AFE_MODE_HIGH_PERF = 1, // AFE mode: high performance mode
|
||||
} afe_mode_t;
|
||||
|
||||
// Set AFE type
|
||||
typedef enum {
|
||||
AFE_TYPE_SR = 0, // Speech recognition scenarios, excluding nonlinear noise suppression
|
||||
AFE_TYPE_VC = 1, // Voice communication scenarios, 16KHz input, including nonlinear noise suppression
|
||||
AFE_TYPE_VC_8K = 2, // Voice communication scenarios, 8KHz input, note that the input data must be 8KHz
|
||||
} afe_type_t;
|
||||
|
||||
typedef enum {
|
||||
AFE_MEMORY_ALLOC_MORE_INTERNAL = 1, // malloc with more internal ram
|
||||
AFE_MEMORY_ALLOC_INTERNAL_PSRAM_BALANCE = 2, // malloc with internal ram and psram in balance
|
||||
AFE_MEMORY_ALLOC_MORE_PSRAM = 3 // malloc with more psram
|
||||
} afe_memory_alloc_mode_t;
|
||||
|
||||
typedef enum {
|
||||
AFE_MN_PEAK_AGC_MODE_1 = -9, // The peak amplitude of fetch audio is -9dB
|
||||
AFE_MN_PEAK_AGC_MODE_2 = -6, // The peak amplitude of fetch audio is -6dB
|
||||
AFE_MN_PEAK_AGC_MODE_3 = -3, // The peak amplitude of fetcg is -3dB
|
||||
AFE_MN_PEAK_NO_AGC = 0, // There is no agc gain
|
||||
} afe_mn_peak_agc_mode_t;
|
||||
|
||||
typedef struct {
|
||||
int total_ch_num; // total channel num, include microphone channel, playback channel and unknown channel
|
||||
int mic_num; // microphone channel number
|
||||
uint8_t *mic_ids; // microphone channel indices
|
||||
int ref_num; // playback reference channel number
|
||||
uint8_t *ref_ids; // playback reference channel indices
|
||||
int sample_rate; // sample rate of audio
|
||||
} afe_pcm_config_t;
|
||||
|
||||
typedef enum {
|
||||
AFE_NS_MODE_WEBRTC = 0, // please use model name of NS, SSP: "WEBRTC"
|
||||
AFE_NS_MODE_NET = 1, // please use model name of NSNET
|
||||
} afe_ns_mode_t;
|
||||
|
||||
typedef enum {
|
||||
AFE_AGC_MODE_WEBRTC = 0, // WEBRTC AGC
|
||||
AFE_AGC_MODE_WAKENET = 1, // AGC gain is calculated by wakenet model if wakenet is activated
|
||||
} afe_agc_mode_t;
|
||||
|
||||
/**
|
||||
* @brief Function to get the debug audio data
|
||||
*
|
||||
* @param data The debug audio data which don't be modify. It should be copied away as soon as possible that
|
||||
* avoid blocking for too long.
|
||||
* @param data_size The number of bytes of data.
|
||||
* @returns
|
||||
*/
|
||||
typedef void (*afe_debug_hook_callback_t)(const int16_t *data, int data_size);
|
||||
|
||||
typedef enum {
|
||||
AFE_DEBUG_HOOK_MASE_TASK_IN = 0, // To get the input data of mase task
|
||||
AFE_DEBUG_HOOK_FETCH_TASK_IN = 1, // To get the input data of fetch task
|
||||
AFE_DEBUG_HOOK_MAX = 2
|
||||
} afe_debug_hook_type_t;
|
||||
|
||||
typedef struct {
|
||||
afe_debug_hook_type_t hook_type; // debug type of hook
|
||||
afe_debug_hook_callback_t hook_callback; // callback function which transfer debug audio data
|
||||
} afe_debug_hook_t;
|
||||
|
||||
typedef struct {
|
||||
/********** AEC(Acoustic Echo Cancellation) **********/
|
||||
bool aec_init; // Whether to init aec
|
||||
aec_mode_t aec_mode; // The mode of aec, AEC_MODE_SR_LOW_COST or AEC_MODE_SR_HIGH_PERF
|
||||
int aec_filter_length; // The filter length of aec
|
||||
|
||||
/********** SE(Speech Enhancement, microphone array processing) **********/
|
||||
bool se_init; // Whether to init se
|
||||
|
||||
/********** NS(Noise Suppression) **********/
|
||||
bool ns_init; // Whether to init ns
|
||||
char *ns_model_name; // Model name of ns
|
||||
afe_ns_mode_t afe_ns_mode; // Model mode of ns
|
||||
|
||||
/********** VAD(Voice Activity Detection) **********/
|
||||
bool vad_init; // Whether to init vad
|
||||
vad_mode_t vad_mode; // The value can be: VAD_MODE_0, VAD_MODE_1, VAD_MODE_2, VAD_MODE_3, VAD_MODE_4
|
||||
char *vad_model_name; // The model name of vad, If it is null, WebRTC VAD will be used.
|
||||
int vad_min_speech_ms; // The minimum duration of speech in ms. It should be bigger than 32 ms, default: 128 ms
|
||||
int vad_min_noise_ms; // The minimum duration of noise or silence in ms. It should be bigger than 64 ms, default:
|
||||
// 1000 ms
|
||||
int vad_delay_ms; // The delay of the first speech frame in ms, default: 128 ms
|
||||
// If you find vad cache can not cover all speech, please increase this value.
|
||||
bool vad_mute_playback; // If true, the playback will be muted for vad detection. default: false
|
||||
bool vad_enable_channel_trigger; // If true, the vad will be used to choose the channel id. default: false
|
||||
|
||||
/********** WakeNet(Wake Word Engine) **********/
|
||||
bool wakenet_init;
|
||||
char *wakenet_model_name; // The model name of wakenet 1
|
||||
char *wakenet_model_name_2; // The model name of wakenet 2 if has wakenet 2
|
||||
det_mode_t wakenet_mode; // The mode of wakenet
|
||||
|
||||
/********** AGC(Automatic Gain Control) **********/
|
||||
bool agc_init; // Whether to init agc
|
||||
afe_agc_mode_t
|
||||
agc_mode; // The AGC mode for ASR. and the gain generated by AGC acts on the audio after far linear gain.
|
||||
int agc_compression_gain_db; // Compression gain in dB (default 9)
|
||||
int agc_target_level_dbfs; // Target level in -dBfs of envelope (default 3, means target level is -3 dBFS)
|
||||
|
||||
/********** General AFE(Audio Front End) parameter **********/
|
||||
afe_pcm_config_t pcm_config; // Config the channel num of original data which is fed to the afe feed function.
|
||||
afe_mode_t afe_mode; // The mode of afe, AFE_MODE_LOW_COST or AFE_MODE_HIGH_PERF
|
||||
afe_type_t afe_type; // The mode of afe, AFE_MODE_LOW_COST or AFE_MODE_HIGH_PERF
|
||||
int afe_perferred_core; // The preferred core of afe se task, which is created in afe_create function.
|
||||
int afe_perferred_priority; // The preferred priority of afe se task, which is created in afe_create function.
|
||||
int afe_ringbuf_size; // The ring buffer size: the number of frame data in ring buffer.
|
||||
afe_memory_alloc_mode_t memory_alloc_mode; // The memory alloc mode for afe. From Internal RAM or PSRAM
|
||||
float afe_linear_gain; // The linear gain for afe output the value should be in [0.1, 10.0]. This value acts
|
||||
// directly on the output amplitude: out_linear_gain * amplitude.
|
||||
bool debug_init;
|
||||
bool fixed_first_channel; // If true, the channel after first wake-up is fixed to raw data of microphone
|
||||
// otherwise, select channel number by wakenet
|
||||
} afe_config_t;
|
||||
|
||||
/**
|
||||
* @brief Get AFE default configuration. The default configuration will enable all algorithms as much as possible based
|
||||
* on the chip target and input format. You can manually fine-tune it after creating the configuration
|
||||
*
|
||||
* The input format:
|
||||
* M to represent the microphone channel
|
||||
* R to represent the playback reference channel
|
||||
* N to represent an unknown or unused channel
|
||||
*
|
||||
* For example, input_format="MMNR" indicates that the input data consists of four channels,
|
||||
* which are the microphone channel, the microphone channel, an unused channel, and the playback channel
|
||||
*
|
||||
* @param input_format The input format
|
||||
* @param models Models from partition, which is configured by Kconfig
|
||||
* @param type The type of afe, AFE_TYPE_SR or AFE_TYPE_VC
|
||||
* @param mode The mode of afe, AFE_MODE_LOW_COST or AFE_MODE_HIGH_PERF
|
||||
*
|
||||
* @return afe_config_t* The default config of afe
|
||||
*/
|
||||
afe_config_t *afe_config_init(const char *input_format, srmodel_list_t *models, afe_type_t type, afe_mode_t mode);
|
||||
|
||||
/**
|
||||
* @brief Check AFE configuration and make sure it is correct.
|
||||
*
|
||||
* @warning If there is a configuration conflict, this function will modify some parameters.
|
||||
* The guiding behind these modifications is to maintain the highest performance of the output audio and results.
|
||||
* And remove the conflict between different algorithms.
|
||||
*
|
||||
* For example, If input is two-channel data, the SE(BSS) algorithm will be prioritized over the NS algorithm.
|
||||
* If SE(BSS) algorithm is deactivated, will only use the first microphone channel.
|
||||
*
|
||||
* @param afe_config Input AFE config
|
||||
*
|
||||
* @return afe_config_t* The modified AFE config
|
||||
*/
|
||||
afe_config_t *afe_config_check(afe_config_t *afe_config);
|
||||
|
||||
/**
|
||||
* @brief Parse input format
|
||||
*
|
||||
* @param input_format The input format, same with afe_config_init() function
|
||||
* @param pcm_config The pcm config
|
||||
*
|
||||
* @return true if the input format is parsed successfully, otherwise false
|
||||
*/
|
||||
bool afe_parse_input_format(const char *input_format, afe_pcm_config_t *pcm_config);
|
||||
|
||||
/**
|
||||
* @brief Parse I2S input data
|
||||
*
|
||||
* @param data The input multi channel data
|
||||
* @param frame_size The frame size of input, it is also the size of single channel data
|
||||
* @param mic_data The output microphone data
|
||||
* @param ref_data The output playback reference data
|
||||
* @param pcm_config The pcm config
|
||||
*
|
||||
*/
|
||||
void afe_parse_input(int16_t *data, int frame_size, int16_t *mic_data, int16_t *ref_data, afe_pcm_config_t *pcm_config);
|
||||
|
||||
/**
|
||||
* @brief Parse input data, from interleaved arrangement to contiguous arrangement
|
||||
*
|
||||
* @param data The input multi channel data
|
||||
* @param frame_size The frame size of input, it is also the size of single channel data
|
||||
* @param channel_num The channel number of data
|
||||
* @param out_data The output data
|
||||
*
|
||||
*/
|
||||
void afe_parse_data(int16_t *data, int frame_size, int channel_num, int16_t *out_data);
|
||||
|
||||
/**
|
||||
* @brief Format input data, from contiguous arrangement to interleaved arrangement
|
||||
*
|
||||
* @param data The input multi channel data
|
||||
* @param frame_size The frame size of input, it is also the size of single channel data
|
||||
* @param channel_num The channel number of data
|
||||
* @param out_data The output data
|
||||
*
|
||||
*/
|
||||
void afe_format_data(int16_t *data, int frame_size, int channel_num, int16_t *out_data);
|
||||
|
||||
/**
|
||||
* @brief Adjust the gain of input data
|
||||
*
|
||||
* @warning the input data will be modified inplace.
|
||||
*
|
||||
* @param data The input audio data
|
||||
* @param frame_size The frame size of input, it is also the size of single channel data
|
||||
* @param factor The gain factor
|
||||
*
|
||||
* @return int16_t* The output audio data
|
||||
*/
|
||||
int16_t *afe_adjust_gain(int16_t *data, int frame_size, float factor);
|
||||
|
||||
/**
|
||||
* @brief Adjust the gain of input data
|
||||
*
|
||||
* @warning the input data will be modified inplace.
|
||||
*
|
||||
* @param in_data The input audio data
|
||||
* @param in_frame_size Input data frame size of input
|
||||
* @param channel_num The channel number of input data, which is same as output data
|
||||
* @param out_data The output audio data
|
||||
* @param out_frame_size Onput data frame size of input
|
||||
*
|
||||
*/
|
||||
void afe_concat_data(int16_t *in_data, int in_frame_size, int channel_num, int16_t *out_data, int out_frame_size);
|
||||
|
||||
/**
|
||||
* @brief Copy the afe config
|
||||
*
|
||||
* @param dst_config The destination afe config
|
||||
* @param src_config The source afe config
|
||||
*
|
||||
* @return The destination afe config
|
||||
*/
|
||||
afe_config_t *afe_config_copy(afe_config_t *dst_config, const afe_config_t *src_config);
|
||||
|
||||
/**
|
||||
* @brief Print the afe config
|
||||
*
|
||||
* @param afe_config The afe config
|
||||
*/
|
||||
void afe_config_print(const afe_config_t *afe_config);
|
||||
|
||||
/**
|
||||
* @brief Allocate afe config
|
||||
*
|
||||
* @return The afe config pointer
|
||||
*/
|
||||
afe_config_t *afe_config_alloc();
|
||||
|
||||
/**
|
||||
* @brief Free afe config
|
||||
*
|
||||
* @param afe_config The afe config pointer
|
||||
*/
|
||||
void afe_config_free(afe_config_t *afe_config);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
@@ -0,0 +1,48 @@
|
||||
#ifndef _ESP_AFE_DOA_H_
|
||||
#define _ESP_AFE_DOA_H_
|
||||
|
||||
#include "esp_doa.h"
|
||||
#include "esp_afe_config.h"
|
||||
#include <stdint.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
doa_handle_t *doa_handle;
|
||||
afe_pcm_config_t pcm_config;
|
||||
int16_t *leftdata;
|
||||
int16_t *rightdata;
|
||||
int frame_size;
|
||||
} afe_doa_handle_t;
|
||||
|
||||
/**
|
||||
* @brief Initialize SRP-PHAT processor
|
||||
* @param input_format The input format
|
||||
* @param fs Sampling rate (Hz), e.g., 16000
|
||||
* @param resolution Angular search resolution (degrees), e.g., 20
|
||||
* @param d_mics Microphone spacing (meters), e.g., 0.06
|
||||
* @param input_timedate_samples input timedate samples, e.g., 1024
|
||||
* @return Initialized doa_handle_t object pointer, Recommend using the above configuration for better performance
|
||||
*/
|
||||
afe_doa_handle_t *afe_doa_create(const char *input_format, int fs, float resolution, float d_mics, int input_timedate_samples);
|
||||
/**
|
||||
* @brief Process audio frame for direction estimation
|
||||
* @param handle doa_handle_t instance pointer
|
||||
* @param indata Input audio data, format is define by input_format.
|
||||
* @return Estimated sound direction in degrees, e.g., 0-180
|
||||
*/
|
||||
float afe_doa_process(afe_doa_handle_t *handle, const int16_t *indata);
|
||||
/**
|
||||
* @brief Release all allocated resources
|
||||
* @param doa doa_handle_t instance pointer to be freed
|
||||
*/
|
||||
void afe_doa_destroy(afe_doa_handle_t *handle);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _ESP_AFE_DOA_H_ */
|
||||
@@ -0,0 +1,237 @@
|
||||
#pragma once
|
||||
#include "esp_afe_config.h"
|
||||
#include "stdbool.h"
|
||||
#include "stdint.h"
|
||||
#include "stdlib.h"
|
||||
#include "freertos/FreeRTOS.h"
|
||||
#include "freertos/task.h"
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// AFE: Audio Front-End
|
||||
// SR: Speech Recognition
|
||||
// afe_sr/AFE_SR: the audio front-end for speech recognition
|
||||
|
||||
// Opaque AFE_SR data container
|
||||
typedef struct esp_afe_sr_data_t esp_afe_sr_data_t;
|
||||
|
||||
/**
|
||||
* @brief The state of vad
|
||||
*/
|
||||
typedef enum {
|
||||
AFE_VAD_SILENCE = 0, // Deprecated, please use vad_state_t, noise or silence
|
||||
AFE_VAD_SPEECH = 1 // Deprecated, please use vad_state_t, speech
|
||||
} afe_vad_state_t;
|
||||
|
||||
/**
|
||||
* @brief The result of fetch function
|
||||
*/
|
||||
typedef struct afe_fetch_result_t {
|
||||
int16_t *data; // the target channel data of audio.
|
||||
int data_size; // the size of data. The unit is byte.
|
||||
int16_t *vad_cache; // the cache data of vad. It's only valid when vad_cache_size > 0. It is used to complete the
|
||||
// audio that was truncated.
|
||||
int vad_cache_size; // the size of vad_cache. The unit is byte.
|
||||
float data_volume; // the volume of input audio, the unit is decibel(dB). This value is calculated before agc.
|
||||
// (note: invalid in vc). if enable wakenet, the window length is the receptive fields of
|
||||
// wakenet(about 1.5s), otherwise is the frame length.
|
||||
wakenet_state_t wakeup_state; // the value is wakenet_state_t
|
||||
int wake_word_index; // if the wake word is detected. It will store the wake word index which start from 1.
|
||||
int wakenet_model_index; // if there are multiple wakenets, this value identifies which model be wakes up. Index
|
||||
// start from 1.
|
||||
vad_state_t vad_state; // the value is afe_vad_state_t
|
||||
int trigger_channel_id; // the channel index of output
|
||||
int wake_word_length; // the length of wake word. The unit is the number of samples.
|
||||
int ret_value; // the return state of fetch function
|
||||
int16_t *raw_data; // the multi-channel output data of audio.
|
||||
int raw_data_channels; // the channel number of raw data
|
||||
float ringbuff_free_pct; // the percent of ringbuff free size. if the value is larger than 0.5, it means the ringbuff is buzy.
|
||||
void *reserved; // reserved for future use
|
||||
} afe_fetch_result_t;
|
||||
|
||||
/**
|
||||
* @brief Function to initialze a AFE_SR instance
|
||||
*
|
||||
* @param afe_config The config of AFE_SR
|
||||
* @returns Handle to the AFE_SR data
|
||||
*/
|
||||
typedef esp_afe_sr_data_t *(*esp_afe_sr_iface_op_create_from_config_t)(afe_config_t *afe_config);
|
||||
|
||||
/**
|
||||
* @brief Get the amount of each channel samples per frame that need to be passed to the function
|
||||
*
|
||||
* Every speech enhancement AFE_SR processes a certain number of samples at the same time. This function
|
||||
* can be used to query that amount. Note that the returned amount is in 16-bit samples, not in bytes.
|
||||
*
|
||||
* @param afe The AFE_SR object to query
|
||||
* @return The amount of samples to feed the fetch function
|
||||
*/
|
||||
typedef int (*esp_afe_sr_iface_op_get_samp_chunksize_t)(esp_afe_sr_data_t *afe);
|
||||
|
||||
/**
|
||||
* @brief Get the channel number
|
||||
*
|
||||
* @param afe The AFE_SR object to query
|
||||
* @return The amount of total channels
|
||||
*/
|
||||
typedef int (*esp_afe_sr_iface_op_get_channel_num_t)(esp_afe_sr_data_t *afe);
|
||||
|
||||
/**
|
||||
* @brief Get the sample rate of the samples to feed to the function
|
||||
*
|
||||
* @param afe The AFE_SR object to query
|
||||
* @return The sample rate, in hz
|
||||
*/
|
||||
typedef int (*esp_afe_sr_iface_op_get_samp_rate_t)(esp_afe_sr_data_t *afe);
|
||||
|
||||
/**
|
||||
* @brief Feed samples of an audio stream to the AFE_SR
|
||||
*
|
||||
* @Warning The input data should be arranged in the format of channel interleaving.
|
||||
* The last channel is reference signal if it has reference data.
|
||||
*
|
||||
* @param afe The AFE_SR object to query
|
||||
*
|
||||
* @param in The input microphone signal, only support signed 16-bit @ 16 KHZ. The frame size can be queried by the
|
||||
* `get_feed_chunksize`.
|
||||
* @return The size of input
|
||||
*/
|
||||
typedef int (*esp_afe_sr_iface_op_feed_t)(esp_afe_sr_data_t *afe, const int16_t *in);
|
||||
|
||||
/**
|
||||
* @brief fetch enhanced samples of an audio stream from the AFE_SR
|
||||
*
|
||||
* @Warning The output is single channel data, no matter how many channels the input is.
|
||||
* Timeout is 2000 ms. If you want to adjust timeout, please refer to the definition of `fetch_with_delay`.
|
||||
*
|
||||
* @param afe The AFE_SR object to query
|
||||
* @return The result of output, please refer to the definition of `afe_fetch_result_t`. (The frame size of output
|
||||
* audio can be queried by the `get_fetch_chunksize`.)
|
||||
*/
|
||||
typedef afe_fetch_result_t *(*esp_afe_sr_iface_op_fetch_t)(esp_afe_sr_data_t *afe);
|
||||
|
||||
/**
|
||||
* @brief fetch enhanced samples of an audio stream from the AFE_SR, same with the function `fetch`
|
||||
*
|
||||
* @Warning The output is single channel data, no matter how many channels the input is.
|
||||
*
|
||||
* @param afe The AFE_SR object to query
|
||||
* @param ticks_to_wait The timeout value, in ticks, to wait for the fetch result.
|
||||
* @return The result of output, please refer to the definition of `afe_fetch_result_t`. (The frame size of output
|
||||
* audio can be queried by the `get_fetch_chunksize`.)
|
||||
*/
|
||||
typedef afe_fetch_result_t *(*esp_afe_sr_iface_op_fetch_with_delay_t)(esp_afe_sr_data_t *afe, TickType_t ticks_to_wait);
|
||||
|
||||
/**
|
||||
* @brief reset ringbuf of AFE.
|
||||
*
|
||||
* @param afe The AFE_SR object to query
|
||||
* @return -1: fail, 1: success
|
||||
*/
|
||||
typedef int (*esp_afe_sr_iface_op_reset_buffer_t)(esp_afe_sr_data_t *afe);
|
||||
|
||||
/**
|
||||
* @brief Set wakenet detection threshold
|
||||
*
|
||||
* @param afe The AFE_SR object to query
|
||||
* @param index The wakenet index, just support 1: wakenet1 or 2: wakenet2
|
||||
* @param threshold The wakenet detection threshold, the value is between 0.4 and 0.9999.
|
||||
* @return -1: fail, 1: success
|
||||
*/
|
||||
typedef int (*esp_afe_sr_iface_op_set_wakenet_threshold_t)(esp_afe_sr_data_t *afe, int index, float threshold);
|
||||
|
||||
/**
|
||||
* @brief Reset wakenet detection threshold to inital state
|
||||
*
|
||||
* @param afe The AFE_SR object to query
|
||||
* @param index The wakenet index, just support 1: wakenet1 or 2: wakenet2
|
||||
* @return -1: fail, 1: success
|
||||
*/
|
||||
typedef int (*esp_afe_sr_iface_op_reset_wakenet_threshold_t)(esp_afe_sr_data_t *afe, int index);
|
||||
|
||||
/**
|
||||
* @brief Reset one function/module/algorithm.
|
||||
*
|
||||
* @param afe The AFE_SR object to query
|
||||
* @return -1: fail, 1: success
|
||||
*/
|
||||
typedef int (*esp_afe_sr_iface_op_reset_op_t)(esp_afe_sr_data_t *afe);
|
||||
|
||||
/**
|
||||
* @brief Disable one function/module/algorithm.
|
||||
*
|
||||
* @param afe The AFE_SR object to query
|
||||
* @return -1: fail, 0: disabled, 1: enabled
|
||||
*/
|
||||
typedef int (*esp_afe_sr_iface_op_disable_func_t)(esp_afe_sr_data_t *afe);
|
||||
|
||||
/**
|
||||
* @brief Enable one function/module/algorithm.
|
||||
*
|
||||
* @param afe The AFE_SR object to query
|
||||
* @return -1: fail, 0: disabled, 1: enabled
|
||||
*/
|
||||
typedef int (*esp_afe_sr_iface_op_enable_func_t)(esp_afe_sr_data_t *afe);
|
||||
|
||||
/**
|
||||
* @brief Print all functions/modules/algorithms pipeline.
|
||||
* The pipeline is the order of the functions/modules/algorithms.
|
||||
* The format like this: [input] -> |AEC(VOIP_HIGH_PERF)| -> |WakeNet(wn9_hilexin)| -> [output]
|
||||
*
|
||||
* @param afe The AFE_SR object to query
|
||||
*/
|
||||
typedef void (*esp_afe_sr_iface_op_print_pipeline_t)(esp_afe_sr_data_t *afe);
|
||||
|
||||
/**
|
||||
* @brief Destroy a AFE_SR instance
|
||||
*
|
||||
* @param afe AFE_SR object to destroy
|
||||
*/
|
||||
typedef void (*esp_afe_sr_iface_op_destroy_t)(esp_afe_sr_data_t *afe);
|
||||
|
||||
/**
|
||||
* This structure contains the functions used to do operations on a AFE_SR.
|
||||
*/
|
||||
typedef struct {
|
||||
esp_afe_sr_iface_op_create_from_config_t create_from_config;
|
||||
esp_afe_sr_iface_op_feed_t feed;
|
||||
esp_afe_sr_iface_op_fetch_t fetch;
|
||||
esp_afe_sr_iface_op_fetch_with_delay_t fetch_with_delay;
|
||||
esp_afe_sr_iface_op_reset_buffer_t reset_buffer;
|
||||
esp_afe_sr_iface_op_get_samp_chunksize_t get_feed_chunksize;
|
||||
esp_afe_sr_iface_op_get_samp_chunksize_t get_fetch_chunksize;
|
||||
esp_afe_sr_iface_op_get_channel_num_t get_channel_num; // same with get_feed_channel_num
|
||||
esp_afe_sr_iface_op_get_channel_num_t get_feed_channel_num;
|
||||
esp_afe_sr_iface_op_get_channel_num_t get_fetch_channel_num;
|
||||
esp_afe_sr_iface_op_get_samp_rate_t get_samp_rate;
|
||||
esp_afe_sr_iface_op_set_wakenet_threshold_t set_wakenet_threshold;
|
||||
esp_afe_sr_iface_op_reset_wakenet_threshold_t reset_wakenet_threshold;
|
||||
esp_afe_sr_iface_op_disable_func_t disable_wakenet;
|
||||
esp_afe_sr_iface_op_enable_func_t enable_wakenet;
|
||||
esp_afe_sr_iface_op_disable_func_t disable_aec;
|
||||
esp_afe_sr_iface_op_enable_func_t enable_aec;
|
||||
esp_afe_sr_iface_op_disable_func_t disable_se;
|
||||
esp_afe_sr_iface_op_enable_func_t enable_se;
|
||||
esp_afe_sr_iface_op_disable_func_t disable_vad;
|
||||
esp_afe_sr_iface_op_enable_func_t enable_vad;
|
||||
esp_afe_sr_iface_op_reset_op_t reset_vad;
|
||||
esp_afe_sr_iface_op_disable_func_t disable_ns;
|
||||
esp_afe_sr_iface_op_enable_func_t enable_ns;
|
||||
esp_afe_sr_iface_op_disable_func_t disable_agc;
|
||||
esp_afe_sr_iface_op_enable_func_t enable_agc;
|
||||
esp_afe_sr_iface_op_print_pipeline_t print_pipeline;
|
||||
esp_afe_sr_iface_op_destroy_t destroy;
|
||||
} esp_afe_sr_iface_t;
|
||||
|
||||
// struct is used to store the AFE handle and data for the AFE task
|
||||
typedef struct {
|
||||
esp_afe_sr_data_t *afe_data;
|
||||
esp_afe_sr_iface_t *afe_handle;
|
||||
TaskHandle_t feed_task;
|
||||
TaskHandle_t fetch_task;
|
||||
} afe_task_into_t;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
@@ -0,0 +1,13 @@
|
||||
#pragma once
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include "esp_afe_sr_iface.h"
|
||||
|
||||
esp_afe_sr_iface_t *esp_afe_handle_from_config(const afe_config_t *config);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
47
managed_components/espressif__esp-sr/include/esp32/esp_agc.h
Normal file
47
managed_components/espressif__esp-sr/include/esp32/esp_agc.h
Normal file
@@ -0,0 +1,47 @@
|
||||
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License
|
||||
#ifndef _ESP_AGC_H_
|
||||
#define _ESP_AGC_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
////all positive value is valid, negective is error
|
||||
typedef enum {
|
||||
ESP_AGC_SUCCESS = 0, ////success
|
||||
ESP_AGC_FAIL = -1, ////agc fail
|
||||
ESP_AGC_SAMPLE_RATE_ERROR = -2, ///sample rate can be only 8khz, 16khz, 32khz
|
||||
ESP_AGC_FRAME_SIZE_ERROR = -3, ////the input frame size should be only 10ms, so should together with sample-rate to get the frame size
|
||||
} ESP_AGE_ERR;
|
||||
|
||||
typedef enum {
|
||||
AGC_MODE_SR = -1, // Bypass WEBRTC AGC
|
||||
AGC_MODE_0 = 0, // Only saturation protection
|
||||
AGC_MODE_1 = 1, // Analog Automatic Gain Control [-targetLevelDbfs (default -3 dBOv)]
|
||||
AGC_MODE_2 = 2, // Digital Automatic Gain Control [-targetLevelDbfs (default -3 dBOv)]
|
||||
AGC_MODE_3 = 3, // Fixed Digital Gain [compressionGaindB (default 8 dB)]
|
||||
} agc_mode_t;
|
||||
|
||||
void *esp_agc_open(agc_mode_t agc_mode, int sample_rate);
|
||||
void set_agc_config(void *agc_handle, int gain_dB, int limiter_enable, int target_level_dbfs);
|
||||
int esp_agc_process(void *agc_handle, short *in_pcm, short *out_pcm, int frame_size, int sample_rate);
|
||||
void esp_agc_close(void *agc_handle);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // _ESP_AGC_H_
|
||||
41
managed_components/espressif__esp-sr/include/esp32/esp_doa.h
Normal file
41
managed_components/espressif__esp-sr/include/esp32/esp_doa.h
Normal file
@@ -0,0 +1,41 @@
|
||||
#ifndef _ESP_DOA_H_
|
||||
#define _ESP_DOA_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct doa_handle_t doa_handle_t;
|
||||
/**
|
||||
* @brief Initialize SRP-PHAT processor
|
||||
* @param fs Sampling rate (Hz), e.g., 16000
|
||||
* @param resolution Angular search resolution (degrees), e.g., 20
|
||||
* @param d_mics Microphone spacing (meters), e.g., 0.06
|
||||
* @param input_timedate_samples input timedate samples, e.g., 1024
|
||||
* @return Initialized doa_handle_t object pointer, Recommend using the above configuration for better performance
|
||||
*/
|
||||
doa_handle_t *esp_doa_create(int fs, float resolution, float d_mics, int input_timedate_samples);
|
||||
|
||||
/**
|
||||
* @brief Release all allocated resources
|
||||
* @param doa doa_handle_t instance pointer to be freed
|
||||
*/
|
||||
void esp_doa_destroy(doa_handle_t *doa);
|
||||
|
||||
/**
|
||||
* @brief Process audio frame for direction estimation
|
||||
* @param doa doa_handle_t instance pointer
|
||||
* @param left Left channel 16-bit PCM data
|
||||
* @param right Right channel 16-bit PCM data
|
||||
* @return Estimated sound direction in degrees, e.g., 0-180
|
||||
*/
|
||||
float esp_doa_process(doa_handle_t *doa, int16_t* left, int16_t* right);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _ESP_DOA_H_ */
|
||||
@@ -0,0 +1,93 @@
|
||||
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License
|
||||
#ifndef _ESP_MASE_H_
|
||||
#define _ESP_MASE_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define MASE_SAMPLE_RATE 16000 // Supports 16kHz only
|
||||
#define MASE_FRAME_SIZE 16 // Supports 16ms only
|
||||
#define MASE_MIC_DISTANCE 65 // According to physical design of mic-array
|
||||
|
||||
/**
|
||||
* @brief Sets mic-array type, currently 2-mic line array and 3-mic circular array
|
||||
* are supported.
|
||||
*/
|
||||
typedef enum {
|
||||
TWO_MIC_LINE = 0,
|
||||
THREE_MIC_CIRCLE = 1
|
||||
} mase_mic_array_type_t;
|
||||
|
||||
/**
|
||||
* @brief Sets operating mode, supporting normal mode and wake-up enhancement mode
|
||||
*/
|
||||
typedef enum {
|
||||
NORMAL_ENHANCEMENT_MODE = 0,
|
||||
WAKE_UP_ENHANCEMENT_MODE = 1
|
||||
} mase_op_mode_t;
|
||||
|
||||
typedef void* mase_handle_t;
|
||||
|
||||
/**
|
||||
* @brief Creates an instance to the MASE structure.
|
||||
*
|
||||
* @param sample_rate The sampling frequency (Hz) must be 16000.
|
||||
*
|
||||
* @param frame_size The length of the audio processing must be 16ms.
|
||||
*
|
||||
* @param array_type '0' for 2-mic line array and '1' for 3-mic circular array.
|
||||
*
|
||||
* @param mic_distance The distance between neiboring microphones in mm.
|
||||
*
|
||||
* @param operating_mode '0' for normal mode and '1' for wake-up enhanced mode.
|
||||
*
|
||||
* @param filter_strength Strengh of the mic-array speech enhancement, must be 0, 1, 2 or 3.
|
||||
*
|
||||
* @return
|
||||
* - NULL: Create failed
|
||||
* - Others: An instance of MASE
|
||||
*/
|
||||
mase_handle_t mase_create(int fs, int frame_size, int array_type, float mic_distance, int operating_mode, int filter_strength);
|
||||
|
||||
/**
|
||||
* @brief Performs mic array processing for one frame.
|
||||
*
|
||||
* @param inst The instance of MASE.
|
||||
*
|
||||
* @param in An array of 16-bit signed audio samples from mic.
|
||||
*
|
||||
* @param dsp_out Returns enhanced signal.
|
||||
*
|
||||
* @return None
|
||||
*
|
||||
*/
|
||||
void mase_process(mase_handle_t st, int16_t *in, int16_t *dsp_out);
|
||||
|
||||
/**
|
||||
* @brief Free the MASE instance
|
||||
*
|
||||
* @param inst The instance of MASE.
|
||||
*
|
||||
* @return None
|
||||
*
|
||||
*/
|
||||
void mase_destory(mase_handle_t st);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,89 @@
|
||||
#pragma once
|
||||
#include "esp_speech_features.h"
|
||||
#include <stdint.h>
|
||||
|
||||
/*
|
||||
This describes an interface for a MFCC runner, that is, some kind of implementation that can be
|
||||
fed sample chunks and returns the MFCC cepstrum of those samples. This is an abstracted interface so
|
||||
multiple implementations can be used.
|
||||
*/
|
||||
|
||||
typedef struct esp_mfcc_data_t esp_mfcc_data_t;
|
||||
|
||||
// Options for the mfcc algorithm itself. These more-or-less match the parameters of csf_mfcc (from c_speech_features),
|
||||
// please refer to its documentation for details.
|
||||
typedef struct {
|
||||
int winstep_ms; // The step between successive windows in ms. (10)
|
||||
int winlen_ms; // The length of the analysis window in ms. (25)
|
||||
int nch; // The number of input channel
|
||||
int numcep; // The number of cepstrum to return
|
||||
int nfilter; // The number of filters in the filterbank
|
||||
int nfft; // The FFT size
|
||||
int samp_freq; // The sample-rate of the signal.
|
||||
int low_freq; // The lowest band edge of mel filters, in hz. (e.g. 0)
|
||||
int high_freq; // The highest band edge of mel filters, in hz. Must not be higher than samp_freq
|
||||
float preemph; // Preemphasis filter coefficient. 0 is no filter. (e.g. 0.97)
|
||||
char *win_type; // Analysis window type to apply to each frame, "hanning","hamming","sine","rectangular","povey"
|
||||
bool append_energy; // If true, the zeroth cepstral coefficient is replaced with the log of the total frame energy
|
||||
bool use_power; // If true, use power of fft spectrum, else use magnitude of fft spectrum
|
||||
int use_log_fbank; // 0: return fbank, 1: return log(x+log_epsilon), 2: return log(max(x, log_epsilon))
|
||||
float log_epsilon; // log epsilon. (e.g. 1e-7)
|
||||
bool psram_first; // Alloc memory from PSRAM first
|
||||
bool remove_dc_offset; // Whether to subtract mean of wave before FFT
|
||||
} esp_mfcc_opts_t;
|
||||
|
||||
/**
|
||||
* @brief Un-initialize and free a mfcc runner
|
||||
*
|
||||
* Function to free a previously allocated mfcc runner.
|
||||
*
|
||||
* @param r Runner object to destroy
|
||||
*/
|
||||
typedef void (*esp_mfcc_op_destroy_t)(esp_mfcc_data_t *r);
|
||||
|
||||
/**
|
||||
* @brief Initialize parameters for a mfcc runner.
|
||||
*
|
||||
* After creation, a mfcc runner needs to be initialized first; this is usually done
|
||||
* in the initialization routine of a speech recognition algorithm. This provides
|
||||
* a pointer to do this for a specific mfcc runner.
|
||||
*
|
||||
* @param opt Options for the mfcc process
|
||||
* @return True if success, false on error.
|
||||
*/
|
||||
typedef esp_mfcc_data_t *(*esp_mfcc_op_create_t)(const esp_mfcc_opts_t *opt);
|
||||
|
||||
/**
|
||||
* @brief Run a mfcc iteration on frame by frame
|
||||
*
|
||||
* This will take a set of samples and return a ceptrum. Note that this may be pipelined:
|
||||
* an initial call to this function may return NULL and subsequent calls may return the
|
||||
* cepstrum of previous calls.
|
||||
*
|
||||
* @param r The mfcc runner
|
||||
* @param samp An array of signed 16-bit samples. The amount of samples should be sampfreq/(winstep_ms/1000).
|
||||
* @return A set of cepstral values, or NULL if no such values are available yet. Free using the free_cepbuf function
|
||||
* when done with this buffer. Note that some implementations require the buffer to be freed before another call
|
||||
* to this function is done.
|
||||
*/
|
||||
typedef float *(*esp_mfcc_op_run_step_t)(esp_mfcc_data_t *r, int16_t *samp, int16_t nch);
|
||||
|
||||
typedef void (*esp_mfcc_op_run_step_s16_t)(esp_mfcc_data_t *r, int16_t *samp, int16_t *fbank);
|
||||
|
||||
/**
|
||||
* @brief Clean all state of mfcc handle
|
||||
*
|
||||
* @param r The mfcc runner
|
||||
*/
|
||||
typedef void (*esp_mfcc_op_clean_t)(esp_mfcc_data_t *r);
|
||||
|
||||
/**
|
||||
* @brief Operations possible on a mfcc runner
|
||||
*/
|
||||
typedef struct {
|
||||
esp_mfcc_op_destroy_t destroy;
|
||||
esp_mfcc_op_create_t create;
|
||||
esp_mfcc_op_run_step_t run_step;
|
||||
esp_mfcc_op_run_step_s16_t run_step_s16;
|
||||
esp_mfcc_op_clean_t clean;
|
||||
} esp_mfcc_iface_t;
|
||||
@@ -0,0 +1,44 @@
|
||||
#pragma once
|
||||
#include "esp_mfcc_iface.h"
|
||||
|
||||
extern const esp_mfcc_iface_t esp_fbank_f32; // float32-fbank handle
|
||||
extern const esp_mfcc_iface_t esp_fbank_s16; // int16-fbank handle
|
||||
|
||||
/**
|
||||
* @brief Return basic opts used in wakenet9 & multinet5
|
||||
**/
|
||||
esp_mfcc_opts_t *get_mfcc_opts_wn9();
|
||||
|
||||
/**
|
||||
* @brief Return basic opts used in wakenet9s
|
||||
**/
|
||||
esp_mfcc_opts_t *get_mfcc_opts(const char *win_type, bool use_power, int winstep_ms, int winlen_ms, int nfilter);
|
||||
|
||||
/**
|
||||
* @brief Return basic opts for default kaldifeat
|
||||
*
|
||||
opts->psram_first = true;
|
||||
opts->use_power = true;
|
||||
opts->use_log_fbank = 2; // log(max(x, log_epsilon))
|
||||
opts->log_epsilon = 1.1920928955078125e-07f; // torch.finfo(torch.float32).eps
|
||||
opts->win_type = "povey";
|
||||
opts->low_freq = 20;
|
||||
opts->high_freq = 7600;
|
||||
opts->samp_freq = 16000;
|
||||
opts->nch = 1;
|
||||
opts->nfft = 512;
|
||||
opts->nfilter = 80;
|
||||
opts->numcep = 80;
|
||||
opts->preemph = 0.97;
|
||||
opts->append_energy = false;
|
||||
opts->winlen_ms = 25;
|
||||
opts->winstep_ms = 10;
|
||||
opts->remove_dc_offset = true;
|
||||
*
|
||||
**/
|
||||
esp_mfcc_opts_t *get_mfcc_opts_kaldi();
|
||||
|
||||
/**
|
||||
* @brief Print mfcc opts
|
||||
**/
|
||||
void print_mfcc_opts(esp_mfcc_opts_t *opts);
|
||||
@@ -0,0 +1,224 @@
|
||||
#pragma once
|
||||
#include "stdint.h"
|
||||
#include "esp_wn_iface.h"
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define ESP_MN_RESULT_MAX_NUM 5
|
||||
#define ESP_MN_MAX_PHRASE_NUM 400
|
||||
#define ESP_MN_MAX_PHRASE_LEN 63
|
||||
#define ESP_MN_MIN_PHRASE_LEN 2
|
||||
|
||||
#define ESP_MN_PREFIX "mn"
|
||||
#define ESP_MN_ENGLISH "en"
|
||||
#define ESP_MN_CHINESE "cn"
|
||||
|
||||
typedef enum {
|
||||
ESP_MN_STATE_DETECTING = 0, // detecting
|
||||
ESP_MN_STATE_DETECTED = 1, // detected
|
||||
ESP_MN_STATE_TIMEOUT = 2, // time out
|
||||
} esp_mn_state_t;
|
||||
|
||||
//Set multinet loading mode
|
||||
//The memory comsumption is decreased with increasing mode,
|
||||
//As a consequence also the CPU loading rate goes up
|
||||
typedef enum {
|
||||
ESP_MN_LOAD_FROM_PSRAM = 0, // Load all weights from PSRAM. Fastest computation with Maximum memory consumption
|
||||
ESP_MN_LOAD_FROM_PSRAM_FLASH = 1, // Load some weights from PSRAM and laod the rest from FLASH (default)
|
||||
ESP_MN_LOAD_FROM_FLASH = 2, // Load more weights from FLASH. Minimum memory consumption with slowest computation
|
||||
} esp_mn_loader_mode_t;
|
||||
|
||||
typedef enum {
|
||||
ESP_MN_GREEDY_SEARCH = 0, // greedy search
|
||||
ESP_MN_BEAM_SEARCH = 1, // beam search
|
||||
ESP_MN_BEAM_SEARCH_WITH_FST = 2, // beam search with trie language model
|
||||
} esp_mn_search_method_t;
|
||||
|
||||
typedef enum {
|
||||
CHINESE_ID = 1, // Chinese language
|
||||
ENGLISH_ID = 2, // English language
|
||||
} language_id_t;
|
||||
|
||||
// Return all possible recognition results
|
||||
typedef struct{
|
||||
esp_mn_state_t state;
|
||||
int num; // The number of phrase in list, num<=5. When num=0, no phrase is recognized.
|
||||
int command_id[ESP_MN_RESULT_MAX_NUM]; // The list of command id.
|
||||
int phrase_id[ESP_MN_RESULT_MAX_NUM]; // The list of phrase id.
|
||||
float prob[ESP_MN_RESULT_MAX_NUM]; // The list of probability.
|
||||
char string[256]; // recognized string with commands graph
|
||||
char raw_string[256]; // recognized string without commands graph
|
||||
} esp_mn_results_t;
|
||||
|
||||
typedef struct {
|
||||
char *string; // command string
|
||||
char *phonemes; // command phonemes, if applicable
|
||||
int16_t command_id; // the command id
|
||||
float threshold; // trigger threshold, default: 0
|
||||
int16_t *wave; // prompt wave data of the phrase
|
||||
} esp_mn_phrase_t;
|
||||
|
||||
typedef struct _mn_node_ {
|
||||
esp_mn_phrase_t *phrase;
|
||||
struct _mn_node_ *next;
|
||||
} esp_mn_node_t;
|
||||
|
||||
typedef struct{
|
||||
int16_t num; // The number of error phrases, which can not added into model
|
||||
esp_mn_phrase_t **phrases; // The array of error phrase pointer
|
||||
} esp_mn_error_t;
|
||||
|
||||
/**
|
||||
* @brief Initialze a model instance with specified model name.
|
||||
*
|
||||
* @param model_name The wakenet model name.
|
||||
* @param duration The duration (ms) to trigger the timeout
|
||||
*
|
||||
* @returns Handle to the model data.
|
||||
*/
|
||||
typedef model_iface_data_t* (*esp_mn_iface_op_create_t)(const char *model_name, int duration);
|
||||
|
||||
/**
|
||||
* @brief Switch multinet mode to change memory consumption and CPU loading
|
||||
*
|
||||
* @warning Just Support multinet6 or later versions
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @param mode The multinet loader mode
|
||||
*
|
||||
* @returns Handle to the model data.
|
||||
*/
|
||||
typedef model_iface_data_t* (*esp_mn_iface_op_switch_loader_mode_t)(model_iface_data_t *model, esp_mn_loader_mode_t mode);
|
||||
|
||||
/**
|
||||
* @brief Callback function type to fetch the amount of samples that need to be passed to the detect function
|
||||
*
|
||||
* Every speech recognition model processes a certain number of samples at the same time. This function
|
||||
* can be used to query that amount. Note that the returned amount is in 16-bit samples, not in bytes.
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @return The amount of samples to feed the detect function
|
||||
*/
|
||||
typedef int (*esp_mn_iface_op_get_samp_chunksize_t)(model_iface_data_t *model);
|
||||
|
||||
/**
|
||||
* @brief Callback function type to fetch the number of frames recognized by the command word
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @return The number of the frames recognized by the command word
|
||||
*/
|
||||
typedef int (*esp_mn_iface_op_get_samp_chunknum_t)(model_iface_data_t *model);
|
||||
|
||||
/**
|
||||
* @brief Set the detection threshold to manually abjust the probability
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @param det_treshold The threshold to trigger speech commands, the range of det_threshold is 0.0~0.9999
|
||||
*/
|
||||
typedef int (*esp_mn_iface_op_set_det_threshold_t)(model_iface_data_t *model, float det_threshold);
|
||||
|
||||
/**
|
||||
* @brief Get the sample rate of the samples to feed to the detect function
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @return The sample rate, in hz
|
||||
*/
|
||||
typedef int (*esp_mn_iface_op_get_samp_rate_t)(model_iface_data_t *model);
|
||||
|
||||
/**
|
||||
* @brief Get the language of model
|
||||
*
|
||||
* @param model The language name
|
||||
* @return Language name string defined in esp_mn_models.h, eg: ESP_MN_CHINESE, ESP_MN_ENGLISH
|
||||
*/
|
||||
typedef char * (*esp_mn_iface_op_get_language_t)(model_iface_data_t *model);
|
||||
|
||||
/**
|
||||
* @brief Feed samples of an audio stream to the speech recognition model and detect if there is a speech command found.
|
||||
*
|
||||
* @param model The model object to query.
|
||||
* @param samples An array of 16-bit signed audio samples. The array size used can be queried by the
|
||||
* get_samp_chunksize function.
|
||||
* @return The state of multinet
|
||||
*/
|
||||
typedef esp_mn_state_t (*esp_mn_iface_op_detect_t)(model_iface_data_t *model, int16_t *samples);
|
||||
|
||||
/**
|
||||
* @brief Destroy a speech commands recognition model
|
||||
*
|
||||
* @param model The Model object to destroy
|
||||
*/
|
||||
typedef void (*esp_mn_iface_op_destroy_t)(model_iface_data_t *model);
|
||||
|
||||
/**
|
||||
* @brief Get recognition results
|
||||
*
|
||||
* @param model The Model object to query
|
||||
*
|
||||
* @return The current results.
|
||||
*/
|
||||
typedef esp_mn_results_t* (*esp_mn_iface_op_get_results_t)(model_iface_data_t *model);
|
||||
|
||||
/**
|
||||
* @brief Open the log print
|
||||
*
|
||||
* @param model_data The model object to query.
|
||||
*
|
||||
*/
|
||||
typedef void (*esp_mn_iface_op_open_log_t)(model_iface_data_t *model_data);
|
||||
|
||||
/**
|
||||
* @brief Clean all status of model
|
||||
*
|
||||
* @param model_data The model object to query.
|
||||
*
|
||||
*/
|
||||
typedef void (*esp_mn_iface_op_clean_t)(model_iface_data_t *model_data);
|
||||
|
||||
/**
|
||||
* @brief Set the speech commands by mn_command_root
|
||||
*
|
||||
* @param model_data The model object to query.
|
||||
* @param mn_command_root The speech commands link.
|
||||
* @return The error phrase id info.
|
||||
*/
|
||||
typedef esp_mn_error_t* (*esp_wn_iface_op_set_speech_commands)(model_iface_data_t *model_data, esp_mn_node_t *mn_command_root);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Print out current commands in fst, note the ones "added" but not "updated" will not be shown here
|
||||
*
|
||||
* @param model_data The model object to query
|
||||
*/
|
||||
typedef void (*esp_mn_iface_op_print_active_speech_commands)(model_iface_data_t *model_data);
|
||||
|
||||
/**
|
||||
* @brief Check if input string can be tokenized
|
||||
*
|
||||
* @param model_data The model object to query
|
||||
* @param str The input string
|
||||
*/
|
||||
typedef int (*esp_mn_iface_op_check_speech_command)(model_iface_data_t *model_data, const char *str);
|
||||
|
||||
typedef struct {
|
||||
esp_mn_iface_op_create_t create;
|
||||
esp_mn_iface_op_get_samp_rate_t get_samp_rate;
|
||||
esp_mn_iface_op_get_samp_chunksize_t get_samp_chunksize;
|
||||
esp_mn_iface_op_get_samp_chunknum_t get_samp_chunknum;
|
||||
esp_mn_iface_op_set_det_threshold_t set_det_threshold;
|
||||
esp_mn_iface_op_get_language_t get_language;
|
||||
esp_mn_iface_op_detect_t detect;
|
||||
esp_mn_iface_op_destroy_t destroy;
|
||||
esp_mn_iface_op_get_results_t get_results;
|
||||
esp_mn_iface_op_open_log_t open_log;
|
||||
esp_mn_iface_op_clean_t clean;
|
||||
esp_wn_iface_op_set_speech_commands set_speech_commands;
|
||||
esp_mn_iface_op_switch_loader_mode_t switch_loader_mode;
|
||||
esp_mn_iface_op_print_active_speech_commands print_active_speech_commands;
|
||||
esp_mn_iface_op_check_speech_command check_speech_command;
|
||||
} esp_mn_iface_t;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
@@ -0,0 +1,66 @@
|
||||
#pragma once
|
||||
#include "esp_mn_iface.h"
|
||||
|
||||
//Contains declarations of all available speech recognion models. Pair this up with the right coefficients and you have a model that can recognize
|
||||
//a specific phrase or word.
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
/**
|
||||
* @brief Get the multinet handle from model name
|
||||
*
|
||||
* @param model_name The name of model
|
||||
* @returns The handle of multinet
|
||||
*/
|
||||
esp_mn_iface_t *esp_mn_handle_from_name(char *model_name);
|
||||
|
||||
/**
|
||||
* @brief Get the multinet language from model name
|
||||
*
|
||||
* @param model_name The name of model
|
||||
* @returns The language of multinet
|
||||
*/
|
||||
char *esp_mn_language_from_name(char *model_name);
|
||||
|
||||
/*
|
||||
Configure wake word to use based on what's selected in menuconfig.
|
||||
*/
|
||||
|
||||
#ifdef CONFIG_SR_MN_CN_MULTINET2_SINGLE_RECOGNITION
|
||||
#include "multinet2_ch.h"
|
||||
#define MULTINET_COEFF get_coeff_multinet2_ch
|
||||
#define MULTINET_MODEL_NAME "mn2_cn"
|
||||
|
||||
#else
|
||||
#define MULTINET_COEFF "COEFF_NULL"
|
||||
#define MULTINET_MODEL_NAME "NULL"
|
||||
#endif
|
||||
|
||||
|
||||
/* example
|
||||
|
||||
static const esp_mn_iface_t *multinet = &MULTINET_MODEL;
|
||||
|
||||
//Initialize MultiNet model data
|
||||
model_iface_data_t *model_data = multinet->create(&MULTINET_COEFF);
|
||||
add_speech_commands(multinet, model_data);
|
||||
|
||||
//Set parameters of buffer
|
||||
int audio_chunksize=model->get_samp_chunksize(model_data);
|
||||
int frequency = model->get_samp_rate(model_data);
|
||||
int16_t *buffer=malloc(audio_chunksize*sizeof(int16_t));
|
||||
|
||||
//Detect
|
||||
int r=model->detect(model_data, buffer);
|
||||
if (r>0) {
|
||||
printf("Detection triggered output %d.\n", r);
|
||||
}
|
||||
|
||||
//Destroy model
|
||||
model->destroy(model_data)
|
||||
|
||||
*/
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
86
managed_components/espressif__esp-sr/include/esp32/esp_ns.h
Normal file
86
managed_components/espressif__esp-sr/include/esp32/esp_ns.h
Normal file
@@ -0,0 +1,86 @@
|
||||
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License
|
||||
#ifndef _ESP_NS_H_
|
||||
#define _ESP_NS_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define NS_USE_SPIARM 0
|
||||
#define NS_FRAME_LENGTH_MS 10 //Supports 10ms, 20ms, 30ms
|
||||
|
||||
/**
|
||||
* The Sampling frequency (Hz) must be 16000Hz
|
||||
*/
|
||||
|
||||
typedef void* ns_handle_t;
|
||||
|
||||
/**
|
||||
* @brief Creates an instance to the NS structure.
|
||||
*
|
||||
* @param frame_length The length of the audio processing can be 10ms, 20ms, 30ms.
|
||||
*
|
||||
* @return
|
||||
* - NULL: Create failed
|
||||
* - Others: The instance of NS
|
||||
*/
|
||||
ns_handle_t ns_create(int frame_length);
|
||||
|
||||
/**
|
||||
* @brief Creates an instance of the more powerful noise suppression algorithm.
|
||||
*
|
||||
* @warning frame_length only supports be 10 ms.
|
||||
*
|
||||
* @param frame_length The length of the audio processing can only be 10ms.
|
||||
* @param mode 0: Mild, 1: Medium, 2: Aggressive
|
||||
* @param sample_rate The sample rate of the audio.
|
||||
*
|
||||
* @return
|
||||
* - NULL: Create failed
|
||||
* - Others: The instance of NS
|
||||
*/
|
||||
ns_handle_t ns_pro_create(int frame_length, int mode, int sample_rate);
|
||||
|
||||
/**
|
||||
* @brief Feed samples of an audio stream to the NS and get the audio stream after Noise suppression.
|
||||
*
|
||||
* @param inst The instance of NS.
|
||||
*
|
||||
* @param indata An array of 16-bit signed audio samples.
|
||||
*
|
||||
* @param outdata An array of 16-bit signed audio samples after noise suppression.
|
||||
*
|
||||
* @return None
|
||||
*
|
||||
*/
|
||||
void ns_process(ns_handle_t inst, int16_t *indata, int16_t *outdata);
|
||||
|
||||
/**
|
||||
* @brief Free the NS instance
|
||||
*
|
||||
* @param inst The instance of NS.
|
||||
*
|
||||
* @return None
|
||||
*
|
||||
*/
|
||||
void ns_destroy(ns_handle_t inst);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif //_ESP_NS_H_
|
||||
@@ -0,0 +1,64 @@
|
||||
#pragma once
|
||||
#include "stdint.h"
|
||||
|
||||
//Opaque model data container
|
||||
typedef struct esp_nsn_data_t esp_nsn_data_t;
|
||||
|
||||
|
||||
/**
|
||||
* @brief Easy function type to initialze a model instance
|
||||
*
|
||||
* @param model_name The name of the model instance
|
||||
* @returns Handle to the model data
|
||||
*/
|
||||
typedef esp_nsn_data_t* (*esp_nsn_iface_op_create_t)(char *model_name);
|
||||
|
||||
/**
|
||||
* @brief Get the amount of samples that need to be passed to the process function
|
||||
*
|
||||
* Every noise suppression model processes a certain number of samples at the same time. This function
|
||||
* can be used to query that amount. Note that the returned amount is in 16-bit samples, not in bytes.
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @return The amount of samples to feed the process function
|
||||
*/
|
||||
typedef int (*esp_nsn_iface_op_get_samp_chunksize_t)(esp_nsn_data_t *model);
|
||||
|
||||
/**
|
||||
* @brief Feed samples of an audio stream to the noise suppression model and get data after process.
|
||||
*
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @param in_data An array of 16-bit signed audio samples. The array size used can be queried by the
|
||||
* get_samp_chunksize function.
|
||||
* @param out_data An array of 16-bit signed audio samples after process.
|
||||
* @return The state of return.
|
||||
*/
|
||||
typedef int (*esp_nsn_iface_op_process_t)(esp_nsn_data_t *model, int16_t *in_data, int16_t *out_data);
|
||||
|
||||
/**
|
||||
* @brief Get the sample rate of the samples to feed to the process function
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @return The sample rate, in hz
|
||||
*/
|
||||
typedef int (*esp_nsn_iface_op_get_samp_rate_t)(esp_nsn_data_t *model);
|
||||
|
||||
/**
|
||||
* @brief Destroy a noise suppression model
|
||||
*
|
||||
* @param model Model object to destroy
|
||||
*/
|
||||
typedef void (*esp_nsn_iface_op_destroy_t)(esp_nsn_data_t *model);
|
||||
|
||||
|
||||
/**
|
||||
* This structure contains the functions used to do operations on a wake word detection model.
|
||||
*/
|
||||
typedef struct {
|
||||
esp_nsn_iface_op_create_t create;
|
||||
esp_nsn_iface_op_get_samp_chunksize_t get_samp_chunksize;
|
||||
esp_nsn_iface_op_process_t process;
|
||||
esp_nsn_iface_op_get_samp_rate_t get_samp_rate;
|
||||
esp_nsn_iface_op_destroy_t destroy;
|
||||
} esp_nsn_iface_t;
|
||||
@@ -0,0 +1,17 @@
|
||||
#pragma once
|
||||
|
||||
#include "esp_nsn_iface.h"
|
||||
|
||||
/*
|
||||
The prefix of nset
|
||||
Now there are nsnet1 and nsnet2
|
||||
*/
|
||||
#define ESP_NSNET_PREFIX "nsnet"
|
||||
|
||||
/**
|
||||
* @brief Get the nsnet handle from model name
|
||||
*
|
||||
* @param model_name The name of model
|
||||
* @returns The handle of multinet
|
||||
*/
|
||||
esp_nsn_iface_t *esp_nsnet_handle_from_name(char *model_name);
|
||||
@@ -0,0 +1,62 @@
|
||||
#pragma once
|
||||
#include "c_speech_features_config.h"
|
||||
#include "stdlib.h"
|
||||
#include <assert.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#ifndef M_2PI
|
||||
#define M_2PI 6.283185307179586476925286766559005
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
float *coeff;
|
||||
int *bank_pos;
|
||||
int nfilter;
|
||||
} esp_mel_filter_t;
|
||||
|
||||
float *esp_mfcc_malloc(size_t size, bool from_psram);
|
||||
|
||||
void esp_mfcc_free(void *ptr);
|
||||
|
||||
/**
|
||||
* @brief Initialize FFT table
|
||||
* @warning For ESP-PLATFORM, use esp-dsp fft
|
||||
* For Other platform, use kiss fft
|
||||
*
|
||||
* @param nfft The input samples number
|
||||
* @return fft-table
|
||||
**/
|
||||
void *esp_fft_init(int nfft);
|
||||
|
||||
/**
|
||||
* @brief Free FFT table
|
||||
* @warning For ESP-PLATFORM, use esp-dsp fft
|
||||
* For Other platform, use kiss fft
|
||||
*
|
||||
* @param fft_table The fft table initialized by esp_fft_init
|
||||
* @param nfft The input samples number
|
||||
* @return fft-table
|
||||
**/
|
||||
void esp_fft_deinit(void *fft_table, int nfft);
|
||||
|
||||
/**
|
||||
* @brief Initial window function
|
||||
* Currently support hanning, hamming, sine, povey, rectangular,
|
||||
* wn9(512-hanning to get wakenet9& multinet5 compatible)
|
||||
**/
|
||||
float *esp_win_func_init(char *win_type, float *window_data, int frame_length);
|
||||
|
||||
float *esp_fftr(float *x, int nfft, void *fft_table);
|
||||
|
||||
float *esp_spectrum_step(float *x, int nfft, bool use_power, void *fft_handle);
|
||||
|
||||
void esp_audio_short_to_float(short *samples, float *x, int len, int remove_dc);
|
||||
|
||||
float *esp_preemphasis_step(float *x, unsigned int len, float coeff, float last);
|
||||
|
||||
esp_mel_filter_t *esp_mel_filter_init(
|
||||
int nfft, int nfilter, int low_freq, int high_freq, int samp_freq, bool from_psram);
|
||||
|
||||
void esp_mel_filter_deinit(esp_mel_filter_t *mel_filter);
|
||||
|
||||
float *esp_mel_dotprod_step(float *x, float *out, esp_mel_filter_t *mel_filter, int use_log_fbank, float epsilon);
|
||||
@@ -0,0 +1,84 @@
|
||||
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License
|
||||
#ifndef _ESP_WEBRTC_H_
|
||||
#define _ESP_WEBRTC_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
#include "esp_agc.h"
|
||||
#include "esp_log.h"
|
||||
#include "esp_ns.h"
|
||||
#include "sr_ringbuf.h"
|
||||
#include <stdint.h>
|
||||
|
||||
#include "esp_heap_caps.h"
|
||||
|
||||
typedef struct {
|
||||
void *ns_handle;
|
||||
void *agc_handle;
|
||||
int frame_size;
|
||||
int sample_rate;
|
||||
int16_t *buff;
|
||||
int16_t *out_data;
|
||||
sr_ringbuf_handle_t rb;
|
||||
} webrtc_handle_t;
|
||||
|
||||
/**
|
||||
* @brief Creates an instance of webrtc.
|
||||
*
|
||||
* @warning frame_length can supports be 10 ms, 20 ms, 30 ms, 32 ms.
|
||||
*
|
||||
* @param frame_length_ms The length of the audio processing
|
||||
* @param ns_mode The mode of NS. -1 means NS is disabled. 0: Mild, 1: Medium, 2: Aggressive
|
||||
* @param agc_mode The model of AGC
|
||||
* @param agc_gain The gain of AGC. default is 9
|
||||
* @param agc_target_level The target level of AGC. default is -3 dbfs
|
||||
* @param sample_rate The sample rate of the audio.
|
||||
*
|
||||
* @return
|
||||
* - NULL: Create failed
|
||||
* - Others: The instance of webrtc
|
||||
*/
|
||||
webrtc_handle_t *webrtc_create(
|
||||
int frame_length_ms, int ns_mode, agc_mode_t agc_mode, int agc_gain, int agc_target_level, int sample_rate);
|
||||
|
||||
/**
|
||||
* @brief Feed samples of an audio stream to the webrtc and get the audio stream after Noise suppression.
|
||||
*
|
||||
* @param handle The instance of NS.
|
||||
* @param in_data An array of 16-bit signed audio samples.
|
||||
* @param out_size The sample size of output data
|
||||
* @param enable_ns Enable noise suppression
|
||||
* @param enable_agc Enable automatic gain control
|
||||
*
|
||||
* @return data after noise suppression
|
||||
*/
|
||||
int16_t *webrtc_process(webrtc_handle_t *handle, int16_t *indata, int *size, bool enable_ns, bool enable_agc);
|
||||
|
||||
/**
|
||||
* @brief Free the webrtc instance
|
||||
*
|
||||
* @param handle The instance of webrtc.
|
||||
*
|
||||
* @return None
|
||||
*
|
||||
*/
|
||||
void webrtc_destroy(webrtc_handle_t *handle);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif //_ESP_NS_H_
|
||||
178
managed_components/espressif__esp-sr/include/esp32/esp_vad.h
Normal file
178
managed_components/espressif__esp-sr/include/esp32/esp_vad.h
Normal file
@@ -0,0 +1,178 @@
|
||||
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License
|
||||
#ifndef _ESP_VAD_H_
|
||||
#define _ESP_VAD_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define SAMPLE_RATE_HZ 16000 // Supports 32000, 16000, 8000
|
||||
#define VAD_FRAME_LENGTH_MS 30 // Supports 10ms, 20ms, 30ms
|
||||
|
||||
/**
|
||||
* @brief Sets the VAD operating mode. A more aggressive (higher mode) VAD is more
|
||||
* restrictive in reporting speech. So If you want trigger more speech, please select lower mode.
|
||||
*/
|
||||
typedef enum {
|
||||
VAD_MODE_0 = 0, // Normal
|
||||
VAD_MODE_1, // Aggressive
|
||||
VAD_MODE_2, // Very Aggressive
|
||||
VAD_MODE_3, // Very Very Aggressive
|
||||
VAD_MODE_4 // Very Very Very Aggressive
|
||||
} vad_mode_t;
|
||||
|
||||
typedef enum {
|
||||
VAD_SILENCE = 0,
|
||||
VAD_SPEECH = 1,
|
||||
} vad_state_t;
|
||||
|
||||
typedef struct vad_trigger_tag {
|
||||
vad_state_t state;
|
||||
unsigned int min_speech_len;
|
||||
unsigned int noise_len;
|
||||
unsigned int min_noise_len;
|
||||
unsigned int speech_len;
|
||||
} vad_trigger_t;
|
||||
|
||||
#define vad_MAX_LEN INT32_MAX - 1
|
||||
/**
|
||||
* @brief Allocate wakenet trigger
|
||||
*
|
||||
* @param min_speech_len Minimum frame number of speech duration
|
||||
* @param min_noise_len Minimum frame number of noise duration
|
||||
*
|
||||
* @return Trigger pointer
|
||||
**/
|
||||
vad_trigger_t *vad_trigger_alloc(int min_speech_len, int min_noise_len);
|
||||
|
||||
/**
|
||||
* @brief Free wakenet trigger
|
||||
**/
|
||||
void vad_trigger_free(vad_trigger_t *trigger);
|
||||
|
||||
/**
|
||||
* @brief Reset wakenet trigger
|
||||
**/
|
||||
void vad_trigger_reset(vad_trigger_t *trigger);
|
||||
|
||||
/**
|
||||
* @brief detect activaty voice by trigger
|
||||
**/
|
||||
vad_state_t vad_trigger_detect(vad_trigger_t *trigger, vad_state_t state);
|
||||
|
||||
typedef struct {
|
||||
vad_trigger_t *trigger;
|
||||
void *vad_inst;
|
||||
int sample_rate;
|
||||
int frame_size;
|
||||
} vad_handle_with_trigger_t;
|
||||
|
||||
typedef vad_handle_with_trigger_t *vad_handle_t;
|
||||
|
||||
// typedef vad_handle_tag * vad_handle_t;
|
||||
|
||||
/**
|
||||
* @brief Creates an instance to the VAD structure.
|
||||
*
|
||||
* @param vad_mode Sets the VAD operating mode.
|
||||
*
|
||||
* @return
|
||||
* - NULL: Create failed
|
||||
* - Others: The instance of VAD
|
||||
*/
|
||||
vad_handle_t vad_create(vad_mode_t vad_mode);
|
||||
|
||||
/**
|
||||
* @brief Creates an instance to the VAD structure.
|
||||
*
|
||||
* @param vad_mode Sets the VAD operating mode.
|
||||
* @param sample_rate Sample rate in Hz
|
||||
* @param one_frame_ms Length of the audio chunksize, can be 10ms, 20ms, 30ms, default: 30.
|
||||
* @param min_speech_ms Minimum speech duration, unit is ms
|
||||
* @param min_noise_ms Minimum noise duration, unit is ms
|
||||
* @return
|
||||
* - NULL: Create failed
|
||||
* - Others: The instance of VAD
|
||||
*/
|
||||
vad_handle_t vad_create_with_param(
|
||||
vad_mode_t vad_mode, int sample_rate, int one_frame_ms, int min_speech_ms, int min_noise_ms);
|
||||
|
||||
/**
|
||||
* @brief Feed samples of an audio stream to the VAD and check if there is someone speaking.
|
||||
*
|
||||
* @param handle The instance of VAD.
|
||||
* @param data An array of 16-bit signed audio samples.
|
||||
* @param sample_rate_hz The Sampling frequency (Hz) can be 32000, 16000, 8000, default: 16000.
|
||||
* @param one_frame_ms The length of the audio processing can be 10ms, 20ms, 30ms, default: 30.
|
||||
* @return
|
||||
* - VAD_SILENCE if no voice
|
||||
* - VAD_SPEECH if voice is detected
|
||||
*
|
||||
*/
|
||||
vad_state_t vad_process(vad_handle_t handle, int16_t *data, int sample_rate_hz, int one_frame_ms);
|
||||
|
||||
/**
|
||||
* @brief Feed samples of an audio stream to the VAD and check if there is someone speaking.
|
||||
*
|
||||
* @param handle The instance of VAD.
|
||||
* @param data An array of 16-bit signed audio samples.
|
||||
* @return
|
||||
* - VAD_SILENCE if no voice
|
||||
* - VAD_SPEECH if voice is detected
|
||||
*
|
||||
*/
|
||||
vad_state_t vad_process_with_trigger(vad_handle_t handle, int16_t *data);
|
||||
|
||||
/**
|
||||
* @brief Reset trigger state as Silence
|
||||
*
|
||||
* @param handle The instance of VAD.
|
||||
*/
|
||||
void vad_reset_trigger(vad_handle_t handle);
|
||||
|
||||
/**
|
||||
* @brief Free the VAD instance
|
||||
*
|
||||
* @param inst The instance of VAD.
|
||||
*
|
||||
* @return None
|
||||
*
|
||||
*/
|
||||
void vad_destroy(vad_handle_t inst);
|
||||
|
||||
/*
|
||||
* Programming Guide:
|
||||
*
|
||||
* @code{c}
|
||||
* vad_handle_t vad_inst = vad_create(VAD_MODE_3, SAMPLE_RATE_HZ, VAD_FRAME_LENGTH_MS); // Creates an instance to
|
||||
* the VAD structure.
|
||||
*
|
||||
* while (1) {
|
||||
* //Use buffer to receive the audio data from MIC.
|
||||
* vad_state_t vad_state = vad_process(vad_inst, buffer); // Feed samples to the VAD process and get the result.
|
||||
* }
|
||||
*
|
||||
* vad_destroy(vad_inst); // Free the VAD instance at the end of whole VAD process
|
||||
*
|
||||
* @endcode
|
||||
*/
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif //_ESP_VAD_H_
|
||||
@@ -0,0 +1,164 @@
|
||||
#pragma once
|
||||
#include "esp_vad.h"
|
||||
#include "stdint.h"
|
||||
#include "dl_lib_convq_queue.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// Opaque model data container
|
||||
typedef struct model_iface_data_t model_iface_data_t;
|
||||
|
||||
// /**
|
||||
// * @brief The state of vad
|
||||
// */
|
||||
// typedef enum {
|
||||
// VAD_NOISE = -1, // Noise
|
||||
// VADNET_STATE_SILENCE = 0, // Silence
|
||||
// VAD_SPEECH = 1 // Speech
|
||||
// } vad_state_t;
|
||||
|
||||
/**
|
||||
* @brief Easy function type to initialze a model instance with a detection mode
|
||||
* and specified model name
|
||||
*
|
||||
* @param model_name The specified model name
|
||||
* @param mode The voice activity detection mode
|
||||
* @param channel_num The number of input audio channels
|
||||
* @param min_speech_ms The minimum duration of speech in ms to trigger vad
|
||||
* speech
|
||||
* @param min_noise_ms The minimum duration of noise in ms to trigger vad
|
||||
* noise
|
||||
* @returns Handle to the model data
|
||||
*/
|
||||
typedef model_iface_data_t *(*esp_vadn_iface_op_create_t)(
|
||||
const void *model_name, vad_mode_t mode, int channel_num, int min_speech_ms, int min_noise_ms);
|
||||
|
||||
/**
|
||||
* @brief Get the amount of samples that need to be passed to the detect
|
||||
* function
|
||||
*
|
||||
* Every speech recognition model processes a certain number of samples at the
|
||||
* same time. This function can be used to query that amount. Note that the
|
||||
* returned amount is in 16-bit samples, not in bytes.
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @return The amount of samples to feed the detect function
|
||||
*/
|
||||
typedef int (*esp_vadn_iface_op_get_samp_chunksize_t)(model_iface_data_t *model);
|
||||
|
||||
/**
|
||||
* @brief Get the channel number of samples that need to be passed to the detect
|
||||
* function
|
||||
*
|
||||
* Every speech recognition model processes a certain number of samples at the
|
||||
* same time. This function can be used to query that amount. Note that the
|
||||
* returned amount is in 16-bit samples, not in bytes.
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @return The amount of samples to feed the detect function
|
||||
*/
|
||||
typedef int (*esp_vadn_iface_op_get_channel_num_t)(model_iface_data_t *model);
|
||||
|
||||
/**
|
||||
* @brief Get the sample rate of the samples to feed to the detect function
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @return The sample rate, in hz
|
||||
*/
|
||||
typedef int (*esp_vadn_iface_op_get_samp_rate_t)(model_iface_data_t *model);
|
||||
|
||||
/**
|
||||
* @brief Set the detection threshold to manually abjust the probability
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @param det_treshold The threshold to trigger wake words, the range of
|
||||
* det_threshold is 0.5~0.9999
|
||||
* @return 0: setting failed, 1: setting success
|
||||
*/
|
||||
typedef int (*esp_vadn_iface_op_set_det_threshold_t)(model_iface_data_t *model, float det_threshold);
|
||||
|
||||
/**
|
||||
* @brief Get the voice activity detection threshold
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @returns the detection threshold
|
||||
*/
|
||||
typedef float (*esp_vadn_iface_op_get_det_threshold_t)(model_iface_data_t *model);
|
||||
|
||||
/**
|
||||
* @brief Feed samples of an audio stream to the vad model and detect whether is
|
||||
* voice.
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @param samples An array of 16-bit signed audio samples. The array size used
|
||||
* can be queried by the get_samp_chunksize function.
|
||||
* @return The index of wake words, return 0 if no wake word is detected, else
|
||||
* the index of the wake words.
|
||||
*/
|
||||
typedef vad_state_t (*esp_vadn_iface_op_detect_t)(model_iface_data_t *model, int16_t *samples);
|
||||
|
||||
/**
|
||||
* @brief Feed MFCC of an audio stream to the vad model and detect whether is
|
||||
* voice.
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @param cq An array of 16-bit MFCC.
|
||||
* @return The index of wake words, return 0 if no wake word is detected, else
|
||||
* the index of the wake words.
|
||||
*/
|
||||
typedef vad_state_t (*esp_vadn_iface_op_detect_mfcc_t)(model_iface_data_t *model, dl_convq_queue_t *cq);
|
||||
|
||||
/**
|
||||
* @brief Get MFCC of an audio stream
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @return MFCC data
|
||||
*/
|
||||
typedef dl_convq_queue_t* (*esp_vadn_iface_op_get_mfcc_data_t)(model_iface_data_t *model);
|
||||
|
||||
/**
|
||||
* @brief Get the triggered channel index. Channel index starts from zero
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @return The channel index
|
||||
*/
|
||||
typedef int (*esp_vadn_iface_op_get_triggered_channel_t)(model_iface_data_t *model);
|
||||
|
||||
/**
|
||||
* @brief Clean all states of model
|
||||
*
|
||||
* @param model The model object to query
|
||||
*/
|
||||
typedef void (*esp_vadn_iface_op_clean_t)(model_iface_data_t *model);
|
||||
|
||||
/**
|
||||
* @brief Destroy a model object
|
||||
*
|
||||
* @param model Model object to destroy
|
||||
*/
|
||||
typedef void (*esp_vadn_iface_op_destroy_t)(model_iface_data_t *model);
|
||||
|
||||
/**
|
||||
* This structure contains the functions used to do operations on a voice
|
||||
* activity detection model.
|
||||
*/
|
||||
typedef struct {
|
||||
esp_vadn_iface_op_create_t create;
|
||||
esp_vadn_iface_op_get_samp_chunksize_t get_samp_chunksize;
|
||||
esp_vadn_iface_op_get_channel_num_t get_channel_num;
|
||||
esp_vadn_iface_op_get_samp_rate_t get_samp_rate;
|
||||
esp_vadn_iface_op_set_det_threshold_t set_det_threshold;
|
||||
esp_vadn_iface_op_get_det_threshold_t get_det_threshold;
|
||||
esp_vadn_iface_op_get_triggered_channel_t get_triggered_channel;
|
||||
esp_vadn_iface_op_detect_t detect;
|
||||
esp_vadn_iface_op_detect_mfcc_t detect_mfcc;
|
||||
esp_vadn_iface_op_get_mfcc_data_t get_mfcc_data;
|
||||
esp_vadn_iface_op_clean_t clean;
|
||||
esp_vadn_iface_op_destroy_t destroy;
|
||||
} esp_vadn_iface_t;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
@@ -0,0 +1,22 @@
|
||||
#pragma once
|
||||
#include "esp_vadn_iface.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// The prefix of vadnet model name is used to filter all wakenet from availabel models.
|
||||
#define ESP_VADN_PREFIX "vadnet"
|
||||
|
||||
/**
|
||||
* @brief Get the wakenet handle from model name
|
||||
*
|
||||
* @param model_name The name of model
|
||||
* @returns The handle of wakenet
|
||||
*/
|
||||
const esp_vadn_iface_t *esp_vadn_handle_from_name(const char *model_name);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
@@ -0,0 +1,90 @@
|
||||
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License
|
||||
#ifndef _ESP_WEBRTC_H_
|
||||
#define _ESP_WEBRTC_H_
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
#include <stdint.h>
|
||||
#include "sr_ringbuf.h"
|
||||
#include "esp_log.h"
|
||||
#include "esp_agc.h"
|
||||
#include "esp_ns.h"
|
||||
|
||||
#include "esp_heap_caps.h"
|
||||
|
||||
typedef struct {
|
||||
void* ns_handle;
|
||||
void* agc_handle;
|
||||
int frame_size;
|
||||
int sample_rate;
|
||||
int16_t *buff;
|
||||
int16_t *out_data;
|
||||
sr_ringbuf_handle_t rb;
|
||||
}webrtc_handle_t;
|
||||
|
||||
/**
|
||||
* @brief Creates an instance of webrtc.
|
||||
*
|
||||
* @warning frame_length can supports be 10 ms, 20 ms, 30 ms, 32 ms.
|
||||
*
|
||||
* @param frame_length_ms The length of the audio processing
|
||||
* @param ns_mode The mode of NS. -1 means NS is disabled. 0: Mild, 1: Medium, 2: Aggressive
|
||||
* @param agc_mode The model of AGC
|
||||
* @param agc_gain The gain of AGC. default is 9
|
||||
* @param agc_target_level The target level of AGC. default is -3 dbfs
|
||||
* @param sample_rate The sample rate of the audio.
|
||||
*
|
||||
* @return
|
||||
* - NULL: Create failed
|
||||
* - Others: The instance of webrtc
|
||||
*/
|
||||
webrtc_handle_t* webrtc_create(
|
||||
int frame_length_ms,
|
||||
int ns_mode,
|
||||
agc_mode_t agc_mode,
|
||||
int agc_gain,
|
||||
int agc_target_level,
|
||||
int sample_rate);
|
||||
|
||||
/**
|
||||
* @brief Feed samples of an audio stream to the webrtc and get the audio stream after Noise suppression.
|
||||
*
|
||||
* @param handle The instance of NS.
|
||||
* @param in_data An array of 16-bit signed audio samples.
|
||||
* @param out_size The sample size of output data
|
||||
* @param enable_ns Enable noise suppression
|
||||
* @param enable_agc Enable automatic gain control
|
||||
*
|
||||
* @return data after noise suppression
|
||||
*/
|
||||
int16_t* webrtc_process(webrtc_handle_t *handle, int16_t *indata, int *size, bool enable_ns, bool enable_agc);
|
||||
|
||||
/**
|
||||
* @brief Free the webrtc instance
|
||||
*
|
||||
* @param handle The instance of webrtc.
|
||||
*
|
||||
* @return None
|
||||
*
|
||||
*/
|
||||
void webrtc_destroy(webrtc_handle_t *handle);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif //_ESP_NS_H_
|
||||
@@ -0,0 +1,226 @@
|
||||
#pragma once
|
||||
#include "stdint.h"
|
||||
#include "dl_lib_convq_queue.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
//Opaque model data container
|
||||
typedef struct model_iface_data_t model_iface_data_t;
|
||||
|
||||
/**
|
||||
* @brief The state of wakeup
|
||||
*/
|
||||
typedef enum
|
||||
{
|
||||
WAKENET_NO_DETECT = 0, // wake word is not detected
|
||||
WAKENET_CHANNEL_VERIFIED = -1, // output channel is verified
|
||||
WAKENET_DETECTED = 1 // wake word is detected
|
||||
} wakenet_state_t;
|
||||
|
||||
//Set wake words recognition operating mode
|
||||
//The probability of being wake words is increased with increasing mode,
|
||||
//As a consequence also the false alarm rate goes up
|
||||
typedef enum {
|
||||
DET_MODE_90 = 0, // Normal
|
||||
DET_MODE_95 = 1, // Aggressive
|
||||
DET_MODE_2CH_90 = 2,
|
||||
DET_MODE_2CH_95 = 3,
|
||||
DET_MODE_3CH_90 = 4,
|
||||
DET_MODE_3CH_95 = 5,
|
||||
DET_MODE_90_COPY_PARAMS = 6, // Aggressive
|
||||
} det_mode_t;
|
||||
|
||||
typedef struct {
|
||||
int wake_word_num; //The number of all wake words
|
||||
char **wake_word_list; //The name list of wake words
|
||||
} wake_word_info_t;
|
||||
|
||||
/**
|
||||
* @brief Easy function type to initialze a model instance with a detection mode and specified wake word coefficient
|
||||
*
|
||||
* @param model_name The specified wake word model coefficient
|
||||
* @param det_mode The wake words detection mode to trigger wake words, DET_MODE_90 or DET_MODE_95
|
||||
* @returns Handle to the model data
|
||||
*/
|
||||
typedef model_iface_data_t* (*esp_wn_iface_op_create_t)(const void *model_name, det_mode_t det_mode);
|
||||
|
||||
/**
|
||||
* @brief Get the amount of samples that need to be passed to the detect function
|
||||
*
|
||||
* Every speech recognition model processes a certain number of samples at the same time. This function
|
||||
* can be used to query that amount. Note that the returned amount is in 16-bit samples, not in bytes.
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @return The amount of samples to feed the detect function
|
||||
*/
|
||||
typedef int (*esp_wn_iface_op_get_samp_chunksize_t)(model_iface_data_t *model);
|
||||
|
||||
/**
|
||||
* @brief Get the channel number of samples that need to be passed to the detect function
|
||||
*
|
||||
* Every speech recognition model processes a certain number of samples at the same time. This function
|
||||
* can be used to query that amount. Note that the returned amount is in 16-bit samples, not in bytes.
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @return The amount of samples to feed the detect function
|
||||
*/
|
||||
typedef int (*esp_wn_iface_op_get_channel_num_t)(model_iface_data_t *model);
|
||||
|
||||
/**
|
||||
* @brief Get the start point of wake word when one wake word is detected.
|
||||
*
|
||||
* @Warning: This function should be called when the channel index is verified.
|
||||
* The returned value is the number of samples from start point of wake word to detected point.
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @return The number of samples from start point to detected point (end point)
|
||||
*/
|
||||
typedef int (*esp_wn_iface_op_get_start_point_t)(model_iface_data_t *model);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Get the sample rate of the samples to feed to the detect function
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @return The sample rate, in hz
|
||||
*/
|
||||
typedef int (*esp_wn_iface_op_get_samp_rate_t)(model_iface_data_t *model);
|
||||
|
||||
/**
|
||||
* @brief Get the number of wake words
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @returns the number of wake words
|
||||
*/
|
||||
typedef int (*esp_wn_iface_op_get_word_num_t)(model_iface_data_t *model);
|
||||
|
||||
/**
|
||||
* @brief Get the name of wake word by index
|
||||
*
|
||||
* @Warning The index of wake word start with 1
|
||||
|
||||
* @param model The model object to query
|
||||
* @param word_index The index of wake word
|
||||
* @returns the detection threshold
|
||||
*/
|
||||
typedef char* (*esp_wn_iface_op_get_word_name_t)(model_iface_data_t *model, int word_index);
|
||||
|
||||
/**
|
||||
* @brief Set the detection threshold to manually abjust the probability
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @param det_treshold The threshold to trigger wake words, the range of det_threshold is 0.4~0.9999
|
||||
* @param word_index The index of wake word
|
||||
* @return 0: setting failed, 1: setting success
|
||||
*/
|
||||
typedef int (*esp_wn_iface_op_set_det_threshold_t)(model_iface_data_t *model, float det_threshold, int word_index);
|
||||
|
||||
/**
|
||||
* @brief Reset the threshold to its initial state
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @return 0: setting failed, 1: setting success
|
||||
*/
|
||||
typedef int (*esp_wn_iface_op_reset_det_threshold_t)(model_iface_data_t *model);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Get the wake word detection threshold of different modes
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @param word_index The index of wake word
|
||||
* @returns the detection threshold
|
||||
*/
|
||||
typedef float (*esp_wn_iface_op_get_det_threshold_t)(model_iface_data_t *model, int word_index);
|
||||
|
||||
/**
|
||||
* @brief Feed samples of an audio stream to the keyword detection model and detect if there is a keyword found.
|
||||
*
|
||||
* @Warning The index of wake word start with 1, 0 means no wake words is detected.
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @param samples An array of 16-bit signed audio samples. The array size used can be queried by the
|
||||
* get_samp_chunksize function.
|
||||
* @return The index of wake words, return 0 if no wake word is detected, else the index of the wake words.
|
||||
*/
|
||||
typedef wakenet_state_t (*esp_wn_iface_op_detect_t)(model_iface_data_t *model, int16_t *samples);
|
||||
|
||||
/**
|
||||
* @brief Get the volume gain
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @param target_db The target dB to calculate volume gain
|
||||
* @returns the volume gain
|
||||
*/
|
||||
typedef float (*esp_wn_iface_op_get_vol_gain_t)(model_iface_data_t *model, float target_db);
|
||||
|
||||
/**
|
||||
* @brief Get the triggered channel index. Channel index starts from zero
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @return The channel index
|
||||
*/
|
||||
typedef int (*esp_wn_iface_op_get_triggered_channel_t)(model_iface_data_t *model);
|
||||
|
||||
/**
|
||||
* @brief Clean all states of model
|
||||
*
|
||||
* @param model The model object to query
|
||||
*/
|
||||
typedef void (*esp_wn_iface_op_clean_t)(model_iface_data_t *model);
|
||||
|
||||
/**
|
||||
* @brief Destroy a speech recognition model
|
||||
*
|
||||
* @param model Model object to destroy
|
||||
*/
|
||||
typedef void (*esp_wn_iface_op_destroy_t)(model_iface_data_t *model);
|
||||
|
||||
/**
|
||||
* @brief Feed MFCC of an audio stream to the vad model and detect whether is
|
||||
* voice.
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @param cq An array of 16-bit MFCC.
|
||||
* @return The index of wake words, return 0 if no wake word is detected, else
|
||||
* the index of the wake words.
|
||||
*/
|
||||
typedef wakenet_state_t (*esp_wn_iface_op_detect_mfcc_t)(model_iface_data_t *model, int16_t *samples, dl_convq_queue_t *cq);
|
||||
|
||||
/**
|
||||
* @brief Get MFCC of an audio stream
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @return MFCC data
|
||||
*/
|
||||
typedef dl_convq_queue_t* (*esp_wn_iface_op_get_mfcc_data_t)(model_iface_data_t *model);
|
||||
|
||||
|
||||
/**
|
||||
* This structure contains the functions used to do operations on a wake word detection model.
|
||||
*/
|
||||
typedef struct {
|
||||
esp_wn_iface_op_create_t create;
|
||||
esp_wn_iface_op_get_start_point_t get_start_point;
|
||||
esp_wn_iface_op_get_samp_chunksize_t get_samp_chunksize;
|
||||
esp_wn_iface_op_get_channel_num_t get_channel_num;
|
||||
esp_wn_iface_op_get_samp_rate_t get_samp_rate;
|
||||
esp_wn_iface_op_get_word_num_t get_word_num;
|
||||
esp_wn_iface_op_get_word_name_t get_word_name;
|
||||
esp_wn_iface_op_set_det_threshold_t set_det_threshold;
|
||||
esp_wn_iface_op_reset_det_threshold_t reset_det_threshold;
|
||||
esp_wn_iface_op_get_det_threshold_t get_det_threshold;
|
||||
esp_wn_iface_op_get_triggered_channel_t get_triggered_channel;
|
||||
esp_wn_iface_op_get_vol_gain_t get_vol_gain;
|
||||
esp_wn_iface_op_detect_t detect;
|
||||
esp_wn_iface_op_detect_mfcc_t detect_mfcc;
|
||||
esp_wn_iface_op_get_mfcc_data_t get_mfcc_data;
|
||||
esp_wn_iface_op_clean_t clean;
|
||||
esp_wn_iface_op_destroy_t destroy;
|
||||
} esp_wn_iface_t;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
@@ -0,0 +1,52 @@
|
||||
#pragma once
|
||||
#include "esp_wn_iface.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// The prefix of wakenet model name is used to filter all wakenet from availabel models.
|
||||
#define ESP_WN_PREFIX "wn"
|
||||
|
||||
/**
|
||||
* @brief Get the wakenet handle from model name
|
||||
*
|
||||
* @param model_name The name of model
|
||||
* @returns The handle of wakenet
|
||||
*/
|
||||
const esp_wn_iface_t *esp_wn_handle_from_name(const char *model_name);
|
||||
|
||||
/**
|
||||
* @brief Get the wake word name from model name
|
||||
*
|
||||
* @param model_name The name of model
|
||||
* @returns The wake word name, like "alexa","hilexin","xiaoaitongxue"
|
||||
*/
|
||||
char *esp_wn_wakeword_from_name(const char *model_name);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
|
||||
static const sr_model_iface_t *model = esp_wn_handle_from_name(model_name);
|
||||
|
||||
//Initialize wakeNet model data
|
||||
static model_iface_data_t *model_data=model->create(model_name, DET_MODE_90);
|
||||
|
||||
//Set parameters of buffer
|
||||
int audio_chunksize=model->get_samp_chunksize(model_data);
|
||||
int frequency = model->get_samp_rate(model_data);
|
||||
int16_t *buffer=malloc(audio_chunksize*sizeof(int16_t));
|
||||
|
||||
//Detect
|
||||
int r=model->detect(model_data, buffer);
|
||||
if (r>0) {
|
||||
printf("Detection triggered output %d.\n", r);
|
||||
}
|
||||
|
||||
//Destroy model
|
||||
model->destroy(model_data)
|
||||
|
||||
*/
|
||||
@@ -0,0 +1,20 @@
|
||||
#ifndef __FLITE_G2P_H__
|
||||
#define __FLITE_G2P_H__
|
||||
|
||||
typedef struct {
|
||||
int num_phonemes;
|
||||
int phoneme_size;
|
||||
char **phonemes;
|
||||
} flite_g2p_result;
|
||||
|
||||
void flite_g2p_result_free(flite_g2p_result *result);
|
||||
|
||||
flite_g2p_result *flite_g2p_get_result(const char *grapheme);
|
||||
|
||||
void flite_g2p_result_print_string(flite_g2p_result *result, int map_phonemes);
|
||||
|
||||
char *flite_g2p_result_get_string(flite_g2p_result *result, int map_phonemes);
|
||||
|
||||
char *flite_g2p(const char *graphemes, int map_phonemes);
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,9 @@
|
||||
//Generated by mkmodel_py
|
||||
#pragma once
|
||||
#include <string.h>
|
||||
#include "dl_lib_coefgetter_if.h"
|
||||
#include "dl_lib_matrix.h"
|
||||
#include "dl_lib_matrixq.h"
|
||||
#include "dl_lib_matrixq8.h"
|
||||
|
||||
extern const model_coeff_getter_t get_coeff_hilexin_wn5;
|
||||
@@ -0,0 +1,9 @@
|
||||
//Generated by mkmodel_py
|
||||
#pragma once
|
||||
#include <string.h>
|
||||
#include "dl_lib_coefgetter_if.h"
|
||||
#include "dl_lib_matrix.h"
|
||||
#include "dl_lib_matrixq.h"
|
||||
#include "dl_lib_matrixq8.h"
|
||||
|
||||
extern const model_coeff_getter_t get_coeff_hilexin_wn5X2;
|
||||
@@ -0,0 +1,9 @@
|
||||
//Generated by mkmodel_py
|
||||
#pragma once
|
||||
#include <string.h>
|
||||
#include "dl_lib_coefgetter_if.h"
|
||||
#include "dl_lib_matrix.h"
|
||||
#include "dl_lib_matrixq.h"
|
||||
#include "dl_lib_matrixq8.h"
|
||||
|
||||
extern const model_coeff_getter_t get_coeff_hilexin_wn5X3;
|
||||
@@ -0,0 +1,9 @@
|
||||
//Generated by mkmodel_py
|
||||
#pragma once
|
||||
#include <string.h>
|
||||
#include "dl_lib_coefgetter_if.h"
|
||||
#include "dl_lib_matrix.h"
|
||||
#include "dl_lib_matrixq.h"
|
||||
#include "dl_lib_matrixq8.h"
|
||||
|
||||
extern const model_coeff_getter_t get_coeff_multinet2_ch;
|
||||
@@ -0,0 +1,9 @@
|
||||
//Generated by mkmodel_py
|
||||
#pragma once
|
||||
#include <string.h>
|
||||
#include "dl_lib_coefgetter_if.h"
|
||||
#include "dl_lib_matrix.h"
|
||||
#include "dl_lib_matrixq.h"
|
||||
#include "dl_lib_matrixq8.h"
|
||||
|
||||
extern const model_coeff_getter_t get_coeff_nihaoxiaoxin_wn5X3;
|
||||
@@ -0,0 +1,9 @@
|
||||
//Generated by mkmodel_py
|
||||
#pragma once
|
||||
#include <string.h>
|
||||
#include "dl_lib_coefgetter_if.h"
|
||||
#include "dl_lib_matrix.h"
|
||||
#include "dl_lib_matrixq.h"
|
||||
#include "dl_lib_matrixq8.h"
|
||||
|
||||
extern const model_coeff_getter_t get_coeff_nihaoxiaozhi_wn5;
|
||||
@@ -0,0 +1,9 @@
|
||||
//Generated by mkmodel_py
|
||||
#pragma once
|
||||
#include <string.h>
|
||||
#include "dl_lib_coefgetter_if.h"
|
||||
#include "dl_lib_matrix.h"
|
||||
#include "dl_lib_matrixq.h"
|
||||
#include "dl_lib_matrixq8.h"
|
||||
|
||||
extern const model_coeff_getter_t get_coeff_nihaoxiaozhi_wn5X2;
|
||||
@@ -0,0 +1,9 @@
|
||||
//Generated by mkmodel_py
|
||||
#pragma once
|
||||
#include <string.h>
|
||||
#include "dl_lib_coefgetter_if.h"
|
||||
#include "dl_lib_matrix.h"
|
||||
#include "dl_lib_matrixq.h"
|
||||
#include "dl_lib_matrixq8.h"
|
||||
|
||||
extern const model_coeff_getter_t get_coeff_nihaoxiaozhi_wn5X3;
|
||||
@@ -0,0 +1,29 @@
|
||||
#pragma once
|
||||
#include <float.h>
|
||||
#include <math.h>
|
||||
|
||||
/* #undef ENABLE_DOUBLE */
|
||||
|
||||
#ifdef ENABLE_DOUBLE
|
||||
# define csf_float double
|
||||
# define csf_ceil ceil
|
||||
# define csf_floor floor
|
||||
# define csf_sin sin
|
||||
# define csf_log log
|
||||
# define csf_log10 log10
|
||||
# define csf_pow pow
|
||||
# define csf_sqrt sqrt
|
||||
# define csf_abs fabs
|
||||
# define csf_float_min DBL_MIN
|
||||
#else
|
||||
# define csf_float float
|
||||
# define csf_ceil ceilf
|
||||
# define csf_floor floorf
|
||||
# define csf_sin sinf
|
||||
# define csf_log logf
|
||||
# define csf_log10 log10f
|
||||
# define csf_pow powf
|
||||
# define csf_sqrt sqrtf
|
||||
# define csf_abs fabsf
|
||||
# define csf_float_min FLT_MIN
|
||||
#endif
|
||||
418
managed_components/espressif__esp-sr/include/esp32c3/dl_lib.h
Normal file
418
managed_components/espressif__esp-sr/include/esp32c3/dl_lib.h
Normal file
@@ -0,0 +1,418 @@
|
||||
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
#ifndef DL_LIB_H
|
||||
#define DL_LIB_H
|
||||
|
||||
#include "dl_lib_matrix.h"
|
||||
#include "dl_lib_matrixq.h"
|
||||
#include "dl_lib_matrixq8.h"
|
||||
|
||||
#ifdef ESP_PLATFORM
|
||||
#include "freertos/FreeRTOS.h"
|
||||
#include "freertos/task.h"
|
||||
#include "freertos/queue.h"
|
||||
#include "esp_system.h"
|
||||
#include "esp_heap_caps.h"
|
||||
#include "sdkconfig.h"
|
||||
#define DL_SPIRAM_SUPPORT 1
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_IDF_TARGET_ESP32S3
|
||||
#include "esp32s3/rom/cache.h"
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef int padding_state;
|
||||
|
||||
// /**
|
||||
// * @brief Allocate a chunk of memory which has the given capabilities.
|
||||
// * Equivalent semantics to libc malloc(), for capability-aware memory.
|
||||
// * In IDF, malloc(p) is equivalent to heap_caps_malloc(p, MALLOC_CAP_8BIT).
|
||||
// *
|
||||
// * @param size In bytes, of the amount of memory to allocate
|
||||
// * @param caps Bitwise OR of MALLOC_CAP_* flags indicating the type of memory to be returned
|
||||
// * MALLOC_CAP_SPIRAM: Memory must be in SPI RAM
|
||||
// * MALLOC_CAP_INTERNAL: Memory must be internal; specifically it should not disappear when flash/spiram cache is switched off
|
||||
// * MALLOC_CAP_DMA: Memory must be able to accessed by DMA
|
||||
// * MALLOC_CAP_DEFAULT: Memory can be returned in a non-capability-specific memory allocation
|
||||
// * @return Pointer to currently allocated heap memory
|
||||
// **/
|
||||
// void *heap_caps_malloc(size_t size, uint32_t caps);
|
||||
|
||||
/**
|
||||
* @brief Allocate aligned memory from internal memory or external memory.
|
||||
* if cnt*size > CONFIG_SPIRAM_MALLOC_ALWAYSINTERNAL, allocate memory from internal RAM
|
||||
* else, allocate memory from PSRAM
|
||||
*
|
||||
* @param cnt Number of continuing chunks of memory to allocate
|
||||
* @param size Size, in bytes, of a chunk of memory to allocate
|
||||
* @param align Aligned size, in bits
|
||||
* @return Pointer to currently allocated heap memory
|
||||
*/
|
||||
void *dl_lib_calloc(int cnt, int size, int align);
|
||||
|
||||
/**
|
||||
* @brief Always allocate aligned memory from external memory.
|
||||
*
|
||||
* @param cnt Number of continuing chunks of memory to allocate
|
||||
* @param size Size, in bytes, of a chunk of memory to allocate
|
||||
* @param align Aligned size, in bits
|
||||
* @return Pointer to currently aligned heap memory
|
||||
*/
|
||||
void *dl_lib_calloc_psram(int cnt, int size, int align);
|
||||
|
||||
/**
|
||||
* @brief Free aligned memory allocated by `dl_lib_calloc` or `dl_lib_calloc_psram`
|
||||
*
|
||||
* @param ptr Pointer to free
|
||||
*/
|
||||
void dl_lib_free(void *ptr);
|
||||
|
||||
/**
|
||||
* @brief Does a fast version of the exp() operation on a floating point number.
|
||||
*
|
||||
* As described in https://codingforspeed.com/using-faster-exponential-approximation/
|
||||
* Should be good til an input of 5 or so with a steps factor of 8.
|
||||
*
|
||||
* @param in Floating point input
|
||||
* @param steps Approximation steps. More is more precise. 8 or 10 should be good enough for most purposes.
|
||||
* @return Exp()'ed output
|
||||
*/
|
||||
fptp_t fast_exp(double x, int steps);
|
||||
|
||||
/**
|
||||
* @brief Does a fast version of the exp() operation on a floating point number.
|
||||
*
|
||||
* @param in Floating point input
|
||||
* @return Exp()'ed output
|
||||
*/
|
||||
double fast_exp_pro(double x);
|
||||
|
||||
/**
|
||||
* @brief Does a softmax operation on a matrix.
|
||||
*
|
||||
* @param in Input matrix
|
||||
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
|
||||
*/
|
||||
void dl_softmax(const dl_matrix2d_t *in, dl_matrix2d_t *out);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Does a softmax operation on a quantized matrix.
|
||||
*
|
||||
* @param in Input matrix
|
||||
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
|
||||
*/
|
||||
void dl_softmax_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out);
|
||||
|
||||
/**
|
||||
* @brief Does a sigmoid operation on a floating point number
|
||||
*
|
||||
* @param in Floating point input
|
||||
* @return Sigmoid output
|
||||
*/
|
||||
|
||||
fptp_t dl_sigmoid_op(fptp_t in);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Does a sigmoid operation on a matrix.
|
||||
*
|
||||
* @param in Input matrix
|
||||
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
|
||||
*/
|
||||
void dl_sigmoid(const dl_matrix2d_t *in, dl_matrix2d_t *out);
|
||||
|
||||
/**
|
||||
* @brief Does a tanh operation on a floating point number
|
||||
*
|
||||
* @param in Floating point input number
|
||||
* @return Tanh value
|
||||
*/
|
||||
fptp_t dl_tanh_op(fptp_t v);
|
||||
|
||||
/**
|
||||
* @brief Does a tanh operation on a matrix.
|
||||
*
|
||||
* @param in Input matrix
|
||||
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
|
||||
*/
|
||||
void dl_tanh(const dl_matrix2d_t *in, dl_matrix2d_t *out);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Does a relu (Rectifier Linear Unit) operation on a floating point number
|
||||
*
|
||||
* @param in Floating point input
|
||||
* @param clip If value is higher than this, it will be clipped to this value
|
||||
* @return Relu output
|
||||
*/
|
||||
fptp_t dl_relu_op(fptp_t in, fptp_t clip);
|
||||
|
||||
/**
|
||||
* @brief Does a ReLu operation on a matrix.
|
||||
*
|
||||
* @param in Input matrix
|
||||
* @param clip If values are higher than this, they will be clipped to this value
|
||||
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
|
||||
*/
|
||||
void dl_relu(const dl_matrix2d_t *in, fptp_t clip, dl_matrix2d_t *out);
|
||||
|
||||
/**
|
||||
* @brief Fully connected layer operation
|
||||
*
|
||||
* @param in Input vector
|
||||
* @param weight Weights of the neurons
|
||||
* @param bias Biases for the neurons. Can be NULL if a bias of 0 is required.
|
||||
* @param out Output array. Outputs are placed here. Needs to be an initialized, weight->w by in->h in size, matrix.
|
||||
*/
|
||||
void dl_fully_connect_layer(const dl_matrix2d_t *in, const dl_matrix2d_t *weight, const dl_matrix2d_t *bias, dl_matrix2d_t *out);
|
||||
|
||||
/**
|
||||
* @brief Pre-calculate the sqrtvari variable for the batch_normalize function.
|
||||
* The sqrtvari matrix depends on the variance and epsilon values, which normally are constant. Hence,
|
||||
* this matrix only needs to be calculated once. This function does that.
|
||||
*
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
void dl_batch_normalize_get_sqrtvar(const dl_matrix2d_t *variance, fptp_t epsilon, dl_matrix2d_t *out);
|
||||
|
||||
/**
|
||||
* @brief Batch-normalize a matrix
|
||||
*
|
||||
* @param m The matrix to normalize
|
||||
* @param offset Offset matrix
|
||||
* @param scale Scale matrix
|
||||
* @param mean Mean matrix
|
||||
* @param sqrtvari Matrix precalculated using dl_batch_normalize_get_sqrtvar
|
||||
* @return
|
||||
*/
|
||||
void dl_batch_normalize(dl_matrix2d_t *m, const dl_matrix2d_t *offset, const dl_matrix2d_t *scale,
|
||||
const dl_matrix2d_t *mean, const dl_matrix2d_t *sqrtvari);
|
||||
|
||||
/**
|
||||
* @brief Do a basic LSTM layer pass.
|
||||
*
|
||||
* @warning Returns state_h pointer, so do not free result.
|
||||
|
||||
* @param in Input vector
|
||||
* @param state_c Internal state of the LSTM network
|
||||
* @param state_h Internal state (previous output values) of the LSTM network
|
||||
* @param weights Weights for the neurons
|
||||
* @param bias Bias for the neurons. Can be NULL if no bias is required
|
||||
* @return Output values of the neurons
|
||||
*/
|
||||
dl_matrix2d_t *dl_basic_lstm_layer(const dl_matrix2d_t *in, dl_matrix2d_t *state_c, dl_matrix2d_t *state_h,
|
||||
const dl_matrix2d_t *weight, const dl_matrix2d_t *bias);
|
||||
|
||||
/**
|
||||
* @brief Do a basic LSTM layer pass, partial quantized version.
|
||||
* This LSTM function accepts 16-bit fixed-point weights and 32-bit float-point bias.
|
||||
*
|
||||
* @warning Returns state_h pointer, so do not free result.
|
||||
|
||||
* @param in Input vector
|
||||
* @param state_c Internal state of the LSTM network
|
||||
* @param state_h Internal state (previous output values) of the LSTM network
|
||||
* @param weights Weights for the neurons, need to be quantised
|
||||
* @param bias Bias for the neurons. Can be NULL if no bias is required
|
||||
* @return Output values of the neurons
|
||||
*/
|
||||
dl_matrix2dq_t *dl_basic_lstm_layer_quantised_weights(const dl_matrix2d_t *in, dl_matrix2d_t *state_c, dl_matrix2d_t *state_h,
|
||||
const dl_matrix2dq_t *weight, const dl_matrix2d_t *bias);
|
||||
|
||||
/**
|
||||
* @brief Do a fully-connected layer pass, fully-quantized version.
|
||||
*
|
||||
* @param in Input vector
|
||||
* @param weight Weights of the neurons
|
||||
* @param bias Bias values of the neurons. Can be NULL if no bias is needed.
|
||||
* @param shift Number of bits to shift the result back by. See dl_lib_matrixq.h for more info
|
||||
* @return Output values of the neurons
|
||||
*/
|
||||
void dl_fully_connect_layer_q(const dl_matrix2dq_t *in, const dl_matrix2dq_t *weight, const dl_matrix2dq_t *bias, dl_matrix2dq_t *out, int shift);
|
||||
|
||||
/**
|
||||
* @brief Do a basic LSTM layer pass, fully-quantized version
|
||||
*
|
||||
* @warning Returns state_h pointer, so do not free result.
|
||||
|
||||
* @param in Input vector
|
||||
* @param state_c Internal state of the LSTM network
|
||||
* @param state_h Internal state (previous output values) of the LSTM network
|
||||
* @param weights Weights for the neurons
|
||||
* @param bias Bias for the neurons. Can be NULL if no bias is required
|
||||
* @param shift Number of bits to shift the result back by. See dl_lib_matrixq.h for more info
|
||||
* @return Output values of the neurons
|
||||
*/
|
||||
dl_matrix2dq_t *dl_basic_lstm_layer_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *state_c, dl_matrix2dq_t *state_h,
|
||||
const dl_matrix2dq_t *weight, const dl_matrix2dq_t *bias, int shift);
|
||||
|
||||
/**
|
||||
* @brief Batch-normalize a matrix, fully-quantized version
|
||||
*
|
||||
* @param m The matrix to normalize
|
||||
* @param offset Offset matrix
|
||||
* @param scale Scale matrix
|
||||
* @param mean Mean matrix
|
||||
* @param sqrtvari Matrix precalculated using dl_batch_normalize_get_sqrtvar
|
||||
* @param shift Number of bits to shift the result back by. See dl_lib_matrixq.h for more info
|
||||
* @return
|
||||
*/
|
||||
void dl_batch_normalize_q(dl_matrix2dq_t *m, const dl_matrix2dq_t *offset, const dl_matrix2dq_t *scale,
|
||||
const dl_matrix2dq_t *mean, const dl_matrix2dq_t *sqrtvari, int shift);
|
||||
|
||||
/**
|
||||
* @brief Does a relu (Rectifier Linear Unit) operation on a fixed-point number
|
||||
* This accepts and returns fixed-point 32-bit number with the last 15 bits being the bits after the decimal
|
||||
* point. (Equivalent to a mantissa in a quantized matrix with exponent -15.)
|
||||
*
|
||||
* @param in Fixed-point input
|
||||
* @param clip If value is higher than this, it will be clipped to this value
|
||||
* @return Relu output
|
||||
*/
|
||||
qtp_t dl_relu_q_op(qtp_t in, qtp_t clip);
|
||||
|
||||
/**
|
||||
* @brief Does a ReLu operation on a matrix, quantized version
|
||||
*
|
||||
* @param in Input matrix
|
||||
* @param clip If values are higher than this, they will be clipped to this value
|
||||
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
|
||||
*/
|
||||
void dl_relu_q(const dl_matrix2dq_t *in, fptp_t clip, dl_matrix2dq_t *out);
|
||||
|
||||
/**
|
||||
* @brief Does a sigmoid operation on a fixed-point number.
|
||||
* This accepts and returns a fixed-point 32-bit number with the last 15 bits being the bits after the decimal
|
||||
* point. (Equivalent to a mantissa in a quantized matrix with exponent -15.)
|
||||
*
|
||||
* @param in Fixed-point input
|
||||
* @return Sigmoid output
|
||||
*/
|
||||
int dl_sigmoid_op_q(const int in);
|
||||
int16_t dl_sigmoid_op_q8(const int16_t in);
|
||||
/**
|
||||
* @brief Does a sigmoid operation on a matrix, quantized version
|
||||
*
|
||||
* @param in Input matrix
|
||||
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
|
||||
*/
|
||||
void dl_sigmoid_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out);
|
||||
|
||||
/**
|
||||
* @brief Does a tanh operation on a matrix, quantized version
|
||||
*
|
||||
* @param in Input matrix
|
||||
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
|
||||
*/
|
||||
void dl_tanh_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out);
|
||||
|
||||
/**
|
||||
* @brief Does a tanh operation on a fixed-point number.
|
||||
* This accepts and returns a fixed-point 32-bit number with the last 15 bits being the bits after the decimal
|
||||
* point. (Equivalent to a mantissa in a quantized matrix with exponent -15.)
|
||||
*
|
||||
* @param in Fixed-point input
|
||||
* @return tanh output
|
||||
*/
|
||||
int dl_tanh_op_q(int v);
|
||||
int16_t dl_tanh_op_q8(int16_t v);
|
||||
|
||||
void load_mat_psram_mn4(void);
|
||||
void load_mat_psram_mn3(void);
|
||||
void free_mat_psram_mn4(void);
|
||||
void free_mat_psram_mn3(void);
|
||||
qtp_t dl_hard_sigmoid_op(qtp_t in, int exponent);
|
||||
qtp_t dl_hard_tanh_op(qtp_t in, int exponent);
|
||||
|
||||
int16_t dl_table_tanh_op(int16_t in, int exponent);
|
||||
int16_t dl_table_sigmoid_op(int16_t in, int exponent);
|
||||
|
||||
void dl_hard_sigmoid_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out);
|
||||
void dl_hard_tanh_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out);
|
||||
|
||||
void dl_table_sigmoid_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out);
|
||||
void dl_table_tanh_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Filter out the number greater than clip in the matrix, quantized version
|
||||
*
|
||||
* @param in Input matrix
|
||||
* @param clip If values are higher than this, they will be clipped to this value
|
||||
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
|
||||
*/
|
||||
void dl_minimum(const dl_matrix2d_t *in, fptp_t clip, dl_matrix2d_t *out);
|
||||
|
||||
/**
|
||||
* @brief Filter out the number greater than clip in the matrix, float version
|
||||
*
|
||||
* @param in Input matrix
|
||||
* @param clip If values are higher than this, they will be clipped to this value
|
||||
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
|
||||
*/
|
||||
void dl_minimum_q(const dl_matrix2dq_t *in, fptp_t clip, dl_matrix2dq_t *out);
|
||||
/**
|
||||
* @brief Do a basic CNN layer pass.
|
||||
*
|
||||
* @Warning This just supports the single channel input image, and the output is single row matrix.
|
||||
That is to say, the height of output is 1, and the weight of output is out_channels*out_image_width*out_image_height
|
||||
*
|
||||
* @param in Input single channel image
|
||||
* @param weight Weights of the neurons, weight->w = out_channels, weight->h = filter_width*filter_height
|
||||
* @param bias Bias for the CNN layer.
|
||||
* @param filter_height The height of convolution kernel
|
||||
* @param filter_width The width of convolution kernel
|
||||
* @param out_channels The number of output channels of convolution kernel
|
||||
* @param stride_x The step length of the convolution window in x(width) direction
|
||||
* @param stride_y The step length of the convolution window in y(height) direction
|
||||
* @param pad One of `"VALID"` or `"SAME"`, 0 is "VALID" and the other is "SAME"
|
||||
* @param out The result of CNN layer, out->h=1.
|
||||
* @return The result of CNN layer.
|
||||
*/
|
||||
dl_matrix2d_t *dl_basic_conv_layer(const dl_matrix2d_t *in, const dl_matrix2d_t *weight, const dl_matrix2d_t *bias, int filter_width, int filter_height,
|
||||
const int out_channels, const int stride_x, const int stride_y, padding_state pad, const dl_matrix2d_t* out);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Do a basic CNN layer pass, quantised wersion.
|
||||
*
|
||||
* @Warning This just supports the single channel input image, and the output is single row matrix.
|
||||
That is to say, the height of output is 1, and the weight of output is out_channels*out_image_width*out_image_height
|
||||
*
|
||||
* @param in Input single channel image
|
||||
* @param weight Weights of the neurons, weight->w = out_channels, weight->h = filter_width*filter_height,
|
||||
* @param bias Bias of the neurons.
|
||||
* @param filter_height The height of convolution kernel
|
||||
* @param filter_width The width of convolution kernel
|
||||
* @param out_channels The number of output channels of convolution kernel
|
||||
* @param stride_x The step length of the convolution window in x(width) direction
|
||||
* @param stride_y The step length of the convolution window in y(height) direction
|
||||
* @param pad One of `"VALID"` or `"SAME"`, 0 is "VALID" and the other is "SAME"
|
||||
* @param out The result of CNN layer, out->h=1
|
||||
* @return The result of CNN layer
|
||||
*/
|
||||
dl_matrix2d_t *dl_basic_conv_layer_quantised_weight(const dl_matrix2d_t *in, const dl_matrix2dq_t *weight, const dl_matrix2d_t *bias, int filter_width, int filter_height,
|
||||
const int out_channels, const int stride_x, const int stride_y, padding_state pad, const dl_matrix2d_t* out);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,80 @@
|
||||
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
#ifndef DL_LIB_COEFGETTER_IF_H
|
||||
#define DL_LIB_COEFGETTER_IF_H
|
||||
|
||||
#include "dl_lib_matrix.h"
|
||||
#include "dl_lib_matrixq.h"
|
||||
#include "dl_lib_matrixq8.h"
|
||||
#include "cJSON.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
//Set this if the coefficient requested is a batch-normalization popvar matrix which needs to be preprocessed by
|
||||
//dl_batch_normalize_get_sqrtvar first.
|
||||
#define COEF_GETTER_HINT_BNVAR (1<<0)
|
||||
|
||||
/*
|
||||
This struct describes the basic information of model data:
|
||||
word_num: the number of wake words or speech commands
|
||||
word_list: the name list of wake words or speech commands
|
||||
thres_list: the threshold list of wake words or speech commands
|
||||
info_str: the string used to reflect the version and information of model data
|
||||
which consist of the architecture of network, the version of model data, wake words and their threshold
|
||||
*/
|
||||
typedef struct {
|
||||
int word_num;
|
||||
char **word_list;
|
||||
int *win_list;
|
||||
float *thresh_list;
|
||||
char *info_str;
|
||||
} model_info_t;
|
||||
|
||||
/*
|
||||
Alphabet struct describes the basic grapheme or phoneme.
|
||||
item_num: the number of baisc item(grapheme or phonemr)
|
||||
items: the list of basic item
|
||||
*/
|
||||
typedef struct {
|
||||
int item_num;
|
||||
char **items;
|
||||
}alphabet_t;
|
||||
|
||||
/*
|
||||
This struct describes a generic coefficient getter: a way to get the constant coefficients needed for a neural network.
|
||||
For the two getters, the name describes the name of the coefficient matrix, usually the same as the Numpy filename the
|
||||
coefficient was originally stored in. The arg argument can be used to optionally pass an additional user-defined argument
|
||||
to the getter (e.g. the directory to look for files in the case of the Numpy file loader getter). The hint argument
|
||||
is a bitwise OR of the COEF_GETTER_HINT_* flags or 0 when none is needed. Use the free_f/free_q functions to release the
|
||||
memory for the returned matrices, when applicable.
|
||||
*/
|
||||
typedef struct {
|
||||
const dl_matrix2d_t* (*getter_f)(const char *name, void *arg, int hint);
|
||||
const dl_matrix2dq_t* (*getter_q)(const char *name, void *arg, int hint);
|
||||
const dl_matrix2dq8_t* (*getter_q8)(const char *name, void *arg, int hint);
|
||||
void (*free_f)(const dl_matrix2d_t *m);
|
||||
void (*free_q)(const dl_matrix2dq_t *m);
|
||||
void (*free_q8)(const dl_matrix2dq8_t *m);
|
||||
const model_info_t* (*getter_info)(void *arg);
|
||||
const alphabet_t* (*getter_alphabet)(void *arg);
|
||||
const cJSON* (*getter_config)(void *arg);
|
||||
} model_coeff_getter_t;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,180 @@
|
||||
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
#ifndef DL_LIB_CONV_QUEUE_H
|
||||
#define DL_LIB_CONV_QUEUE_H
|
||||
|
||||
|
||||
#include "dl_lib_matrix.h"
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef float fptp_t;
|
||||
|
||||
//Flags for matrices
|
||||
// #define DL_MF_FOREIGNDATA (0) /*< Matrix *item data actually points to another matrix and should not be freed */
|
||||
|
||||
//Float convolution FIFO queue.
|
||||
typedef struct {
|
||||
int n; /*< the length of queue */
|
||||
int c; /*< the channel number of queue element*/
|
||||
int front; /*< the front(top) position of queue */
|
||||
int flag; /*< not used*/
|
||||
fptp_t *item; /*< Pointer to item array */
|
||||
} dl_conv_queue_t;
|
||||
|
||||
/**
|
||||
* @brief Allocate a convolution queue
|
||||
*
|
||||
* @param n The length of queue
|
||||
* @param c The channel number of elements in the queue
|
||||
* @return The convolution queue, or NULL if out of memory
|
||||
*/
|
||||
dl_conv_queue_t *dl_conv_queue_alloc(int n, int c);
|
||||
|
||||
/**
|
||||
* @brief Allocate a convolution queue from psram
|
||||
*
|
||||
* @param n The length of queue
|
||||
* @param c The channel number of elements in the queue
|
||||
* @return The convolution queue, or NULL if out of memory
|
||||
*/
|
||||
dl_conv_queue_t *dl_conv_queue_alloc_from_psram(int n, int c);
|
||||
|
||||
/**
|
||||
* @brief Free a convolution queue
|
||||
*
|
||||
* @param cq The convolution queue to free
|
||||
*/
|
||||
void dl_conv_queue_free(dl_conv_queue_t *cq);
|
||||
|
||||
void dl_conv_to_matrix2d(dl_conv_queue_t *cq, dl_matrix2d_t* out);
|
||||
|
||||
/**
|
||||
* @brief Move the front pointer of queue forward,
|
||||
the First(oldest) element become the last(newest) element,
|
||||
*
|
||||
* @param cq Input convolution queue
|
||||
* @return Pointer of oldest element
|
||||
*/
|
||||
fptp_t *dl_conv_queue_pop(dl_conv_queue_t *cq);
|
||||
|
||||
/**
|
||||
* @brief Remove the oldest element, then insert the input element at the end of queue
|
||||
*
|
||||
* @param cq Input convolution queue
|
||||
* @param item The new element
|
||||
*/
|
||||
void dl_conv_queue_push(dl_conv_queue_t *cq, fptp_t* item);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Get the pointer of element in the queue by offset
|
||||
*
|
||||
* @param cq Input convolution queue
|
||||
* @param offset Offset from the front of the queue
|
||||
* @return Pointer of the element
|
||||
*/
|
||||
fptp_t *dl_get_queue_item(dl_conv_queue_t *cq, int offset);
|
||||
|
||||
/**
|
||||
* @brief Does a sigmoid operation on the one of element in the convolution queue.
|
||||
* Gets the pointer of element in the convolution queue by offset, and does a sigmoid operation
|
||||
* by this pointer, then return the pointer
|
||||
*
|
||||
* @param cq Input convolution queue
|
||||
* @param offset Offset from the front of the queue
|
||||
* @return Pointer of the element
|
||||
*/
|
||||
fptp_t *dl_sigmoid_step(dl_conv_queue_t *cq, int offset);
|
||||
|
||||
/**
|
||||
* @brief Does a tanh operation on the one of element in the convolution queue.
|
||||
* Gets the pointer of element in the convolution queue by offset, and does a tanh operation
|
||||
* by this pointer, then return the pointer
|
||||
*
|
||||
* @param cq Input convolution queue
|
||||
* @param offset Offset from the front of the queue
|
||||
* @return Pointer of the element
|
||||
*/
|
||||
fptp_t *dl_tanh_step(dl_conv_queue_t *cq, int offset);
|
||||
|
||||
/**
|
||||
* @brief Does a softmax operation on the one of element in the convolution queue.
|
||||
* Gets the pointer of element in the convolution queue by offset, and does a softmax operation
|
||||
* by this pointer, then return the pointer
|
||||
*
|
||||
* @param cq Input convolution queue
|
||||
* @param offset Offset from the front of the queue
|
||||
* @return Pointer of the element
|
||||
*/
|
||||
fptp_t *dl_softmax_step(dl_conv_queue_t *cq, int offset);
|
||||
|
||||
fptp_t *dl_relu_step(dl_conv_queue_t *cq, int offset);
|
||||
fptp_t *dl_relu_look(dl_matrix2d_t *cq, int offset);
|
||||
dl_matrix2d_t *dl_matrix_concat1(const dl_conv_queue_t *a, const dl_matrix2d_t *b);
|
||||
dl_matrix2d_t *dl_basic_lstm_layer1(const dl_conv_queue_t *in, dl_matrix2d_t *state_c, dl_matrix2d_t *state_h,
|
||||
const dl_matrix2d_t *weight, const dl_matrix2d_t *bias);
|
||||
/**
|
||||
* @brief Fast implement for 1D atrous convolution (a.k.a. convolution with holes or dilated convolution)
|
||||
* based on convolution queue.
|
||||
*
|
||||
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
|
||||
* is first element of output queue and should not be freed separately.
|
||||
*
|
||||
* @param in Input convolution queue
|
||||
* @param out Output convolution queue
|
||||
* @param rate A positive int, the stride with which we sample input value
|
||||
* @param size A positive int, the size of 1D-filter
|
||||
* @param kernel The kernel matrix of filter
|
||||
* @param bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
|
||||
* @return The result of atrous convolution
|
||||
*/
|
||||
fptp_t *dl_atrous_conv1d_step(dl_conv_queue_t *in, dl_conv_queue_t *out, int rate, int size,
|
||||
dl_matrix2d_t* kernel, dl_matrix2d_t* bias);
|
||||
fptp_t *dl_look_conv_step(dl_conv_queue_t *in, dl_matrix2d_t *out, int rate, int size,
|
||||
dl_matrix2d_t* kernel, dl_matrix2d_t* bias);
|
||||
|
||||
/**
|
||||
* @brief Fast implement of dilation layer as follows
|
||||
*
|
||||
* |-> [gate(sigmoid)] -|
|
||||
* input - | |-> (*) - output
|
||||
* |-> [filter(tanh)] -|
|
||||
*
|
||||
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
|
||||
* is first element of output queue and should not be freed separately.
|
||||
*
|
||||
* @param in Input convolution queue
|
||||
* @param out Output convolution queue
|
||||
* @param rate A positive int, the stride with which we sample input value
|
||||
* @param size A positive int, the size of 1D-filter
|
||||
* @param filter_kernel The kernel matrix of filter
|
||||
* @param filter_bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
|
||||
* @param gate_kernel The kernel matrix of gate
|
||||
* @param gate_bias The bias matrix of gate. Can be NULL if a bias of 0 is required.
|
||||
* @return The result of dilation layer
|
||||
*/
|
||||
fptp_t *dl_dilation_layer(dl_conv_queue_t *in, dl_conv_queue_t *out, int rate, int size,
|
||||
dl_matrix2d_t* filter_kernel, dl_matrix2d_t* filter_bias,
|
||||
dl_matrix2d_t* gate_kernel, dl_matrix2d_t* gate_bias);
|
||||
|
||||
|
||||
void test_atrous_conv(int size, int rate, int in_channel, int out_channel);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,303 @@
|
||||
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
#ifndef DL_LIB_CONVQ8_QUEUE_H
|
||||
#define DL_LIB_CONVQ8_QUEUE_H
|
||||
|
||||
|
||||
#include "dl_lib_matrixq.h"
|
||||
#include "dl_lib_matrixq8.h"
|
||||
#include "dl_lib_conv_queue.h"
|
||||
#include "dl_lib_convq_queue.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
//[nch, n, c]
|
||||
typedef struct {
|
||||
int n; /*< the length of queue */
|
||||
int c; /*< the number of queue element*/
|
||||
int front; /*< the front(top) position of queue */
|
||||
int nch; /*< the channel of queue */
|
||||
int exponent; /*< The values in items should be multiplied by pow(2,exponent)
|
||||
to get the real values */
|
||||
q8tp_t *itemq; /*< Pointer to item array */
|
||||
} dl_convq8_queue_t;
|
||||
|
||||
/**
|
||||
* @brief Allocate a fixed-point convolution queue
|
||||
*
|
||||
* @param n The length of queue
|
||||
* @param c The number of elements in the queue
|
||||
* @return The convolution queue, or NULL if out of memory
|
||||
*/
|
||||
dl_convq8_queue_t *dl_convq8_queue_alloc(int n, int c);
|
||||
|
||||
/**
|
||||
* @brief Allocate a fixed-point convolution queue
|
||||
*
|
||||
* @param n The length of queue
|
||||
* @param c The number of elements in the queue
|
||||
* @param c The channel of queue
|
||||
* @return The convolution queue, or NULL if out of memory
|
||||
*/
|
||||
dl_convq8_queue_t *dl_convq8_queue_alloc_mc(int n, int c, int nch);
|
||||
|
||||
/**
|
||||
* @brief Allocate a bit fixed-point convolution queue from PSRAM
|
||||
*
|
||||
* @param n The length of queue
|
||||
* @param c The number of elements in the queue
|
||||
* @param nch The channel of queue
|
||||
* @return The convolution queue, or NULL if out of memory
|
||||
*/
|
||||
dl_convq8_queue_t *dl_convq8_queue_alloc_mc_from_psram(int n, int c, int nch);
|
||||
|
||||
/**
|
||||
* @brief Free a fixed-point convolution queue
|
||||
*
|
||||
* @param cq The fixed-point convolution queue to free
|
||||
*/
|
||||
void dl_convq8_queue_free(dl_convq8_queue_t *cq);
|
||||
|
||||
/**
|
||||
* @brief Set itemq of convolution queue to 0
|
||||
*
|
||||
* @param cq The fixed-point convolution queue to free
|
||||
*/
|
||||
void dl_convq8_queue_bzero(dl_convq8_queue_t *cqm);
|
||||
|
||||
/**
|
||||
* @brief Move the front pointer of queue forward,
|
||||
the First(oldest) element become the last(newest) element,
|
||||
*
|
||||
* @param cq Input fixed-point convolution queue
|
||||
* @return Pointer of oldest element
|
||||
*/
|
||||
q8tp_t *dl_convq8_queue_pop(dl_convq8_queue_t *cq);
|
||||
q8tp_t *dl_convq8_queue_popn(dl_convq8_queue_t *cq, int n);
|
||||
|
||||
/**
|
||||
* @brief Insert the float-point element at the end of queue.
|
||||
* The precision of fixed-point numbers is described by the Qm.f notation,
|
||||
*
|
||||
* @param cq Input fixed-point convolution queue
|
||||
* @param item The float-point element
|
||||
* @param m_bit The number of integer bits including the sign bits
|
||||
* @param f_bit The number of fractional bits
|
||||
*/
|
||||
void dl_convq8_queue_push_by_qmf(dl_convq8_queue_t *cq, fptp_t* item, int m_bit, int f_bit);
|
||||
|
||||
/**
|
||||
* @brief Get the pointer of element in the queue by offset
|
||||
*
|
||||
* @param cq Input fixed-point convolution queue
|
||||
* @param offset Offset from the front of the queue
|
||||
* @return Pointer of the element
|
||||
*/
|
||||
q8tp_t *dl_get_queue_itemq8(dl_convq8_queue_t *cq, int offset);
|
||||
|
||||
/**
|
||||
* @brief Get the pointer of element in the queue by offset
|
||||
*
|
||||
* @param cq Input fixed-point convolution queue
|
||||
* @param offset Offset from the front of the queue
|
||||
* @param ch Channel index of queue
|
||||
* @return Pointer of the element
|
||||
*/
|
||||
q8tp_t *dl_get_queue_itemq8_mc(dl_convq8_queue_t *cq, int offset, int ch);
|
||||
|
||||
/**
|
||||
* @brief Fast and quantised implement for 1D atrous convolution (a.k.a. convolution with holes or dilated convolution)
|
||||
* based on convolution queue.
|
||||
*
|
||||
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
|
||||
* is last element of output queue and should not be freed separately.
|
||||
*
|
||||
* @param in Input fixed-point convolution queue
|
||||
* @param out Output fixed-point convolution queue
|
||||
* @param rate A positive int, the stride with which we sample input value
|
||||
* @param size A positive int, the size of 1D-filter
|
||||
* @param kernel Kernel matrix of filter
|
||||
* @param bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
|
||||
* @param out_exponent Shift ratio used in dot operation between two 16-bit fixed point vector
|
||||
* @param offset Offset used to calculate the beginning of input conv queue
|
||||
* @param prenum The num to control the parameter size of preload operation
|
||||
* @return The result of atrous convolution
|
||||
*/
|
||||
void dl_atrous_conv1dq8_steps(dl_convq8_queue_t *in, dl_convq8_queue_t *out, int rate, int size,
|
||||
dl_matrix2dq8_t* kernel, dl_matrix2dq8_t* bias,
|
||||
int out_exponent, int offset, int prenum);
|
||||
|
||||
/**
|
||||
* @brief Fast implement of dilation layer as follows
|
||||
*
|
||||
* |-> [gate(sigmoid)] -|
|
||||
* input - | |-> (*) - output
|
||||
* |-> [filter(tanh)] -|
|
||||
*
|
||||
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
|
||||
* is last element of output queue and should not be freed separately.
|
||||
*
|
||||
* @param in Input fixed-point convolution queue
|
||||
* @param out Output fixed-point convolution queue
|
||||
* @param rate A positive int, the stride with which we sample input value
|
||||
* @param size A positive int, the size of 1D-filter
|
||||
* @param filter_kernel The kernel matrix of filter
|
||||
* @param filter_bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
|
||||
* @param gate_kernel The kernel matrix of gate
|
||||
* @param gate_bias The bias matrix of gate. Can be NULL if a bias of 0 is required.
|
||||
* @param offset Offset used to calculate the beginning of input conv queue
|
||||
* @param prenum The num to control the parameter size of preload operation
|
||||
* @return The result of dilation layer
|
||||
*/
|
||||
void dl_dilation_layerq8_steps(dl_convq8_queue_t *in, dl_convq8_queue_t *out, int rate, int size,
|
||||
dl_matrix2dq8_t* filter_kernel, dl_matrix2dq8_t* filter_bias,
|
||||
dl_matrix2dq8_t* gate_kernel, dl_matrix2dq8_t* gate_bias,
|
||||
int offset, int prenum);
|
||||
|
||||
|
||||
|
||||
|
||||
dl_conv_queue_t *dl_convq8_queue_add(dl_convq8_queue_t *cq1, dl_convq8_queue_t *cq2);
|
||||
|
||||
int8_t dl_sigmoid_lutq8(int in);
|
||||
/**
|
||||
* @brief Allocate a 8-bit fixed-point Multi-Channel convolution queue
|
||||
*
|
||||
* @param n The length of queue
|
||||
* @param c The number of elements in the queue
|
||||
* @param nch The channel number
|
||||
* @return The convolution queue, or NULL if out of memory
|
||||
*/
|
||||
dl_convq8_queue_t **dl_convq8_queue_mc_alloc(int n, int c, int nch);
|
||||
|
||||
/**
|
||||
* @brief Free a 8-bit fixed-point Multi-Channel convolution queue
|
||||
*
|
||||
* @param cqm The fixed-point convolution queue to free
|
||||
* @param nch The channel number
|
||||
*/
|
||||
void dl_convq8_queue_mc_free(dl_convq8_queue_t **cqm, int nch);
|
||||
|
||||
/**
|
||||
* @brief Tanh activation function for 8-bit fixed-point Multi-Channel convolution queue input
|
||||
*
|
||||
* @param cqm Input 8-bit fixed-point Multi-Channel convolution queue
|
||||
* @param offset Offset used to calculate the beginning of input conv queue
|
||||
* @param nch The channel number
|
||||
*/
|
||||
void dl_tanh_convq8_mc(dl_convq8_queue_t **cqm, int offset, int nch);
|
||||
|
||||
/**
|
||||
* @brief Fast and quantised 16-bit implement for Multi-channel 1D atrous convolution (a.k.a. convolution with holes or dilated convolution)
|
||||
* Usually, this layer is used as first layer for 8-bit network.
|
||||
*
|
||||
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
|
||||
* Input is a 16-bit queue point, Output is an 8-bit queue point.
|
||||
*
|
||||
* @param in Input 16bit fixed-point convolution queue array
|
||||
* @param out Output 8bit fixed-point convolution queue array
|
||||
* @param rate A positive int, the stride with which we sample input value
|
||||
* @param size A positive int, the size of 1D-filter
|
||||
* @param kernel The kernel matrix of filter
|
||||
* @param bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
|
||||
* @param out_exponent Exponent of output
|
||||
* @param offset Offset used to calculate the beginning of input conv queue
|
||||
* @param prenum The num to control the parameter size of preload operation
|
||||
*/
|
||||
void dl_atrous_conv1dq8_16in_mc_steps(dl_convq_queue_t **in, dl_convq8_queue_t **out, int nch, int rate, int size,
|
||||
dl_matrix2dq_t* kernel, dl_matrix2dq_t* bias, int out_exponent, int offset, int prenum);
|
||||
|
||||
/**
|
||||
* @brief Fast and quantised 8-bit implement for Multi-channel 1D atrous convolution (a.k.a. convolution with holes or dilated convolution)
|
||||
* based on convolution queue.
|
||||
*
|
||||
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
|
||||
* is last element of output queue and should not be freed separately.
|
||||
*
|
||||
* @param in Input 8bit fixed-point convolution queue array
|
||||
* @param out Output 8bit fixed-point convolution queue array
|
||||
* @param rate A positive int, the stride with which we sample input value
|
||||
* @param size A positive int, the size of 1D-filter
|
||||
* @param kernel The kernel matrix of filter
|
||||
* @param bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
|
||||
* @param out_exponent Exponent of output
|
||||
* @param offset Offset used to calculate the beginning of input conv queue
|
||||
* @param prenum The num to control the parameter size of preload operation
|
||||
*/
|
||||
void dl_atrous_conv1dq8_mc_steps(dl_convq8_queue_t **in, dl_convq8_queue_t **out,
|
||||
int nch, int rate, int size,
|
||||
dl_matrix2dq8_t* kernel, dl_matrix2dq8_t* bias,
|
||||
int out_exponent, int offset, int prenum);
|
||||
|
||||
/**
|
||||
* @brief Fast implement of 8-bit dilation layer as follows
|
||||
*
|
||||
* |-> [gate(sigmoid)] -|
|
||||
* input - | |-> (*) - output
|
||||
* |-> [filter(tanh)] -|
|
||||
*
|
||||
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
|
||||
* is last element of output queue and should not be freed separately.
|
||||
*
|
||||
* @param in Input 8-bit fixed-point convolution queue
|
||||
* @param out Output 8-bit fixed-point convolution queue
|
||||
* @param rate A positive int, the stride with which we sample input value
|
||||
* @param size A positive int, the size of 1D-filter
|
||||
* @param filter_kernel The kernel matrix of filter
|
||||
* @param filter_bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
|
||||
* @param gate_kernel The kernel matrix of gate
|
||||
* @param gate_bias The bias matrix of gate. Can be NULL if a bias of 0 is required.
|
||||
* @param offset Offset used to calculate the beginning of input conv queue
|
||||
* @param prenum The num to control the parameter size of preload operation
|
||||
*/
|
||||
void dl_dilation_layerq8_mc_steps(dl_convq8_queue_t **in, dl_convq8_queue_t **out, int nch, int rate, int size,
|
||||
dl_matrix2dq8_t* filter_kernel, dl_matrix2dq8_t* filter_bias,
|
||||
dl_matrix2dq8_t* gate_kernel, dl_matrix2dq8_t* gate_bias,
|
||||
int offset, int prenum);
|
||||
|
||||
void dl_convq8_queue_mc_bzero(dl_convq8_queue_t **cqm, int nch);
|
||||
|
||||
|
||||
|
||||
dl_convq8_queue_t *dl_convq8_queue_alloc_from_psram(int n, int c);
|
||||
|
||||
qtp_t *dl_dilation_layerq16_8(dl_convq_queue_t *in, dl_convq8_queue_t *out, int rate, int size,
|
||||
dl_matrix2dq_t* filter_kernel, dl_matrix2dq_t* filter_bias,
|
||||
dl_matrix2dq_t* gate_kernel, dl_matrix2dq_t* gate_bias, int prenum);
|
||||
|
||||
|
||||
qtp_t *dl_dilation_layerq8(dl_convq8_queue_t *in, dl_convq8_queue_t *out, int rate, int size,
|
||||
dl_matrix2dq8_t* filter_kernel, dl_matrix2dq_t* filter_bias,
|
||||
dl_matrix2dq8_t* gate_kernel, dl_matrix2dq_t* gate_bias, int prenum);
|
||||
|
||||
dl_matrix2dq8_t *dl_convq8_lstm_layer(const dl_convq8_queue_t *in, dl_convq8_queue_t *out, dl_matrix2dq8_t *state_c,
|
||||
dl_matrix2dq8_t *state_h, const dl_matrix2dq8_t *in_weight, const dl_matrix2dq8_t *h_weight,
|
||||
const dl_matrix2dq_t *bias, int prenum);
|
||||
|
||||
qtp_t *dl_atrous_conv1dq8_16_s3(dl_convq8_queue_t *in, dl_convq_queue_t *out, int rate, int size,
|
||||
dl_matrix2dq8_t* kernel, dl_matrix2dq_t* bias, int prenum);
|
||||
|
||||
void print_convq8(dl_convq8_queue_t *cq, int offset);
|
||||
void print_convq(dl_convq_queue_t *cq, int offset);
|
||||
void dl_relu_convq8(dl_convq8_queue_t *cq);
|
||||
|
||||
void lstmq8_free(void);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,382 @@
|
||||
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
#ifndef DL_LIB_CONVQ_QUEUE_H
|
||||
#define DL_LIB_CONVQ_QUEUE_H
|
||||
|
||||
#include "dl_lib_matrixq.h"
|
||||
#include "dl_lib_conv_queue.h"
|
||||
#include "dl_lib.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
//fixed-point convolution FIFO queue.
|
||||
//[nch, n, c]
|
||||
typedef struct {
|
||||
int n; /*< the length of queue */
|
||||
int c; /*< the number of queue element*/
|
||||
int front; /*< the front(top) position of queue */
|
||||
int nch; /*< the multiple of queue*/
|
||||
int exponent; /*< The values in items should be multiplied by pow(2,exponent)
|
||||
to get the real values */
|
||||
qtp_t *itemq; /*< Pointer to item array */
|
||||
} dl_convq_queue_t;
|
||||
|
||||
/**
|
||||
* @brief Allocate a fixed-point convolution queue
|
||||
*
|
||||
* @param n The length of queue
|
||||
* @param c The number of elements in the queue
|
||||
* @return The convolution queue, or NULL if out of memory
|
||||
*/
|
||||
dl_convq_queue_t *dl_convq_queue_alloc(int n, int c);
|
||||
|
||||
/**
|
||||
* @brief Allocate a fixed-point convolution queue from PSRAM
|
||||
*
|
||||
* @param n The length of queue
|
||||
* @param c The number of elements in the queue
|
||||
* @return The convolution queue, or NULL if out of memory
|
||||
*/
|
||||
dl_convq_queue_t *dl_convq_queue_alloc_from_psram(int n, int c);
|
||||
|
||||
/**
|
||||
* @brief Allocate a fixed-point multi-channel convolution queue
|
||||
*
|
||||
* @param n The length of queue
|
||||
* @param c The number of elements in the queue
|
||||
* @param nch The channel of conv queue
|
||||
* @return The convolution queue, or NULL if out of memory
|
||||
*/
|
||||
dl_convq_queue_t *dl_convq_queue_alloc_mc(int n, int c, int nch);
|
||||
|
||||
/**
|
||||
* @brief Allocate a fixed-point multi-channel convolution queue from PSRAM
|
||||
*
|
||||
* @param n The length of queue
|
||||
* @param c The number of elements in the queue
|
||||
* @param nch The channel of conv queue
|
||||
* @return The convolution queue, or NULL if out of memory
|
||||
*/
|
||||
dl_convq_queue_t *dl_convq_queue_alloc_mc_from_psram(int n, int c, int nch);
|
||||
|
||||
|
||||
void dl_convq_to_matrix2dq(dl_convq_queue_t *cq, dl_matrix2dq_t* out, int row);
|
||||
|
||||
/**
|
||||
* @brief Free a fixed-point convolution queue
|
||||
*
|
||||
* @param cq The fixed-point convolution queue to free
|
||||
*/
|
||||
void dl_convq_queue_free(dl_convq_queue_t *cq);
|
||||
|
||||
/**
|
||||
* @brief Set itemq of convolution queue to 0
|
||||
*
|
||||
* @param cq The fixed-point convolution queue point
|
||||
*/
|
||||
void dl_convq_queue_bzero(dl_convq_queue_t *cq);
|
||||
|
||||
/**
|
||||
* @brief Move the front pointer of queue forward,
|
||||
the First(oldest) element become the last(newest) element,
|
||||
*
|
||||
* @param cq Input fixed-point convolution queue
|
||||
* @return Pointer of oldest element
|
||||
*/
|
||||
qtp_t *dl_convq_queue_pop(dl_convq_queue_t *cq);
|
||||
qtp_t *dl_convq_queue_popn(dl_convq_queue_t *cq, int n);
|
||||
/**
|
||||
* @brief Remove the oldest element, then insert the input element at the end of queue
|
||||
*
|
||||
* @param cq Input fixed-point convolution queue
|
||||
* @param item The new element
|
||||
*/
|
||||
void dl_convq_queue_push(dl_convq_queue_t *cq, dl_matrix2dq_t *a, int shift);
|
||||
|
||||
/**
|
||||
* @brief Insert the float-point element at the end of queue.
|
||||
* The precision of fixed-point numbers is described by the Qm.f notation,
|
||||
*
|
||||
* @param cq Input fixed-point convolution queue
|
||||
* @param item The float-point element
|
||||
* @param m_bit The number of integer bits including the sign bits
|
||||
* @param f_bit The number of fractional bits
|
||||
*/
|
||||
void dl_convq_queue_push_by_qmf(dl_convq_queue_t *cq, fptp_t* item, int m_bit, int f_bit);
|
||||
|
||||
void dl_convq16_queue_push_by_qmf(dl_convq_queue_t *cq, fptp_t* item, int m_bit, int f_bit);
|
||||
|
||||
dl_conv_queue_t *dl_queue_from_convq(dl_convq_queue_t *cq1);
|
||||
|
||||
/**
|
||||
* @brief Get the pointer of element in the queue by offset
|
||||
*
|
||||
* @param cq Input fixed-point convolution queue
|
||||
* @param last_num Offset from the front of the queue
|
||||
* @return Pointer of the element
|
||||
*/
|
||||
qtp_t *dl_get_queue_itemq(dl_convq_queue_t *cq, int last_num);
|
||||
|
||||
/**
|
||||
* @brief Get the pointer of element in the queue by offset
|
||||
*
|
||||
* @param cq Input fixed-point convolution queue
|
||||
* @param offset Offset from the front of the queue
|
||||
* @param ch Channel index of convolution queue
|
||||
* @return Pointer of the element
|
||||
*/
|
||||
qtp_t *dl_get_queue_itemq_mc(dl_convq_queue_t *cq, int offset, int ch);
|
||||
|
||||
/**
|
||||
* @brief Does a tanh operation on the one of element in the convolution queue.
|
||||
* Gets the pointer of element in the convolution queue by offset, and does a
|
||||
* tanh operation by this pointer, then return the pointer
|
||||
*
|
||||
* @param cq Input fixed-point convolution queue
|
||||
* @param offset Offset from the front of the queue
|
||||
* @return Pointer of the element
|
||||
*/
|
||||
void dl_tanh_convq(dl_convq_queue_t *cq, int offset);
|
||||
|
||||
/**
|
||||
* @brief Does a tanh operation on the one of element in multi channel convolution queue.
|
||||
* Gets the pointer of element in the convolution queue by offset, and does a
|
||||
* tanh operation by this pointer, then return the pointer
|
||||
*
|
||||
* @param cq Input fixed-point multi channnel convolution queue
|
||||
* @param offset Offset from the front of the queue
|
||||
* @param nch The channel number of cqm
|
||||
* @return Pointer of the element
|
||||
*/
|
||||
void dl_tanh_convq_mc(dl_convq_queue_t **cqm, int offset, int nch);
|
||||
|
||||
/**
|
||||
* @brief Does a relu operation on the one of element in the convolution queue.
|
||||
* Gets the pointer of element in the convolution queue by offset, and does a
|
||||
* relu operation by this pointer, then return the pointer
|
||||
*
|
||||
* @param cq Input fixed-point convolution queue
|
||||
* @param offset Offset from the front of the queue
|
||||
* @return Pointer of the element
|
||||
*/
|
||||
void dl_relu_convq(dl_convq_queue_t *cq, fptp_t clip, int last_num);
|
||||
|
||||
/**
|
||||
* @brief Does a softmax operation on the one of element in the convolution queue.
|
||||
* Gets the pointer of element in the convolution queue by offset, input data
|
||||
stay as it is. Results are saved into the *out* array.
|
||||
*
|
||||
* @param cq Input fixed-point convolution queue
|
||||
* @param offset Offset from the front of the queue
|
||||
* @param out Old array to re-use. Passing NULL will allocate a new matrix.
|
||||
* @return softmax results
|
||||
*/
|
||||
fptp_t * dl_softmax_step_q(dl_convq_queue_t *cq, int offset, fptp_t *out);
|
||||
|
||||
/**
|
||||
* @brief Fast and quantised implement for 1D atrous convolution (a.k.a. convolution with holes or dilated convolution)
|
||||
* based on convolution queue.
|
||||
*
|
||||
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
|
||||
* is last element of output queue and should not be freed separately.
|
||||
*
|
||||
* @param in Input fixed-point convolution queue
|
||||
* @param out Output fixed-point convolution queue
|
||||
* @param rate A positive int, the stride with which we sample input value
|
||||
* @param size A positive int, the size of 1D-filter
|
||||
* @param kernel The kernel matrix of filter
|
||||
* @param bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
|
||||
* @param shift Shift ratio used in dot operation between two 16-bit fixed point vector
|
||||
* @return The result of atrous convolution
|
||||
*/
|
||||
qtp_t * dl_atrous_conv1dq(dl_convq_queue_t *in, dl_convq_queue_t *out, int rate, int size,
|
||||
dl_matrix2dq_t* kernel, dl_matrix2dq_t* bias, int shift, int prenum);
|
||||
|
||||
/**
|
||||
* @brief Fast implement of dilation layer as follows
|
||||
*
|
||||
* |-> [gate(sigmoid)] -|
|
||||
* input - | |-> (*) - output
|
||||
* |-> [filter(tanh)] -|
|
||||
*
|
||||
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
|
||||
* is last element of output queue and should not be freed separately.
|
||||
*
|
||||
* @param in Input fixed-point convolution queue
|
||||
* @param out Output fixed-point convolution queue
|
||||
* @param rate A positive int, the stride with which we sample input value
|
||||
* @param size A positive int, the size of 1D-filter
|
||||
* @param filter_kernel The kernel matrix of filter
|
||||
* @param filter_bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
|
||||
* @param gate_kernel The kernel matrix of gate
|
||||
* @param gate_bias The bias matrix of gate. Can be NULL if a bias of 0 is required.
|
||||
* @param filter_shift Shift ratio used in filter operation between two 16-bit fixed point vector
|
||||
* @param gate_shift Shift ratio used in gate operation between two 16-bit fixed point vector
|
||||
* @return The result of dilation layer
|
||||
*/
|
||||
qtp_t *dl_dilation_layerq_steps(dl_convq_queue_t *in, dl_convq_queue_t *out, int rate, int size,
|
||||
dl_matrix2dq_t* filter_kernel, dl_matrix2dq_t* filter_bias,
|
||||
dl_matrix2dq_t* gate_kernel, dl_matrix2dq_t* gate_bias,
|
||||
int filter_shift, int gate_shift, int offset, int prenum);
|
||||
|
||||
|
||||
qtp_t *dl_dilation_layerq(dl_convq_queue_t *in, dl_convq_queue_t *out, int rate, int size,
|
||||
dl_matrix2dq_t* filter_kernel, dl_matrix2dq_t* filter_bias,
|
||||
dl_matrix2dq_t* gate_kernel, dl_matrix2dq_t* gate_bias,
|
||||
int filter_shift, int gate_shift, int prenum);
|
||||
|
||||
qtp_t *dl_dilation_layerq16(dl_convq_queue_t *in, dl_convq_queue_t *out, int rate, int size,
|
||||
dl_matrix2dq_t* filter_kernel, dl_matrix2dq_t* filter_bias,
|
||||
dl_matrix2dq_t* gate_kernel, dl_matrix2dq_t* gate_bias, int prenum);
|
||||
|
||||
|
||||
qtp_t *dl_atrous_conv1dq_steps(dl_convq_queue_t *in, dl_convq_queue_t *out, int rate, int size,
|
||||
dl_matrix2dq_t* kernel, dl_matrix2dq_t* bias, int shift, int offset, int prenum);
|
||||
|
||||
/**
|
||||
* @brief Add a pair of fixed-point convolution queue item-by-item, and return float-point convolution queue
|
||||
*
|
||||
* @param cq1 First fixed-point convolution queue
|
||||
* @param cq2 Seconf fixed-point convolution queue
|
||||
* @return The result of float-point convolution queue
|
||||
*/
|
||||
dl_conv_queue_t *dl_convq_queue_add(dl_convq_queue_t *cq1, dl_convq_queue_t *cq2);
|
||||
|
||||
/**
|
||||
* @brief Fast implement of LSTM layer by dl_atrous_conv1dq function
|
||||
*
|
||||
* @Warning LSTM kernel is split into two part, the first part input is the last layer output,
|
||||
* and kernel is parameter *in_weight*. The second part input is the last frame LSTM output,
|
||||
* the kernel is parameters *h_weight*.
|
||||
*
|
||||
* @param in Input fixed-point convolution queue
|
||||
* @param out Output fixed-point convolution queue
|
||||
* @param state_c Internal state of the LSTM network
|
||||
* @param state_h Internal state (previous output values) of the LSTM network
|
||||
* @param in_weight the LSTM kernel needed by first part
|
||||
* @param h_weight the LSTM kernel needed by second part
|
||||
* @param bias The bias matrix of LSTM. Can be NULL if a bias of 0 is required.
|
||||
* @in_shift Shift ratio used in first part
|
||||
* @h_shift Shift ratio used in second part
|
||||
* @return The result of LSTM layer
|
||||
*/
|
||||
dl_matrix2dq_t *dl_convq_lstm_layer(const dl_convq_queue_t *in, dl_convq_queue_t *out, dl_matrix2dq_t *state_c,
|
||||
dl_matrix2dq_t *state_h, const dl_matrix2dq_t *in_weight, const dl_matrix2dq_t *h_weight,
|
||||
const dl_matrix2dq_t *bias, int in_shift, int h_shift, int prenum);
|
||||
dl_matrix2dq_t *dl_basic_lstm_layer1_q(const dl_convq_queue_t *in, dl_matrix2dq_t *state_c, dl_matrix2dq_t *state_h,
|
||||
const dl_matrix2dq_t *weight, const dl_matrix2dq_t *bias, int step, int shift);
|
||||
|
||||
dl_matrix2dq_t *dl_convq16_lstm_layer(dl_convq_queue_t *in, dl_convq_queue_t *out, dl_matrix2dq_t *state_c,
|
||||
dl_matrix2dq_t *state_h, dl_matrix2dq_t *in_weight, dl_matrix2dq_t *h_weight,
|
||||
dl_matrix2dq_t *bias, int prenum);
|
||||
|
||||
/**
|
||||
* @brief Allocate a fixed-point multi channel convolution queue
|
||||
*
|
||||
* @param n The length of queue
|
||||
* @param c The channel number of elements in the queue
|
||||
* @param nch the channel numbet of convolution queue
|
||||
* @return The convolution queue, or NULL if out of memory
|
||||
*/
|
||||
dl_convq_queue_t **dl_convq_queue_mc_alloc(int n, int c, int nch);
|
||||
|
||||
/**
|
||||
* @brief Free a fixed-point multi channel convolution queue
|
||||
*
|
||||
* @param cqm The fixed-point convolution queue to free
|
||||
* @param nch The channel number of cqm
|
||||
*/
|
||||
void dl_convq_queue_mc_free(dl_convq_queue_t **cqm, int nch);
|
||||
|
||||
/**
|
||||
* @brief Fast and quantised implement for 1D atrous convolution (a.k.a. convolution with holes or dilated convolution)
|
||||
* based on convolution queue.
|
||||
*
|
||||
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
|
||||
* is last element of output queue and should not be freed separately.
|
||||
*
|
||||
* @param in Input fixed-point convolution queue
|
||||
* @param out Output fixed-point convolution queue
|
||||
* @param nch The channel number of input
|
||||
* @param rate A positive int, the stride with which we sample input value
|
||||
* @param size A positive int, the size of 1D-filter
|
||||
* @param kernel The kernel matrix of filter
|
||||
* @param bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
|
||||
* @param shift Shift ratio used in dot operation between two 16-bit fixed point vector
|
||||
* @param offset the offset to calculate input convq
|
||||
* @param prenum the preload size, 0: do not use preload function
|
||||
* @return The result of atrous convolution
|
||||
*/
|
||||
qtp_t *dl_atrous_conv1dq_mc_steps( dl_convq_queue_t **in,
|
||||
dl_convq_queue_t **out,
|
||||
int nch,
|
||||
int rate,
|
||||
int size,
|
||||
dl_matrix2dq_t* kernel,
|
||||
dl_matrix2dq_t* bias,
|
||||
int shift,
|
||||
int offset,
|
||||
int prenum);
|
||||
|
||||
/**
|
||||
* @brief Fast implement of dilation layer as follows for multi channel input
|
||||
*
|
||||
* |-> [gate(sigmoid)] -|
|
||||
* input - | |-> (*) - output
|
||||
* |-> [filter(tanh)] -|
|
||||
*
|
||||
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
|
||||
* is last element of output queue and should not be freed separately.
|
||||
*
|
||||
* @param in Input fixed-point convolution queue
|
||||
* @param out Output fixed-point convolution queue
|
||||
* @param nch The channel number of input
|
||||
* @param rate A positive int, the stride with which we sample input value
|
||||
* @param size A positive int, the size of 1D-filter
|
||||
* @param filter_kernel The kernel matrix of filter
|
||||
* @param filter_bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
|
||||
* @param gate_kernel The kernel matrix of gate
|
||||
* @param gate_bias The bias matrix of gate. Can be NULL if a bias of 0 is required.
|
||||
* @param filter_shift Shift ratio used in filter operation between two 16-bit fixed point vector
|
||||
* @param gate_shift Shift ratio used in gate operation between two 16-bit fixed point vector
|
||||
* @param offset The offset to calculate input convq
|
||||
* @param prenum The preload size, 0: do not use preload function
|
||||
* @return The result of dilation layer
|
||||
*/
|
||||
qtp_t *dl_dilation_layerq_mc_steps( dl_convq_queue_t **in,
|
||||
dl_convq_queue_t **out,
|
||||
int nch,
|
||||
int rate,
|
||||
int size,
|
||||
dl_matrix2dq_t* filter_kernel,
|
||||
dl_matrix2dq_t* filter_bias,
|
||||
dl_matrix2dq_t* gate_kernel,
|
||||
dl_matrix2dq_t* gate_bias,
|
||||
int filter_shift,
|
||||
int gate_shift,
|
||||
int offset,
|
||||
int prenum);
|
||||
|
||||
void test_atrous_convq(int size, int rate, int in_channel, int out_channel);
|
||||
void test_lstm_convq(int size, int in_dim, int lstm_cell);
|
||||
void dl_nn_tanh_i162(dl_convq_queue_t **cqm, int offset, int nch);
|
||||
void dl_copy_queue_item_by_qmf(dl_convq_queue_t *cq, fptp_t* item, int m_bit, int f_bit, int offset, int ch);
|
||||
void dl_convq_queue_mc_bzero(dl_convq_queue_t **cqm, int nch);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,257 @@
|
||||
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
#ifndef DL_LIB_MATRIX_H
|
||||
#define DL_LIB_MATRIX_H
|
||||
|
||||
#ifdef ESP_PLATFORM
|
||||
#include "freertos/FreeRTOS.h"
|
||||
#include "freertos/task.h"
|
||||
#include "freertos/queue.h"
|
||||
#include "esp_system.h"
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef float fptp_t;
|
||||
|
||||
#if CONFIG_BT_SHARE_MEM_REUSE
|
||||
extern multi_heap_handle_t gst_heap;
|
||||
#endif
|
||||
|
||||
//Flags for matrices
|
||||
#define DL_MF_FOREIGNDATA 1 /*< Matrix pointer and item data actually points to another matrix and should not be freed */
|
||||
#define DL_MF_FOREIGNITEM 2 /*< Only item data actually points to another matrix and should not be freed */
|
||||
|
||||
//'Normal' float matrix
|
||||
typedef struct {
|
||||
int w; /*< Width */
|
||||
int h; /*< Height */
|
||||
int stride; /*< Row stride, essentially how many items to skip to get to the same position in the next row */
|
||||
int flags; /*< Flags. OR of DL_MF_* values */
|
||||
fptp_t *item; /*< Pointer to item array */
|
||||
} dl_matrix2d_t;
|
||||
|
||||
//Macro to quickly access the raw items in a matrix
|
||||
#define DL_ITM(m, x, y) m->item[(x)+(y)*m->stride]
|
||||
|
||||
|
||||
/**
|
||||
* @brief Allocate a matrix
|
||||
*
|
||||
* @param w Width of the matrix
|
||||
* @param h Height of the matrix
|
||||
* @return The matrix, or NULL if out of memory
|
||||
*/
|
||||
dl_matrix2d_t *dl_matrix_alloc(int w, int h);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Free a matrix
|
||||
* Frees the matrix structure and (if it doesn't have the DL_MF_FOREIGNDATA flag set) the m->items space as well.
|
||||
*
|
||||
* @param m Matrix to free
|
||||
*/
|
||||
void dl_matrix_free(dl_matrix2d_t *m);
|
||||
|
||||
/**
|
||||
* @brief Zero out the matrix
|
||||
* Sets all entries in the matrix to 0.
|
||||
*
|
||||
* @param m Matrix to zero
|
||||
*/
|
||||
void dl_matrix_zero(dl_matrix2d_t *m);
|
||||
|
||||
/**
|
||||
* @brief Copy the matrix into psram
|
||||
* Copy the matrix from flash or iram/psram into psram
|
||||
*
|
||||
* @param m Matrix to zero
|
||||
*/
|
||||
dl_matrix2d_t *dl_matrix_copy_to_psram(const dl_matrix2d_t *m);
|
||||
|
||||
/**
|
||||
* @brief Generate a new matrix using a range of items from an existing matrix.
|
||||
* When using this, the data of the new matrix is not allocated/copied but it re-uses a pointer
|
||||
* to the existing data. Changing the data in the resulting matrix, as a result, will also change
|
||||
* the data in the existing matrix that has been sliced.
|
||||
*
|
||||
* @param x X-offset of the origin of the returned matrix within the sliced matrix
|
||||
* @param y Y-offset of the origin of the returned matrix within the sliced matrix
|
||||
* @param w Width of the resulting matrix
|
||||
* @param h Height of the resulting matrix
|
||||
* @param in Old matrix (with foreign data) to re-use. Passing NULL will allocate a new matrix.
|
||||
* @return The resulting slice matrix, or NULL if out of memory
|
||||
*/
|
||||
dl_matrix2d_t *dl_matrix_slice(const dl_matrix2d_t *src, int x, int y, int w, int h, dl_matrix2d_t *in);
|
||||
|
||||
/**
|
||||
* @brief select a range of items from an existing matrix and flatten them into one dimension.
|
||||
*
|
||||
* @Warning The results are flattened in row-major order.
|
||||
*
|
||||
* @param x X-offset of the origin of the returned matrix within the sliced matrix
|
||||
* @param y Y-offset of the origin of the returned matrix within the sliced matrix
|
||||
* @param w Width of the resulting matrix
|
||||
* @param h Height of the resulting matrix
|
||||
* @param in Old matrix to re-use. Passing NULL will allocate a new matrix.
|
||||
* @return The resulting flatten matrix, or NULL if out of memory
|
||||
*/
|
||||
dl_matrix2d_t *dl_matrix_flatten(const dl_matrix2d_t *src, int x, int y, int w, int h, dl_matrix2d_t *in);
|
||||
|
||||
/**
|
||||
* @brief Generate a matrix from existing floating-point data
|
||||
*
|
||||
* @param w Width of resulting matrix
|
||||
* @param h Height of resulting matrix
|
||||
* @param data Data to populate matrix with
|
||||
* @return A newaly allocated matrix populated with the given input data, or NULL if out of memory.
|
||||
*/
|
||||
dl_matrix2d_t *dl_matrix_from_data(int w, int h, int stride, const void *data);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Multiply a pair of matrices item-by-item: res=a*b
|
||||
*
|
||||
* @param a First multiplicand
|
||||
* @param b Second multiplicand
|
||||
* @param res Multiplicated data. Can be equal to a or b to overwrite that.
|
||||
*/
|
||||
void dl_matrix_mul(const dl_matrix2d_t *a, const dl_matrix2d_t *b, dl_matrix2d_t *res);
|
||||
|
||||
/**
|
||||
* @brief Do a dotproduct of two matrices : res=a.b
|
||||
*
|
||||
* @param a First multiplicand
|
||||
* @param b Second multiplicand
|
||||
* @param res Dotproduct data. *Must* be a *different* matrix from a or b!
|
||||
*/
|
||||
void dl_matrix_dot(const dl_matrix2d_t *a, const dl_matrix2d_t *b, dl_matrix2d_t *res);
|
||||
|
||||
/**
|
||||
* @brief Add a pair of matrices item-by-item: res=a-b
|
||||
*
|
||||
* @param a First matrix
|
||||
* @param b Second matrix
|
||||
* @param res Added data. Can be equal to a or b to overwrite that.
|
||||
*/
|
||||
void dl_matrix_add(const dl_matrix2d_t *a, const dl_matrix2d_t *b, dl_matrix2d_t *out);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Divide a pair of matrices item-by-item: res=a/b
|
||||
*
|
||||
* @param a First matrix
|
||||
* @param b Second matrix
|
||||
* @param res Divided data. Can be equal to a or b to overwrite that.
|
||||
*/
|
||||
void dl_matrix_div(const dl_matrix2d_t *a, const dl_matrix2d_t *b, dl_matrix2d_t *out);
|
||||
|
||||
/**
|
||||
* @brief Subtract a matrix from another, item-by-item: res=a-b
|
||||
*
|
||||
* @param a First matrix
|
||||
* @param b Second matrix
|
||||
* @param res Subtracted data. Can be equal to a or b to overwrite that.
|
||||
*/
|
||||
void dl_matrix_sub(const dl_matrix2d_t *a, const dl_matrix2d_t *b, dl_matrix2d_t *out);
|
||||
|
||||
/**
|
||||
* @brief Add a constant to every item of the matrix
|
||||
*
|
||||
* @param subj Matrix to add the constant to
|
||||
* @param add The constant
|
||||
*/
|
||||
void dl_matrix_add_const(dl_matrix2d_t *subj, const fptp_t add);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Concatenate the rows of two matrices into a new matrix
|
||||
*
|
||||
* @param a First matrix
|
||||
* @param b Second matrix
|
||||
* @return A newly allocated array with as avlues a|b
|
||||
*/
|
||||
dl_matrix2d_t *dl_matrix_concat(const dl_matrix2d_t *a, const dl_matrix2d_t *b);
|
||||
|
||||
dl_matrix2d_t *dl_matrix_concat_h( dl_matrix2d_t *a, const dl_matrix2d_t *b);
|
||||
|
||||
/**
|
||||
* @brief Print the contents of a matrix to stdout. Used for debugging.
|
||||
*
|
||||
* @param a The matrix to print.
|
||||
*/
|
||||
void dl_printmatrix(const dl_matrix2d_t *a);
|
||||
|
||||
/**
|
||||
* @brief Return the average square error given a correct and a test matrix.
|
||||
*
|
||||
* ...Well, more or less. If anything, it gives an indication of the error between
|
||||
* the two. Check the code for the exact implementation.
|
||||
*
|
||||
* @param a First of the two matrices to compare
|
||||
* @param b Second of the two matrices to compare
|
||||
* @return value indicating the relative difference between matrices
|
||||
*/
|
||||
float dl_matrix_get_avg_sq_err(const dl_matrix2d_t *a, const dl_matrix2d_t *b);
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* @brief Check if two matrices have the same shape, that is, the same amount of rows and columns
|
||||
*
|
||||
* @param a First of the two matrices to compare
|
||||
* @param b Second of the two matrices to compare
|
||||
* @return true if the two matrices are shaped the same, false otherwise.
|
||||
*/
|
||||
int dl_matrix_same_shape(const dl_matrix2d_t *a, const dl_matrix2d_t *b);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Get a specific item from the matrix
|
||||
*
|
||||
* Please use these for external matrix access instead of DL_ITM
|
||||
*
|
||||
* @param m Matrix to access
|
||||
* @param x Column address
|
||||
* @param y Row address
|
||||
* @return Value in that position
|
||||
*/
|
||||
inline static fptp_t dl_matrix_get(const dl_matrix2d_t *m, const int x, const int y) {
|
||||
return DL_ITM(m, x, y);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Set a specific item in the matrix to the given value
|
||||
*
|
||||
* Please use these for external matrix access instead of DL_ITM
|
||||
*
|
||||
* @param m Matrix to access
|
||||
* @param x Column address
|
||||
* @param y Row address
|
||||
* @param val Value to write to that position
|
||||
*/
|
||||
inline static void dl_matrix_set(dl_matrix2d_t *m, const int x, const int y, fptp_t val) {
|
||||
DL_ITM(m, x, y)=val;
|
||||
}
|
||||
|
||||
void matrix_get_range(const dl_matrix2d_t *m, fptp_t *rmin, fptp_t *rmax);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
@@ -0,0 +1,387 @@
|
||||
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
#ifndef DL_LIB_MATRIXQ_H
|
||||
#define DL_LIB_MATRIXQ_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include "dl_lib_matrix.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef int16_t qtp_t;
|
||||
|
||||
//Quantized matrix. Uses fixed numbers and has the storage for the rows/columns inverted
|
||||
//for easy use as a multiplicand without stressing out the flash cache too much.
|
||||
typedef struct {
|
||||
int w;
|
||||
int h;
|
||||
int stride; //Normally equals h, not w!
|
||||
int flags;
|
||||
int exponent; //The values in items should be multiplied by pow(2,exponent) to get the real values.
|
||||
qtp_t *itemq;
|
||||
} dl_matrix2dq_t;
|
||||
|
||||
#define DL_QTP_SHIFT 15
|
||||
#define DL_QTP_RANGE ((1<<DL_QTP_SHIFT)-1)
|
||||
#define DL_ITMQ(m, x, y) m->itemq[(y)+(x)*m->stride]
|
||||
#define DL_QTP_EXP_NA 255 //non-applicable exponent because matrix is null
|
||||
|
||||
#define DL_SHIFT_AUTO 32
|
||||
|
||||
/**
|
||||
* @info About quantized matrices and shift values
|
||||
*
|
||||
* Grab a coffee (or tea, or hot water) and sit down when you read this for the first
|
||||
* time. Quantized matrices can speed up your operations, but come with some quirks, and
|
||||
* it's good to understand how they work before using them.
|
||||
*
|
||||
* The data in the quantized matrix type is stored similarily to floating-point types:
|
||||
* when storing a real value, the value is stored as a mantissa (base number) and an
|
||||
* exponent. The 'real' value that can be re-derived from those two numbers is something
|
||||
* similar to mantissa*2^exponent. Up to this point, there's not that much difference from
|
||||
* the standard floating point implementations like e.g. IEEE-754.
|
||||
*
|
||||
* The difference with respect to quantized matrices is that for a quantized matrix, it is
|
||||
* assumed all values stored have more-or-less the same order of magnitude. This allows the
|
||||
* matrix to only store all the mantissas, while the exponents are shared; there is only one
|
||||
* exponent for the entire matrix. This makes it quicker to handle matrix operations - the
|
||||
* logic to fix the exponents only needs to happen once, while the rest can be done in simple
|
||||
* integer arithmetic. It also nets us some memory savings - while normally a floating point
|
||||
* number is 32-bit, storing only 16-bit mantissas as the matrix items almost halves the
|
||||
* memory requirements.
|
||||
*
|
||||
* While most of the details of handling the intricacies of the quantized matrixes are done
|
||||
* transparently by the code in dl_lib_matrixq.c, some implementation details leak out,
|
||||
* specifically in places where addition/subtraction/division happens.
|
||||
*
|
||||
* The problem is that the routines do not know what the size of the resulting operation is. For
|
||||
* instance, when adding two matrices of numbers, the resulting numbers *could* be large enough
|
||||
* to overflow the mantissa of the result if the exponent is the same. However, if by default we
|
||||
* assume the mantissas needs to be scaled back, we may lose precision.
|
||||
*
|
||||
* In order to counter this, all operations that have this issue have a ``shift`` argument. If
|
||||
* the argument is zero, the routine will be conservative, that is, increase the exponent of
|
||||
* the result to such an extent it's mathematically impossible a value in the result will exceed
|
||||
* the maximum value that can be stored. However, when this argument is larger than zero, the
|
||||
* algorithm will hold back on this scaling by the indicated amount of bits, preserving precision
|
||||
* but increasing the chance of some of the calculated values not fitting in the mantissa anymore.
|
||||
* If this happens, the value will be clipped to the largest (or, for negative values, smallest)
|
||||
* value possible. (Neural networks usually are okay with this happening for a limited amount
|
||||
* of matrix indices).
|
||||
*
|
||||
* For deciding on these shift values, it is recommended to start with a shift value of one, then
|
||||
* use dl_matrixq_check_sanity on the result. If this indicates clipping, lower the shift value.
|
||||
* If it indicates bits are under-used, increase it. Note that for adding and subtraction, only
|
||||
* shift values of 0 or 1 make sense; these routines will error out if you try to do something
|
||||
* else.
|
||||
*
|
||||
* For neural networks and other noise-tolerant applications, note that even when
|
||||
* dl_matrixq_check_sanity does not indicate any problems, twiddling with the shift value may lead
|
||||
* to slightly improved precision. Feel free to experiment.
|
||||
**/
|
||||
|
||||
|
||||
/**
|
||||
* @brief Allocate a matrix
|
||||
*
|
||||
* @param w Width of the matrix
|
||||
* @param h Height of the matrix
|
||||
* @return The matrix, or NULL if out of memory
|
||||
*/
|
||||
dl_matrix2dq_t *dl_matrixq_alloc(int w, int h);
|
||||
dl_matrix2dq_t *dl_matrixq_alloc_psram(int w, int h);
|
||||
/**
|
||||
* @brief Convert a floating-point matrix to a quantized matrix
|
||||
*
|
||||
* @param m Floating-point matrix to convert
|
||||
* @param out Quantized matrix to re-use. If NULL, allocate a new one.
|
||||
* @Return The quantized version of the floating-point matrix
|
||||
*/
|
||||
dl_matrix2dq_t *dl_matrixq_from_matrix2d(const dl_matrix2d_t *m, dl_matrix2dq_t *out);
|
||||
|
||||
/**
|
||||
* TODO: DESCRIBE THIS FUNCTION
|
||||
*/
|
||||
dl_matrix2dq_t *dl_matrixq_from_matrix2d_by_qmf(const dl_matrix2d_t *m, dl_matrix2dq_t *out, int m_bit, int f_bit);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Convert a quantized matrix to a floating-point one.
|
||||
*
|
||||
* @param m Floating-point matrix to convert
|
||||
* @param out Quantized matrix to re-use. If NULL, allocate a new one.
|
||||
* @Return The quantized version of the floating-point matrix
|
||||
**/
|
||||
dl_matrix2d_t *dl_matrix2d_from_matrixq(const dl_matrix2dq_t *m, dl_matrix2d_t *out);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Free a quantized matrix
|
||||
* Frees the matrix structure and (if it doesn't have the DL_MF_FOREIGNDATA flag set) the m->items space as well.
|
||||
*
|
||||
* @param m Matrix to free
|
||||
*/
|
||||
void dl_matrixq_free(dl_matrix2dq_t *m);
|
||||
|
||||
/**
|
||||
* @brief Zero out the matrix
|
||||
* Sets all entries in the matrix to 0.
|
||||
*
|
||||
* @param m Matrix to zero
|
||||
*/
|
||||
void dl_matrixq_zero(dl_matrix2dq_t *m);
|
||||
|
||||
/**
|
||||
* @brief Copy the matrix into psram
|
||||
* Copy the matrix from flash or iram/psram into psram
|
||||
*
|
||||
* @param m Matrix to copy
|
||||
*/
|
||||
dl_matrix2dq_t *dl_matrixq_copy_to_psram(const dl_matrix2dq_t *m);
|
||||
|
||||
/**
|
||||
* @brief Do a dotproduct of two quantized matrices : res=a.b, Result is a fixed-point matrix.
|
||||
*
|
||||
* @param a First multiplicand
|
||||
* @param b Second multiplicand
|
||||
* @param res Dotproduct data. *Must* be a *different* matrix from a or b!
|
||||
* @param shift Shift ratio
|
||||
*/
|
||||
void dl_matrixq_dot(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *res, int shift);
|
||||
|
||||
/**
|
||||
* @brief Do a dotproduct of two quantized matrices: res=a.b, Result is a floating-point matrix.
|
||||
*
|
||||
* @param a First multiplicand
|
||||
* @param b Second multiplicand
|
||||
* @param res Dotproduct data. *Must* be a *different* matrix from a or b!
|
||||
*/
|
||||
void dl_matrixq_dot_matrix_out(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2d_t *res);
|
||||
|
||||
/**
|
||||
* @brief Do a dotproduct of two quantized matrices : res=a.b. This always uses the simple & stupid C algo for the dot product.
|
||||
*
|
||||
* Result is a fixed-point matrix.
|
||||
*
|
||||
* Use this only if you expect something is wrong with the accelerated routines that dl_matrixq_dot calls; this function can be
|
||||
* much slower than dl_matrixq_dot .
|
||||
*
|
||||
* @param a First multiplicand
|
||||
* @param b Second multiplicand
|
||||
* @param res Dotproduct data. *Must* be a *different* matrix from a or b!
|
||||
* @param shift Shift ratio
|
||||
*/
|
||||
void dl_matrixq_dot_c_impl(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *res, int shift);
|
||||
|
||||
/**
|
||||
* @brief Do a dotproduct of two quantized matrices : res=a.b. This always uses the simple & stupid C algo for the dot product.
|
||||
*
|
||||
* Result is a floating-point matrix.
|
||||
*
|
||||
* Use this only if you expect something is wrong with the accelerated routines that dl_matrixq_dot_matrix_out calls; this function can be
|
||||
* much slower than dl_matrixq_dot_matrix_out.
|
||||
*
|
||||
* @param a First multiplicand
|
||||
* @param b Second multiplicand
|
||||
* @param res Dotproduct data. *Must* be a *different* matrix from a or b!
|
||||
*/
|
||||
void dl_matrixq_dot_matrix_out_c_impl(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2d_t *res);
|
||||
|
||||
/**
|
||||
* @brief Do a dotproduct of a floating point and a quantized matrix. Result is a floating-point matrix.
|
||||
*
|
||||
* @param a First multiplicand; float matrix
|
||||
* @param b Second multiplicand; quantized matrix
|
||||
* @param res Dotproduct data; float matrix. *Must* be a *different* matrix from a or b!
|
||||
*/
|
||||
void dl_matrix_matrixq_dot(const dl_matrix2d_t *a, const dl_matrix2dq_t *b, dl_matrix2d_t *res);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Print the contents of a quantized matrix to stdout. Used for debugging.
|
||||
*
|
||||
* @param a The matrix to print.
|
||||
*/
|
||||
void dl_printmatrixq(const dl_matrix2dq_t *a);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Add a pair of quantizedmatrices item-by-item: res=a-b
|
||||
*
|
||||
* @param a First matrix
|
||||
* @param b Second matrix
|
||||
* @param res Added data. Can be equal to a or b to overwrite that.
|
||||
* @param shift Shift value. Only 0 or 1 makes sense here. <ToDo: check>
|
||||
*/
|
||||
void dl_matrixq_add(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *res, int shift);
|
||||
|
||||
/**
|
||||
* @brief Generate a new matrix using a range of items from an existing matrix.
|
||||
* When using this, the data of the new matrix is not allocated/copied but it re-uses a pointer
|
||||
* to the existing data. Changing the data in the resulting matrix, as a result, will also change
|
||||
* the data in the existing matrix that has been sliced.
|
||||
*
|
||||
* @Warning In contrast to the floating point equivalent of this function, the fixed-point version
|
||||
* of this has the issue that as soon as the output exponent of one of the slices changes, the data
|
||||
* in the sliced matrix gets corrupted (because the exponent of that matrix is still the same.) If you
|
||||
* use this function, either treat the slices as read-only, or assume the sliced matrix contains
|
||||
* garbage after modifying the data in one of the slices.
|
||||
*
|
||||
* @param x X-offset of the origin of the returned matrix within the sliced matrix
|
||||
* @param y Y-offset of the origin of the returned matrix within the sliced matrix
|
||||
* @param w Width of the resulting matrix
|
||||
* @param h Height of the resulting matrix
|
||||
* @param in Old matrix (with foreign data) to re-use. Passing NULL will allocate a new matrix.
|
||||
* @return The resulting slice matrix, or NULL if out of memory
|
||||
*/
|
||||
dl_matrix2dq_t *dl_matrixq_slice(const dl_matrix2dq_t *src, int x, int y, int w, int h, dl_matrix2dq_t *in);
|
||||
|
||||
/**
|
||||
* @brief select a range of items from an existing matrix and flatten them into one dimension.
|
||||
*
|
||||
* @Warning The results are flattened in row-major order.
|
||||
*
|
||||
* @param x X-offset of the origin of the returned matrix within the sliced matrix
|
||||
* @param y Y-offset of the origin of the returned matrix within the sliced matrix
|
||||
* @param w Width of the resulting matrix
|
||||
* @param h Height of the resulting matrix
|
||||
* @param in Old matrix to re-use. Passing NULL will allocate a new matrix.
|
||||
* @return The resulting flatten matrix, or NULL if out of memory
|
||||
*/
|
||||
dl_matrix2dq_t *dl_matrixq_flatten(const dl_matrix2dq_t *src, int x, int y, int w, int h, dl_matrix2dq_t *in);
|
||||
|
||||
/**
|
||||
* @brief Subtract a quantized matrix from another, item-by-item: res=a-b
|
||||
*
|
||||
* @param a First matrix
|
||||
* @param b Second matrix
|
||||
* @param res Subtracted data. Can be equal to a or b to overwrite that.
|
||||
* @param shift Shift value. Only 0 or 1 makes sense here. <ToDo: check>
|
||||
*/
|
||||
void dl_matrixq_sub(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *res, int shift);
|
||||
|
||||
/**
|
||||
* @brief Multiply a pair of quantized matrices item-by-item: res=a*b
|
||||
*
|
||||
* @param a First multiplicand
|
||||
* @param b Second multiplicand
|
||||
* @param res Multiplicated data. Can be equal to a or b to overwrite that matrix.
|
||||
*/
|
||||
void dl_matrixq_mul( dl_matrix2dq_t *a, dl_matrix2dq_t *b, dl_matrix2dq_t *res);
|
||||
|
||||
/**
|
||||
* @brief Divide a pair of quantized matrices item-by-item: res=a/b
|
||||
*
|
||||
* @param a First matrix
|
||||
* @param b Second matrix
|
||||
* @param res Divided data. Can be equal to a or b to overwrite that.
|
||||
*/
|
||||
void dl_matrixq_div(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *out, int shift);
|
||||
|
||||
/**
|
||||
* @brief Check if two quantized matrices have the same shape, that is, the same amount of
|
||||
* rows and columns
|
||||
*
|
||||
* @param a First of the two matrices to compare
|
||||
* @param b Second of the two matrices to compare
|
||||
* @return true if the two matrices are shaped the same, false otherwise.
|
||||
*/
|
||||
int dl_matrixq_same_shape(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b);
|
||||
|
||||
/**
|
||||
* @brief Concatenate the rows of two quantized matrices into a new matrix
|
||||
*
|
||||
* @param a First matrix
|
||||
* @param b Second matrix
|
||||
* @return A newly allocated quantized matrix with as values a|b
|
||||
*/
|
||||
dl_matrix2dq_t *dl_matrixq_concat(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b);
|
||||
|
||||
/**
|
||||
* @brief Add a constant to every item of the quantized matrix
|
||||
*
|
||||
* @param subj Matrix to add the constant to
|
||||
* @param add The constant
|
||||
*/
|
||||
void dl_matrixq_add_const(dl_matrix2dq_t *subj, const fptp_t add, int shift);
|
||||
|
||||
/**
|
||||
* @brief Check the sanity of a quantized matrix
|
||||
*
|
||||
* Due to the nature of quantized matrices, depending on the calculations a quantized
|
||||
* matrix is the result of and the shift values chosen in those calculations, a quantized
|
||||
* matrix may have an exponent and mantissas that lead to a loss of precision, either because
|
||||
* most significant mantissa bits are unused, or because a fair amount of mantissas are
|
||||
* clipped. This function checks if this is the case and will report a message to stdout
|
||||
* if significant loss of precision is detected.
|
||||
*
|
||||
* @param m The quantized matrix to check
|
||||
* @param name A string to be displayed in the message if the sanity check fails
|
||||
* @return True if matrix is sane, false otherwise
|
||||
**/
|
||||
|
||||
int dl_matrixq_check_sanity(dl_matrix2dq_t *m, const char *name);
|
||||
|
||||
/**
|
||||
* @brief re-adjust the exponent of the matrix to fit the mantissa better
|
||||
*
|
||||
* This function will shift up all the data in the mantissas so there are no
|
||||
* most-significant bits that are unused in all mantissas. It will also adjust
|
||||
* the exponent to keep the actua values in the matrix the same.
|
||||
*
|
||||
* Some operations done on a matrix, especially operations that re-use the
|
||||
* result of earlier operations done in the same way, can lead to the loss of
|
||||
* data because the exponent of the quantized matrix is never re-adjusted. You
|
||||
* can do that implicitely by calling this function.
|
||||
*
|
||||
* @param m The matrix to re-adjust
|
||||
**/
|
||||
void dl_matrixq_readjust_exp(dl_matrix2dq_t *m);
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* @brief Get the floating-point value of a specific item from the quantized matrix
|
||||
*
|
||||
* @param m Matrix to access
|
||||
* @param x Column address
|
||||
* @param y Row address
|
||||
* @return Value in that position
|
||||
*/
|
||||
fptp_t dl_matrixq_get(const dl_matrix2dq_t *m, const int x, const int y);
|
||||
|
||||
/**
|
||||
* @brief Set a specific item in the quantized matrix to the given
|
||||
* floating-point value
|
||||
*
|
||||
* @warning If the given value is more than the exponent in the quantized matrix
|
||||
* allows for, all mantissas in the matrix will be shifted down to make the value
|
||||
* 'fit'. If, however, the exponent is such that the value would result in a
|
||||
* quantized mantissa of 0, nothing is done.
|
||||
*
|
||||
* @param m Matrix to access
|
||||
* @param x Column address
|
||||
* @param y Row address
|
||||
* @param val Value to write to that position
|
||||
*/
|
||||
void dl_matrixq_set(dl_matrix2dq_t *m, const int x, const int y, fptp_t val);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,80 @@
|
||||
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
#ifndef DL_LIB_MATRIXQ8_H
|
||||
#define DL_LIB_MATRIXQ8_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include "dl_lib_matrix.h"
|
||||
#include "dl_lib.h"
|
||||
#include "dl_lib_matrixq.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef int8_t q8tp_t;
|
||||
|
||||
typedef struct {
|
||||
int w;
|
||||
int h;
|
||||
int stride; //Normally equals h, not w!
|
||||
int flags;
|
||||
int exponent; //The values in items should be multiplied by pow(2,exponent) to get the real values.
|
||||
q8tp_t *itemq;
|
||||
} dl_matrix2dq8_t;
|
||||
|
||||
#define DL_Q8TP_SHIFT 7
|
||||
#define DL_Q8TP_RANGE ((1<<DL_Q8TP_SHIFT)-1)
|
||||
#define DL_ITMQ8(m, x, y) m->itemq[(y)+(x)*m->stride]
|
||||
|
||||
/**
|
||||
* @brief Allocate a matrix
|
||||
*
|
||||
* @param w Width of the matrix
|
||||
* @param h Height of the matrix
|
||||
* @return The matrix, or NULL if out of memory
|
||||
*/
|
||||
dl_matrix2dq8_t *dl_matrixq8_alloc(int w, int h);
|
||||
|
||||
/**
|
||||
* @brief Free a quantized matrix
|
||||
* Frees the matrix structure and (if it doesn't have the DL_MF_FOREIGNDATA flag set) the m->items space as well.
|
||||
*
|
||||
* @param m Matrix to free
|
||||
*/
|
||||
void dl_matrixq8_free(dl_matrix2dq8_t *m);
|
||||
|
||||
/**
|
||||
* @brief Copy a quantized matrix
|
||||
* Copy a quantized matrix from flash or iram/psram
|
||||
*
|
||||
* @param m Matrix to copy
|
||||
*/
|
||||
dl_matrix2dq8_t *dl_matrixq8_copy_to_psram(const dl_matrix2dq8_t *m);
|
||||
|
||||
/**
|
||||
* @brief Convert a floating-point matrix to a quantized matrix
|
||||
*
|
||||
* @param m Floating-point matrix to convert
|
||||
* @param out Quantized matrix to re-use. If NULL, allocate a new one.
|
||||
* @Return The quantized version of the floating-point matrix
|
||||
*/
|
||||
dl_matrix2dq8_t *dl_matrixq8_from_matrix2d(const dl_matrix2d_t *m, dl_matrix2dq8_t *out);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
105
managed_components/espressif__esp-sr/include/esp32c3/esp_aec.h
Normal file
105
managed_components/espressif__esp-sr/include/esp32c3/esp_aec.h
Normal file
@@ -0,0 +1,105 @@
|
||||
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License
|
||||
#ifndef _ESP_AEC_H_
|
||||
#define _ESP_AEC_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define USE_AEC_FFT // Not kiss_fft
|
||||
#define AEC_SAMPLE_RATE 16000 // Only Support 16000Hz
|
||||
#define AEC_FRAME_LENGTH_MS 32
|
||||
|
||||
typedef struct aec_handle_t aec_handle_t;
|
||||
typedef enum {
|
||||
AEC_MODE_SR_LOW_COST = 0, // Low Cost AEC fro speech recognition
|
||||
AEC_MODE_SR_HIGH_PERF = 1, // High Perforamce AEC for speech recognition
|
||||
AEC_MODE_VOIP_LOW_COST = 3, // Low Cost AEC for voice communication
|
||||
AEC_MODE_VOIP_HIGH_PERF = 4, // High Perforamce AEC for voice communication
|
||||
} aec_mode_t;
|
||||
|
||||
/**
|
||||
* @brief Creates an instance to the AEC structure.
|
||||
* Please get frame size by aec_get_chunksize() function
|
||||
*
|
||||
* @param sample_rate The Sampling frequency (Hz) must be 16000.
|
||||
* @param filter_length Number of filter, recommend to set 4. The larger the filter_length, the more resource consumption.
|
||||
* @param channel_num The input microphone channel number
|
||||
* @param mode The mode of AEC, recommend to set AEC_MODE_SR_LOW_COST
|
||||
* @return
|
||||
* - NULL: Create failed
|
||||
* - Others: The instance of AEC
|
||||
*/
|
||||
aec_handle_t *aec_create(int sample_rate, int filter_length, int channel_num, aec_mode_t mode);
|
||||
|
||||
/**
|
||||
* @brief Creates an instance to the AEC structure, same with aec_create().
|
||||
*
|
||||
* @param filter_length Number of filter, recommend to set 4. The larger the filter_length, the more resource consumption.
|
||||
* @param channel_num The input microphone channel number
|
||||
* @param mode The mode of AEC, recommend to set AEC_MODE_SR_LOW_COST
|
||||
* @return
|
||||
* - NULL: Create failed
|
||||
* - Others: The instance of AEC
|
||||
*/
|
||||
aec_handle_t *aec_pro_create(int filter_length, int channel_num, aec_mode_t mode);
|
||||
|
||||
/**
|
||||
* @brief Performs echo cancellation a frame, based on the audio sent to the speaker and frame from mic.
|
||||
*
|
||||
* @warning The indata, refdata and outdata must be 16-bit signed. please allocate memory by heap_caps_aligned_alloc().
|
||||
*
|
||||
* @param inst The instance of AEC. Format for multi-channel data is "ch0 ch0 ch0 ..., ch1 ch1 ch1 ..."
|
||||
* @param indata An array of 16-bit signed audio samples from mic.
|
||||
* @param refdata An array of 16-bit signed audio samples sent to the speaker.
|
||||
* @param outdata Returns near-end signal with echo removed. Format for multi-channel data is "ch0 ch0 ch0..., ch1 ch1 ch1 ..."
|
||||
* @return None
|
||||
*
|
||||
*/
|
||||
void aec_process(const aec_handle_t *handel, int16_t *indata, int16_t *refdata, int16_t *outdata);
|
||||
|
||||
/**
|
||||
* @brief Get frame size of AEC (the samples of one frame)
|
||||
* @param handle The instance of AEC.
|
||||
* @return Frame size
|
||||
*/
|
||||
int aec_get_chunksize(const aec_handle_t *handle);
|
||||
|
||||
/**
|
||||
* @brief Get AEC mode string
|
||||
*
|
||||
* @param aec_mode The mode of AEC.
|
||||
*
|
||||
* @return AEC mode string
|
||||
*/
|
||||
char * aec_get_mode_string(aec_mode_t aec_mode);
|
||||
|
||||
/**
|
||||
* @brief Free the AEC instance
|
||||
*
|
||||
* @param inst The instance of AEC.
|
||||
*
|
||||
* @return None
|
||||
*
|
||||
*/
|
||||
void aec_destroy(aec_handle_t *handel);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif //_ESP_AEC_H_
|
||||
@@ -0,0 +1,81 @@
|
||||
|
||||
#ifndef _ESP_AFE_AEC_H_
|
||||
#define _ESP_AFE_AEC_H_
|
||||
|
||||
#include "esp_aec.h"
|
||||
#include "esp_afe_config.h"
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
aec_handle_t *handle;
|
||||
aec_mode_t mode;
|
||||
afe_pcm_config_t pcm_config;
|
||||
int frame_size;
|
||||
int16_t *data;
|
||||
} afe_aec_handle_t;
|
||||
|
||||
/**
|
||||
* @brief Creates an instance to the AEC structure.
|
||||
*
|
||||
* @warning Currently only support 1 microphone channel and 1 playback channe.
|
||||
* If input has multiple microphone channels and playback channels, just the first microphone channel and playback
|
||||
* channel will be selected.
|
||||
*
|
||||
* The input format, same as afe config:
|
||||
* M to represent the microphone channel
|
||||
* R to represent the playback reference channel
|
||||
* N to represent an unknown or unused channel
|
||||
*
|
||||
* For example, input_format="MMNR" indicates that the input data consists of four channels,
|
||||
* which are the microphone channel, the microphone channel, an unused channel, and the playback channel
|
||||
*
|
||||
* @param input_format The input format
|
||||
* @param filter_length The length of filter. The larger the filter, the higher the CPU loading.
|
||||
* Recommended filter_length = 4 for esp32s3 and esp32p4. Recommended filter_length = 2 for
|
||||
* esp32c5.
|
||||
* @param type The type of afe, AFE_TYPE_SR or AFE_TYPE_VC
|
||||
* @param mode The mode of afe, AFE_MODE_LOW_COST or AFE_MODE_HIGH_PERF
|
||||
*
|
||||
* @return afe_config_t* The default config of afe
|
||||
*/
|
||||
afe_aec_handle_t *afe_aec_create(const char *input_format, int filter_length, afe_type_t type, afe_mode_t mode);
|
||||
|
||||
/**
|
||||
* @brief Performs echo cancellation a frame, based on the audio sent to the speaker and frame from mic.
|
||||
*
|
||||
* @param inst The instance of AEC.
|
||||
* @param indata Input audio data, format is define by input_format.
|
||||
* @param outdata Near-end signal with echo removed. outdata must be 16-bit aligned.
|
||||
* please use heap_caps_aligned_calloc(16, n, size, caps) to allocate an aligned chunk of memory
|
||||
|
||||
* @return The bytes of outdata.
|
||||
*/
|
||||
size_t afe_aec_process(afe_aec_handle_t *handel, const int16_t *indata, int16_t *outdata);
|
||||
|
||||
/**
|
||||
* @brief Get frame size of AEC (the samples of one frame)
|
||||
* @param handle The instance of AEC.
|
||||
* @return Frame size
|
||||
*/
|
||||
int afe_aec_get_chunksize(afe_aec_handle_t *handle);
|
||||
|
||||
/**
|
||||
* @brief Free the AEC instance
|
||||
*
|
||||
* @param inst The instance of AEC.
|
||||
*
|
||||
* @return None
|
||||
*
|
||||
*/
|
||||
void afe_aec_destroy(afe_aec_handle_t *handel);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif //_ESP_AEC_H_
|
||||
@@ -0,0 +1,288 @@
|
||||
#pragma once
|
||||
#include "esp_aec.h"
|
||||
#include "esp_agc.h"
|
||||
#include "esp_nsn_models.h"
|
||||
#include "esp_vad.h"
|
||||
#include "esp_vadn_models.h"
|
||||
#include "esp_wn_iface.h"
|
||||
#include "esp_wn_models.h"
|
||||
#include "model_path.h"
|
||||
#include "stdbool.h"
|
||||
#include "stdint.h"
|
||||
#include "stdlib.h"
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// AFE: Audio Front-End
|
||||
// SR: Speech Recognition
|
||||
// VC: Voice Communication
|
||||
|
||||
// Set AFE_SR mode
|
||||
typedef enum {
|
||||
SR_MODE_LOW_COST = 0, // Deprecated, please use afe_mode_t, AFE mode: low cost mode
|
||||
SR_MODE_HIGH_PERF = 1, // Deprecated, please use afe_mode_t, AFE mode: high performance mode
|
||||
} afe_sr_mode_t;
|
||||
|
||||
// Set AFE mode
|
||||
typedef enum {
|
||||
AFE_MODE_LOW_COST = 0, // AFE mode: low cost mode
|
||||
AFE_MODE_HIGH_PERF = 1, // AFE mode: high performance mode
|
||||
} afe_mode_t;
|
||||
|
||||
// Set AFE type
|
||||
typedef enum {
|
||||
AFE_TYPE_SR = 0, // Speech recognition scenarios, excluding nonlinear noise suppression
|
||||
AFE_TYPE_VC = 1, // Voice communication scenarios, 16KHz input, including nonlinear noise suppression
|
||||
AFE_TYPE_VC_8K = 2, // Voice communication scenarios, 8KHz input, note that the input data must be 8KHz
|
||||
} afe_type_t;
|
||||
|
||||
typedef enum {
|
||||
AFE_MEMORY_ALLOC_MORE_INTERNAL = 1, // malloc with more internal ram
|
||||
AFE_MEMORY_ALLOC_INTERNAL_PSRAM_BALANCE = 2, // malloc with internal ram and psram in balance
|
||||
AFE_MEMORY_ALLOC_MORE_PSRAM = 3 // malloc with more psram
|
||||
} afe_memory_alloc_mode_t;
|
||||
|
||||
typedef enum {
|
||||
AFE_MN_PEAK_AGC_MODE_1 = -9, // The peak amplitude of fetch audio is -9dB
|
||||
AFE_MN_PEAK_AGC_MODE_2 = -6, // The peak amplitude of fetch audio is -6dB
|
||||
AFE_MN_PEAK_AGC_MODE_3 = -3, // The peak amplitude of fetcg is -3dB
|
||||
AFE_MN_PEAK_NO_AGC = 0, // There is no agc gain
|
||||
} afe_mn_peak_agc_mode_t;
|
||||
|
||||
typedef struct {
|
||||
int total_ch_num; // total channel num, include microphone channel, playback channel and unknown channel
|
||||
int mic_num; // microphone channel number
|
||||
uint8_t *mic_ids; // microphone channel indices
|
||||
int ref_num; // playback reference channel number
|
||||
uint8_t *ref_ids; // playback reference channel indices
|
||||
int sample_rate; // sample rate of audio
|
||||
} afe_pcm_config_t;
|
||||
|
||||
typedef enum {
|
||||
AFE_NS_MODE_WEBRTC = 0, // please use model name of NS, SSP: "WEBRTC"
|
||||
AFE_NS_MODE_NET = 1, // please use model name of NSNET
|
||||
} afe_ns_mode_t;
|
||||
|
||||
typedef enum {
|
||||
AFE_AGC_MODE_WEBRTC = 0, // WEBRTC AGC
|
||||
AFE_AGC_MODE_WAKENET = 1, // AGC gain is calculated by wakenet model if wakenet is activated
|
||||
} afe_agc_mode_t;
|
||||
|
||||
/**
|
||||
* @brief Function to get the debug audio data
|
||||
*
|
||||
* @param data The debug audio data which don't be modify. It should be copied away as soon as possible that
|
||||
* avoid blocking for too long.
|
||||
* @param data_size The number of bytes of data.
|
||||
* @returns
|
||||
*/
|
||||
typedef void (*afe_debug_hook_callback_t)(const int16_t *data, int data_size);
|
||||
|
||||
typedef enum {
|
||||
AFE_DEBUG_HOOK_MASE_TASK_IN = 0, // To get the input data of mase task
|
||||
AFE_DEBUG_HOOK_FETCH_TASK_IN = 1, // To get the input data of fetch task
|
||||
AFE_DEBUG_HOOK_MAX = 2
|
||||
} afe_debug_hook_type_t;
|
||||
|
||||
typedef struct {
|
||||
afe_debug_hook_type_t hook_type; // debug type of hook
|
||||
afe_debug_hook_callback_t hook_callback; // callback function which transfer debug audio data
|
||||
} afe_debug_hook_t;
|
||||
|
||||
typedef struct {
|
||||
/********** AEC(Acoustic Echo Cancellation) **********/
|
||||
bool aec_init; // Whether to init aec
|
||||
aec_mode_t aec_mode; // The mode of aec, AEC_MODE_SR_LOW_COST or AEC_MODE_SR_HIGH_PERF
|
||||
int aec_filter_length; // The filter length of aec
|
||||
|
||||
/********** SE(Speech Enhancement, microphone array processing) **********/
|
||||
bool se_init; // Whether to init se
|
||||
|
||||
/********** NS(Noise Suppression) **********/
|
||||
bool ns_init; // Whether to init ns
|
||||
char *ns_model_name; // Model name of ns
|
||||
afe_ns_mode_t afe_ns_mode; // Model mode of ns
|
||||
|
||||
/********** VAD(Voice Activity Detection) **********/
|
||||
bool vad_init; // Whether to init vad
|
||||
vad_mode_t vad_mode; // The value can be: VAD_MODE_0, VAD_MODE_1, VAD_MODE_2, VAD_MODE_3, VAD_MODE_4
|
||||
char *vad_model_name; // The model name of vad, If it is null, WebRTC VAD will be used.
|
||||
int vad_min_speech_ms; // The minimum duration of speech in ms. It should be bigger than 32 ms, default: 128 ms
|
||||
int vad_min_noise_ms; // The minimum duration of noise or silence in ms. It should be bigger than 64 ms, default:
|
||||
// 1000 ms
|
||||
int vad_delay_ms; // The delay of the first speech frame in ms, default: 128 ms
|
||||
// If you find vad cache can not cover all speech, please increase this value.
|
||||
bool vad_mute_playback; // If true, the playback will be muted for vad detection. default: false
|
||||
bool vad_enable_channel_trigger; // If true, the vad will be used to choose the channel id. default: false
|
||||
|
||||
/********** WakeNet(Wake Word Engine) **********/
|
||||
bool wakenet_init;
|
||||
char *wakenet_model_name; // The model name of wakenet 1
|
||||
char *wakenet_model_name_2; // The model name of wakenet 2 if has wakenet 2
|
||||
det_mode_t wakenet_mode; // The mode of wakenet
|
||||
|
||||
/********** AGC(Automatic Gain Control) **********/
|
||||
bool agc_init; // Whether to init agc
|
||||
afe_agc_mode_t
|
||||
agc_mode; // The AGC mode for ASR. and the gain generated by AGC acts on the audio after far linear gain.
|
||||
int agc_compression_gain_db; // Compression gain in dB (default 9)
|
||||
int agc_target_level_dbfs; // Target level in -dBfs of envelope (default 3, means target level is -3 dBFS)
|
||||
|
||||
/********** General AFE(Audio Front End) parameter **********/
|
||||
afe_pcm_config_t pcm_config; // Config the channel num of original data which is fed to the afe feed function.
|
||||
afe_mode_t afe_mode; // The mode of afe, AFE_MODE_LOW_COST or AFE_MODE_HIGH_PERF
|
||||
afe_type_t afe_type; // The mode of afe, AFE_MODE_LOW_COST or AFE_MODE_HIGH_PERF
|
||||
int afe_perferred_core; // The preferred core of afe se task, which is created in afe_create function.
|
||||
int afe_perferred_priority; // The preferred priority of afe se task, which is created in afe_create function.
|
||||
int afe_ringbuf_size; // The ring buffer size: the number of frame data in ring buffer.
|
||||
afe_memory_alloc_mode_t memory_alloc_mode; // The memory alloc mode for afe. From Internal RAM or PSRAM
|
||||
float afe_linear_gain; // The linear gain for afe output the value should be in [0.1, 10.0]. This value acts
|
||||
// directly on the output amplitude: out_linear_gain * amplitude.
|
||||
bool debug_init;
|
||||
bool fixed_first_channel; // If true, the channel after first wake-up is fixed to raw data of microphone
|
||||
// otherwise, select channel number by wakenet
|
||||
} afe_config_t;
|
||||
|
||||
/**
|
||||
* @brief Get AFE default configuration. The default configuration will enable all algorithms as much as possible based
|
||||
* on the chip target and input format. You can manually fine-tune it after creating the configuration
|
||||
*
|
||||
* The input format:
|
||||
* M to represent the microphone channel
|
||||
* R to represent the playback reference channel
|
||||
* N to represent an unknown or unused channel
|
||||
*
|
||||
* For example, input_format="MMNR" indicates that the input data consists of four channels,
|
||||
* which are the microphone channel, the microphone channel, an unused channel, and the playback channel
|
||||
*
|
||||
* @param input_format The input format
|
||||
* @param models Models from partition, which is configured by Kconfig
|
||||
* @param type The type of afe, AFE_TYPE_SR or AFE_TYPE_VC
|
||||
* @param mode The mode of afe, AFE_MODE_LOW_COST or AFE_MODE_HIGH_PERF
|
||||
*
|
||||
* @return afe_config_t* The default config of afe
|
||||
*/
|
||||
afe_config_t *afe_config_init(const char *input_format, srmodel_list_t *models, afe_type_t type, afe_mode_t mode);
|
||||
|
||||
/**
|
||||
* @brief Check AFE configuration and make sure it is correct.
|
||||
*
|
||||
* @warning If there is a configuration conflict, this function will modify some parameters.
|
||||
* The guiding behind these modifications is to maintain the highest performance of the output audio and results.
|
||||
* And remove the conflict between different algorithms.
|
||||
*
|
||||
* For example, If input is two-channel data, the SE(BSS) algorithm will be prioritized over the NS algorithm.
|
||||
* If SE(BSS) algorithm is deactivated, will only use the first microphone channel.
|
||||
*
|
||||
* @param afe_config Input AFE config
|
||||
*
|
||||
* @return afe_config_t* The modified AFE config
|
||||
*/
|
||||
afe_config_t *afe_config_check(afe_config_t *afe_config);
|
||||
|
||||
/**
|
||||
* @brief Parse input format
|
||||
*
|
||||
* @param input_format The input format, same with afe_config_init() function
|
||||
* @param pcm_config The pcm config
|
||||
*
|
||||
* @return true if the input format is parsed successfully, otherwise false
|
||||
*/
|
||||
bool afe_parse_input_format(const char *input_format, afe_pcm_config_t *pcm_config);
|
||||
|
||||
/**
|
||||
* @brief Parse I2S input data
|
||||
*
|
||||
* @param data The input multi channel data
|
||||
* @param frame_size The frame size of input, it is also the size of single channel data
|
||||
* @param mic_data The output microphone data
|
||||
* @param ref_data The output playback reference data
|
||||
* @param pcm_config The pcm config
|
||||
*
|
||||
*/
|
||||
void afe_parse_input(int16_t *data, int frame_size, int16_t *mic_data, int16_t *ref_data, afe_pcm_config_t *pcm_config);
|
||||
|
||||
/**
|
||||
* @brief Parse input data, from interleaved arrangement to contiguous arrangement
|
||||
*
|
||||
* @param data The input multi channel data
|
||||
* @param frame_size The frame size of input, it is also the size of single channel data
|
||||
* @param channel_num The channel number of data
|
||||
* @param out_data The output data
|
||||
*
|
||||
*/
|
||||
void afe_parse_data(int16_t *data, int frame_size, int channel_num, int16_t *out_data);
|
||||
|
||||
/**
|
||||
* @brief Format input data, from contiguous arrangement to interleaved arrangement
|
||||
*
|
||||
* @param data The input multi channel data
|
||||
* @param frame_size The frame size of input, it is also the size of single channel data
|
||||
* @param channel_num The channel number of data
|
||||
* @param out_data The output data
|
||||
*
|
||||
*/
|
||||
void afe_format_data(int16_t *data, int frame_size, int channel_num, int16_t *out_data);
|
||||
|
||||
/**
|
||||
* @brief Adjust the gain of input data
|
||||
*
|
||||
* @warning the input data will be modified inplace.
|
||||
*
|
||||
* @param data The input audio data
|
||||
* @param frame_size The frame size of input, it is also the size of single channel data
|
||||
* @param factor The gain factor
|
||||
*
|
||||
* @return int16_t* The output audio data
|
||||
*/
|
||||
int16_t *afe_adjust_gain(int16_t *data, int frame_size, float factor);
|
||||
|
||||
/**
|
||||
* @brief Adjust the gain of input data
|
||||
*
|
||||
* @warning the input data will be modified inplace.
|
||||
*
|
||||
* @param in_data The input audio data
|
||||
* @param in_frame_size Input data frame size of input
|
||||
* @param channel_num The channel number of input data, which is same as output data
|
||||
* @param out_data The output audio data
|
||||
* @param out_frame_size Onput data frame size of input
|
||||
*
|
||||
*/
|
||||
void afe_concat_data(int16_t *in_data, int in_frame_size, int channel_num, int16_t *out_data, int out_frame_size);
|
||||
|
||||
/**
|
||||
* @brief Copy the afe config
|
||||
*
|
||||
* @param dst_config The destination afe config
|
||||
* @param src_config The source afe config
|
||||
*
|
||||
* @return The destination afe config
|
||||
*/
|
||||
afe_config_t *afe_config_copy(afe_config_t *dst_config, const afe_config_t *src_config);
|
||||
|
||||
/**
|
||||
* @brief Print the afe config
|
||||
*
|
||||
* @param afe_config The afe config
|
||||
*/
|
||||
void afe_config_print(const afe_config_t *afe_config);
|
||||
|
||||
/**
|
||||
* @brief Allocate afe config
|
||||
*
|
||||
* @return The afe config pointer
|
||||
*/
|
||||
afe_config_t *afe_config_alloc();
|
||||
|
||||
/**
|
||||
* @brief Free afe config
|
||||
*
|
||||
* @param afe_config The afe config pointer
|
||||
*/
|
||||
void afe_config_free(afe_config_t *afe_config);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
@@ -0,0 +1,48 @@
|
||||
#ifndef _ESP_AFE_DOA_H_
|
||||
#define _ESP_AFE_DOA_H_
|
||||
|
||||
#include "esp_doa.h"
|
||||
#include "esp_afe_config.h"
|
||||
#include <stdint.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
doa_handle_t *doa_handle;
|
||||
afe_pcm_config_t pcm_config;
|
||||
int16_t *leftdata;
|
||||
int16_t *rightdata;
|
||||
int frame_size;
|
||||
} afe_doa_handle_t;
|
||||
|
||||
/**
|
||||
* @brief Initialize SRP-PHAT processor
|
||||
* @param input_format The input format
|
||||
* @param fs Sampling rate (Hz), e.g., 16000
|
||||
* @param resolution Angular search resolution (degrees), e.g., 20
|
||||
* @param d_mics Microphone spacing (meters), e.g., 0.06
|
||||
* @param input_timedate_samples input timedate samples, e.g., 1024
|
||||
* @return Initialized doa_handle_t object pointer, Recommend using the above configuration for better performance
|
||||
*/
|
||||
afe_doa_handle_t *afe_doa_create(const char *input_format, int fs, float resolution, float d_mics, int input_timedate_samples);
|
||||
/**
|
||||
* @brief Process audio frame for direction estimation
|
||||
* @param handle doa_handle_t instance pointer
|
||||
* @param indata Input audio data, format is define by input_format.
|
||||
* @return Estimated sound direction in degrees, e.g., 0-180
|
||||
*/
|
||||
float afe_doa_process(afe_doa_handle_t *handle, const int16_t *indata);
|
||||
/**
|
||||
* @brief Release all allocated resources
|
||||
* @param doa doa_handle_t instance pointer to be freed
|
||||
*/
|
||||
void afe_doa_destroy(afe_doa_handle_t *handle);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _ESP_AFE_DOA_H_ */
|
||||
@@ -0,0 +1,237 @@
|
||||
#pragma once
|
||||
#include "esp_afe_config.h"
|
||||
#include "stdbool.h"
|
||||
#include "stdint.h"
|
||||
#include "stdlib.h"
|
||||
#include "freertos/FreeRTOS.h"
|
||||
#include "freertos/task.h"
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// AFE: Audio Front-End
|
||||
// SR: Speech Recognition
|
||||
// afe_sr/AFE_SR: the audio front-end for speech recognition
|
||||
|
||||
// Opaque AFE_SR data container
|
||||
typedef struct esp_afe_sr_data_t esp_afe_sr_data_t;
|
||||
|
||||
/**
|
||||
* @brief The state of vad
|
||||
*/
|
||||
typedef enum {
|
||||
AFE_VAD_SILENCE = 0, // Deprecated, please use vad_state_t, noise or silence
|
||||
AFE_VAD_SPEECH = 1 // Deprecated, please use vad_state_t, speech
|
||||
} afe_vad_state_t;
|
||||
|
||||
/**
|
||||
* @brief The result of fetch function
|
||||
*/
|
||||
typedef struct afe_fetch_result_t {
|
||||
int16_t *data; // the target channel data of audio.
|
||||
int data_size; // the size of data. The unit is byte.
|
||||
int16_t *vad_cache; // the cache data of vad. It's only valid when vad_cache_size > 0. It is used to complete the
|
||||
// audio that was truncated.
|
||||
int vad_cache_size; // the size of vad_cache. The unit is byte.
|
||||
float data_volume; // the volume of input audio, the unit is decibel(dB). This value is calculated before agc.
|
||||
// (note: invalid in vc). if enable wakenet, the window length is the receptive fields of
|
||||
// wakenet(about 1.5s), otherwise is the frame length.
|
||||
wakenet_state_t wakeup_state; // the value is wakenet_state_t
|
||||
int wake_word_index; // if the wake word is detected. It will store the wake word index which start from 1.
|
||||
int wakenet_model_index; // if there are multiple wakenets, this value identifies which model be wakes up. Index
|
||||
// start from 1.
|
||||
vad_state_t vad_state; // the value is afe_vad_state_t
|
||||
int trigger_channel_id; // the channel index of output
|
||||
int wake_word_length; // the length of wake word. The unit is the number of samples.
|
||||
int ret_value; // the return state of fetch function
|
||||
int16_t *raw_data; // the multi-channel output data of audio.
|
||||
int raw_data_channels; // the channel number of raw data
|
||||
float ringbuff_free_pct; // the percent of ringbuff free size. if the value is larger than 0.5, it means the ringbuff is buzy.
|
||||
void *reserved; // reserved for future use
|
||||
} afe_fetch_result_t;
|
||||
|
||||
/**
|
||||
* @brief Function to initialze a AFE_SR instance
|
||||
*
|
||||
* @param afe_config The config of AFE_SR
|
||||
* @returns Handle to the AFE_SR data
|
||||
*/
|
||||
typedef esp_afe_sr_data_t *(*esp_afe_sr_iface_op_create_from_config_t)(afe_config_t *afe_config);
|
||||
|
||||
/**
|
||||
* @brief Get the amount of each channel samples per frame that need to be passed to the function
|
||||
*
|
||||
* Every speech enhancement AFE_SR processes a certain number of samples at the same time. This function
|
||||
* can be used to query that amount. Note that the returned amount is in 16-bit samples, not in bytes.
|
||||
*
|
||||
* @param afe The AFE_SR object to query
|
||||
* @return The amount of samples to feed the fetch function
|
||||
*/
|
||||
typedef int (*esp_afe_sr_iface_op_get_samp_chunksize_t)(esp_afe_sr_data_t *afe);
|
||||
|
||||
/**
|
||||
* @brief Get the channel number
|
||||
*
|
||||
* @param afe The AFE_SR object to query
|
||||
* @return The amount of total channels
|
||||
*/
|
||||
typedef int (*esp_afe_sr_iface_op_get_channel_num_t)(esp_afe_sr_data_t *afe);
|
||||
|
||||
/**
|
||||
* @brief Get the sample rate of the samples to feed to the function
|
||||
*
|
||||
* @param afe The AFE_SR object to query
|
||||
* @return The sample rate, in hz
|
||||
*/
|
||||
typedef int (*esp_afe_sr_iface_op_get_samp_rate_t)(esp_afe_sr_data_t *afe);
|
||||
|
||||
/**
|
||||
* @brief Feed samples of an audio stream to the AFE_SR
|
||||
*
|
||||
* @Warning The input data should be arranged in the format of channel interleaving.
|
||||
* The last channel is reference signal if it has reference data.
|
||||
*
|
||||
* @param afe The AFE_SR object to query
|
||||
*
|
||||
* @param in The input microphone signal, only support signed 16-bit @ 16 KHZ. The frame size can be queried by the
|
||||
* `get_feed_chunksize`.
|
||||
* @return The size of input
|
||||
*/
|
||||
typedef int (*esp_afe_sr_iface_op_feed_t)(esp_afe_sr_data_t *afe, const int16_t *in);
|
||||
|
||||
/**
|
||||
* @brief fetch enhanced samples of an audio stream from the AFE_SR
|
||||
*
|
||||
* @Warning The output is single channel data, no matter how many channels the input is.
|
||||
* Timeout is 2000 ms. If you want to adjust timeout, please refer to the definition of `fetch_with_delay`.
|
||||
*
|
||||
* @param afe The AFE_SR object to query
|
||||
* @return The result of output, please refer to the definition of `afe_fetch_result_t`. (The frame size of output
|
||||
* audio can be queried by the `get_fetch_chunksize`.)
|
||||
*/
|
||||
typedef afe_fetch_result_t *(*esp_afe_sr_iface_op_fetch_t)(esp_afe_sr_data_t *afe);
|
||||
|
||||
/**
|
||||
* @brief fetch enhanced samples of an audio stream from the AFE_SR, same with the function `fetch`
|
||||
*
|
||||
* @Warning The output is single channel data, no matter how many channels the input is.
|
||||
*
|
||||
* @param afe The AFE_SR object to query
|
||||
* @param ticks_to_wait The timeout value, in ticks, to wait for the fetch result.
|
||||
* @return The result of output, please refer to the definition of `afe_fetch_result_t`. (The frame size of output
|
||||
* audio can be queried by the `get_fetch_chunksize`.)
|
||||
*/
|
||||
typedef afe_fetch_result_t *(*esp_afe_sr_iface_op_fetch_with_delay_t)(esp_afe_sr_data_t *afe, TickType_t ticks_to_wait);
|
||||
|
||||
/**
|
||||
* @brief reset ringbuf of AFE.
|
||||
*
|
||||
* @param afe The AFE_SR object to query
|
||||
* @return -1: fail, 1: success
|
||||
*/
|
||||
typedef int (*esp_afe_sr_iface_op_reset_buffer_t)(esp_afe_sr_data_t *afe);
|
||||
|
||||
/**
|
||||
* @brief Set wakenet detection threshold
|
||||
*
|
||||
* @param afe The AFE_SR object to query
|
||||
* @param index The wakenet index, just support 1: wakenet1 or 2: wakenet2
|
||||
* @param threshold The wakenet detection threshold, the value is between 0.4 and 0.9999.
|
||||
* @return -1: fail, 1: success
|
||||
*/
|
||||
typedef int (*esp_afe_sr_iface_op_set_wakenet_threshold_t)(esp_afe_sr_data_t *afe, int index, float threshold);
|
||||
|
||||
/**
|
||||
* @brief Reset wakenet detection threshold to inital state
|
||||
*
|
||||
* @param afe The AFE_SR object to query
|
||||
* @param index The wakenet index, just support 1: wakenet1 or 2: wakenet2
|
||||
* @return -1: fail, 1: success
|
||||
*/
|
||||
typedef int (*esp_afe_sr_iface_op_reset_wakenet_threshold_t)(esp_afe_sr_data_t *afe, int index);
|
||||
|
||||
/**
|
||||
* @brief Reset one function/module/algorithm.
|
||||
*
|
||||
* @param afe The AFE_SR object to query
|
||||
* @return -1: fail, 1: success
|
||||
*/
|
||||
typedef int (*esp_afe_sr_iface_op_reset_op_t)(esp_afe_sr_data_t *afe);
|
||||
|
||||
/**
|
||||
* @brief Disable one function/module/algorithm.
|
||||
*
|
||||
* @param afe The AFE_SR object to query
|
||||
* @return -1: fail, 0: disabled, 1: enabled
|
||||
*/
|
||||
typedef int (*esp_afe_sr_iface_op_disable_func_t)(esp_afe_sr_data_t *afe);
|
||||
|
||||
/**
|
||||
* @brief Enable one function/module/algorithm.
|
||||
*
|
||||
* @param afe The AFE_SR object to query
|
||||
* @return -1: fail, 0: disabled, 1: enabled
|
||||
*/
|
||||
typedef int (*esp_afe_sr_iface_op_enable_func_t)(esp_afe_sr_data_t *afe);
|
||||
|
||||
/**
|
||||
* @brief Print all functions/modules/algorithms pipeline.
|
||||
* The pipeline is the order of the functions/modules/algorithms.
|
||||
* The format like this: [input] -> |AEC(VOIP_HIGH_PERF)| -> |WakeNet(wn9_hilexin)| -> [output]
|
||||
*
|
||||
* @param afe The AFE_SR object to query
|
||||
*/
|
||||
typedef void (*esp_afe_sr_iface_op_print_pipeline_t)(esp_afe_sr_data_t *afe);
|
||||
|
||||
/**
|
||||
* @brief Destroy a AFE_SR instance
|
||||
*
|
||||
* @param afe AFE_SR object to destroy
|
||||
*/
|
||||
typedef void (*esp_afe_sr_iface_op_destroy_t)(esp_afe_sr_data_t *afe);
|
||||
|
||||
/**
|
||||
* This structure contains the functions used to do operations on a AFE_SR.
|
||||
*/
|
||||
typedef struct {
|
||||
esp_afe_sr_iface_op_create_from_config_t create_from_config;
|
||||
esp_afe_sr_iface_op_feed_t feed;
|
||||
esp_afe_sr_iface_op_fetch_t fetch;
|
||||
esp_afe_sr_iface_op_fetch_with_delay_t fetch_with_delay;
|
||||
esp_afe_sr_iface_op_reset_buffer_t reset_buffer;
|
||||
esp_afe_sr_iface_op_get_samp_chunksize_t get_feed_chunksize;
|
||||
esp_afe_sr_iface_op_get_samp_chunksize_t get_fetch_chunksize;
|
||||
esp_afe_sr_iface_op_get_channel_num_t get_channel_num; // same with get_feed_channel_num
|
||||
esp_afe_sr_iface_op_get_channel_num_t get_feed_channel_num;
|
||||
esp_afe_sr_iface_op_get_channel_num_t get_fetch_channel_num;
|
||||
esp_afe_sr_iface_op_get_samp_rate_t get_samp_rate;
|
||||
esp_afe_sr_iface_op_set_wakenet_threshold_t set_wakenet_threshold;
|
||||
esp_afe_sr_iface_op_reset_wakenet_threshold_t reset_wakenet_threshold;
|
||||
esp_afe_sr_iface_op_disable_func_t disable_wakenet;
|
||||
esp_afe_sr_iface_op_enable_func_t enable_wakenet;
|
||||
esp_afe_sr_iface_op_disable_func_t disable_aec;
|
||||
esp_afe_sr_iface_op_enable_func_t enable_aec;
|
||||
esp_afe_sr_iface_op_disable_func_t disable_se;
|
||||
esp_afe_sr_iface_op_enable_func_t enable_se;
|
||||
esp_afe_sr_iface_op_disable_func_t disable_vad;
|
||||
esp_afe_sr_iface_op_enable_func_t enable_vad;
|
||||
esp_afe_sr_iface_op_reset_op_t reset_vad;
|
||||
esp_afe_sr_iface_op_disable_func_t disable_ns;
|
||||
esp_afe_sr_iface_op_enable_func_t enable_ns;
|
||||
esp_afe_sr_iface_op_disable_func_t disable_agc;
|
||||
esp_afe_sr_iface_op_enable_func_t enable_agc;
|
||||
esp_afe_sr_iface_op_print_pipeline_t print_pipeline;
|
||||
esp_afe_sr_iface_op_destroy_t destroy;
|
||||
} esp_afe_sr_iface_t;
|
||||
|
||||
// struct is used to store the AFE handle and data for the AFE task
|
||||
typedef struct {
|
||||
esp_afe_sr_data_t *afe_data;
|
||||
esp_afe_sr_iface_t *afe_handle;
|
||||
TaskHandle_t feed_task;
|
||||
TaskHandle_t fetch_task;
|
||||
} afe_task_into_t;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
@@ -0,0 +1,13 @@
|
||||
#pragma once
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include "esp_afe_sr_iface.h"
|
||||
|
||||
esp_afe_sr_iface_t *esp_afe_handle_from_config(const afe_config_t *config);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
@@ -0,0 +1,47 @@
|
||||
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License
|
||||
#ifndef _ESP_AGC_H_
|
||||
#define _ESP_AGC_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
////all positive value is valid, negective is error
|
||||
typedef enum {
|
||||
ESP_AGC_SUCCESS = 0, ////success
|
||||
ESP_AGC_FAIL = -1, ////agc fail
|
||||
ESP_AGC_SAMPLE_RATE_ERROR = -2, ///sample rate can be only 8khz, 16khz, 32khz
|
||||
ESP_AGC_FRAME_SIZE_ERROR = -3, ////the input frame size should be only 10ms, so should together with sample-rate to get the frame size
|
||||
} ESP_AGE_ERR;
|
||||
|
||||
typedef enum {
|
||||
AGC_MODE_SR = -1, // Bypass WEBRTC AGC
|
||||
AGC_MODE_0 = 0, // Only saturation protection
|
||||
AGC_MODE_1 = 1, // Analog Automatic Gain Control [-targetLevelDbfs (default -3 dBOv)]
|
||||
AGC_MODE_2 = 2, // Digital Automatic Gain Control [-targetLevelDbfs (default -3 dBOv)]
|
||||
AGC_MODE_3 = 3, // Fixed Digital Gain [compressionGaindB (default 8 dB)]
|
||||
} agc_mode_t;
|
||||
|
||||
void *esp_agc_open(agc_mode_t agc_mode, int sample_rate);
|
||||
void set_agc_config(void *agc_handle, int gain_dB, int limiter_enable, int target_level_dbfs);
|
||||
int esp_agc_process(void *agc_handle, short *in_pcm, short *out_pcm, int frame_size, int sample_rate);
|
||||
void esp_agc_close(void *agc_handle);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // _ESP_AGC_H_
|
||||
@@ -0,0 +1,41 @@
|
||||
#ifndef _ESP_DOA_H_
|
||||
#define _ESP_DOA_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct doa_handle_t doa_handle_t;
|
||||
/**
|
||||
* @brief Initialize SRP-PHAT processor
|
||||
* @param fs Sampling rate (Hz), e.g., 16000
|
||||
* @param resolution Angular search resolution (degrees), e.g., 20
|
||||
* @param d_mics Microphone spacing (meters), e.g., 0.06
|
||||
* @param input_timedate_samples input timedate samples, e.g., 1024
|
||||
* @return Initialized doa_handle_t object pointer, Recommend using the above configuration for better performance
|
||||
*/
|
||||
doa_handle_t *esp_doa_create(int fs, float resolution, float d_mics, int input_timedate_samples);
|
||||
|
||||
/**
|
||||
* @brief Release all allocated resources
|
||||
* @param doa doa_handle_t instance pointer to be freed
|
||||
*/
|
||||
void esp_doa_destroy(doa_handle_t *doa);
|
||||
|
||||
/**
|
||||
* @brief Process audio frame for direction estimation
|
||||
* @param doa doa_handle_t instance pointer
|
||||
* @param left Left channel 16-bit PCM data
|
||||
* @param right Right channel 16-bit PCM data
|
||||
* @return Estimated sound direction in degrees, e.g., 0-180
|
||||
*/
|
||||
float esp_doa_process(doa_handle_t *doa, int16_t* left, int16_t* right);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _ESP_DOA_H_ */
|
||||
@@ -0,0 +1,93 @@
|
||||
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License
|
||||
#ifndef _ESP_MASE_H_
|
||||
#define _ESP_MASE_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define MASE_SAMPLE_RATE 16000 // Supports 16kHz only
|
||||
#define MASE_FRAME_SIZE 16 // Supports 16ms only
|
||||
#define MASE_MIC_DISTANCE 65 // According to physical design of mic-array
|
||||
|
||||
/**
|
||||
* @brief Sets mic-array type, currently 2-mic line array and 3-mic circular array
|
||||
* are supported.
|
||||
*/
|
||||
typedef enum {
|
||||
TWO_MIC_LINE = 0,
|
||||
THREE_MIC_CIRCLE = 1
|
||||
} mase_mic_array_type_t;
|
||||
|
||||
/**
|
||||
* @brief Sets operating mode, supporting normal mode and wake-up enhancement mode
|
||||
*/
|
||||
typedef enum {
|
||||
NORMAL_ENHANCEMENT_MODE = 0,
|
||||
WAKE_UP_ENHANCEMENT_MODE = 1
|
||||
} mase_op_mode_t;
|
||||
|
||||
typedef void* mase_handle_t;
|
||||
|
||||
/**
|
||||
* @brief Creates an instance to the MASE structure.
|
||||
*
|
||||
* @param sample_rate The sampling frequency (Hz) must be 16000.
|
||||
*
|
||||
* @param frame_size The length of the audio processing must be 16ms.
|
||||
*
|
||||
* @param array_type '0' for 2-mic line array and '1' for 3-mic circular array.
|
||||
*
|
||||
* @param mic_distance The distance between neiboring microphones in mm.
|
||||
*
|
||||
* @param operating_mode '0' for normal mode and '1' for wake-up enhanced mode.
|
||||
*
|
||||
* @param filter_strength Strengh of the mic-array speech enhancement, must be 0, 1, 2 or 3.
|
||||
*
|
||||
* @return
|
||||
* - NULL: Create failed
|
||||
* - Others: An instance of MASE
|
||||
*/
|
||||
mase_handle_t mase_create(int fs, int frame_size, int array_type, float mic_distance, int operating_mode, int filter_strength);
|
||||
|
||||
/**
|
||||
* @brief Performs mic array processing for one frame.
|
||||
*
|
||||
* @param inst The instance of MASE.
|
||||
*
|
||||
* @param in An array of 16-bit signed audio samples from mic.
|
||||
*
|
||||
* @param dsp_out Returns enhanced signal.
|
||||
*
|
||||
* @return None
|
||||
*
|
||||
*/
|
||||
void mase_process(mase_handle_t st, int16_t *in, int16_t *dsp_out);
|
||||
|
||||
/**
|
||||
* @brief Free the MASE instance
|
||||
*
|
||||
* @param inst The instance of MASE.
|
||||
*
|
||||
* @return None
|
||||
*
|
||||
*/
|
||||
void mase_destory(mase_handle_t st);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,86 @@
|
||||
#pragma once
|
||||
#include "esp_speech_features.h"
|
||||
#include <stdint.h>
|
||||
|
||||
/*
|
||||
This describes an interface for a MFCC runner, that is, some kind of implementation that can be
|
||||
fed sample chunks and returns the MFCC cepstrum of those samples. This is an abstracted interface so
|
||||
multiple implementations can be used.
|
||||
*/
|
||||
|
||||
typedef struct esp_mfcc_data_t esp_mfcc_data_t;
|
||||
|
||||
// Options for the mfcc algorithm itself. These more-or-less match the parameters of csf_mfcc (from c_speech_features),
|
||||
// please refer to its documentation for details.
|
||||
typedef struct {
|
||||
int winstep_ms; // The step between successive windows in ms. (10)
|
||||
int winlen_ms; // The length of the analysis window in ms. (25)
|
||||
int nch; // The number of input channel
|
||||
int numcep; // The number of cepstrum to return
|
||||
int nfilter; // The number of filters in the filterbank
|
||||
int nfft; // The FFT size
|
||||
int samp_freq; // The sample-rate of the signal.
|
||||
int low_freq; // The lowest band edge of mel filters, in hz. (e.g. 0)
|
||||
int high_freq; // The highest band edge of mel filters, in hz. Must not be higher than samp_freq
|
||||
float preemph; // Preemphasis filter coefficient. 0 is no filter. (e.g. 0.97)
|
||||
char *win_type; // Analysis window type to apply to each frame, "hanning","hamming","sine","rectangular","povey"
|
||||
bool append_energy; // If true, the zeroth cepstral coefficient is replaced with the log of the total frame energy
|
||||
bool use_power; // If true, use power of fft spectrum, else use magnitude of fft spectrum
|
||||
int use_log_fbank; // 0: return fbank, 1: return log(x+log_epsilon), 2: return log(max(x, log_epsilon))
|
||||
float log_epsilon; // log epsilon. (e.g. 1e-7)
|
||||
bool psram_first; // Alloc memory from PSRAM first
|
||||
bool remove_dc_offset; // Whether to subtract mean of wave before FFT
|
||||
} esp_mfcc_opts_t;
|
||||
|
||||
/**
|
||||
* @brief Un-initialize and free a mfcc runner
|
||||
*
|
||||
* Function to free a previously allocated mfcc runner.
|
||||
*
|
||||
* @param r Runner object to destroy
|
||||
*/
|
||||
typedef void (*esp_mfcc_op_destroy_t)(esp_mfcc_data_t *r);
|
||||
|
||||
/**
|
||||
* @brief Initialize parameters for a mfcc runner.
|
||||
*
|
||||
* After creation, a mfcc runner needs to be initialized first; this is usually done
|
||||
* in the initialization routine of a speech recognition algorithm. This provides
|
||||
* a pointer to do this for a specific mfcc runner.
|
||||
*
|
||||
* @param opt Options for the mfcc process
|
||||
* @return True if success, false on error.
|
||||
*/
|
||||
typedef esp_mfcc_data_t *(*esp_mfcc_op_create_t)(const esp_mfcc_opts_t *opt);
|
||||
|
||||
/**
|
||||
* @brief Run a mfcc iteration on frame by frame
|
||||
*
|
||||
* This will take a set of samples and return a ceptrum. Note that this may be pipelined:
|
||||
* an initial call to this function may return NULL and subsequent calls may return the
|
||||
* cepstrum of previous calls.
|
||||
*
|
||||
* @param r The mfcc runner
|
||||
* @param samp An array of signed 16-bit samples. The amount of samples should be sampfreq/(winstep_ms/1000).
|
||||
* @return A set of cepstral values, or NULL if no such values are available yet. Free using the free_cepbuf function
|
||||
* when done with this buffer. Note that some implementations require the buffer to be freed before another call
|
||||
* to this function is done.
|
||||
*/
|
||||
typedef float *(*esp_mfcc_op_run_step_t)(esp_mfcc_data_t *r, int16_t *samp, int16_t nch);
|
||||
|
||||
/**
|
||||
* @brief Clean all state of mfcc handle
|
||||
*
|
||||
* @param r The mfcc runner
|
||||
*/
|
||||
typedef void (*esp_mfcc_op_clean_t)(esp_mfcc_data_t *r);
|
||||
|
||||
/**
|
||||
* @brief Operations possible on a mfcc runner
|
||||
*/
|
||||
typedef struct {
|
||||
esp_mfcc_op_destroy_t destroy;
|
||||
esp_mfcc_op_create_t create;
|
||||
esp_mfcc_op_run_step_t run_step;
|
||||
esp_mfcc_op_clean_t clean;
|
||||
} esp_mfcc_iface_t;
|
||||
@@ -0,0 +1,89 @@
|
||||
#pragma once
|
||||
#include "esp_speech_features.h"
|
||||
#include <stdint.h>
|
||||
|
||||
/*
|
||||
This describes an interface for a MFCC runner, that is, some kind of implementation that can be
|
||||
fed sample chunks and returns the MFCC cepstrum of those samples. This is an abstracted interface so
|
||||
multiple implementations can be used.
|
||||
*/
|
||||
|
||||
typedef struct esp_mfcc_data_t esp_mfcc_data_t;
|
||||
|
||||
// Options for the mfcc algorithm itself. These more-or-less match the parameters of csf_mfcc (from c_speech_features),
|
||||
// please refer to its documentation for details.
|
||||
typedef struct {
|
||||
int winstep_ms; // The step between successive windows in ms. (10)
|
||||
int winlen_ms; // The length of the analysis window in ms. (25)
|
||||
int nch; // The number of input channel
|
||||
int numcep; // The number of cepstrum to return
|
||||
int nfilter; // The number of filters in the filterbank
|
||||
int nfft; // The FFT size
|
||||
int samp_freq; // The sample-rate of the signal.
|
||||
int low_freq; // The lowest band edge of mel filters, in hz. (e.g. 0)
|
||||
int high_freq; // The highest band edge of mel filters, in hz. Must not be higher than samp_freq
|
||||
float preemph; // Preemphasis filter coefficient. 0 is no filter. (e.g. 0.97)
|
||||
char *win_type; // Analysis window type to apply to each frame, "hanning","hamming","sine","rectangular","povey"
|
||||
bool append_energy; // If true, the zeroth cepstral coefficient is replaced with the log of the total frame energy
|
||||
bool use_power; // If true, use power of fft spectrum, else use magnitude of fft spectrum
|
||||
int use_log_fbank; // 0: return fbank, 1: return log(x+log_epsilon), 2: return log(max(x, log_epsilon))
|
||||
float log_epsilon; // log epsilon. (e.g. 1e-7)
|
||||
bool psram_first; // Alloc memory from PSRAM first
|
||||
bool remove_dc_offset; // Whether to subtract mean of wave before FFT
|
||||
} esp_mfcc_opts_t;
|
||||
|
||||
/**
|
||||
* @brief Un-initialize and free a mfcc runner
|
||||
*
|
||||
* Function to free a previously allocated mfcc runner.
|
||||
*
|
||||
* @param r Runner object to destroy
|
||||
*/
|
||||
typedef void (*esp_mfcc_op_destroy_t)(esp_mfcc_data_t *r);
|
||||
|
||||
/**
|
||||
* @brief Initialize parameters for a mfcc runner.
|
||||
*
|
||||
* After creation, a mfcc runner needs to be initialized first; this is usually done
|
||||
* in the initialization routine of a speech recognition algorithm. This provides
|
||||
* a pointer to do this for a specific mfcc runner.
|
||||
*
|
||||
* @param opt Options for the mfcc process
|
||||
* @return True if success, false on error.
|
||||
*/
|
||||
typedef esp_mfcc_data_t *(*esp_mfcc_op_create_t)(const esp_mfcc_opts_t *opt);
|
||||
|
||||
/**
|
||||
* @brief Run a mfcc iteration on frame by frame
|
||||
*
|
||||
* This will take a set of samples and return a ceptrum. Note that this may be pipelined:
|
||||
* an initial call to this function may return NULL and subsequent calls may return the
|
||||
* cepstrum of previous calls.
|
||||
*
|
||||
* @param r The mfcc runner
|
||||
* @param samp An array of signed 16-bit samples. The amount of samples should be sampfreq/(winstep_ms/1000).
|
||||
* @return A set of cepstral values, or NULL if no such values are available yet. Free using the free_cepbuf function
|
||||
* when done with this buffer. Note that some implementations require the buffer to be freed before another call
|
||||
* to this function is done.
|
||||
*/
|
||||
typedef float *(*esp_mfcc_op_run_step_t)(esp_mfcc_data_t *r, int16_t *samp, int16_t nch);
|
||||
|
||||
typedef void (*esp_mfcc_op_run_step_s16_t)(esp_mfcc_data_t *r, int16_t *samp, int16_t *fbank);
|
||||
|
||||
/**
|
||||
* @brief Clean all state of mfcc handle
|
||||
*
|
||||
* @param r The mfcc runner
|
||||
*/
|
||||
typedef void (*esp_mfcc_op_clean_t)(esp_mfcc_data_t *r);
|
||||
|
||||
/**
|
||||
* @brief Operations possible on a mfcc runner
|
||||
*/
|
||||
typedef struct {
|
||||
esp_mfcc_op_destroy_t destroy;
|
||||
esp_mfcc_op_create_t create;
|
||||
esp_mfcc_op_run_step_t run_step;
|
||||
esp_mfcc_op_run_step_s16_t run_step_s16;
|
||||
esp_mfcc_op_clean_t clean;
|
||||
} esp_mfcc_iface_t;
|
||||
@@ -0,0 +1,44 @@
|
||||
#pragma once
|
||||
#include "esp_mfcc_iface.h"
|
||||
|
||||
extern const esp_mfcc_iface_t esp_fbank_f32; // float32-fbank handle
|
||||
extern const esp_mfcc_iface_t esp_fbank_s16; // int16-fbank handle
|
||||
|
||||
/**
|
||||
* @brief Return basic opts used in wakenet9 & multinet5
|
||||
**/
|
||||
esp_mfcc_opts_t *get_mfcc_opts_wn9();
|
||||
|
||||
/**
|
||||
* @brief Return basic opts used in wakenet9s
|
||||
**/
|
||||
esp_mfcc_opts_t *get_mfcc_opts(const char *win_type, bool use_power, int winstep_ms, int winlen_ms, int nfilter);
|
||||
|
||||
/**
|
||||
* @brief Return basic opts for default kaldifeat
|
||||
*
|
||||
opts->psram_first = true;
|
||||
opts->use_power = true;
|
||||
opts->use_log_fbank = 2; // log(max(x, log_epsilon))
|
||||
opts->log_epsilon = 1.1920928955078125e-07f; // torch.finfo(torch.float32).eps
|
||||
opts->win_type = "povey";
|
||||
opts->low_freq = 20;
|
||||
opts->high_freq = 7600;
|
||||
opts->samp_freq = 16000;
|
||||
opts->nch = 1;
|
||||
opts->nfft = 512;
|
||||
opts->nfilter = 80;
|
||||
opts->numcep = 80;
|
||||
opts->preemph = 0.97;
|
||||
opts->append_energy = false;
|
||||
opts->winlen_ms = 25;
|
||||
opts->winstep_ms = 10;
|
||||
opts->remove_dc_offset = true;
|
||||
*
|
||||
**/
|
||||
esp_mfcc_opts_t *get_mfcc_opts_kaldi();
|
||||
|
||||
/**
|
||||
* @brief Print mfcc opts
|
||||
**/
|
||||
void print_mfcc_opts(esp_mfcc_opts_t *opts);
|
||||
@@ -0,0 +1,224 @@
|
||||
#pragma once
|
||||
#include "stdint.h"
|
||||
#include "esp_wn_iface.h"
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define ESP_MN_RESULT_MAX_NUM 5
|
||||
#define ESP_MN_MAX_PHRASE_NUM 400
|
||||
#define ESP_MN_MAX_PHRASE_LEN 63
|
||||
#define ESP_MN_MIN_PHRASE_LEN 2
|
||||
|
||||
#define ESP_MN_PREFIX "mn"
|
||||
#define ESP_MN_ENGLISH "en"
|
||||
#define ESP_MN_CHINESE "cn"
|
||||
|
||||
typedef enum {
|
||||
ESP_MN_STATE_DETECTING = 0, // detecting
|
||||
ESP_MN_STATE_DETECTED = 1, // detected
|
||||
ESP_MN_STATE_TIMEOUT = 2, // time out
|
||||
} esp_mn_state_t;
|
||||
|
||||
//Set multinet loading mode
|
||||
//The memory comsumption is decreased with increasing mode,
|
||||
//As a consequence also the CPU loading rate goes up
|
||||
typedef enum {
|
||||
ESP_MN_LOAD_FROM_PSRAM = 0, // Load all weights from PSRAM. Fastest computation with Maximum memory consumption
|
||||
ESP_MN_LOAD_FROM_PSRAM_FLASH = 1, // Load some weights from PSRAM and laod the rest from FLASH (default)
|
||||
ESP_MN_LOAD_FROM_FLASH = 2, // Load more weights from FLASH. Minimum memory consumption with slowest computation
|
||||
} esp_mn_loader_mode_t;
|
||||
|
||||
typedef enum {
|
||||
ESP_MN_GREEDY_SEARCH = 0, // greedy search
|
||||
ESP_MN_BEAM_SEARCH = 1, // beam search
|
||||
ESP_MN_BEAM_SEARCH_WITH_FST = 2, // beam search with trie language model
|
||||
} esp_mn_search_method_t;
|
||||
|
||||
typedef enum {
|
||||
CHINESE_ID = 1, // Chinese language
|
||||
ENGLISH_ID = 2, // English language
|
||||
} language_id_t;
|
||||
|
||||
// Return all possible recognition results
|
||||
typedef struct{
|
||||
esp_mn_state_t state;
|
||||
int num; // The number of phrase in list, num<=5. When num=0, no phrase is recognized.
|
||||
int command_id[ESP_MN_RESULT_MAX_NUM]; // The list of command id.
|
||||
int phrase_id[ESP_MN_RESULT_MAX_NUM]; // The list of phrase id.
|
||||
float prob[ESP_MN_RESULT_MAX_NUM]; // The list of probability.
|
||||
char string[256]; // recognized string with commands graph
|
||||
char raw_string[256]; // recognized string without commands graph
|
||||
} esp_mn_results_t;
|
||||
|
||||
typedef struct {
|
||||
char *string; // command string
|
||||
char *phonemes; // command phonemes, if applicable
|
||||
int16_t command_id; // the command id
|
||||
float threshold; // trigger threshold, default: 0
|
||||
int16_t *wave; // prompt wave data of the phrase
|
||||
} esp_mn_phrase_t;
|
||||
|
||||
typedef struct _mn_node_ {
|
||||
esp_mn_phrase_t *phrase;
|
||||
struct _mn_node_ *next;
|
||||
} esp_mn_node_t;
|
||||
|
||||
typedef struct{
|
||||
int16_t num; // The number of error phrases, which can not added into model
|
||||
esp_mn_phrase_t **phrases; // The array of error phrase pointer
|
||||
} esp_mn_error_t;
|
||||
|
||||
/**
|
||||
* @brief Initialze a model instance with specified model name.
|
||||
*
|
||||
* @param model_name The wakenet model name.
|
||||
* @param duration The duration (ms) to trigger the timeout
|
||||
*
|
||||
* @returns Handle to the model data.
|
||||
*/
|
||||
typedef model_iface_data_t* (*esp_mn_iface_op_create_t)(const char *model_name, int duration);
|
||||
|
||||
/**
|
||||
* @brief Switch multinet mode to change memory consumption and CPU loading
|
||||
*
|
||||
* @warning Just Support multinet6 or later versions
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @param mode The multinet loader mode
|
||||
*
|
||||
* @returns Handle to the model data.
|
||||
*/
|
||||
typedef model_iface_data_t* (*esp_mn_iface_op_switch_loader_mode_t)(model_iface_data_t *model, esp_mn_loader_mode_t mode);
|
||||
|
||||
/**
|
||||
* @brief Callback function type to fetch the amount of samples that need to be passed to the detect function
|
||||
*
|
||||
* Every speech recognition model processes a certain number of samples at the same time. This function
|
||||
* can be used to query that amount. Note that the returned amount is in 16-bit samples, not in bytes.
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @return The amount of samples to feed the detect function
|
||||
*/
|
||||
typedef int (*esp_mn_iface_op_get_samp_chunksize_t)(model_iface_data_t *model);
|
||||
|
||||
/**
|
||||
* @brief Callback function type to fetch the number of frames recognized by the command word
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @return The number of the frames recognized by the command word
|
||||
*/
|
||||
typedef int (*esp_mn_iface_op_get_samp_chunknum_t)(model_iface_data_t *model);
|
||||
|
||||
/**
|
||||
* @brief Set the detection threshold to manually abjust the probability
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @param det_treshold The threshold to trigger speech commands, the range of det_threshold is 0.0~0.9999
|
||||
*/
|
||||
typedef int (*esp_mn_iface_op_set_det_threshold_t)(model_iface_data_t *model, float det_threshold);
|
||||
|
||||
/**
|
||||
* @brief Get the sample rate of the samples to feed to the detect function
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @return The sample rate, in hz
|
||||
*/
|
||||
typedef int (*esp_mn_iface_op_get_samp_rate_t)(model_iface_data_t *model);
|
||||
|
||||
/**
|
||||
* @brief Get the language of model
|
||||
*
|
||||
* @param model The language name
|
||||
* @return Language name string defined in esp_mn_models.h, eg: ESP_MN_CHINESE, ESP_MN_ENGLISH
|
||||
*/
|
||||
typedef char * (*esp_mn_iface_op_get_language_t)(model_iface_data_t *model);
|
||||
|
||||
/**
|
||||
* @brief Feed samples of an audio stream to the speech recognition model and detect if there is a speech command found.
|
||||
*
|
||||
* @param model The model object to query.
|
||||
* @param samples An array of 16-bit signed audio samples. The array size used can be queried by the
|
||||
* get_samp_chunksize function.
|
||||
* @return The state of multinet
|
||||
*/
|
||||
typedef esp_mn_state_t (*esp_mn_iface_op_detect_t)(model_iface_data_t *model, int16_t *samples);
|
||||
|
||||
/**
|
||||
* @brief Destroy a speech commands recognition model
|
||||
*
|
||||
* @param model The Model object to destroy
|
||||
*/
|
||||
typedef void (*esp_mn_iface_op_destroy_t)(model_iface_data_t *model);
|
||||
|
||||
/**
|
||||
* @brief Get recognition results
|
||||
*
|
||||
* @param model The Model object to query
|
||||
*
|
||||
* @return The current results.
|
||||
*/
|
||||
typedef esp_mn_results_t* (*esp_mn_iface_op_get_results_t)(model_iface_data_t *model);
|
||||
|
||||
/**
|
||||
* @brief Open the log print
|
||||
*
|
||||
* @param model_data The model object to query.
|
||||
*
|
||||
*/
|
||||
typedef void (*esp_mn_iface_op_open_log_t)(model_iface_data_t *model_data);
|
||||
|
||||
/**
|
||||
* @brief Clean all status of model
|
||||
*
|
||||
* @param model_data The model object to query.
|
||||
*
|
||||
*/
|
||||
typedef void (*esp_mn_iface_op_clean_t)(model_iface_data_t *model_data);
|
||||
|
||||
/**
|
||||
* @brief Set the speech commands by mn_command_root
|
||||
*
|
||||
* @param model_data The model object to query.
|
||||
* @param mn_command_root The speech commands link.
|
||||
* @return The error phrase id info.
|
||||
*/
|
||||
typedef esp_mn_error_t* (*esp_wn_iface_op_set_speech_commands)(model_iface_data_t *model_data, esp_mn_node_t *mn_command_root);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Print out current commands in fst, note the ones "added" but not "updated" will not be shown here
|
||||
*
|
||||
* @param model_data The model object to query
|
||||
*/
|
||||
typedef void (*esp_mn_iface_op_print_active_speech_commands)(model_iface_data_t *model_data);
|
||||
|
||||
/**
|
||||
* @brief Check if input string can be tokenized
|
||||
*
|
||||
* @param model_data The model object to query
|
||||
* @param str The input string
|
||||
*/
|
||||
typedef int (*esp_mn_iface_op_check_speech_command)(model_iface_data_t *model_data, const char *str);
|
||||
|
||||
typedef struct {
|
||||
esp_mn_iface_op_create_t create;
|
||||
esp_mn_iface_op_get_samp_rate_t get_samp_rate;
|
||||
esp_mn_iface_op_get_samp_chunksize_t get_samp_chunksize;
|
||||
esp_mn_iface_op_get_samp_chunknum_t get_samp_chunknum;
|
||||
esp_mn_iface_op_set_det_threshold_t set_det_threshold;
|
||||
esp_mn_iface_op_get_language_t get_language;
|
||||
esp_mn_iface_op_detect_t detect;
|
||||
esp_mn_iface_op_destroy_t destroy;
|
||||
esp_mn_iface_op_get_results_t get_results;
|
||||
esp_mn_iface_op_open_log_t open_log;
|
||||
esp_mn_iface_op_clean_t clean;
|
||||
esp_wn_iface_op_set_speech_commands set_speech_commands;
|
||||
esp_mn_iface_op_switch_loader_mode_t switch_loader_mode;
|
||||
esp_mn_iface_op_print_active_speech_commands print_active_speech_commands;
|
||||
esp_mn_iface_op_check_speech_command check_speech_command;
|
||||
} esp_mn_iface_t;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
@@ -0,0 +1,66 @@
|
||||
#pragma once
|
||||
#include "esp_mn_iface.h"
|
||||
|
||||
//Contains declarations of all available speech recognion models. Pair this up with the right coefficients and you have a model that can recognize
|
||||
//a specific phrase or word.
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
/**
|
||||
* @brief Get the multinet handle from model name
|
||||
*
|
||||
* @param model_name The name of model
|
||||
* @returns The handle of multinet
|
||||
*/
|
||||
esp_mn_iface_t *esp_mn_handle_from_name(char *model_name);
|
||||
|
||||
/**
|
||||
* @brief Get the multinet language from model name
|
||||
*
|
||||
* @param model_name The name of model
|
||||
* @returns The language of multinet
|
||||
*/
|
||||
char *esp_mn_language_from_name(char *model_name);
|
||||
|
||||
/*
|
||||
Configure wake word to use based on what's selected in menuconfig.
|
||||
*/
|
||||
|
||||
#ifdef CONFIG_SR_MN_CN_MULTINET2_SINGLE_RECOGNITION
|
||||
#include "multinet2_ch.h"
|
||||
#define MULTINET_COEFF get_coeff_multinet2_ch
|
||||
#define MULTINET_MODEL_NAME "mn2_cn"
|
||||
|
||||
#else
|
||||
#define MULTINET_COEFF "COEFF_NULL"
|
||||
#define MULTINET_MODEL_NAME "NULL"
|
||||
#endif
|
||||
|
||||
|
||||
/* example
|
||||
|
||||
static const esp_mn_iface_t *multinet = &MULTINET_MODEL;
|
||||
|
||||
//Initialize MultiNet model data
|
||||
model_iface_data_t *model_data = multinet->create(&MULTINET_COEFF);
|
||||
add_speech_commands(multinet, model_data);
|
||||
|
||||
//Set parameters of buffer
|
||||
int audio_chunksize=model->get_samp_chunksize(model_data);
|
||||
int frequency = model->get_samp_rate(model_data);
|
||||
int16_t *buffer=malloc(audio_chunksize*sizeof(int16_t));
|
||||
|
||||
//Detect
|
||||
int r=model->detect(model_data, buffer);
|
||||
if (r>0) {
|
||||
printf("Detection triggered output %d.\n", r);
|
||||
}
|
||||
|
||||
//Destroy model
|
||||
model->destroy(model_data)
|
||||
|
||||
*/
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
@@ -0,0 +1,86 @@
|
||||
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License
|
||||
#ifndef _ESP_NS_H_
|
||||
#define _ESP_NS_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define NS_USE_SPIARM 0
|
||||
#define NS_FRAME_LENGTH_MS 10 //Supports 10ms, 20ms, 30ms
|
||||
|
||||
/**
|
||||
* The Sampling frequency (Hz) must be 16000Hz
|
||||
*/
|
||||
|
||||
typedef void* ns_handle_t;
|
||||
|
||||
/**
|
||||
* @brief Creates an instance to the NS structure.
|
||||
*
|
||||
* @param frame_length The length of the audio processing can be 10ms, 20ms, 30ms.
|
||||
*
|
||||
* @return
|
||||
* - NULL: Create failed
|
||||
* - Others: The instance of NS
|
||||
*/
|
||||
ns_handle_t ns_create(int frame_length);
|
||||
|
||||
/**
|
||||
* @brief Creates an instance of the more powerful noise suppression algorithm.
|
||||
*
|
||||
* @warning frame_length only supports be 10 ms.
|
||||
*
|
||||
* @param frame_length The length of the audio processing can only be 10ms.
|
||||
* @param mode 0: Mild, 1: Medium, 2: Aggressive
|
||||
* @param sample_rate The sample rate of the audio.
|
||||
*
|
||||
* @return
|
||||
* - NULL: Create failed
|
||||
* - Others: The instance of NS
|
||||
*/
|
||||
ns_handle_t ns_pro_create(int frame_length, int mode, int sample_rate);
|
||||
|
||||
/**
|
||||
* @brief Feed samples of an audio stream to the NS and get the audio stream after Noise suppression.
|
||||
*
|
||||
* @param inst The instance of NS.
|
||||
*
|
||||
* @param indata An array of 16-bit signed audio samples.
|
||||
*
|
||||
* @param outdata An array of 16-bit signed audio samples after noise suppression.
|
||||
*
|
||||
* @return None
|
||||
*
|
||||
*/
|
||||
void ns_process(ns_handle_t inst, int16_t *indata, int16_t *outdata);
|
||||
|
||||
/**
|
||||
* @brief Free the NS instance
|
||||
*
|
||||
* @param inst The instance of NS.
|
||||
*
|
||||
* @return None
|
||||
*
|
||||
*/
|
||||
void ns_destroy(ns_handle_t inst);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif //_ESP_NS_H_
|
||||
@@ -0,0 +1,64 @@
|
||||
#pragma once
|
||||
#include "stdint.h"
|
||||
|
||||
//Opaque model data container
|
||||
typedef struct esp_nsn_data_t esp_nsn_data_t;
|
||||
|
||||
|
||||
/**
|
||||
* @brief Easy function type to initialze a model instance
|
||||
*
|
||||
* @param model_name The name of the model instance
|
||||
* @returns Handle to the model data
|
||||
*/
|
||||
typedef esp_nsn_data_t* (*esp_nsn_iface_op_create_t)(char *model_name);
|
||||
|
||||
/**
|
||||
* @brief Get the amount of samples that need to be passed to the process function
|
||||
*
|
||||
* Every noise suppression model processes a certain number of samples at the same time. This function
|
||||
* can be used to query that amount. Note that the returned amount is in 16-bit samples, not in bytes.
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @return The amount of samples to feed the process function
|
||||
*/
|
||||
typedef int (*esp_nsn_iface_op_get_samp_chunksize_t)(esp_nsn_data_t *model);
|
||||
|
||||
/**
|
||||
* @brief Feed samples of an audio stream to the noise suppression model and get data after process.
|
||||
*
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @param in_data An array of 16-bit signed audio samples. The array size used can be queried by the
|
||||
* get_samp_chunksize function.
|
||||
* @param out_data An array of 16-bit signed audio samples after process.
|
||||
* @return The state of return.
|
||||
*/
|
||||
typedef int (*esp_nsn_iface_op_process_t)(esp_nsn_data_t *model, int16_t *in_data, int16_t *out_data);
|
||||
|
||||
/**
|
||||
* @brief Get the sample rate of the samples to feed to the process function
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @return The sample rate, in hz
|
||||
*/
|
||||
typedef int (*esp_nsn_iface_op_get_samp_rate_t)(esp_nsn_data_t *model);
|
||||
|
||||
/**
|
||||
* @brief Destroy a noise suppression model
|
||||
*
|
||||
* @param model Model object to destroy
|
||||
*/
|
||||
typedef void (*esp_nsn_iface_op_destroy_t)(esp_nsn_data_t *model);
|
||||
|
||||
|
||||
/**
|
||||
* This structure contains the functions used to do operations on a wake word detection model.
|
||||
*/
|
||||
typedef struct {
|
||||
esp_nsn_iface_op_create_t create;
|
||||
esp_nsn_iface_op_get_samp_chunksize_t get_samp_chunksize;
|
||||
esp_nsn_iface_op_process_t process;
|
||||
esp_nsn_iface_op_get_samp_rate_t get_samp_rate;
|
||||
esp_nsn_iface_op_destroy_t destroy;
|
||||
} esp_nsn_iface_t;
|
||||
@@ -0,0 +1,17 @@
|
||||
#pragma once
|
||||
|
||||
#include "esp_nsn_iface.h"
|
||||
|
||||
/*
|
||||
The prefix of nset
|
||||
Now there are nsnet1 and nsnet2
|
||||
*/
|
||||
#define ESP_NSNET_PREFIX "nsnet"
|
||||
|
||||
/**
|
||||
* @brief Get the nsnet handle from model name
|
||||
*
|
||||
* @param model_name The name of model
|
||||
* @returns The handle of multinet
|
||||
*/
|
||||
esp_nsn_iface_t *esp_nsnet_handle_from_name(char *model_name);
|
||||
@@ -0,0 +1,62 @@
|
||||
#pragma once
|
||||
#include "c_speech_features_config.h"
|
||||
#include "stdlib.h"
|
||||
#include <assert.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#ifndef M_2PI
|
||||
#define M_2PI 6.283185307179586476925286766559005
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
float *coeff;
|
||||
int *bank_pos;
|
||||
int nfilter;
|
||||
} esp_mel_filter_t;
|
||||
|
||||
float *esp_mfcc_malloc(size_t size, bool from_psram);
|
||||
|
||||
void esp_mfcc_free(void *ptr);
|
||||
|
||||
/**
|
||||
* @brief Initialize FFT table
|
||||
* @warning For ESP-PLATFORM, use esp-dsp fft
|
||||
* For Other platform, use kiss fft
|
||||
*
|
||||
* @param nfft The input samples number
|
||||
* @return fft-table
|
||||
**/
|
||||
void *esp_fft_init(int nfft);
|
||||
|
||||
/**
|
||||
* @brief Free FFT table
|
||||
* @warning For ESP-PLATFORM, use esp-dsp fft
|
||||
* For Other platform, use kiss fft
|
||||
*
|
||||
* @param fft_table The fft table initialized by esp_fft_init
|
||||
* @param nfft The input samples number
|
||||
* @return fft-table
|
||||
**/
|
||||
void esp_fft_deinit(void *fft_table, int nfft);
|
||||
|
||||
/**
|
||||
* @brief Initial window function
|
||||
* Currently support hanning, hamming, sine, povey, rectangular,
|
||||
* wn9(512-hanning to get wakenet9& multinet5 compatible)
|
||||
**/
|
||||
float *esp_win_func_init(char *win_type, float *window_data, int frame_length);
|
||||
|
||||
float *esp_fftr(float *x, int nfft, void *fft_table);
|
||||
|
||||
float *esp_spectrum_step(float *x, int nfft, bool use_power, void *fft_handle);
|
||||
|
||||
void esp_audio_short_to_float(short *samples, float *x, int len, int remove_dc);
|
||||
|
||||
float *esp_preemphasis_step(float *x, unsigned int len, float coeff, float last);
|
||||
|
||||
esp_mel_filter_t *esp_mel_filter_init(
|
||||
int nfft, int nfilter, int low_freq, int high_freq, int samp_freq, bool from_psram);
|
||||
|
||||
void esp_mel_filter_deinit(esp_mel_filter_t *mel_filter);
|
||||
|
||||
float *esp_mel_dotprod_step(float *x, float *out, esp_mel_filter_t *mel_filter, int use_log_fbank, float epsilon);
|
||||
@@ -0,0 +1,84 @@
|
||||
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License
|
||||
#ifndef _ESP_WEBRTC_H_
|
||||
#define _ESP_WEBRTC_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
#include "esp_agc.h"
|
||||
#include "esp_log.h"
|
||||
#include "esp_ns.h"
|
||||
#include "sr_ringbuf.h"
|
||||
#include <stdint.h>
|
||||
|
||||
#include "esp_heap_caps.h"
|
||||
|
||||
typedef struct {
|
||||
void *ns_handle;
|
||||
void *agc_handle;
|
||||
int frame_size;
|
||||
int sample_rate;
|
||||
int16_t *buff;
|
||||
int16_t *out_data;
|
||||
sr_ringbuf_handle_t rb;
|
||||
} webrtc_handle_t;
|
||||
|
||||
/**
|
||||
* @brief Creates an instance of webrtc.
|
||||
*
|
||||
* @warning frame_length can supports be 10 ms, 20 ms, 30 ms, 32 ms.
|
||||
*
|
||||
* @param frame_length_ms The length of the audio processing
|
||||
* @param ns_mode The mode of NS. -1 means NS is disabled. 0: Mild, 1: Medium, 2: Aggressive
|
||||
* @param agc_mode The model of AGC
|
||||
* @param agc_gain The gain of AGC. default is 9
|
||||
* @param agc_target_level The target level of AGC. default is -3 dbfs
|
||||
* @param sample_rate The sample rate of the audio.
|
||||
*
|
||||
* @return
|
||||
* - NULL: Create failed
|
||||
* - Others: The instance of webrtc
|
||||
*/
|
||||
webrtc_handle_t *webrtc_create(
|
||||
int frame_length_ms, int ns_mode, agc_mode_t agc_mode, int agc_gain, int agc_target_level, int sample_rate);
|
||||
|
||||
/**
|
||||
* @brief Feed samples of an audio stream to the webrtc and get the audio stream after Noise suppression.
|
||||
*
|
||||
* @param handle The instance of NS.
|
||||
* @param in_data An array of 16-bit signed audio samples.
|
||||
* @param out_size The sample size of output data
|
||||
* @param enable_ns Enable noise suppression
|
||||
* @param enable_agc Enable automatic gain control
|
||||
*
|
||||
* @return data after noise suppression
|
||||
*/
|
||||
int16_t *webrtc_process(webrtc_handle_t *handle, int16_t *indata, int *size, bool enable_ns, bool enable_agc);
|
||||
|
||||
/**
|
||||
* @brief Free the webrtc instance
|
||||
*
|
||||
* @param handle The instance of webrtc.
|
||||
*
|
||||
* @return None
|
||||
*
|
||||
*/
|
||||
void webrtc_destroy(webrtc_handle_t *handle);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif //_ESP_NS_H_
|
||||
178
managed_components/espressif__esp-sr/include/esp32c3/esp_vad.h
Normal file
178
managed_components/espressif__esp-sr/include/esp32c3/esp_vad.h
Normal file
@@ -0,0 +1,178 @@
|
||||
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License
|
||||
#ifndef _ESP_VAD_H_
|
||||
#define _ESP_VAD_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define SAMPLE_RATE_HZ 16000 // Supports 32000, 16000, 8000
|
||||
#define VAD_FRAME_LENGTH_MS 30 // Supports 10ms, 20ms, 30ms
|
||||
|
||||
/**
|
||||
* @brief Sets the VAD operating mode. A more aggressive (higher mode) VAD is more
|
||||
* restrictive in reporting speech. So If you want trigger more speech, please select lower mode.
|
||||
*/
|
||||
typedef enum {
|
||||
VAD_MODE_0 = 0, // Normal
|
||||
VAD_MODE_1, // Aggressive
|
||||
VAD_MODE_2, // Very Aggressive
|
||||
VAD_MODE_3, // Very Very Aggressive
|
||||
VAD_MODE_4 // Very Very Very Aggressive
|
||||
} vad_mode_t;
|
||||
|
||||
typedef enum {
|
||||
VAD_SILENCE = 0,
|
||||
VAD_SPEECH = 1,
|
||||
} vad_state_t;
|
||||
|
||||
typedef struct vad_trigger_tag {
|
||||
vad_state_t state;
|
||||
unsigned int min_speech_len;
|
||||
unsigned int noise_len;
|
||||
unsigned int min_noise_len;
|
||||
unsigned int speech_len;
|
||||
} vad_trigger_t;
|
||||
|
||||
#define vad_MAX_LEN INT32_MAX - 1
|
||||
/**
|
||||
* @brief Allocate wakenet trigger
|
||||
*
|
||||
* @param min_speech_len Minimum frame number of speech duration
|
||||
* @param min_noise_len Minimum frame number of noise duration
|
||||
*
|
||||
* @return Trigger pointer
|
||||
**/
|
||||
vad_trigger_t *vad_trigger_alloc(int min_speech_len, int min_noise_len);
|
||||
|
||||
/**
|
||||
* @brief Free wakenet trigger
|
||||
**/
|
||||
void vad_trigger_free(vad_trigger_t *trigger);
|
||||
|
||||
/**
|
||||
* @brief Reset wakenet trigger
|
||||
**/
|
||||
void vad_trigger_reset(vad_trigger_t *trigger);
|
||||
|
||||
/**
|
||||
* @brief detect activaty voice by trigger
|
||||
**/
|
||||
vad_state_t vad_trigger_detect(vad_trigger_t *trigger, vad_state_t state);
|
||||
|
||||
typedef struct {
|
||||
vad_trigger_t *trigger;
|
||||
void *vad_inst;
|
||||
int sample_rate;
|
||||
int frame_size;
|
||||
} vad_handle_with_trigger_t;
|
||||
|
||||
typedef vad_handle_with_trigger_t *vad_handle_t;
|
||||
|
||||
// typedef vad_handle_tag * vad_handle_t;
|
||||
|
||||
/**
|
||||
* @brief Creates an instance to the VAD structure.
|
||||
*
|
||||
* @param vad_mode Sets the VAD operating mode.
|
||||
*
|
||||
* @return
|
||||
* - NULL: Create failed
|
||||
* - Others: The instance of VAD
|
||||
*/
|
||||
vad_handle_t vad_create(vad_mode_t vad_mode);
|
||||
|
||||
/**
|
||||
* @brief Creates an instance to the VAD structure.
|
||||
*
|
||||
* @param vad_mode Sets the VAD operating mode.
|
||||
* @param sample_rate Sample rate in Hz
|
||||
* @param one_frame_ms Length of the audio chunksize, can be 10ms, 20ms, 30ms, default: 30.
|
||||
* @param min_speech_ms Minimum speech duration, unit is ms
|
||||
* @param min_noise_ms Minimum noise duration, unit is ms
|
||||
* @return
|
||||
* - NULL: Create failed
|
||||
* - Others: The instance of VAD
|
||||
*/
|
||||
vad_handle_t vad_create_with_param(
|
||||
vad_mode_t vad_mode, int sample_rate, int one_frame_ms, int min_speech_ms, int min_noise_ms);
|
||||
|
||||
/**
|
||||
* @brief Feed samples of an audio stream to the VAD and check if there is someone speaking.
|
||||
*
|
||||
* @param handle The instance of VAD.
|
||||
* @param data An array of 16-bit signed audio samples.
|
||||
* @param sample_rate_hz The Sampling frequency (Hz) can be 32000, 16000, 8000, default: 16000.
|
||||
* @param one_frame_ms The length of the audio processing can be 10ms, 20ms, 30ms, default: 30.
|
||||
* @return
|
||||
* - VAD_SILENCE if no voice
|
||||
* - VAD_SPEECH if voice is detected
|
||||
*
|
||||
*/
|
||||
vad_state_t vad_process(vad_handle_t handle, int16_t *data, int sample_rate_hz, int one_frame_ms);
|
||||
|
||||
/**
|
||||
* @brief Feed samples of an audio stream to the VAD and check if there is someone speaking.
|
||||
*
|
||||
* @param handle The instance of VAD.
|
||||
* @param data An array of 16-bit signed audio samples.
|
||||
* @return
|
||||
* - VAD_SILENCE if no voice
|
||||
* - VAD_SPEECH if voice is detected
|
||||
*
|
||||
*/
|
||||
vad_state_t vad_process_with_trigger(vad_handle_t handle, int16_t *data);
|
||||
|
||||
/**
|
||||
* @brief Reset trigger state as Silence
|
||||
*
|
||||
* @param handle The instance of VAD.
|
||||
*/
|
||||
void vad_reset_trigger(vad_handle_t handle);
|
||||
|
||||
/**
|
||||
* @brief Free the VAD instance
|
||||
*
|
||||
* @param inst The instance of VAD.
|
||||
*
|
||||
* @return None
|
||||
*
|
||||
*/
|
||||
void vad_destroy(vad_handle_t inst);
|
||||
|
||||
/*
|
||||
* Programming Guide:
|
||||
*
|
||||
* @code{c}
|
||||
* vad_handle_t vad_inst = vad_create(VAD_MODE_3, SAMPLE_RATE_HZ, VAD_FRAME_LENGTH_MS); // Creates an instance to
|
||||
* the VAD structure.
|
||||
*
|
||||
* while (1) {
|
||||
* //Use buffer to receive the audio data from MIC.
|
||||
* vad_state_t vad_state = vad_process(vad_inst, buffer); // Feed samples to the VAD process and get the result.
|
||||
* }
|
||||
*
|
||||
* vad_destroy(vad_inst); // Free the VAD instance at the end of whole VAD process
|
||||
*
|
||||
* @endcode
|
||||
*/
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif //_ESP_VAD_H_
|
||||
@@ -0,0 +1,164 @@
|
||||
#pragma once
|
||||
#include "esp_vad.h"
|
||||
#include "stdint.h"
|
||||
#include "dl_lib_convq_queue.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// Opaque model data container
|
||||
typedef struct model_iface_data_t model_iface_data_t;
|
||||
|
||||
// /**
|
||||
// * @brief The state of vad
|
||||
// */
|
||||
// typedef enum {
|
||||
// VAD_NOISE = -1, // Noise
|
||||
// VADNET_STATE_SILENCE = 0, // Silence
|
||||
// VAD_SPEECH = 1 // Speech
|
||||
// } vad_state_t;
|
||||
|
||||
/**
|
||||
* @brief Easy function type to initialze a model instance with a detection mode
|
||||
* and specified model name
|
||||
*
|
||||
* @param model_name The specified model name
|
||||
* @param mode The voice activity detection mode
|
||||
* @param channel_num The number of input audio channels
|
||||
* @param min_speech_ms The minimum duration of speech in ms to trigger vad
|
||||
* speech
|
||||
* @param min_noise_ms The minimum duration of noise in ms to trigger vad
|
||||
* noise
|
||||
* @returns Handle to the model data
|
||||
*/
|
||||
typedef model_iface_data_t *(*esp_vadn_iface_op_create_t)(
|
||||
const void *model_name, vad_mode_t mode, int channel_num, int min_speech_ms, int min_noise_ms);
|
||||
|
||||
/**
|
||||
* @brief Get the amount of samples that need to be passed to the detect
|
||||
* function
|
||||
*
|
||||
* Every speech recognition model processes a certain number of samples at the
|
||||
* same time. This function can be used to query that amount. Note that the
|
||||
* returned amount is in 16-bit samples, not in bytes.
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @return The amount of samples to feed the detect function
|
||||
*/
|
||||
typedef int (*esp_vadn_iface_op_get_samp_chunksize_t)(model_iface_data_t *model);
|
||||
|
||||
/**
|
||||
* @brief Get the channel number of samples that need to be passed to the detect
|
||||
* function
|
||||
*
|
||||
* Every speech recognition model processes a certain number of samples at the
|
||||
* same time. This function can be used to query that amount. Note that the
|
||||
* returned amount is in 16-bit samples, not in bytes.
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @return The amount of samples to feed the detect function
|
||||
*/
|
||||
typedef int (*esp_vadn_iface_op_get_channel_num_t)(model_iface_data_t *model);
|
||||
|
||||
/**
|
||||
* @brief Get the sample rate of the samples to feed to the detect function
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @return The sample rate, in hz
|
||||
*/
|
||||
typedef int (*esp_vadn_iface_op_get_samp_rate_t)(model_iface_data_t *model);
|
||||
|
||||
/**
|
||||
* @brief Set the detection threshold to manually abjust the probability
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @param det_treshold The threshold to trigger wake words, the range of
|
||||
* det_threshold is 0.5~0.9999
|
||||
* @return 0: setting failed, 1: setting success
|
||||
*/
|
||||
typedef int (*esp_vadn_iface_op_set_det_threshold_t)(model_iface_data_t *model, float det_threshold);
|
||||
|
||||
/**
|
||||
* @brief Get the voice activity detection threshold
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @returns the detection threshold
|
||||
*/
|
||||
typedef float (*esp_vadn_iface_op_get_det_threshold_t)(model_iface_data_t *model);
|
||||
|
||||
/**
|
||||
* @brief Feed samples of an audio stream to the vad model and detect whether is
|
||||
* voice.
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @param samples An array of 16-bit signed audio samples. The array size used
|
||||
* can be queried by the get_samp_chunksize function.
|
||||
* @return The index of wake words, return 0 if no wake word is detected, else
|
||||
* the index of the wake words.
|
||||
*/
|
||||
typedef vad_state_t (*esp_vadn_iface_op_detect_t)(model_iface_data_t *model, int16_t *samples);
|
||||
|
||||
/**
|
||||
* @brief Feed MFCC of an audio stream to the vad model and detect whether is
|
||||
* voice.
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @param cq An array of 16-bit MFCC.
|
||||
* @return The index of wake words, return 0 if no wake word is detected, else
|
||||
* the index of the wake words.
|
||||
*/
|
||||
typedef vad_state_t (*esp_vadn_iface_op_detect_mfcc_t)(model_iface_data_t *model, dl_convq_queue_t *cq);
|
||||
|
||||
/**
|
||||
* @brief Get MFCC of an audio stream
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @return MFCC data
|
||||
*/
|
||||
typedef dl_convq_queue_t* (*esp_vadn_iface_op_get_mfcc_data_t)(model_iface_data_t *model);
|
||||
|
||||
/**
|
||||
* @brief Get the triggered channel index. Channel index starts from zero
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @return The channel index
|
||||
*/
|
||||
typedef int (*esp_vadn_iface_op_get_triggered_channel_t)(model_iface_data_t *model);
|
||||
|
||||
/**
|
||||
* @brief Clean all states of model
|
||||
*
|
||||
* @param model The model object to query
|
||||
*/
|
||||
typedef void (*esp_vadn_iface_op_clean_t)(model_iface_data_t *model);
|
||||
|
||||
/**
|
||||
* @brief Destroy a model object
|
||||
*
|
||||
* @param model Model object to destroy
|
||||
*/
|
||||
typedef void (*esp_vadn_iface_op_destroy_t)(model_iface_data_t *model);
|
||||
|
||||
/**
|
||||
* This structure contains the functions used to do operations on a voice
|
||||
* activity detection model.
|
||||
*/
|
||||
typedef struct {
|
||||
esp_vadn_iface_op_create_t create;
|
||||
esp_vadn_iface_op_get_samp_chunksize_t get_samp_chunksize;
|
||||
esp_vadn_iface_op_get_channel_num_t get_channel_num;
|
||||
esp_vadn_iface_op_get_samp_rate_t get_samp_rate;
|
||||
esp_vadn_iface_op_set_det_threshold_t set_det_threshold;
|
||||
esp_vadn_iface_op_get_det_threshold_t get_det_threshold;
|
||||
esp_vadn_iface_op_get_triggered_channel_t get_triggered_channel;
|
||||
esp_vadn_iface_op_detect_t detect;
|
||||
esp_vadn_iface_op_detect_mfcc_t detect_mfcc;
|
||||
esp_vadn_iface_op_get_mfcc_data_t get_mfcc_data;
|
||||
esp_vadn_iface_op_clean_t clean;
|
||||
esp_vadn_iface_op_destroy_t destroy;
|
||||
} esp_vadn_iface_t;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
@@ -0,0 +1,22 @@
|
||||
#pragma once
|
||||
#include "esp_vadn_iface.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// The prefix of vadnet model name is used to filter all wakenet from availabel models.
|
||||
#define ESP_VADN_PREFIX "vadnet"
|
||||
|
||||
/**
|
||||
* @brief Get the wakenet handle from model name
|
||||
*
|
||||
* @param model_name The name of model
|
||||
* @returns The handle of wakenet
|
||||
*/
|
||||
const esp_vadn_iface_t *esp_vadn_handle_from_name(const char *model_name);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
@@ -0,0 +1,226 @@
|
||||
#pragma once
|
||||
#include "stdint.h"
|
||||
#include "dl_lib_convq_queue.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
//Opaque model data container
|
||||
typedef struct model_iface_data_t model_iface_data_t;
|
||||
|
||||
/**
|
||||
* @brief The state of wakeup
|
||||
*/
|
||||
typedef enum
|
||||
{
|
||||
WAKENET_NO_DETECT = 0, // wake word is not detected
|
||||
WAKENET_CHANNEL_VERIFIED = -1, // output channel is verified
|
||||
WAKENET_DETECTED = 1 // wake word is detected
|
||||
} wakenet_state_t;
|
||||
|
||||
//Set wake words recognition operating mode
|
||||
//The probability of being wake words is increased with increasing mode,
|
||||
//As a consequence also the false alarm rate goes up
|
||||
typedef enum {
|
||||
DET_MODE_90 = 0, // Normal
|
||||
DET_MODE_95 = 1, // Aggressive
|
||||
DET_MODE_2CH_90 = 2,
|
||||
DET_MODE_2CH_95 = 3,
|
||||
DET_MODE_3CH_90 = 4,
|
||||
DET_MODE_3CH_95 = 5,
|
||||
DET_MODE_90_COPY_PARAMS = 6, // Aggressive
|
||||
} det_mode_t;
|
||||
|
||||
typedef struct {
|
||||
int wake_word_num; //The number of all wake words
|
||||
char **wake_word_list; //The name list of wake words
|
||||
} wake_word_info_t;
|
||||
|
||||
/**
|
||||
* @brief Easy function type to initialze a model instance with a detection mode and specified wake word coefficient
|
||||
*
|
||||
* @param model_name The specified wake word model coefficient
|
||||
* @param det_mode The wake words detection mode to trigger wake words, DET_MODE_90 or DET_MODE_95
|
||||
* @returns Handle to the model data
|
||||
*/
|
||||
typedef model_iface_data_t* (*esp_wn_iface_op_create_t)(const void *model_name, det_mode_t det_mode);
|
||||
|
||||
/**
|
||||
* @brief Get the amount of samples that need to be passed to the detect function
|
||||
*
|
||||
* Every speech recognition model processes a certain number of samples at the same time. This function
|
||||
* can be used to query that amount. Note that the returned amount is in 16-bit samples, not in bytes.
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @return The amount of samples to feed the detect function
|
||||
*/
|
||||
typedef int (*esp_wn_iface_op_get_samp_chunksize_t)(model_iface_data_t *model);
|
||||
|
||||
/**
|
||||
* @brief Get the channel number of samples that need to be passed to the detect function
|
||||
*
|
||||
* Every speech recognition model processes a certain number of samples at the same time. This function
|
||||
* can be used to query that amount. Note that the returned amount is in 16-bit samples, not in bytes.
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @return The amount of samples to feed the detect function
|
||||
*/
|
||||
typedef int (*esp_wn_iface_op_get_channel_num_t)(model_iface_data_t *model);
|
||||
|
||||
/**
|
||||
* @brief Get the start point of wake word when one wake word is detected.
|
||||
*
|
||||
* @Warning: This function should be called when the channel index is verified.
|
||||
* The returned value is the number of samples from start point of wake word to detected point.
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @return The number of samples from start point to detected point (end point)
|
||||
*/
|
||||
typedef int (*esp_wn_iface_op_get_start_point_t)(model_iface_data_t *model);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Get the sample rate of the samples to feed to the detect function
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @return The sample rate, in hz
|
||||
*/
|
||||
typedef int (*esp_wn_iface_op_get_samp_rate_t)(model_iface_data_t *model);
|
||||
|
||||
/**
|
||||
* @brief Get the number of wake words
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @returns the number of wake words
|
||||
*/
|
||||
typedef int (*esp_wn_iface_op_get_word_num_t)(model_iface_data_t *model);
|
||||
|
||||
/**
|
||||
* @brief Get the name of wake word by index
|
||||
*
|
||||
* @Warning The index of wake word start with 1
|
||||
|
||||
* @param model The model object to query
|
||||
* @param word_index The index of wake word
|
||||
* @returns the detection threshold
|
||||
*/
|
||||
typedef char* (*esp_wn_iface_op_get_word_name_t)(model_iface_data_t *model, int word_index);
|
||||
|
||||
/**
|
||||
* @brief Set the detection threshold to manually abjust the probability
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @param det_treshold The threshold to trigger wake words, the range of det_threshold is 0.4~0.9999
|
||||
* @param word_index The index of wake word
|
||||
* @return 0: setting failed, 1: setting success
|
||||
*/
|
||||
typedef int (*esp_wn_iface_op_set_det_threshold_t)(model_iface_data_t *model, float det_threshold, int word_index);
|
||||
|
||||
/**
|
||||
* @brief Reset the threshold to its initial state
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @return 0: setting failed, 1: setting success
|
||||
*/
|
||||
typedef int (*esp_wn_iface_op_reset_det_threshold_t)(model_iface_data_t *model);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Get the wake word detection threshold of different modes
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @param word_index The index of wake word
|
||||
* @returns the detection threshold
|
||||
*/
|
||||
typedef float (*esp_wn_iface_op_get_det_threshold_t)(model_iface_data_t *model, int word_index);
|
||||
|
||||
/**
|
||||
* @brief Feed samples of an audio stream to the keyword detection model and detect if there is a keyword found.
|
||||
*
|
||||
* @Warning The index of wake word start with 1, 0 means no wake words is detected.
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @param samples An array of 16-bit signed audio samples. The array size used can be queried by the
|
||||
* get_samp_chunksize function.
|
||||
* @return The index of wake words, return 0 if no wake word is detected, else the index of the wake words.
|
||||
*/
|
||||
typedef wakenet_state_t (*esp_wn_iface_op_detect_t)(model_iface_data_t *model, int16_t *samples);
|
||||
|
||||
/**
|
||||
* @brief Get the volume gain
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @param target_db The target dB to calculate volume gain
|
||||
* @returns the volume gain
|
||||
*/
|
||||
typedef float (*esp_wn_iface_op_get_vol_gain_t)(model_iface_data_t *model, float target_db);
|
||||
|
||||
/**
|
||||
* @brief Get the triggered channel index. Channel index starts from zero
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @return The channel index
|
||||
*/
|
||||
typedef int (*esp_wn_iface_op_get_triggered_channel_t)(model_iface_data_t *model);
|
||||
|
||||
/**
|
||||
* @brief Clean all states of model
|
||||
*
|
||||
* @param model The model object to query
|
||||
*/
|
||||
typedef void (*esp_wn_iface_op_clean_t)(model_iface_data_t *model);
|
||||
|
||||
/**
|
||||
* @brief Destroy a speech recognition model
|
||||
*
|
||||
* @param model Model object to destroy
|
||||
*/
|
||||
typedef void (*esp_wn_iface_op_destroy_t)(model_iface_data_t *model);
|
||||
|
||||
/**
|
||||
* @brief Feed MFCC of an audio stream to the vad model and detect whether is
|
||||
* voice.
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @param cq An array of 16-bit MFCC.
|
||||
* @return The index of wake words, return 0 if no wake word is detected, else
|
||||
* the index of the wake words.
|
||||
*/
|
||||
typedef wakenet_state_t (*esp_wn_iface_op_detect_mfcc_t)(model_iface_data_t *model, int16_t *samples, dl_convq_queue_t *cq);
|
||||
|
||||
/**
|
||||
* @brief Get MFCC of an audio stream
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @return MFCC data
|
||||
*/
|
||||
typedef dl_convq_queue_t* (*esp_wn_iface_op_get_mfcc_data_t)(model_iface_data_t *model);
|
||||
|
||||
|
||||
/**
|
||||
* This structure contains the functions used to do operations on a wake word detection model.
|
||||
*/
|
||||
typedef struct {
|
||||
esp_wn_iface_op_create_t create;
|
||||
esp_wn_iface_op_get_start_point_t get_start_point;
|
||||
esp_wn_iface_op_get_samp_chunksize_t get_samp_chunksize;
|
||||
esp_wn_iface_op_get_channel_num_t get_channel_num;
|
||||
esp_wn_iface_op_get_samp_rate_t get_samp_rate;
|
||||
esp_wn_iface_op_get_word_num_t get_word_num;
|
||||
esp_wn_iface_op_get_word_name_t get_word_name;
|
||||
esp_wn_iface_op_set_det_threshold_t set_det_threshold;
|
||||
esp_wn_iface_op_reset_det_threshold_t reset_det_threshold;
|
||||
esp_wn_iface_op_get_det_threshold_t get_det_threshold;
|
||||
esp_wn_iface_op_get_triggered_channel_t get_triggered_channel;
|
||||
esp_wn_iface_op_get_vol_gain_t get_vol_gain;
|
||||
esp_wn_iface_op_detect_t detect;
|
||||
esp_wn_iface_op_detect_mfcc_t detect_mfcc;
|
||||
esp_wn_iface_op_get_mfcc_data_t get_mfcc_data;
|
||||
esp_wn_iface_op_clean_t clean;
|
||||
esp_wn_iface_op_destroy_t destroy;
|
||||
} esp_wn_iface_t;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
@@ -0,0 +1,52 @@
|
||||
#pragma once
|
||||
#include "esp_wn_iface.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// The prefix of wakenet model name is used to filter all wakenet from availabel models.
|
||||
#define ESP_WN_PREFIX "wn"
|
||||
|
||||
/**
|
||||
* @brief Get the wakenet handle from model name
|
||||
*
|
||||
* @param model_name The name of model
|
||||
* @returns The handle of wakenet
|
||||
*/
|
||||
const esp_wn_iface_t *esp_wn_handle_from_name(const char *model_name);
|
||||
|
||||
/**
|
||||
* @brief Get the wake word name from model name
|
||||
*
|
||||
* @param model_name The name of model
|
||||
* @returns The wake word name, like "alexa","hilexin","xiaoaitongxue"
|
||||
*/
|
||||
char *esp_wn_wakeword_from_name(const char *model_name);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
|
||||
static const sr_model_iface_t *model = esp_wn_handle_from_name(model_name);
|
||||
|
||||
//Initialize wakeNet model data
|
||||
static model_iface_data_t *model_data=model->create(model_name, DET_MODE_90);
|
||||
|
||||
//Set parameters of buffer
|
||||
int audio_chunksize=model->get_samp_chunksize(model_data);
|
||||
int frequency = model->get_samp_rate(model_data);
|
||||
int16_t *buffer=malloc(audio_chunksize*sizeof(int16_t));
|
||||
|
||||
//Detect
|
||||
int r=model->detect(model_data, buffer);
|
||||
if (r>0) {
|
||||
printf("Detection triggered output %d.\n", r);
|
||||
}
|
||||
|
||||
//Destroy model
|
||||
model->destroy(model_data)
|
||||
|
||||
*/
|
||||
@@ -0,0 +1,20 @@
|
||||
#ifndef __FLITE_G2P_H__
|
||||
#define __FLITE_G2P_H__
|
||||
|
||||
typedef struct {
|
||||
int num_phonemes;
|
||||
int phoneme_size;
|
||||
char **phonemes;
|
||||
} flite_g2p_result;
|
||||
|
||||
void flite_g2p_result_free(flite_g2p_result *result);
|
||||
|
||||
flite_g2p_result *flite_g2p_get_result(const char *grapheme);
|
||||
|
||||
void flite_g2p_result_print_string(flite_g2p_result *result, int map_phonemes);
|
||||
|
||||
char *flite_g2p_result_get_string(flite_g2p_result *result, int map_phonemes);
|
||||
|
||||
char *flite_g2p(const char *graphemes, int map_phonemes);
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,29 @@
|
||||
#pragma once
|
||||
#include <float.h>
|
||||
#include <math.h>
|
||||
|
||||
/* #undef ENABLE_DOUBLE */
|
||||
|
||||
#ifdef ENABLE_DOUBLE
|
||||
# define csf_float double
|
||||
# define csf_ceil ceil
|
||||
# define csf_floor floor
|
||||
# define csf_sin sin
|
||||
# define csf_log log
|
||||
# define csf_log10 log10
|
||||
# define csf_pow pow
|
||||
# define csf_sqrt sqrt
|
||||
# define csf_abs fabs
|
||||
# define csf_float_min DBL_MIN
|
||||
#else
|
||||
# define csf_float float
|
||||
# define csf_ceil ceilf
|
||||
# define csf_floor floorf
|
||||
# define csf_sin sinf
|
||||
# define csf_log logf
|
||||
# define csf_log10 log10f
|
||||
# define csf_pow powf
|
||||
# define csf_sqrt sqrtf
|
||||
# define csf_abs fabsf
|
||||
# define csf_float_min FLT_MIN
|
||||
#endif
|
||||
418
managed_components/espressif__esp-sr/include/esp32c5/dl_lib.h
Normal file
418
managed_components/espressif__esp-sr/include/esp32c5/dl_lib.h
Normal file
@@ -0,0 +1,418 @@
|
||||
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
#ifndef DL_LIB_H
|
||||
#define DL_LIB_H
|
||||
|
||||
#include "dl_lib_matrix.h"
|
||||
#include "dl_lib_matrixq.h"
|
||||
#include "dl_lib_matrixq8.h"
|
||||
|
||||
#ifdef ESP_PLATFORM
|
||||
#include "freertos/FreeRTOS.h"
|
||||
#include "freertos/task.h"
|
||||
#include "freertos/queue.h"
|
||||
#include "esp_system.h"
|
||||
#include "esp_heap_caps.h"
|
||||
#include "sdkconfig.h"
|
||||
#define DL_SPIRAM_SUPPORT 1
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_IDF_TARGET_ESP32S3
|
||||
#include "esp32s3/rom/cache.h"
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef int padding_state;
|
||||
|
||||
// /**
|
||||
// * @brief Allocate a chunk of memory which has the given capabilities.
|
||||
// * Equivalent semantics to libc malloc(), for capability-aware memory.
|
||||
// * In IDF, malloc(p) is equivalent to heap_caps_malloc(p, MALLOC_CAP_8BIT).
|
||||
// *
|
||||
// * @param size In bytes, of the amount of memory to allocate
|
||||
// * @param caps Bitwise OR of MALLOC_CAP_* flags indicating the type of memory to be returned
|
||||
// * MALLOC_CAP_SPIRAM: Memory must be in SPI RAM
|
||||
// * MALLOC_CAP_INTERNAL: Memory must be internal; specifically it should not disappear when flash/spiram cache is switched off
|
||||
// * MALLOC_CAP_DMA: Memory must be able to accessed by DMA
|
||||
// * MALLOC_CAP_DEFAULT: Memory can be returned in a non-capability-specific memory allocation
|
||||
// * @return Pointer to currently allocated heap memory
|
||||
// **/
|
||||
// void *heap_caps_malloc(size_t size, uint32_t caps);
|
||||
|
||||
/**
|
||||
* @brief Allocate aligned memory from internal memory or external memory.
|
||||
* if cnt*size > CONFIG_SPIRAM_MALLOC_ALWAYSINTERNAL, allocate memory from internal RAM
|
||||
* else, allocate memory from PSRAM
|
||||
*
|
||||
* @param cnt Number of continuing chunks of memory to allocate
|
||||
* @param size Size, in bytes, of a chunk of memory to allocate
|
||||
* @param align Aligned size, in bits
|
||||
* @return Pointer to currently allocated heap memory
|
||||
*/
|
||||
void *dl_lib_calloc(int cnt, int size, int align);
|
||||
|
||||
/**
|
||||
* @brief Always allocate aligned memory from external memory.
|
||||
*
|
||||
* @param cnt Number of continuing chunks of memory to allocate
|
||||
* @param size Size, in bytes, of a chunk of memory to allocate
|
||||
* @param align Aligned size, in bits
|
||||
* @return Pointer to currently aligned heap memory
|
||||
*/
|
||||
void *dl_lib_calloc_psram(int cnt, int size, int align);
|
||||
|
||||
/**
|
||||
* @brief Free aligned memory allocated by `dl_lib_calloc` or `dl_lib_calloc_psram`
|
||||
*
|
||||
* @param ptr Pointer to free
|
||||
*/
|
||||
void dl_lib_free(void *ptr);
|
||||
|
||||
/**
|
||||
* @brief Does a fast version of the exp() operation on a floating point number.
|
||||
*
|
||||
* As described in https://codingforspeed.com/using-faster-exponential-approximation/
|
||||
* Should be good til an input of 5 or so with a steps factor of 8.
|
||||
*
|
||||
* @param in Floating point input
|
||||
* @param steps Approximation steps. More is more precise. 8 or 10 should be good enough for most purposes.
|
||||
* @return Exp()'ed output
|
||||
*/
|
||||
fptp_t fast_exp(double x, int steps);
|
||||
|
||||
/**
|
||||
* @brief Does a fast version of the exp() operation on a floating point number.
|
||||
*
|
||||
* @param in Floating point input
|
||||
* @return Exp()'ed output
|
||||
*/
|
||||
double fast_exp_pro(double x);
|
||||
|
||||
/**
|
||||
* @brief Does a softmax operation on a matrix.
|
||||
*
|
||||
* @param in Input matrix
|
||||
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
|
||||
*/
|
||||
void dl_softmax(const dl_matrix2d_t *in, dl_matrix2d_t *out);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Does a softmax operation on a quantized matrix.
|
||||
*
|
||||
* @param in Input matrix
|
||||
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
|
||||
*/
|
||||
void dl_softmax_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out);
|
||||
|
||||
/**
|
||||
* @brief Does a sigmoid operation on a floating point number
|
||||
*
|
||||
* @param in Floating point input
|
||||
* @return Sigmoid output
|
||||
*/
|
||||
|
||||
fptp_t dl_sigmoid_op(fptp_t in);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Does a sigmoid operation on a matrix.
|
||||
*
|
||||
* @param in Input matrix
|
||||
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
|
||||
*/
|
||||
void dl_sigmoid(const dl_matrix2d_t *in, dl_matrix2d_t *out);
|
||||
|
||||
/**
|
||||
* @brief Does a tanh operation on a floating point number
|
||||
*
|
||||
* @param in Floating point input number
|
||||
* @return Tanh value
|
||||
*/
|
||||
fptp_t dl_tanh_op(fptp_t v);
|
||||
|
||||
/**
|
||||
* @brief Does a tanh operation on a matrix.
|
||||
*
|
||||
* @param in Input matrix
|
||||
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
|
||||
*/
|
||||
void dl_tanh(const dl_matrix2d_t *in, dl_matrix2d_t *out);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Does a relu (Rectifier Linear Unit) operation on a floating point number
|
||||
*
|
||||
* @param in Floating point input
|
||||
* @param clip If value is higher than this, it will be clipped to this value
|
||||
* @return Relu output
|
||||
*/
|
||||
fptp_t dl_relu_op(fptp_t in, fptp_t clip);
|
||||
|
||||
/**
|
||||
* @brief Does a ReLu operation on a matrix.
|
||||
*
|
||||
* @param in Input matrix
|
||||
* @param clip If values are higher than this, they will be clipped to this value
|
||||
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
|
||||
*/
|
||||
void dl_relu(const dl_matrix2d_t *in, fptp_t clip, dl_matrix2d_t *out);
|
||||
|
||||
/**
|
||||
* @brief Fully connected layer operation
|
||||
*
|
||||
* @param in Input vector
|
||||
* @param weight Weights of the neurons
|
||||
* @param bias Biases for the neurons. Can be NULL if a bias of 0 is required.
|
||||
* @param out Output array. Outputs are placed here. Needs to be an initialized, weight->w by in->h in size, matrix.
|
||||
*/
|
||||
void dl_fully_connect_layer(const dl_matrix2d_t *in, const dl_matrix2d_t *weight, const dl_matrix2d_t *bias, dl_matrix2d_t *out);
|
||||
|
||||
/**
|
||||
* @brief Pre-calculate the sqrtvari variable for the batch_normalize function.
|
||||
* The sqrtvari matrix depends on the variance and epsilon values, which normally are constant. Hence,
|
||||
* this matrix only needs to be calculated once. This function does that.
|
||||
*
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
void dl_batch_normalize_get_sqrtvar(const dl_matrix2d_t *variance, fptp_t epsilon, dl_matrix2d_t *out);
|
||||
|
||||
/**
|
||||
* @brief Batch-normalize a matrix
|
||||
*
|
||||
* @param m The matrix to normalize
|
||||
* @param offset Offset matrix
|
||||
* @param scale Scale matrix
|
||||
* @param mean Mean matrix
|
||||
* @param sqrtvari Matrix precalculated using dl_batch_normalize_get_sqrtvar
|
||||
* @return
|
||||
*/
|
||||
void dl_batch_normalize(dl_matrix2d_t *m, const dl_matrix2d_t *offset, const dl_matrix2d_t *scale,
|
||||
const dl_matrix2d_t *mean, const dl_matrix2d_t *sqrtvari);
|
||||
|
||||
/**
|
||||
* @brief Do a basic LSTM layer pass.
|
||||
*
|
||||
* @warning Returns state_h pointer, so do not free result.
|
||||
|
||||
* @param in Input vector
|
||||
* @param state_c Internal state of the LSTM network
|
||||
* @param state_h Internal state (previous output values) of the LSTM network
|
||||
* @param weights Weights for the neurons
|
||||
* @param bias Bias for the neurons. Can be NULL if no bias is required
|
||||
* @return Output values of the neurons
|
||||
*/
|
||||
dl_matrix2d_t *dl_basic_lstm_layer(const dl_matrix2d_t *in, dl_matrix2d_t *state_c, dl_matrix2d_t *state_h,
|
||||
const dl_matrix2d_t *weight, const dl_matrix2d_t *bias);
|
||||
|
||||
/**
|
||||
* @brief Do a basic LSTM layer pass, partial quantized version.
|
||||
* This LSTM function accepts 16-bit fixed-point weights and 32-bit float-point bias.
|
||||
*
|
||||
* @warning Returns state_h pointer, so do not free result.
|
||||
|
||||
* @param in Input vector
|
||||
* @param state_c Internal state of the LSTM network
|
||||
* @param state_h Internal state (previous output values) of the LSTM network
|
||||
* @param weights Weights for the neurons, need to be quantised
|
||||
* @param bias Bias for the neurons. Can be NULL if no bias is required
|
||||
* @return Output values of the neurons
|
||||
*/
|
||||
dl_matrix2dq_t *dl_basic_lstm_layer_quantised_weights(const dl_matrix2d_t *in, dl_matrix2d_t *state_c, dl_matrix2d_t *state_h,
|
||||
const dl_matrix2dq_t *weight, const dl_matrix2d_t *bias);
|
||||
|
||||
/**
|
||||
* @brief Do a fully-connected layer pass, fully-quantized version.
|
||||
*
|
||||
* @param in Input vector
|
||||
* @param weight Weights of the neurons
|
||||
* @param bias Bias values of the neurons. Can be NULL if no bias is needed.
|
||||
* @param shift Number of bits to shift the result back by. See dl_lib_matrixq.h for more info
|
||||
* @return Output values of the neurons
|
||||
*/
|
||||
void dl_fully_connect_layer_q(const dl_matrix2dq_t *in, const dl_matrix2dq_t *weight, const dl_matrix2dq_t *bias, dl_matrix2dq_t *out, int shift);
|
||||
|
||||
/**
|
||||
* @brief Do a basic LSTM layer pass, fully-quantized version
|
||||
*
|
||||
* @warning Returns state_h pointer, so do not free result.
|
||||
|
||||
* @param in Input vector
|
||||
* @param state_c Internal state of the LSTM network
|
||||
* @param state_h Internal state (previous output values) of the LSTM network
|
||||
* @param weights Weights for the neurons
|
||||
* @param bias Bias for the neurons. Can be NULL if no bias is required
|
||||
* @param shift Number of bits to shift the result back by. See dl_lib_matrixq.h for more info
|
||||
* @return Output values of the neurons
|
||||
*/
|
||||
dl_matrix2dq_t *dl_basic_lstm_layer_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *state_c, dl_matrix2dq_t *state_h,
|
||||
const dl_matrix2dq_t *weight, const dl_matrix2dq_t *bias, int shift);
|
||||
|
||||
/**
|
||||
* @brief Batch-normalize a matrix, fully-quantized version
|
||||
*
|
||||
* @param m The matrix to normalize
|
||||
* @param offset Offset matrix
|
||||
* @param scale Scale matrix
|
||||
* @param mean Mean matrix
|
||||
* @param sqrtvari Matrix precalculated using dl_batch_normalize_get_sqrtvar
|
||||
* @param shift Number of bits to shift the result back by. See dl_lib_matrixq.h for more info
|
||||
* @return
|
||||
*/
|
||||
void dl_batch_normalize_q(dl_matrix2dq_t *m, const dl_matrix2dq_t *offset, const dl_matrix2dq_t *scale,
|
||||
const dl_matrix2dq_t *mean, const dl_matrix2dq_t *sqrtvari, int shift);
|
||||
|
||||
/**
|
||||
* @brief Does a relu (Rectifier Linear Unit) operation on a fixed-point number
|
||||
* This accepts and returns fixed-point 32-bit number with the last 15 bits being the bits after the decimal
|
||||
* point. (Equivalent to a mantissa in a quantized matrix with exponent -15.)
|
||||
*
|
||||
* @param in Fixed-point input
|
||||
* @param clip If value is higher than this, it will be clipped to this value
|
||||
* @return Relu output
|
||||
*/
|
||||
qtp_t dl_relu_q_op(qtp_t in, qtp_t clip);
|
||||
|
||||
/**
|
||||
* @brief Does a ReLu operation on a matrix, quantized version
|
||||
*
|
||||
* @param in Input matrix
|
||||
* @param clip If values are higher than this, they will be clipped to this value
|
||||
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
|
||||
*/
|
||||
void dl_relu_q(const dl_matrix2dq_t *in, fptp_t clip, dl_matrix2dq_t *out);
|
||||
|
||||
/**
|
||||
* @brief Does a sigmoid operation on a fixed-point number.
|
||||
* This accepts and returns a fixed-point 32-bit number with the last 15 bits being the bits after the decimal
|
||||
* point. (Equivalent to a mantissa in a quantized matrix with exponent -15.)
|
||||
*
|
||||
* @param in Fixed-point input
|
||||
* @return Sigmoid output
|
||||
*/
|
||||
int dl_sigmoid_op_q(const int in);
|
||||
int16_t dl_sigmoid_op_q8(const int16_t in);
|
||||
/**
|
||||
* @brief Does a sigmoid operation on a matrix, quantized version
|
||||
*
|
||||
* @param in Input matrix
|
||||
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
|
||||
*/
|
||||
void dl_sigmoid_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out);
|
||||
|
||||
/**
|
||||
* @brief Does a tanh operation on a matrix, quantized version
|
||||
*
|
||||
* @param in Input matrix
|
||||
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
|
||||
*/
|
||||
void dl_tanh_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out);
|
||||
|
||||
/**
|
||||
* @brief Does a tanh operation on a fixed-point number.
|
||||
* This accepts and returns a fixed-point 32-bit number with the last 15 bits being the bits after the decimal
|
||||
* point. (Equivalent to a mantissa in a quantized matrix with exponent -15.)
|
||||
*
|
||||
* @param in Fixed-point input
|
||||
* @return tanh output
|
||||
*/
|
||||
int dl_tanh_op_q(int v);
|
||||
int16_t dl_tanh_op_q8(int16_t v);
|
||||
|
||||
void load_mat_psram_mn4(void);
|
||||
void load_mat_psram_mn3(void);
|
||||
void free_mat_psram_mn4(void);
|
||||
void free_mat_psram_mn3(void);
|
||||
qtp_t dl_hard_sigmoid_op(qtp_t in, int exponent);
|
||||
qtp_t dl_hard_tanh_op(qtp_t in, int exponent);
|
||||
|
||||
int16_t dl_table_tanh_op(int16_t in, int exponent);
|
||||
int16_t dl_table_sigmoid_op(int16_t in, int exponent);
|
||||
|
||||
void dl_hard_sigmoid_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out);
|
||||
void dl_hard_tanh_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out);
|
||||
|
||||
void dl_table_sigmoid_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out);
|
||||
void dl_table_tanh_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Filter out the number greater than clip in the matrix, quantized version
|
||||
*
|
||||
* @param in Input matrix
|
||||
* @param clip If values are higher than this, they will be clipped to this value
|
||||
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
|
||||
*/
|
||||
void dl_minimum(const dl_matrix2d_t *in, fptp_t clip, dl_matrix2d_t *out);
|
||||
|
||||
/**
|
||||
* @brief Filter out the number greater than clip in the matrix, float version
|
||||
*
|
||||
* @param in Input matrix
|
||||
* @param clip If values are higher than this, they will be clipped to this value
|
||||
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
|
||||
*/
|
||||
void dl_minimum_q(const dl_matrix2dq_t *in, fptp_t clip, dl_matrix2dq_t *out);
|
||||
/**
|
||||
* @brief Do a basic CNN layer pass.
|
||||
*
|
||||
* @Warning This just supports the single channel input image, and the output is single row matrix.
|
||||
That is to say, the height of output is 1, and the weight of output is out_channels*out_image_width*out_image_height
|
||||
*
|
||||
* @param in Input single channel image
|
||||
* @param weight Weights of the neurons, weight->w = out_channels, weight->h = filter_width*filter_height
|
||||
* @param bias Bias for the CNN layer.
|
||||
* @param filter_height The height of convolution kernel
|
||||
* @param filter_width The width of convolution kernel
|
||||
* @param out_channels The number of output channels of convolution kernel
|
||||
* @param stride_x The step length of the convolution window in x(width) direction
|
||||
* @param stride_y The step length of the convolution window in y(height) direction
|
||||
* @param pad One of `"VALID"` or `"SAME"`, 0 is "VALID" and the other is "SAME"
|
||||
* @param out The result of CNN layer, out->h=1.
|
||||
* @return The result of CNN layer.
|
||||
*/
|
||||
dl_matrix2d_t *dl_basic_conv_layer(const dl_matrix2d_t *in, const dl_matrix2d_t *weight, const dl_matrix2d_t *bias, int filter_width, int filter_height,
|
||||
const int out_channels, const int stride_x, const int stride_y, padding_state pad, const dl_matrix2d_t* out);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Do a basic CNN layer pass, quantised wersion.
|
||||
*
|
||||
* @Warning This just supports the single channel input image, and the output is single row matrix.
|
||||
That is to say, the height of output is 1, and the weight of output is out_channels*out_image_width*out_image_height
|
||||
*
|
||||
* @param in Input single channel image
|
||||
* @param weight Weights of the neurons, weight->w = out_channels, weight->h = filter_width*filter_height,
|
||||
* @param bias Bias of the neurons.
|
||||
* @param filter_height The height of convolution kernel
|
||||
* @param filter_width The width of convolution kernel
|
||||
* @param out_channels The number of output channels of convolution kernel
|
||||
* @param stride_x The step length of the convolution window in x(width) direction
|
||||
* @param stride_y The step length of the convolution window in y(height) direction
|
||||
* @param pad One of `"VALID"` or `"SAME"`, 0 is "VALID" and the other is "SAME"
|
||||
* @param out The result of CNN layer, out->h=1
|
||||
* @return The result of CNN layer
|
||||
*/
|
||||
dl_matrix2d_t *dl_basic_conv_layer_quantised_weight(const dl_matrix2d_t *in, const dl_matrix2dq_t *weight, const dl_matrix2d_t *bias, int filter_width, int filter_height,
|
||||
const int out_channels, const int stride_x, const int stride_y, padding_state pad, const dl_matrix2d_t* out);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,80 @@
|
||||
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
#ifndef DL_LIB_COEFGETTER_IF_H
|
||||
#define DL_LIB_COEFGETTER_IF_H
|
||||
|
||||
#include "dl_lib_matrix.h"
|
||||
#include "dl_lib_matrixq.h"
|
||||
#include "dl_lib_matrixq8.h"
|
||||
#include "cJSON.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
//Set this if the coefficient requested is a batch-normalization popvar matrix which needs to be preprocessed by
|
||||
//dl_batch_normalize_get_sqrtvar first.
|
||||
#define COEF_GETTER_HINT_BNVAR (1<<0)
|
||||
|
||||
/*
|
||||
This struct describes the basic information of model data:
|
||||
word_num: the number of wake words or speech commands
|
||||
word_list: the name list of wake words or speech commands
|
||||
thres_list: the threshold list of wake words or speech commands
|
||||
info_str: the string used to reflect the version and information of model data
|
||||
which consist of the architecture of network, the version of model data, wake words and their threshold
|
||||
*/
|
||||
typedef struct {
|
||||
int word_num;
|
||||
char **word_list;
|
||||
int *win_list;
|
||||
float *thresh_list;
|
||||
char *info_str;
|
||||
} model_info_t;
|
||||
|
||||
/*
|
||||
Alphabet struct describes the basic grapheme or phoneme.
|
||||
item_num: the number of baisc item(grapheme or phonemr)
|
||||
items: the list of basic item
|
||||
*/
|
||||
typedef struct {
|
||||
int item_num;
|
||||
char **items;
|
||||
}alphabet_t;
|
||||
|
||||
/*
|
||||
This struct describes a generic coefficient getter: a way to get the constant coefficients needed for a neural network.
|
||||
For the two getters, the name describes the name of the coefficient matrix, usually the same as the Numpy filename the
|
||||
coefficient was originally stored in. The arg argument can be used to optionally pass an additional user-defined argument
|
||||
to the getter (e.g. the directory to look for files in the case of the Numpy file loader getter). The hint argument
|
||||
is a bitwise OR of the COEF_GETTER_HINT_* flags or 0 when none is needed. Use the free_f/free_q functions to release the
|
||||
memory for the returned matrices, when applicable.
|
||||
*/
|
||||
typedef struct {
|
||||
const dl_matrix2d_t* (*getter_f)(const char *name, void *arg, int hint);
|
||||
const dl_matrix2dq_t* (*getter_q)(const char *name, void *arg, int hint);
|
||||
const dl_matrix2dq8_t* (*getter_q8)(const char *name, void *arg, int hint);
|
||||
void (*free_f)(const dl_matrix2d_t *m);
|
||||
void (*free_q)(const dl_matrix2dq_t *m);
|
||||
void (*free_q8)(const dl_matrix2dq8_t *m);
|
||||
const model_info_t* (*getter_info)(void *arg);
|
||||
const alphabet_t* (*getter_alphabet)(void *arg);
|
||||
const cJSON* (*getter_config)(void *arg);
|
||||
} model_coeff_getter_t;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,180 @@
|
||||
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
#ifndef DL_LIB_CONV_QUEUE_H
|
||||
#define DL_LIB_CONV_QUEUE_H
|
||||
|
||||
|
||||
#include "dl_lib_matrix.h"
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef float fptp_t;
|
||||
|
||||
//Flags for matrices
|
||||
// #define DL_MF_FOREIGNDATA (0) /*< Matrix *item data actually points to another matrix and should not be freed */
|
||||
|
||||
//Float convolution FIFO queue.
|
||||
typedef struct {
|
||||
int n; /*< the length of queue */
|
||||
int c; /*< the channel number of queue element*/
|
||||
int front; /*< the front(top) position of queue */
|
||||
int flag; /*< not used*/
|
||||
fptp_t *item; /*< Pointer to item array */
|
||||
} dl_conv_queue_t;
|
||||
|
||||
/**
|
||||
* @brief Allocate a convolution queue
|
||||
*
|
||||
* @param n The length of queue
|
||||
* @param c The channel number of elements in the queue
|
||||
* @return The convolution queue, or NULL if out of memory
|
||||
*/
|
||||
dl_conv_queue_t *dl_conv_queue_alloc(int n, int c);
|
||||
|
||||
/**
|
||||
* @brief Allocate a convolution queue from psram
|
||||
*
|
||||
* @param n The length of queue
|
||||
* @param c The channel number of elements in the queue
|
||||
* @return The convolution queue, or NULL if out of memory
|
||||
*/
|
||||
dl_conv_queue_t *dl_conv_queue_alloc_from_psram(int n, int c);
|
||||
|
||||
/**
|
||||
* @brief Free a convolution queue
|
||||
*
|
||||
* @param cq The convolution queue to free
|
||||
*/
|
||||
void dl_conv_queue_free(dl_conv_queue_t *cq);
|
||||
|
||||
void dl_conv_to_matrix2d(dl_conv_queue_t *cq, dl_matrix2d_t* out);
|
||||
|
||||
/**
|
||||
* @brief Move the front pointer of queue forward,
|
||||
the First(oldest) element become the last(newest) element,
|
||||
*
|
||||
* @param cq Input convolution queue
|
||||
* @return Pointer of oldest element
|
||||
*/
|
||||
fptp_t *dl_conv_queue_pop(dl_conv_queue_t *cq);
|
||||
|
||||
/**
|
||||
* @brief Remove the oldest element, then insert the input element at the end of queue
|
||||
*
|
||||
* @param cq Input convolution queue
|
||||
* @param item The new element
|
||||
*/
|
||||
void dl_conv_queue_push(dl_conv_queue_t *cq, fptp_t* item);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Get the pointer of element in the queue by offset
|
||||
*
|
||||
* @param cq Input convolution queue
|
||||
* @param offset Offset from the front of the queue
|
||||
* @return Pointer of the element
|
||||
*/
|
||||
fptp_t *dl_get_queue_item(dl_conv_queue_t *cq, int offset);
|
||||
|
||||
/**
|
||||
* @brief Does a sigmoid operation on the one of element in the convolution queue.
|
||||
* Gets the pointer of element in the convolution queue by offset, and does a sigmoid operation
|
||||
* by this pointer, then return the pointer
|
||||
*
|
||||
* @param cq Input convolution queue
|
||||
* @param offset Offset from the front of the queue
|
||||
* @return Pointer of the element
|
||||
*/
|
||||
fptp_t *dl_sigmoid_step(dl_conv_queue_t *cq, int offset);
|
||||
|
||||
/**
|
||||
* @brief Does a tanh operation on the one of element in the convolution queue.
|
||||
* Gets the pointer of element in the convolution queue by offset, and does a tanh operation
|
||||
* by this pointer, then return the pointer
|
||||
*
|
||||
* @param cq Input convolution queue
|
||||
* @param offset Offset from the front of the queue
|
||||
* @return Pointer of the element
|
||||
*/
|
||||
fptp_t *dl_tanh_step(dl_conv_queue_t *cq, int offset);
|
||||
|
||||
/**
|
||||
* @brief Does a softmax operation on the one of element in the convolution queue.
|
||||
* Gets the pointer of element in the convolution queue by offset, and does a softmax operation
|
||||
* by this pointer, then return the pointer
|
||||
*
|
||||
* @param cq Input convolution queue
|
||||
* @param offset Offset from the front of the queue
|
||||
* @return Pointer of the element
|
||||
*/
|
||||
fptp_t *dl_softmax_step(dl_conv_queue_t *cq, int offset);
|
||||
|
||||
fptp_t *dl_relu_step(dl_conv_queue_t *cq, int offset);
|
||||
fptp_t *dl_relu_look(dl_matrix2d_t *cq, int offset);
|
||||
dl_matrix2d_t *dl_matrix_concat1(const dl_conv_queue_t *a, const dl_matrix2d_t *b);
|
||||
dl_matrix2d_t *dl_basic_lstm_layer1(const dl_conv_queue_t *in, dl_matrix2d_t *state_c, dl_matrix2d_t *state_h,
|
||||
const dl_matrix2d_t *weight, const dl_matrix2d_t *bias);
|
||||
/**
|
||||
* @brief Fast implement for 1D atrous convolution (a.k.a. convolution with holes or dilated convolution)
|
||||
* based on convolution queue.
|
||||
*
|
||||
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
|
||||
* is first element of output queue and should not be freed separately.
|
||||
*
|
||||
* @param in Input convolution queue
|
||||
* @param out Output convolution queue
|
||||
* @param rate A positive int, the stride with which we sample input value
|
||||
* @param size A positive int, the size of 1D-filter
|
||||
* @param kernel The kernel matrix of filter
|
||||
* @param bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
|
||||
* @return The result of atrous convolution
|
||||
*/
|
||||
fptp_t *dl_atrous_conv1d_step(dl_conv_queue_t *in, dl_conv_queue_t *out, int rate, int size,
|
||||
dl_matrix2d_t* kernel, dl_matrix2d_t* bias);
|
||||
fptp_t *dl_look_conv_step(dl_conv_queue_t *in, dl_matrix2d_t *out, int rate, int size,
|
||||
dl_matrix2d_t* kernel, dl_matrix2d_t* bias);
|
||||
|
||||
/**
|
||||
* @brief Fast implement of dilation layer as follows
|
||||
*
|
||||
* |-> [gate(sigmoid)] -|
|
||||
* input - | |-> (*) - output
|
||||
* |-> [filter(tanh)] -|
|
||||
*
|
||||
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
|
||||
* is first element of output queue and should not be freed separately.
|
||||
*
|
||||
* @param in Input convolution queue
|
||||
* @param out Output convolution queue
|
||||
* @param rate A positive int, the stride with which we sample input value
|
||||
* @param size A positive int, the size of 1D-filter
|
||||
* @param filter_kernel The kernel matrix of filter
|
||||
* @param filter_bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
|
||||
* @param gate_kernel The kernel matrix of gate
|
||||
* @param gate_bias The bias matrix of gate. Can be NULL if a bias of 0 is required.
|
||||
* @return The result of dilation layer
|
||||
*/
|
||||
fptp_t *dl_dilation_layer(dl_conv_queue_t *in, dl_conv_queue_t *out, int rate, int size,
|
||||
dl_matrix2d_t* filter_kernel, dl_matrix2d_t* filter_bias,
|
||||
dl_matrix2d_t* gate_kernel, dl_matrix2d_t* gate_bias);
|
||||
|
||||
|
||||
void test_atrous_conv(int size, int rate, int in_channel, int out_channel);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,303 @@
|
||||
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
#ifndef DL_LIB_CONVQ8_QUEUE_H
|
||||
#define DL_LIB_CONVQ8_QUEUE_H
|
||||
|
||||
|
||||
#include "dl_lib_matrixq.h"
|
||||
#include "dl_lib_matrixq8.h"
|
||||
#include "dl_lib_conv_queue.h"
|
||||
#include "dl_lib_convq_queue.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
//[nch, n, c]
|
||||
typedef struct {
|
||||
int n; /*< the length of queue */
|
||||
int c; /*< the number of queue element*/
|
||||
int front; /*< the front(top) position of queue */
|
||||
int nch; /*< the channel of queue */
|
||||
int exponent; /*< The values in items should be multiplied by pow(2,exponent)
|
||||
to get the real values */
|
||||
q8tp_t *itemq; /*< Pointer to item array */
|
||||
} dl_convq8_queue_t;
|
||||
|
||||
/**
|
||||
* @brief Allocate a fixed-point convolution queue
|
||||
*
|
||||
* @param n The length of queue
|
||||
* @param c The number of elements in the queue
|
||||
* @return The convolution queue, or NULL if out of memory
|
||||
*/
|
||||
dl_convq8_queue_t *dl_convq8_queue_alloc(int n, int c);
|
||||
|
||||
/**
|
||||
* @brief Allocate a fixed-point convolution queue
|
||||
*
|
||||
* @param n The length of queue
|
||||
* @param c The number of elements in the queue
|
||||
* @param c The channel of queue
|
||||
* @return The convolution queue, or NULL if out of memory
|
||||
*/
|
||||
dl_convq8_queue_t *dl_convq8_queue_alloc_mc(int n, int c, int nch);
|
||||
|
||||
/**
|
||||
* @brief Allocate a bit fixed-point convolution queue from PSRAM
|
||||
*
|
||||
* @param n The length of queue
|
||||
* @param c The number of elements in the queue
|
||||
* @param nch The channel of queue
|
||||
* @return The convolution queue, or NULL if out of memory
|
||||
*/
|
||||
dl_convq8_queue_t *dl_convq8_queue_alloc_mc_from_psram(int n, int c, int nch);
|
||||
|
||||
/**
|
||||
* @brief Free a fixed-point convolution queue
|
||||
*
|
||||
* @param cq The fixed-point convolution queue to free
|
||||
*/
|
||||
void dl_convq8_queue_free(dl_convq8_queue_t *cq);
|
||||
|
||||
/**
|
||||
* @brief Set itemq of convolution queue to 0
|
||||
*
|
||||
* @param cq The fixed-point convolution queue to free
|
||||
*/
|
||||
void dl_convq8_queue_bzero(dl_convq8_queue_t *cqm);
|
||||
|
||||
/**
|
||||
* @brief Move the front pointer of queue forward,
|
||||
the First(oldest) element become the last(newest) element,
|
||||
*
|
||||
* @param cq Input fixed-point convolution queue
|
||||
* @return Pointer of oldest element
|
||||
*/
|
||||
q8tp_t *dl_convq8_queue_pop(dl_convq8_queue_t *cq);
|
||||
q8tp_t *dl_convq8_queue_popn(dl_convq8_queue_t *cq, int n);
|
||||
|
||||
/**
|
||||
* @brief Insert the float-point element at the end of queue.
|
||||
* The precision of fixed-point numbers is described by the Qm.f notation,
|
||||
*
|
||||
* @param cq Input fixed-point convolution queue
|
||||
* @param item The float-point element
|
||||
* @param m_bit The number of integer bits including the sign bits
|
||||
* @param f_bit The number of fractional bits
|
||||
*/
|
||||
void dl_convq8_queue_push_by_qmf(dl_convq8_queue_t *cq, fptp_t* item, int m_bit, int f_bit);
|
||||
|
||||
/**
|
||||
* @brief Get the pointer of element in the queue by offset
|
||||
*
|
||||
* @param cq Input fixed-point convolution queue
|
||||
* @param offset Offset from the front of the queue
|
||||
* @return Pointer of the element
|
||||
*/
|
||||
q8tp_t *dl_get_queue_itemq8(dl_convq8_queue_t *cq, int offset);
|
||||
|
||||
/**
|
||||
* @brief Get the pointer of element in the queue by offset
|
||||
*
|
||||
* @param cq Input fixed-point convolution queue
|
||||
* @param offset Offset from the front of the queue
|
||||
* @param ch Channel index of queue
|
||||
* @return Pointer of the element
|
||||
*/
|
||||
q8tp_t *dl_get_queue_itemq8_mc(dl_convq8_queue_t *cq, int offset, int ch);
|
||||
|
||||
/**
|
||||
* @brief Fast and quantised implement for 1D atrous convolution (a.k.a. convolution with holes or dilated convolution)
|
||||
* based on convolution queue.
|
||||
*
|
||||
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
|
||||
* is last element of output queue and should not be freed separately.
|
||||
*
|
||||
* @param in Input fixed-point convolution queue
|
||||
* @param out Output fixed-point convolution queue
|
||||
* @param rate A positive int, the stride with which we sample input value
|
||||
* @param size A positive int, the size of 1D-filter
|
||||
* @param kernel Kernel matrix of filter
|
||||
* @param bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
|
||||
* @param out_exponent Shift ratio used in dot operation between two 16-bit fixed point vector
|
||||
* @param offset Offset used to calculate the beginning of input conv queue
|
||||
* @param prenum The num to control the parameter size of preload operation
|
||||
* @return The result of atrous convolution
|
||||
*/
|
||||
void dl_atrous_conv1dq8_steps(dl_convq8_queue_t *in, dl_convq8_queue_t *out, int rate, int size,
|
||||
dl_matrix2dq8_t* kernel, dl_matrix2dq8_t* bias,
|
||||
int out_exponent, int offset, int prenum);
|
||||
|
||||
/**
|
||||
* @brief Fast implement of dilation layer as follows
|
||||
*
|
||||
* |-> [gate(sigmoid)] -|
|
||||
* input - | |-> (*) - output
|
||||
* |-> [filter(tanh)] -|
|
||||
*
|
||||
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
|
||||
* is last element of output queue and should not be freed separately.
|
||||
*
|
||||
* @param in Input fixed-point convolution queue
|
||||
* @param out Output fixed-point convolution queue
|
||||
* @param rate A positive int, the stride with which we sample input value
|
||||
* @param size A positive int, the size of 1D-filter
|
||||
* @param filter_kernel The kernel matrix of filter
|
||||
* @param filter_bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
|
||||
* @param gate_kernel The kernel matrix of gate
|
||||
* @param gate_bias The bias matrix of gate. Can be NULL if a bias of 0 is required.
|
||||
* @param offset Offset used to calculate the beginning of input conv queue
|
||||
* @param prenum The num to control the parameter size of preload operation
|
||||
* @return The result of dilation layer
|
||||
*/
|
||||
void dl_dilation_layerq8_steps(dl_convq8_queue_t *in, dl_convq8_queue_t *out, int rate, int size,
|
||||
dl_matrix2dq8_t* filter_kernel, dl_matrix2dq8_t* filter_bias,
|
||||
dl_matrix2dq8_t* gate_kernel, dl_matrix2dq8_t* gate_bias,
|
||||
int offset, int prenum);
|
||||
|
||||
|
||||
|
||||
|
||||
dl_conv_queue_t *dl_convq8_queue_add(dl_convq8_queue_t *cq1, dl_convq8_queue_t *cq2);
|
||||
|
||||
int8_t dl_sigmoid_lutq8(int in);
|
||||
/**
|
||||
* @brief Allocate a 8-bit fixed-point Multi-Channel convolution queue
|
||||
*
|
||||
* @param n The length of queue
|
||||
* @param c The number of elements in the queue
|
||||
* @param nch The channel number
|
||||
* @return The convolution queue, or NULL if out of memory
|
||||
*/
|
||||
dl_convq8_queue_t **dl_convq8_queue_mc_alloc(int n, int c, int nch);
|
||||
|
||||
/**
|
||||
* @brief Free a 8-bit fixed-point Multi-Channel convolution queue
|
||||
*
|
||||
* @param cqm The fixed-point convolution queue to free
|
||||
* @param nch The channel number
|
||||
*/
|
||||
void dl_convq8_queue_mc_free(dl_convq8_queue_t **cqm, int nch);
|
||||
|
||||
/**
|
||||
* @brief Tanh activation function for 8-bit fixed-point Multi-Channel convolution queue input
|
||||
*
|
||||
* @param cqm Input 8-bit fixed-point Multi-Channel convolution queue
|
||||
* @param offset Offset used to calculate the beginning of input conv queue
|
||||
* @param nch The channel number
|
||||
*/
|
||||
void dl_tanh_convq8_mc(dl_convq8_queue_t **cqm, int offset, int nch);
|
||||
|
||||
/**
|
||||
* @brief Fast and quantised 16-bit implement for Multi-channel 1D atrous convolution (a.k.a. convolution with holes or dilated convolution)
|
||||
* Usually, this layer is used as first layer for 8-bit network.
|
||||
*
|
||||
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
|
||||
* Input is a 16-bit queue point, Output is an 8-bit queue point.
|
||||
*
|
||||
* @param in Input 16bit fixed-point convolution queue array
|
||||
* @param out Output 8bit fixed-point convolution queue array
|
||||
* @param rate A positive int, the stride with which we sample input value
|
||||
* @param size A positive int, the size of 1D-filter
|
||||
* @param kernel The kernel matrix of filter
|
||||
* @param bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
|
||||
* @param out_exponent Exponent of output
|
||||
* @param offset Offset used to calculate the beginning of input conv queue
|
||||
* @param prenum The num to control the parameter size of preload operation
|
||||
*/
|
||||
void dl_atrous_conv1dq8_16in_mc_steps(dl_convq_queue_t **in, dl_convq8_queue_t **out, int nch, int rate, int size,
|
||||
dl_matrix2dq_t* kernel, dl_matrix2dq_t* bias, int out_exponent, int offset, int prenum);
|
||||
|
||||
/**
|
||||
* @brief Fast and quantised 8-bit implement for Multi-channel 1D atrous convolution (a.k.a. convolution with holes or dilated convolution)
|
||||
* based on convolution queue.
|
||||
*
|
||||
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
|
||||
* is last element of output queue and should not be freed separately.
|
||||
*
|
||||
* @param in Input 8bit fixed-point convolution queue array
|
||||
* @param out Output 8bit fixed-point convolution queue array
|
||||
* @param rate A positive int, the stride with which we sample input value
|
||||
* @param size A positive int, the size of 1D-filter
|
||||
* @param kernel The kernel matrix of filter
|
||||
* @param bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
|
||||
* @param out_exponent Exponent of output
|
||||
* @param offset Offset used to calculate the beginning of input conv queue
|
||||
* @param prenum The num to control the parameter size of preload operation
|
||||
*/
|
||||
void dl_atrous_conv1dq8_mc_steps(dl_convq8_queue_t **in, dl_convq8_queue_t **out,
|
||||
int nch, int rate, int size,
|
||||
dl_matrix2dq8_t* kernel, dl_matrix2dq8_t* bias,
|
||||
int out_exponent, int offset, int prenum);
|
||||
|
||||
/**
|
||||
* @brief Fast implement of 8-bit dilation layer as follows
|
||||
*
|
||||
* |-> [gate(sigmoid)] -|
|
||||
* input - | |-> (*) - output
|
||||
* |-> [filter(tanh)] -|
|
||||
*
|
||||
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
|
||||
* is last element of output queue and should not be freed separately.
|
||||
*
|
||||
* @param in Input 8-bit fixed-point convolution queue
|
||||
* @param out Output 8-bit fixed-point convolution queue
|
||||
* @param rate A positive int, the stride with which we sample input value
|
||||
* @param size A positive int, the size of 1D-filter
|
||||
* @param filter_kernel The kernel matrix of filter
|
||||
* @param filter_bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
|
||||
* @param gate_kernel The kernel matrix of gate
|
||||
* @param gate_bias The bias matrix of gate. Can be NULL if a bias of 0 is required.
|
||||
* @param offset Offset used to calculate the beginning of input conv queue
|
||||
* @param prenum The num to control the parameter size of preload operation
|
||||
*/
|
||||
void dl_dilation_layerq8_mc_steps(dl_convq8_queue_t **in, dl_convq8_queue_t **out, int nch, int rate, int size,
|
||||
dl_matrix2dq8_t* filter_kernel, dl_matrix2dq8_t* filter_bias,
|
||||
dl_matrix2dq8_t* gate_kernel, dl_matrix2dq8_t* gate_bias,
|
||||
int offset, int prenum);
|
||||
|
||||
void dl_convq8_queue_mc_bzero(dl_convq8_queue_t **cqm, int nch);
|
||||
|
||||
|
||||
|
||||
dl_convq8_queue_t *dl_convq8_queue_alloc_from_psram(int n, int c);
|
||||
|
||||
qtp_t *dl_dilation_layerq16_8(dl_convq_queue_t *in, dl_convq8_queue_t *out, int rate, int size,
|
||||
dl_matrix2dq_t* filter_kernel, dl_matrix2dq_t* filter_bias,
|
||||
dl_matrix2dq_t* gate_kernel, dl_matrix2dq_t* gate_bias, int prenum);
|
||||
|
||||
|
||||
qtp_t *dl_dilation_layerq8(dl_convq8_queue_t *in, dl_convq8_queue_t *out, int rate, int size,
|
||||
dl_matrix2dq8_t* filter_kernel, dl_matrix2dq_t* filter_bias,
|
||||
dl_matrix2dq8_t* gate_kernel, dl_matrix2dq_t* gate_bias, int prenum);
|
||||
|
||||
dl_matrix2dq8_t *dl_convq8_lstm_layer(const dl_convq8_queue_t *in, dl_convq8_queue_t *out, dl_matrix2dq8_t *state_c,
|
||||
dl_matrix2dq8_t *state_h, const dl_matrix2dq8_t *in_weight, const dl_matrix2dq8_t *h_weight,
|
||||
const dl_matrix2dq_t *bias, int prenum);
|
||||
|
||||
qtp_t *dl_atrous_conv1dq8_16_s3(dl_convq8_queue_t *in, dl_convq_queue_t *out, int rate, int size,
|
||||
dl_matrix2dq8_t* kernel, dl_matrix2dq_t* bias, int prenum);
|
||||
|
||||
void print_convq8(dl_convq8_queue_t *cq, int offset);
|
||||
void print_convq(dl_convq_queue_t *cq, int offset);
|
||||
void dl_relu_convq8(dl_convq8_queue_t *cq);
|
||||
|
||||
void lstmq8_free(void);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,382 @@
|
||||
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
#ifndef DL_LIB_CONVQ_QUEUE_H
|
||||
#define DL_LIB_CONVQ_QUEUE_H
|
||||
|
||||
#include "dl_lib_matrixq.h"
|
||||
#include "dl_lib_conv_queue.h"
|
||||
#include "dl_lib.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
//fixed-point convolution FIFO queue.
|
||||
//[nch, n, c]
|
||||
typedef struct {
|
||||
int n; /*< the length of queue */
|
||||
int c; /*< the number of queue element*/
|
||||
int front; /*< the front(top) position of queue */
|
||||
int nch; /*< the multiple of queue*/
|
||||
int exponent; /*< The values in items should be multiplied by pow(2,exponent)
|
||||
to get the real values */
|
||||
qtp_t *itemq; /*< Pointer to item array */
|
||||
} dl_convq_queue_t;
|
||||
|
||||
/**
|
||||
* @brief Allocate a fixed-point convolution queue
|
||||
*
|
||||
* @param n The length of queue
|
||||
* @param c The number of elements in the queue
|
||||
* @return The convolution queue, or NULL if out of memory
|
||||
*/
|
||||
dl_convq_queue_t *dl_convq_queue_alloc(int n, int c);
|
||||
|
||||
/**
|
||||
* @brief Allocate a fixed-point convolution queue from PSRAM
|
||||
*
|
||||
* @param n The length of queue
|
||||
* @param c The number of elements in the queue
|
||||
* @return The convolution queue, or NULL if out of memory
|
||||
*/
|
||||
dl_convq_queue_t *dl_convq_queue_alloc_from_psram(int n, int c);
|
||||
|
||||
/**
|
||||
* @brief Allocate a fixed-point multi-channel convolution queue
|
||||
*
|
||||
* @param n The length of queue
|
||||
* @param c The number of elements in the queue
|
||||
* @param nch The channel of conv queue
|
||||
* @return The convolution queue, or NULL if out of memory
|
||||
*/
|
||||
dl_convq_queue_t *dl_convq_queue_alloc_mc(int n, int c, int nch);
|
||||
|
||||
/**
|
||||
* @brief Allocate a fixed-point multi-channel convolution queue from PSRAM
|
||||
*
|
||||
* @param n The length of queue
|
||||
* @param c The number of elements in the queue
|
||||
* @param nch The channel of conv queue
|
||||
* @return The convolution queue, or NULL if out of memory
|
||||
*/
|
||||
dl_convq_queue_t *dl_convq_queue_alloc_mc_from_psram(int n, int c, int nch);
|
||||
|
||||
|
||||
void dl_convq_to_matrix2dq(dl_convq_queue_t *cq, dl_matrix2dq_t* out, int row);
|
||||
|
||||
/**
|
||||
* @brief Free a fixed-point convolution queue
|
||||
*
|
||||
* @param cq The fixed-point convolution queue to free
|
||||
*/
|
||||
void dl_convq_queue_free(dl_convq_queue_t *cq);
|
||||
|
||||
/**
|
||||
* @brief Set itemq of convolution queue to 0
|
||||
*
|
||||
* @param cq The fixed-point convolution queue point
|
||||
*/
|
||||
void dl_convq_queue_bzero(dl_convq_queue_t *cq);
|
||||
|
||||
/**
|
||||
* @brief Move the front pointer of queue forward,
|
||||
the First(oldest) element become the last(newest) element,
|
||||
*
|
||||
* @param cq Input fixed-point convolution queue
|
||||
* @return Pointer of oldest element
|
||||
*/
|
||||
qtp_t *dl_convq_queue_pop(dl_convq_queue_t *cq);
|
||||
qtp_t *dl_convq_queue_popn(dl_convq_queue_t *cq, int n);
|
||||
/**
|
||||
* @brief Remove the oldest element, then insert the input element at the end of queue
|
||||
*
|
||||
* @param cq Input fixed-point convolution queue
|
||||
* @param item The new element
|
||||
*/
|
||||
void dl_convq_queue_push(dl_convq_queue_t *cq, dl_matrix2dq_t *a, int shift);
|
||||
|
||||
/**
|
||||
* @brief Insert the float-point element at the end of queue.
|
||||
* The precision of fixed-point numbers is described by the Qm.f notation,
|
||||
*
|
||||
* @param cq Input fixed-point convolution queue
|
||||
* @param item The float-point element
|
||||
* @param m_bit The number of integer bits including the sign bits
|
||||
* @param f_bit The number of fractional bits
|
||||
*/
|
||||
void dl_convq_queue_push_by_qmf(dl_convq_queue_t *cq, fptp_t* item, int m_bit, int f_bit);
|
||||
|
||||
void dl_convq16_queue_push_by_qmf(dl_convq_queue_t *cq, fptp_t* item, int m_bit, int f_bit);
|
||||
|
||||
dl_conv_queue_t *dl_queue_from_convq(dl_convq_queue_t *cq1);
|
||||
|
||||
/**
|
||||
* @brief Get the pointer of element in the queue by offset
|
||||
*
|
||||
* @param cq Input fixed-point convolution queue
|
||||
* @param last_num Offset from the front of the queue
|
||||
* @return Pointer of the element
|
||||
*/
|
||||
qtp_t *dl_get_queue_itemq(dl_convq_queue_t *cq, int last_num);
|
||||
|
||||
/**
|
||||
* @brief Get the pointer of element in the queue by offset
|
||||
*
|
||||
* @param cq Input fixed-point convolution queue
|
||||
* @param offset Offset from the front of the queue
|
||||
* @param ch Channel index of convolution queue
|
||||
* @return Pointer of the element
|
||||
*/
|
||||
qtp_t *dl_get_queue_itemq_mc(dl_convq_queue_t *cq, int offset, int ch);
|
||||
|
||||
/**
|
||||
* @brief Does a tanh operation on the one of element in the convolution queue.
|
||||
* Gets the pointer of element in the convolution queue by offset, and does a
|
||||
* tanh operation by this pointer, then return the pointer
|
||||
*
|
||||
* @param cq Input fixed-point convolution queue
|
||||
* @param offset Offset from the front of the queue
|
||||
* @return Pointer of the element
|
||||
*/
|
||||
void dl_tanh_convq(dl_convq_queue_t *cq, int offset);
|
||||
|
||||
/**
|
||||
* @brief Does a tanh operation on the one of element in multi channel convolution queue.
|
||||
* Gets the pointer of element in the convolution queue by offset, and does a
|
||||
* tanh operation by this pointer, then return the pointer
|
||||
*
|
||||
* @param cq Input fixed-point multi channnel convolution queue
|
||||
* @param offset Offset from the front of the queue
|
||||
* @param nch The channel number of cqm
|
||||
* @return Pointer of the element
|
||||
*/
|
||||
void dl_tanh_convq_mc(dl_convq_queue_t **cqm, int offset, int nch);
|
||||
|
||||
/**
|
||||
* @brief Does a relu operation on the one of element in the convolution queue.
|
||||
* Gets the pointer of element in the convolution queue by offset, and does a
|
||||
* relu operation by this pointer, then return the pointer
|
||||
*
|
||||
* @param cq Input fixed-point convolution queue
|
||||
* @param offset Offset from the front of the queue
|
||||
* @return Pointer of the element
|
||||
*/
|
||||
void dl_relu_convq(dl_convq_queue_t *cq, fptp_t clip, int last_num);
|
||||
|
||||
/**
|
||||
* @brief Does a softmax operation on the one of element in the convolution queue.
|
||||
* Gets the pointer of element in the convolution queue by offset, input data
|
||||
stay as it is. Results are saved into the *out* array.
|
||||
*
|
||||
* @param cq Input fixed-point convolution queue
|
||||
* @param offset Offset from the front of the queue
|
||||
* @param out Old array to re-use. Passing NULL will allocate a new matrix.
|
||||
* @return softmax results
|
||||
*/
|
||||
fptp_t * dl_softmax_step_q(dl_convq_queue_t *cq, int offset, fptp_t *out);
|
||||
|
||||
/**
|
||||
* @brief Fast and quantised implement for 1D atrous convolution (a.k.a. convolution with holes or dilated convolution)
|
||||
* based on convolution queue.
|
||||
*
|
||||
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
|
||||
* is last element of output queue and should not be freed separately.
|
||||
*
|
||||
* @param in Input fixed-point convolution queue
|
||||
* @param out Output fixed-point convolution queue
|
||||
* @param rate A positive int, the stride with which we sample input value
|
||||
* @param size A positive int, the size of 1D-filter
|
||||
* @param kernel The kernel matrix of filter
|
||||
* @param bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
|
||||
* @param shift Shift ratio used in dot operation between two 16-bit fixed point vector
|
||||
* @return The result of atrous convolution
|
||||
*/
|
||||
qtp_t * dl_atrous_conv1dq(dl_convq_queue_t *in, dl_convq_queue_t *out, int rate, int size,
|
||||
dl_matrix2dq_t* kernel, dl_matrix2dq_t* bias, int shift, int prenum);
|
||||
|
||||
/**
|
||||
* @brief Fast implement of dilation layer as follows
|
||||
*
|
||||
* |-> [gate(sigmoid)] -|
|
||||
* input - | |-> (*) - output
|
||||
* |-> [filter(tanh)] -|
|
||||
*
|
||||
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
|
||||
* is last element of output queue and should not be freed separately.
|
||||
*
|
||||
* @param in Input fixed-point convolution queue
|
||||
* @param out Output fixed-point convolution queue
|
||||
* @param rate A positive int, the stride with which we sample input value
|
||||
* @param size A positive int, the size of 1D-filter
|
||||
* @param filter_kernel The kernel matrix of filter
|
||||
* @param filter_bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
|
||||
* @param gate_kernel The kernel matrix of gate
|
||||
* @param gate_bias The bias matrix of gate. Can be NULL if a bias of 0 is required.
|
||||
* @param filter_shift Shift ratio used in filter operation between two 16-bit fixed point vector
|
||||
* @param gate_shift Shift ratio used in gate operation between two 16-bit fixed point vector
|
||||
* @return The result of dilation layer
|
||||
*/
|
||||
qtp_t *dl_dilation_layerq_steps(dl_convq_queue_t *in, dl_convq_queue_t *out, int rate, int size,
|
||||
dl_matrix2dq_t* filter_kernel, dl_matrix2dq_t* filter_bias,
|
||||
dl_matrix2dq_t* gate_kernel, dl_matrix2dq_t* gate_bias,
|
||||
int filter_shift, int gate_shift, int offset, int prenum);
|
||||
|
||||
|
||||
qtp_t *dl_dilation_layerq(dl_convq_queue_t *in, dl_convq_queue_t *out, int rate, int size,
|
||||
dl_matrix2dq_t* filter_kernel, dl_matrix2dq_t* filter_bias,
|
||||
dl_matrix2dq_t* gate_kernel, dl_matrix2dq_t* gate_bias,
|
||||
int filter_shift, int gate_shift, int prenum);
|
||||
|
||||
qtp_t *dl_dilation_layerq16(dl_convq_queue_t *in, dl_convq_queue_t *out, int rate, int size,
|
||||
dl_matrix2dq_t* filter_kernel, dl_matrix2dq_t* filter_bias,
|
||||
dl_matrix2dq_t* gate_kernel, dl_matrix2dq_t* gate_bias, int prenum);
|
||||
|
||||
|
||||
qtp_t *dl_atrous_conv1dq_steps(dl_convq_queue_t *in, dl_convq_queue_t *out, int rate, int size,
|
||||
dl_matrix2dq_t* kernel, dl_matrix2dq_t* bias, int shift, int offset, int prenum);
|
||||
|
||||
/**
|
||||
* @brief Add a pair of fixed-point convolution queue item-by-item, and return float-point convolution queue
|
||||
*
|
||||
* @param cq1 First fixed-point convolution queue
|
||||
* @param cq2 Seconf fixed-point convolution queue
|
||||
* @return The result of float-point convolution queue
|
||||
*/
|
||||
dl_conv_queue_t *dl_convq_queue_add(dl_convq_queue_t *cq1, dl_convq_queue_t *cq2);
|
||||
|
||||
/**
|
||||
* @brief Fast implement of LSTM layer by dl_atrous_conv1dq function
|
||||
*
|
||||
* @Warning LSTM kernel is split into two part, the first part input is the last layer output,
|
||||
* and kernel is parameter *in_weight*. The second part input is the last frame LSTM output,
|
||||
* the kernel is parameters *h_weight*.
|
||||
*
|
||||
* @param in Input fixed-point convolution queue
|
||||
* @param out Output fixed-point convolution queue
|
||||
* @param state_c Internal state of the LSTM network
|
||||
* @param state_h Internal state (previous output values) of the LSTM network
|
||||
* @param in_weight the LSTM kernel needed by first part
|
||||
* @param h_weight the LSTM kernel needed by second part
|
||||
* @param bias The bias matrix of LSTM. Can be NULL if a bias of 0 is required.
|
||||
* @in_shift Shift ratio used in first part
|
||||
* @h_shift Shift ratio used in second part
|
||||
* @return The result of LSTM layer
|
||||
*/
|
||||
dl_matrix2dq_t *dl_convq_lstm_layer(const dl_convq_queue_t *in, dl_convq_queue_t *out, dl_matrix2dq_t *state_c,
|
||||
dl_matrix2dq_t *state_h, const dl_matrix2dq_t *in_weight, const dl_matrix2dq_t *h_weight,
|
||||
const dl_matrix2dq_t *bias, int in_shift, int h_shift, int prenum);
|
||||
dl_matrix2dq_t *dl_basic_lstm_layer1_q(const dl_convq_queue_t *in, dl_matrix2dq_t *state_c, dl_matrix2dq_t *state_h,
|
||||
const dl_matrix2dq_t *weight, const dl_matrix2dq_t *bias, int step, int shift);
|
||||
|
||||
dl_matrix2dq_t *dl_convq16_lstm_layer(dl_convq_queue_t *in, dl_convq_queue_t *out, dl_matrix2dq_t *state_c,
|
||||
dl_matrix2dq_t *state_h, dl_matrix2dq_t *in_weight, dl_matrix2dq_t *h_weight,
|
||||
dl_matrix2dq_t *bias, int prenum);
|
||||
|
||||
/**
|
||||
* @brief Allocate a fixed-point multi channel convolution queue
|
||||
*
|
||||
* @param n The length of queue
|
||||
* @param c The channel number of elements in the queue
|
||||
* @param nch the channel numbet of convolution queue
|
||||
* @return The convolution queue, or NULL if out of memory
|
||||
*/
|
||||
dl_convq_queue_t **dl_convq_queue_mc_alloc(int n, int c, int nch);
|
||||
|
||||
/**
|
||||
* @brief Free a fixed-point multi channel convolution queue
|
||||
*
|
||||
* @param cqm The fixed-point convolution queue to free
|
||||
* @param nch The channel number of cqm
|
||||
*/
|
||||
void dl_convq_queue_mc_free(dl_convq_queue_t **cqm, int nch);
|
||||
|
||||
/**
|
||||
* @brief Fast and quantised implement for 1D atrous convolution (a.k.a. convolution with holes or dilated convolution)
|
||||
* based on convolution queue.
|
||||
*
|
||||
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
|
||||
* is last element of output queue and should not be freed separately.
|
||||
*
|
||||
* @param in Input fixed-point convolution queue
|
||||
* @param out Output fixed-point convolution queue
|
||||
* @param nch The channel number of input
|
||||
* @param rate A positive int, the stride with which we sample input value
|
||||
* @param size A positive int, the size of 1D-filter
|
||||
* @param kernel The kernel matrix of filter
|
||||
* @param bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
|
||||
* @param shift Shift ratio used in dot operation between two 16-bit fixed point vector
|
||||
* @param offset the offset to calculate input convq
|
||||
* @param prenum the preload size, 0: do not use preload function
|
||||
* @return The result of atrous convolution
|
||||
*/
|
||||
qtp_t *dl_atrous_conv1dq_mc_steps( dl_convq_queue_t **in,
|
||||
dl_convq_queue_t **out,
|
||||
int nch,
|
||||
int rate,
|
||||
int size,
|
||||
dl_matrix2dq_t* kernel,
|
||||
dl_matrix2dq_t* bias,
|
||||
int shift,
|
||||
int offset,
|
||||
int prenum);
|
||||
|
||||
/**
|
||||
* @brief Fast implement of dilation layer as follows for multi channel input
|
||||
*
|
||||
* |-> [gate(sigmoid)] -|
|
||||
* input - | |-> (*) - output
|
||||
* |-> [filter(tanh)] -|
|
||||
*
|
||||
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
|
||||
* is last element of output queue and should not be freed separately.
|
||||
*
|
||||
* @param in Input fixed-point convolution queue
|
||||
* @param out Output fixed-point convolution queue
|
||||
* @param nch The channel number of input
|
||||
* @param rate A positive int, the stride with which we sample input value
|
||||
* @param size A positive int, the size of 1D-filter
|
||||
* @param filter_kernel The kernel matrix of filter
|
||||
* @param filter_bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
|
||||
* @param gate_kernel The kernel matrix of gate
|
||||
* @param gate_bias The bias matrix of gate. Can be NULL if a bias of 0 is required.
|
||||
* @param filter_shift Shift ratio used in filter operation between two 16-bit fixed point vector
|
||||
* @param gate_shift Shift ratio used in gate operation between two 16-bit fixed point vector
|
||||
* @param offset The offset to calculate input convq
|
||||
* @param prenum The preload size, 0: do not use preload function
|
||||
* @return The result of dilation layer
|
||||
*/
|
||||
qtp_t *dl_dilation_layerq_mc_steps( dl_convq_queue_t **in,
|
||||
dl_convq_queue_t **out,
|
||||
int nch,
|
||||
int rate,
|
||||
int size,
|
||||
dl_matrix2dq_t* filter_kernel,
|
||||
dl_matrix2dq_t* filter_bias,
|
||||
dl_matrix2dq_t* gate_kernel,
|
||||
dl_matrix2dq_t* gate_bias,
|
||||
int filter_shift,
|
||||
int gate_shift,
|
||||
int offset,
|
||||
int prenum);
|
||||
|
||||
void test_atrous_convq(int size, int rate, int in_channel, int out_channel);
|
||||
void test_lstm_convq(int size, int in_dim, int lstm_cell);
|
||||
void dl_nn_tanh_i162(dl_convq_queue_t **cqm, int offset, int nch);
|
||||
void dl_copy_queue_item_by_qmf(dl_convq_queue_t *cq, fptp_t* item, int m_bit, int f_bit, int offset, int ch);
|
||||
void dl_convq_queue_mc_bzero(dl_convq_queue_t **cqm, int nch);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,257 @@
|
||||
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
#ifndef DL_LIB_MATRIX_H
|
||||
#define DL_LIB_MATRIX_H
|
||||
|
||||
#ifdef ESP_PLATFORM
|
||||
#include "freertos/FreeRTOS.h"
|
||||
#include "freertos/task.h"
|
||||
#include "freertos/queue.h"
|
||||
#include "esp_system.h"
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef float fptp_t;
|
||||
|
||||
#if CONFIG_BT_SHARE_MEM_REUSE
|
||||
extern multi_heap_handle_t gst_heap;
|
||||
#endif
|
||||
|
||||
//Flags for matrices
|
||||
#define DL_MF_FOREIGNDATA 1 /*< Matrix pointer and item data actually points to another matrix and should not be freed */
|
||||
#define DL_MF_FOREIGNITEM 2 /*< Only item data actually points to another matrix and should not be freed */
|
||||
|
||||
//'Normal' float matrix
|
||||
typedef struct {
|
||||
int w; /*< Width */
|
||||
int h; /*< Height */
|
||||
int stride; /*< Row stride, essentially how many items to skip to get to the same position in the next row */
|
||||
int flags; /*< Flags. OR of DL_MF_* values */
|
||||
fptp_t *item; /*< Pointer to item array */
|
||||
} dl_matrix2d_t;
|
||||
|
||||
//Macro to quickly access the raw items in a matrix
|
||||
#define DL_ITM(m, x, y) m->item[(x)+(y)*m->stride]
|
||||
|
||||
|
||||
/**
|
||||
* @brief Allocate a matrix
|
||||
*
|
||||
* @param w Width of the matrix
|
||||
* @param h Height of the matrix
|
||||
* @return The matrix, or NULL if out of memory
|
||||
*/
|
||||
dl_matrix2d_t *dl_matrix_alloc(int w, int h);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Free a matrix
|
||||
* Frees the matrix structure and (if it doesn't have the DL_MF_FOREIGNDATA flag set) the m->items space as well.
|
||||
*
|
||||
* @param m Matrix to free
|
||||
*/
|
||||
void dl_matrix_free(dl_matrix2d_t *m);
|
||||
|
||||
/**
|
||||
* @brief Zero out the matrix
|
||||
* Sets all entries in the matrix to 0.
|
||||
*
|
||||
* @param m Matrix to zero
|
||||
*/
|
||||
void dl_matrix_zero(dl_matrix2d_t *m);
|
||||
|
||||
/**
|
||||
* @brief Copy the matrix into psram
|
||||
* Copy the matrix from flash or iram/psram into psram
|
||||
*
|
||||
* @param m Matrix to zero
|
||||
*/
|
||||
dl_matrix2d_t *dl_matrix_copy_to_psram(const dl_matrix2d_t *m);
|
||||
|
||||
/**
|
||||
* @brief Generate a new matrix using a range of items from an existing matrix.
|
||||
* When using this, the data of the new matrix is not allocated/copied but it re-uses a pointer
|
||||
* to the existing data. Changing the data in the resulting matrix, as a result, will also change
|
||||
* the data in the existing matrix that has been sliced.
|
||||
*
|
||||
* @param x X-offset of the origin of the returned matrix within the sliced matrix
|
||||
* @param y Y-offset of the origin of the returned matrix within the sliced matrix
|
||||
* @param w Width of the resulting matrix
|
||||
* @param h Height of the resulting matrix
|
||||
* @param in Old matrix (with foreign data) to re-use. Passing NULL will allocate a new matrix.
|
||||
* @return The resulting slice matrix, or NULL if out of memory
|
||||
*/
|
||||
dl_matrix2d_t *dl_matrix_slice(const dl_matrix2d_t *src, int x, int y, int w, int h, dl_matrix2d_t *in);
|
||||
|
||||
/**
|
||||
* @brief select a range of items from an existing matrix and flatten them into one dimension.
|
||||
*
|
||||
* @Warning The results are flattened in row-major order.
|
||||
*
|
||||
* @param x X-offset of the origin of the returned matrix within the sliced matrix
|
||||
* @param y Y-offset of the origin of the returned matrix within the sliced matrix
|
||||
* @param w Width of the resulting matrix
|
||||
* @param h Height of the resulting matrix
|
||||
* @param in Old matrix to re-use. Passing NULL will allocate a new matrix.
|
||||
* @return The resulting flatten matrix, or NULL if out of memory
|
||||
*/
|
||||
dl_matrix2d_t *dl_matrix_flatten(const dl_matrix2d_t *src, int x, int y, int w, int h, dl_matrix2d_t *in);
|
||||
|
||||
/**
|
||||
* @brief Generate a matrix from existing floating-point data
|
||||
*
|
||||
* @param w Width of resulting matrix
|
||||
* @param h Height of resulting matrix
|
||||
* @param data Data to populate matrix with
|
||||
* @return A newaly allocated matrix populated with the given input data, or NULL if out of memory.
|
||||
*/
|
||||
dl_matrix2d_t *dl_matrix_from_data(int w, int h, int stride, const void *data);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Multiply a pair of matrices item-by-item: res=a*b
|
||||
*
|
||||
* @param a First multiplicand
|
||||
* @param b Second multiplicand
|
||||
* @param res Multiplicated data. Can be equal to a or b to overwrite that.
|
||||
*/
|
||||
void dl_matrix_mul(const dl_matrix2d_t *a, const dl_matrix2d_t *b, dl_matrix2d_t *res);
|
||||
|
||||
/**
|
||||
* @brief Do a dotproduct of two matrices : res=a.b
|
||||
*
|
||||
* @param a First multiplicand
|
||||
* @param b Second multiplicand
|
||||
* @param res Dotproduct data. *Must* be a *different* matrix from a or b!
|
||||
*/
|
||||
void dl_matrix_dot(const dl_matrix2d_t *a, const dl_matrix2d_t *b, dl_matrix2d_t *res);
|
||||
|
||||
/**
|
||||
* @brief Add a pair of matrices item-by-item: res=a-b
|
||||
*
|
||||
* @param a First matrix
|
||||
* @param b Second matrix
|
||||
* @param res Added data. Can be equal to a or b to overwrite that.
|
||||
*/
|
||||
void dl_matrix_add(const dl_matrix2d_t *a, const dl_matrix2d_t *b, dl_matrix2d_t *out);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Divide a pair of matrices item-by-item: res=a/b
|
||||
*
|
||||
* @param a First matrix
|
||||
* @param b Second matrix
|
||||
* @param res Divided data. Can be equal to a or b to overwrite that.
|
||||
*/
|
||||
void dl_matrix_div(const dl_matrix2d_t *a, const dl_matrix2d_t *b, dl_matrix2d_t *out);
|
||||
|
||||
/**
|
||||
* @brief Subtract a matrix from another, item-by-item: res=a-b
|
||||
*
|
||||
* @param a First matrix
|
||||
* @param b Second matrix
|
||||
* @param res Subtracted data. Can be equal to a or b to overwrite that.
|
||||
*/
|
||||
void dl_matrix_sub(const dl_matrix2d_t *a, const dl_matrix2d_t *b, dl_matrix2d_t *out);
|
||||
|
||||
/**
|
||||
* @brief Add a constant to every item of the matrix
|
||||
*
|
||||
* @param subj Matrix to add the constant to
|
||||
* @param add The constant
|
||||
*/
|
||||
void dl_matrix_add_const(dl_matrix2d_t *subj, const fptp_t add);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Concatenate the rows of two matrices into a new matrix
|
||||
*
|
||||
* @param a First matrix
|
||||
* @param b Second matrix
|
||||
* @return A newly allocated array with as avlues a|b
|
||||
*/
|
||||
dl_matrix2d_t *dl_matrix_concat(const dl_matrix2d_t *a, const dl_matrix2d_t *b);
|
||||
|
||||
dl_matrix2d_t *dl_matrix_concat_h( dl_matrix2d_t *a, const dl_matrix2d_t *b);
|
||||
|
||||
/**
|
||||
* @brief Print the contents of a matrix to stdout. Used for debugging.
|
||||
*
|
||||
* @param a The matrix to print.
|
||||
*/
|
||||
void dl_printmatrix(const dl_matrix2d_t *a);
|
||||
|
||||
/**
|
||||
* @brief Return the average square error given a correct and a test matrix.
|
||||
*
|
||||
* ...Well, more or less. If anything, it gives an indication of the error between
|
||||
* the two. Check the code for the exact implementation.
|
||||
*
|
||||
* @param a First of the two matrices to compare
|
||||
* @param b Second of the two matrices to compare
|
||||
* @return value indicating the relative difference between matrices
|
||||
*/
|
||||
float dl_matrix_get_avg_sq_err(const dl_matrix2d_t *a, const dl_matrix2d_t *b);
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* @brief Check if two matrices have the same shape, that is, the same amount of rows and columns
|
||||
*
|
||||
* @param a First of the two matrices to compare
|
||||
* @param b Second of the two matrices to compare
|
||||
* @return true if the two matrices are shaped the same, false otherwise.
|
||||
*/
|
||||
int dl_matrix_same_shape(const dl_matrix2d_t *a, const dl_matrix2d_t *b);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Get a specific item from the matrix
|
||||
*
|
||||
* Please use these for external matrix access instead of DL_ITM
|
||||
*
|
||||
* @param m Matrix to access
|
||||
* @param x Column address
|
||||
* @param y Row address
|
||||
* @return Value in that position
|
||||
*/
|
||||
inline static fptp_t dl_matrix_get(const dl_matrix2d_t *m, const int x, const int y) {
|
||||
return DL_ITM(m, x, y);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Set a specific item in the matrix to the given value
|
||||
*
|
||||
* Please use these for external matrix access instead of DL_ITM
|
||||
*
|
||||
* @param m Matrix to access
|
||||
* @param x Column address
|
||||
* @param y Row address
|
||||
* @param val Value to write to that position
|
||||
*/
|
||||
inline static void dl_matrix_set(dl_matrix2d_t *m, const int x, const int y, fptp_t val) {
|
||||
DL_ITM(m, x, y)=val;
|
||||
}
|
||||
|
||||
void matrix_get_range(const dl_matrix2d_t *m, fptp_t *rmin, fptp_t *rmax);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
@@ -0,0 +1,387 @@
|
||||
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
#ifndef DL_LIB_MATRIXQ_H
|
||||
#define DL_LIB_MATRIXQ_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include "dl_lib_matrix.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef int16_t qtp_t;
|
||||
|
||||
//Quantized matrix. Uses fixed numbers and has the storage for the rows/columns inverted
|
||||
//for easy use as a multiplicand without stressing out the flash cache too much.
|
||||
typedef struct {
|
||||
int w;
|
||||
int h;
|
||||
int stride; //Normally equals h, not w!
|
||||
int flags;
|
||||
int exponent; //The values in items should be multiplied by pow(2,exponent) to get the real values.
|
||||
qtp_t *itemq;
|
||||
} dl_matrix2dq_t;
|
||||
|
||||
#define DL_QTP_SHIFT 15
|
||||
#define DL_QTP_RANGE ((1<<DL_QTP_SHIFT)-1)
|
||||
#define DL_ITMQ(m, x, y) m->itemq[(y)+(x)*m->stride]
|
||||
#define DL_QTP_EXP_NA 255 //non-applicable exponent because matrix is null
|
||||
|
||||
#define DL_SHIFT_AUTO 32
|
||||
|
||||
/**
|
||||
* @info About quantized matrices and shift values
|
||||
*
|
||||
* Grab a coffee (or tea, or hot water) and sit down when you read this for the first
|
||||
* time. Quantized matrices can speed up your operations, but come with some quirks, and
|
||||
* it's good to understand how they work before using them.
|
||||
*
|
||||
* The data in the quantized matrix type is stored similarily to floating-point types:
|
||||
* when storing a real value, the value is stored as a mantissa (base number) and an
|
||||
* exponent. The 'real' value that can be re-derived from those two numbers is something
|
||||
* similar to mantissa*2^exponent. Up to this point, there's not that much difference from
|
||||
* the standard floating point implementations like e.g. IEEE-754.
|
||||
*
|
||||
* The difference with respect to quantized matrices is that for a quantized matrix, it is
|
||||
* assumed all values stored have more-or-less the same order of magnitude. This allows the
|
||||
* matrix to only store all the mantissas, while the exponents are shared; there is only one
|
||||
* exponent for the entire matrix. This makes it quicker to handle matrix operations - the
|
||||
* logic to fix the exponents only needs to happen once, while the rest can be done in simple
|
||||
* integer arithmetic. It also nets us some memory savings - while normally a floating point
|
||||
* number is 32-bit, storing only 16-bit mantissas as the matrix items almost halves the
|
||||
* memory requirements.
|
||||
*
|
||||
* While most of the details of handling the intricacies of the quantized matrixes are done
|
||||
* transparently by the code in dl_lib_matrixq.c, some implementation details leak out,
|
||||
* specifically in places where addition/subtraction/division happens.
|
||||
*
|
||||
* The problem is that the routines do not know what the size of the resulting operation is. For
|
||||
* instance, when adding two matrices of numbers, the resulting numbers *could* be large enough
|
||||
* to overflow the mantissa of the result if the exponent is the same. However, if by default we
|
||||
* assume the mantissas needs to be scaled back, we may lose precision.
|
||||
*
|
||||
* In order to counter this, all operations that have this issue have a ``shift`` argument. If
|
||||
* the argument is zero, the routine will be conservative, that is, increase the exponent of
|
||||
* the result to such an extent it's mathematically impossible a value in the result will exceed
|
||||
* the maximum value that can be stored. However, when this argument is larger than zero, the
|
||||
* algorithm will hold back on this scaling by the indicated amount of bits, preserving precision
|
||||
* but increasing the chance of some of the calculated values not fitting in the mantissa anymore.
|
||||
* If this happens, the value will be clipped to the largest (or, for negative values, smallest)
|
||||
* value possible. (Neural networks usually are okay with this happening for a limited amount
|
||||
* of matrix indices).
|
||||
*
|
||||
* For deciding on these shift values, it is recommended to start with a shift value of one, then
|
||||
* use dl_matrixq_check_sanity on the result. If this indicates clipping, lower the shift value.
|
||||
* If it indicates bits are under-used, increase it. Note that for adding and subtraction, only
|
||||
* shift values of 0 or 1 make sense; these routines will error out if you try to do something
|
||||
* else.
|
||||
*
|
||||
* For neural networks and other noise-tolerant applications, note that even when
|
||||
* dl_matrixq_check_sanity does not indicate any problems, twiddling with the shift value may lead
|
||||
* to slightly improved precision. Feel free to experiment.
|
||||
**/
|
||||
|
||||
|
||||
/**
|
||||
* @brief Allocate a matrix
|
||||
*
|
||||
* @param w Width of the matrix
|
||||
* @param h Height of the matrix
|
||||
* @return The matrix, or NULL if out of memory
|
||||
*/
|
||||
dl_matrix2dq_t *dl_matrixq_alloc(int w, int h);
|
||||
dl_matrix2dq_t *dl_matrixq_alloc_psram(int w, int h);
|
||||
/**
|
||||
* @brief Convert a floating-point matrix to a quantized matrix
|
||||
*
|
||||
* @param m Floating-point matrix to convert
|
||||
* @param out Quantized matrix to re-use. If NULL, allocate a new one.
|
||||
* @Return The quantized version of the floating-point matrix
|
||||
*/
|
||||
dl_matrix2dq_t *dl_matrixq_from_matrix2d(const dl_matrix2d_t *m, dl_matrix2dq_t *out);
|
||||
|
||||
/**
|
||||
* TODO: DESCRIBE THIS FUNCTION
|
||||
*/
|
||||
dl_matrix2dq_t *dl_matrixq_from_matrix2d_by_qmf(const dl_matrix2d_t *m, dl_matrix2dq_t *out, int m_bit, int f_bit);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Convert a quantized matrix to a floating-point one.
|
||||
*
|
||||
* @param m Floating-point matrix to convert
|
||||
* @param out Quantized matrix to re-use. If NULL, allocate a new one.
|
||||
* @Return The quantized version of the floating-point matrix
|
||||
**/
|
||||
dl_matrix2d_t *dl_matrix2d_from_matrixq(const dl_matrix2dq_t *m, dl_matrix2d_t *out);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Free a quantized matrix
|
||||
* Frees the matrix structure and (if it doesn't have the DL_MF_FOREIGNDATA flag set) the m->items space as well.
|
||||
*
|
||||
* @param m Matrix to free
|
||||
*/
|
||||
void dl_matrixq_free(dl_matrix2dq_t *m);
|
||||
|
||||
/**
|
||||
* @brief Zero out the matrix
|
||||
* Sets all entries in the matrix to 0.
|
||||
*
|
||||
* @param m Matrix to zero
|
||||
*/
|
||||
void dl_matrixq_zero(dl_matrix2dq_t *m);
|
||||
|
||||
/**
|
||||
* @brief Copy the matrix into psram
|
||||
* Copy the matrix from flash or iram/psram into psram
|
||||
*
|
||||
* @param m Matrix to copy
|
||||
*/
|
||||
dl_matrix2dq_t *dl_matrixq_copy_to_psram(const dl_matrix2dq_t *m);
|
||||
|
||||
/**
|
||||
* @brief Do a dotproduct of two quantized matrices : res=a.b, Result is a fixed-point matrix.
|
||||
*
|
||||
* @param a First multiplicand
|
||||
* @param b Second multiplicand
|
||||
* @param res Dotproduct data. *Must* be a *different* matrix from a or b!
|
||||
* @param shift Shift ratio
|
||||
*/
|
||||
void dl_matrixq_dot(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *res, int shift);
|
||||
|
||||
/**
|
||||
* @brief Do a dotproduct of two quantized matrices: res=a.b, Result is a floating-point matrix.
|
||||
*
|
||||
* @param a First multiplicand
|
||||
* @param b Second multiplicand
|
||||
* @param res Dotproduct data. *Must* be a *different* matrix from a or b!
|
||||
*/
|
||||
void dl_matrixq_dot_matrix_out(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2d_t *res);
|
||||
|
||||
/**
|
||||
* @brief Do a dotproduct of two quantized matrices : res=a.b. This always uses the simple & stupid C algo for the dot product.
|
||||
*
|
||||
* Result is a fixed-point matrix.
|
||||
*
|
||||
* Use this only if you expect something is wrong with the accelerated routines that dl_matrixq_dot calls; this function can be
|
||||
* much slower than dl_matrixq_dot .
|
||||
*
|
||||
* @param a First multiplicand
|
||||
* @param b Second multiplicand
|
||||
* @param res Dotproduct data. *Must* be a *different* matrix from a or b!
|
||||
* @param shift Shift ratio
|
||||
*/
|
||||
void dl_matrixq_dot_c_impl(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *res, int shift);
|
||||
|
||||
/**
|
||||
* @brief Do a dotproduct of two quantized matrices : res=a.b. This always uses the simple & stupid C algo for the dot product.
|
||||
*
|
||||
* Result is a floating-point matrix.
|
||||
*
|
||||
* Use this only if you expect something is wrong with the accelerated routines that dl_matrixq_dot_matrix_out calls; this function can be
|
||||
* much slower than dl_matrixq_dot_matrix_out.
|
||||
*
|
||||
* @param a First multiplicand
|
||||
* @param b Second multiplicand
|
||||
* @param res Dotproduct data. *Must* be a *different* matrix from a or b!
|
||||
*/
|
||||
void dl_matrixq_dot_matrix_out_c_impl(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2d_t *res);
|
||||
|
||||
/**
|
||||
* @brief Do a dotproduct of a floating point and a quantized matrix. Result is a floating-point matrix.
|
||||
*
|
||||
* @param a First multiplicand; float matrix
|
||||
* @param b Second multiplicand; quantized matrix
|
||||
* @param res Dotproduct data; float matrix. *Must* be a *different* matrix from a or b!
|
||||
*/
|
||||
void dl_matrix_matrixq_dot(const dl_matrix2d_t *a, const dl_matrix2dq_t *b, dl_matrix2d_t *res);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Print the contents of a quantized matrix to stdout. Used for debugging.
|
||||
*
|
||||
* @param a The matrix to print.
|
||||
*/
|
||||
void dl_printmatrixq(const dl_matrix2dq_t *a);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Add a pair of quantizedmatrices item-by-item: res=a-b
|
||||
*
|
||||
* @param a First matrix
|
||||
* @param b Second matrix
|
||||
* @param res Added data. Can be equal to a or b to overwrite that.
|
||||
* @param shift Shift value. Only 0 or 1 makes sense here. <ToDo: check>
|
||||
*/
|
||||
void dl_matrixq_add(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *res, int shift);
|
||||
|
||||
/**
|
||||
* @brief Generate a new matrix using a range of items from an existing matrix.
|
||||
* When using this, the data of the new matrix is not allocated/copied but it re-uses a pointer
|
||||
* to the existing data. Changing the data in the resulting matrix, as a result, will also change
|
||||
* the data in the existing matrix that has been sliced.
|
||||
*
|
||||
* @Warning In contrast to the floating point equivalent of this function, the fixed-point version
|
||||
* of this has the issue that as soon as the output exponent of one of the slices changes, the data
|
||||
* in the sliced matrix gets corrupted (because the exponent of that matrix is still the same.) If you
|
||||
* use this function, either treat the slices as read-only, or assume the sliced matrix contains
|
||||
* garbage after modifying the data in one of the slices.
|
||||
*
|
||||
* @param x X-offset of the origin of the returned matrix within the sliced matrix
|
||||
* @param y Y-offset of the origin of the returned matrix within the sliced matrix
|
||||
* @param w Width of the resulting matrix
|
||||
* @param h Height of the resulting matrix
|
||||
* @param in Old matrix (with foreign data) to re-use. Passing NULL will allocate a new matrix.
|
||||
* @return The resulting slice matrix, or NULL if out of memory
|
||||
*/
|
||||
dl_matrix2dq_t *dl_matrixq_slice(const dl_matrix2dq_t *src, int x, int y, int w, int h, dl_matrix2dq_t *in);
|
||||
|
||||
/**
|
||||
* @brief select a range of items from an existing matrix and flatten them into one dimension.
|
||||
*
|
||||
* @Warning The results are flattened in row-major order.
|
||||
*
|
||||
* @param x X-offset of the origin of the returned matrix within the sliced matrix
|
||||
* @param y Y-offset of the origin of the returned matrix within the sliced matrix
|
||||
* @param w Width of the resulting matrix
|
||||
* @param h Height of the resulting matrix
|
||||
* @param in Old matrix to re-use. Passing NULL will allocate a new matrix.
|
||||
* @return The resulting flatten matrix, or NULL if out of memory
|
||||
*/
|
||||
dl_matrix2dq_t *dl_matrixq_flatten(const dl_matrix2dq_t *src, int x, int y, int w, int h, dl_matrix2dq_t *in);
|
||||
|
||||
/**
|
||||
* @brief Subtract a quantized matrix from another, item-by-item: res=a-b
|
||||
*
|
||||
* @param a First matrix
|
||||
* @param b Second matrix
|
||||
* @param res Subtracted data. Can be equal to a or b to overwrite that.
|
||||
* @param shift Shift value. Only 0 or 1 makes sense here. <ToDo: check>
|
||||
*/
|
||||
void dl_matrixq_sub(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *res, int shift);
|
||||
|
||||
/**
|
||||
* @brief Multiply a pair of quantized matrices item-by-item: res=a*b
|
||||
*
|
||||
* @param a First multiplicand
|
||||
* @param b Second multiplicand
|
||||
* @param res Multiplicated data. Can be equal to a or b to overwrite that matrix.
|
||||
*/
|
||||
void dl_matrixq_mul( dl_matrix2dq_t *a, dl_matrix2dq_t *b, dl_matrix2dq_t *res);
|
||||
|
||||
/**
|
||||
* @brief Divide a pair of quantized matrices item-by-item: res=a/b
|
||||
*
|
||||
* @param a First matrix
|
||||
* @param b Second matrix
|
||||
* @param res Divided data. Can be equal to a or b to overwrite that.
|
||||
*/
|
||||
void dl_matrixq_div(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *out, int shift);
|
||||
|
||||
/**
|
||||
* @brief Check if two quantized matrices have the same shape, that is, the same amount of
|
||||
* rows and columns
|
||||
*
|
||||
* @param a First of the two matrices to compare
|
||||
* @param b Second of the two matrices to compare
|
||||
* @return true if the two matrices are shaped the same, false otherwise.
|
||||
*/
|
||||
int dl_matrixq_same_shape(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b);
|
||||
|
||||
/**
|
||||
* @brief Concatenate the rows of two quantized matrices into a new matrix
|
||||
*
|
||||
* @param a First matrix
|
||||
* @param b Second matrix
|
||||
* @return A newly allocated quantized matrix with as values a|b
|
||||
*/
|
||||
dl_matrix2dq_t *dl_matrixq_concat(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b);
|
||||
|
||||
/**
|
||||
* @brief Add a constant to every item of the quantized matrix
|
||||
*
|
||||
* @param subj Matrix to add the constant to
|
||||
* @param add The constant
|
||||
*/
|
||||
void dl_matrixq_add_const(dl_matrix2dq_t *subj, const fptp_t add, int shift);
|
||||
|
||||
/**
|
||||
* @brief Check the sanity of a quantized matrix
|
||||
*
|
||||
* Due to the nature of quantized matrices, depending on the calculations a quantized
|
||||
* matrix is the result of and the shift values chosen in those calculations, a quantized
|
||||
* matrix may have an exponent and mantissas that lead to a loss of precision, either because
|
||||
* most significant mantissa bits are unused, or because a fair amount of mantissas are
|
||||
* clipped. This function checks if this is the case and will report a message to stdout
|
||||
* if significant loss of precision is detected.
|
||||
*
|
||||
* @param m The quantized matrix to check
|
||||
* @param name A string to be displayed in the message if the sanity check fails
|
||||
* @return True if matrix is sane, false otherwise
|
||||
**/
|
||||
|
||||
int dl_matrixq_check_sanity(dl_matrix2dq_t *m, const char *name);
|
||||
|
||||
/**
|
||||
* @brief re-adjust the exponent of the matrix to fit the mantissa better
|
||||
*
|
||||
* This function will shift up all the data in the mantissas so there are no
|
||||
* most-significant bits that are unused in all mantissas. It will also adjust
|
||||
* the exponent to keep the actua values in the matrix the same.
|
||||
*
|
||||
* Some operations done on a matrix, especially operations that re-use the
|
||||
* result of earlier operations done in the same way, can lead to the loss of
|
||||
* data because the exponent of the quantized matrix is never re-adjusted. You
|
||||
* can do that implicitely by calling this function.
|
||||
*
|
||||
* @param m The matrix to re-adjust
|
||||
**/
|
||||
void dl_matrixq_readjust_exp(dl_matrix2dq_t *m);
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* @brief Get the floating-point value of a specific item from the quantized matrix
|
||||
*
|
||||
* @param m Matrix to access
|
||||
* @param x Column address
|
||||
* @param y Row address
|
||||
* @return Value in that position
|
||||
*/
|
||||
fptp_t dl_matrixq_get(const dl_matrix2dq_t *m, const int x, const int y);
|
||||
|
||||
/**
|
||||
* @brief Set a specific item in the quantized matrix to the given
|
||||
* floating-point value
|
||||
*
|
||||
* @warning If the given value is more than the exponent in the quantized matrix
|
||||
* allows for, all mantissas in the matrix will be shifted down to make the value
|
||||
* 'fit'. If, however, the exponent is such that the value would result in a
|
||||
* quantized mantissa of 0, nothing is done.
|
||||
*
|
||||
* @param m Matrix to access
|
||||
* @param x Column address
|
||||
* @param y Row address
|
||||
* @param val Value to write to that position
|
||||
*/
|
||||
void dl_matrixq_set(dl_matrix2dq_t *m, const int x, const int y, fptp_t val);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,80 @@
|
||||
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
#ifndef DL_LIB_MATRIXQ8_H
|
||||
#define DL_LIB_MATRIXQ8_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include "dl_lib_matrix.h"
|
||||
#include "dl_lib.h"
|
||||
#include "dl_lib_matrixq.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef int8_t q8tp_t;
|
||||
|
||||
typedef struct {
|
||||
int w;
|
||||
int h;
|
||||
int stride; //Normally equals h, not w!
|
||||
int flags;
|
||||
int exponent; //The values in items should be multiplied by pow(2,exponent) to get the real values.
|
||||
q8tp_t *itemq;
|
||||
} dl_matrix2dq8_t;
|
||||
|
||||
#define DL_Q8TP_SHIFT 7
|
||||
#define DL_Q8TP_RANGE ((1<<DL_Q8TP_SHIFT)-1)
|
||||
#define DL_ITMQ8(m, x, y) m->itemq[(y)+(x)*m->stride]
|
||||
|
||||
/**
|
||||
* @brief Allocate a matrix
|
||||
*
|
||||
* @param w Width of the matrix
|
||||
* @param h Height of the matrix
|
||||
* @return The matrix, or NULL if out of memory
|
||||
*/
|
||||
dl_matrix2dq8_t *dl_matrixq8_alloc(int w, int h);
|
||||
|
||||
/**
|
||||
* @brief Free a quantized matrix
|
||||
* Frees the matrix structure and (if it doesn't have the DL_MF_FOREIGNDATA flag set) the m->items space as well.
|
||||
*
|
||||
* @param m Matrix to free
|
||||
*/
|
||||
void dl_matrixq8_free(dl_matrix2dq8_t *m);
|
||||
|
||||
/**
|
||||
* @brief Copy a quantized matrix
|
||||
* Copy a quantized matrix from flash or iram/psram
|
||||
*
|
||||
* @param m Matrix to copy
|
||||
*/
|
||||
dl_matrix2dq8_t *dl_matrixq8_copy_to_psram(const dl_matrix2dq8_t *m);
|
||||
|
||||
/**
|
||||
* @brief Convert a floating-point matrix to a quantized matrix
|
||||
*
|
||||
* @param m Floating-point matrix to convert
|
||||
* @param out Quantized matrix to re-use. If NULL, allocate a new one.
|
||||
* @Return The quantized version of the floating-point matrix
|
||||
*/
|
||||
dl_matrix2dq8_t *dl_matrixq8_from_matrix2d(const dl_matrix2d_t *m, dl_matrix2dq8_t *out);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
105
managed_components/espressif__esp-sr/include/esp32c5/esp_aec.h
Normal file
105
managed_components/espressif__esp-sr/include/esp32c5/esp_aec.h
Normal file
@@ -0,0 +1,105 @@
|
||||
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License
|
||||
#ifndef _ESP_AEC_H_
|
||||
#define _ESP_AEC_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define USE_AEC_FFT // Not kiss_fft
|
||||
#define AEC_SAMPLE_RATE 16000 // Only Support 16000Hz
|
||||
#define AEC_FRAME_LENGTH_MS 32
|
||||
|
||||
typedef struct aec_handle_t aec_handle_t;
|
||||
typedef enum {
|
||||
AEC_MODE_SR_LOW_COST = 0, // Low Cost AEC fro speech recognition
|
||||
AEC_MODE_SR_HIGH_PERF = 1, // High Perforamce AEC for speech recognition
|
||||
AEC_MODE_VOIP_LOW_COST = 3, // Low Cost AEC for voice communication
|
||||
AEC_MODE_VOIP_HIGH_PERF = 4, // High Perforamce AEC for voice communication
|
||||
} aec_mode_t;
|
||||
|
||||
/**
|
||||
* @brief Creates an instance to the AEC structure.
|
||||
* Please get frame size by aec_get_chunksize() function
|
||||
*
|
||||
* @param sample_rate The Sampling frequency (Hz) must be 16000.
|
||||
* @param filter_length Number of filter, recommend to set 4. The larger the filter_length, the more resource consumption.
|
||||
* @param channel_num The input microphone channel number
|
||||
* @param mode The mode of AEC, recommend to set AEC_MODE_SR_LOW_COST
|
||||
* @return
|
||||
* - NULL: Create failed
|
||||
* - Others: The instance of AEC
|
||||
*/
|
||||
aec_handle_t *aec_create(int sample_rate, int filter_length, int channel_num, aec_mode_t mode);
|
||||
|
||||
/**
|
||||
* @brief Creates an instance to the AEC structure, same with aec_create().
|
||||
*
|
||||
* @param filter_length Number of filter, recommend to set 4. The larger the filter_length, the more resource consumption.
|
||||
* @param channel_num The input microphone channel number
|
||||
* @param mode The mode of AEC, recommend to set AEC_MODE_SR_LOW_COST
|
||||
* @return
|
||||
* - NULL: Create failed
|
||||
* - Others: The instance of AEC
|
||||
*/
|
||||
aec_handle_t *aec_pro_create(int filter_length, int channel_num, aec_mode_t mode);
|
||||
|
||||
/**
|
||||
* @brief Performs echo cancellation a frame, based on the audio sent to the speaker and frame from mic.
|
||||
*
|
||||
* @warning The indata, refdata and outdata must be 16-bit signed. please allocate memory by heap_caps_aligned_alloc().
|
||||
*
|
||||
* @param inst The instance of AEC. Format for multi-channel data is "ch0 ch0 ch0 ..., ch1 ch1 ch1 ..."
|
||||
* @param indata An array of 16-bit signed audio samples from mic.
|
||||
* @param refdata An array of 16-bit signed audio samples sent to the speaker.
|
||||
* @param outdata Returns near-end signal with echo removed. Format for multi-channel data is "ch0 ch0 ch0..., ch1 ch1 ch1 ..."
|
||||
* @return None
|
||||
*
|
||||
*/
|
||||
void aec_process(const aec_handle_t *handel, int16_t *indata, int16_t *refdata, int16_t *outdata);
|
||||
|
||||
/**
|
||||
* @brief Get frame size of AEC (the samples of one frame)
|
||||
* @param handle The instance of AEC.
|
||||
* @return Frame size
|
||||
*/
|
||||
int aec_get_chunksize(const aec_handle_t *handle);
|
||||
|
||||
/**
|
||||
* @brief Get AEC mode string
|
||||
*
|
||||
* @param aec_mode The mode of AEC.
|
||||
*
|
||||
* @return AEC mode string
|
||||
*/
|
||||
char * aec_get_mode_string(aec_mode_t aec_mode);
|
||||
|
||||
/**
|
||||
* @brief Free the AEC instance
|
||||
*
|
||||
* @param inst The instance of AEC.
|
||||
*
|
||||
* @return None
|
||||
*
|
||||
*/
|
||||
void aec_destroy(aec_handle_t *handel);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif //_ESP_AEC_H_
|
||||
@@ -0,0 +1,81 @@
|
||||
|
||||
#ifndef _ESP_AFE_AEC_H_
|
||||
#define _ESP_AFE_AEC_H_
|
||||
|
||||
#include "esp_aec.h"
|
||||
#include "esp_afe_config.h"
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
aec_handle_t *handle;
|
||||
aec_mode_t mode;
|
||||
afe_pcm_config_t pcm_config;
|
||||
int frame_size;
|
||||
int16_t *data;
|
||||
} afe_aec_handle_t;
|
||||
|
||||
/**
|
||||
* @brief Creates an instance to the AEC structure.
|
||||
*
|
||||
* @warning Currently only support 1 microphone channel and 1 playback channe.
|
||||
* If input has multiple microphone channels and playback channels, just the first microphone channel and playback
|
||||
* channel will be selected.
|
||||
*
|
||||
* The input format, same as afe config:
|
||||
* M to represent the microphone channel
|
||||
* R to represent the playback reference channel
|
||||
* N to represent an unknown or unused channel
|
||||
*
|
||||
* For example, input_format="MMNR" indicates that the input data consists of four channels,
|
||||
* which are the microphone channel, the microphone channel, an unused channel, and the playback channel
|
||||
*
|
||||
* @param input_format The input format
|
||||
* @param filter_length The length of filter. The larger the filter, the higher the CPU loading.
|
||||
* Recommended filter_length = 4 for esp32s3 and esp32p4. Recommended filter_length = 2 for
|
||||
* esp32c5.
|
||||
* @param type The type of afe, AFE_TYPE_SR or AFE_TYPE_VC
|
||||
* @param mode The mode of afe, AFE_MODE_LOW_COST or AFE_MODE_HIGH_PERF
|
||||
*
|
||||
* @return afe_config_t* The default config of afe
|
||||
*/
|
||||
afe_aec_handle_t *afe_aec_create(const char *input_format, int filter_length, afe_type_t type, afe_mode_t mode);
|
||||
|
||||
/**
|
||||
* @brief Performs echo cancellation a frame, based on the audio sent to the speaker and frame from mic.
|
||||
*
|
||||
* @param inst The instance of AEC.
|
||||
* @param indata Input audio data, format is define by input_format.
|
||||
* @param outdata Near-end signal with echo removed. outdata must be 16-bit aligned.
|
||||
* please use heap_caps_aligned_calloc(16, n, size, caps) to allocate an aligned chunk of memory
|
||||
|
||||
* @return The bytes of outdata.
|
||||
*/
|
||||
size_t afe_aec_process(afe_aec_handle_t *handel, const int16_t *indata, int16_t *outdata);
|
||||
|
||||
/**
|
||||
* @brief Get frame size of AEC (the samples of one frame)
|
||||
* @param handle The instance of AEC.
|
||||
* @return Frame size
|
||||
*/
|
||||
int afe_aec_get_chunksize(afe_aec_handle_t *handle);
|
||||
|
||||
/**
|
||||
* @brief Free the AEC instance
|
||||
*
|
||||
* @param inst The instance of AEC.
|
||||
*
|
||||
* @return None
|
||||
*
|
||||
*/
|
||||
void afe_aec_destroy(afe_aec_handle_t *handel);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif //_ESP_AEC_H_
|
||||
@@ -0,0 +1,288 @@
|
||||
#pragma once
|
||||
#include "esp_aec.h"
|
||||
#include "esp_agc.h"
|
||||
#include "esp_nsn_models.h"
|
||||
#include "esp_vad.h"
|
||||
#include "esp_vadn_models.h"
|
||||
#include "esp_wn_iface.h"
|
||||
#include "esp_wn_models.h"
|
||||
#include "model_path.h"
|
||||
#include "stdbool.h"
|
||||
#include "stdint.h"
|
||||
#include "stdlib.h"
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// AFE: Audio Front-End
|
||||
// SR: Speech Recognition
|
||||
// VC: Voice Communication
|
||||
|
||||
// Set AFE_SR mode
|
||||
typedef enum {
|
||||
SR_MODE_LOW_COST = 0, // Deprecated, please use afe_mode_t, AFE mode: low cost mode
|
||||
SR_MODE_HIGH_PERF = 1, // Deprecated, please use afe_mode_t, AFE mode: high performance mode
|
||||
} afe_sr_mode_t;
|
||||
|
||||
// Set AFE mode
|
||||
typedef enum {
|
||||
AFE_MODE_LOW_COST = 0, // AFE mode: low cost mode
|
||||
AFE_MODE_HIGH_PERF = 1, // AFE mode: high performance mode
|
||||
} afe_mode_t;
|
||||
|
||||
// Set AFE type
|
||||
typedef enum {
|
||||
AFE_TYPE_SR = 0, // Speech recognition scenarios, excluding nonlinear noise suppression
|
||||
AFE_TYPE_VC = 1, // Voice communication scenarios, 16KHz input, including nonlinear noise suppression
|
||||
AFE_TYPE_VC_8K = 2, // Voice communication scenarios, 8KHz input, note that the input data must be 8KHz
|
||||
} afe_type_t;
|
||||
|
||||
typedef enum {
|
||||
AFE_MEMORY_ALLOC_MORE_INTERNAL = 1, // malloc with more internal ram
|
||||
AFE_MEMORY_ALLOC_INTERNAL_PSRAM_BALANCE = 2, // malloc with internal ram and psram in balance
|
||||
AFE_MEMORY_ALLOC_MORE_PSRAM = 3 // malloc with more psram
|
||||
} afe_memory_alloc_mode_t;
|
||||
|
||||
typedef enum {
|
||||
AFE_MN_PEAK_AGC_MODE_1 = -9, // The peak amplitude of fetch audio is -9dB
|
||||
AFE_MN_PEAK_AGC_MODE_2 = -6, // The peak amplitude of fetch audio is -6dB
|
||||
AFE_MN_PEAK_AGC_MODE_3 = -3, // The peak amplitude of fetcg is -3dB
|
||||
AFE_MN_PEAK_NO_AGC = 0, // There is no agc gain
|
||||
} afe_mn_peak_agc_mode_t;
|
||||
|
||||
typedef struct {
|
||||
int total_ch_num; // total channel num, include microphone channel, playback channel and unknown channel
|
||||
int mic_num; // microphone channel number
|
||||
uint8_t *mic_ids; // microphone channel indices
|
||||
int ref_num; // playback reference channel number
|
||||
uint8_t *ref_ids; // playback reference channel indices
|
||||
int sample_rate; // sample rate of audio
|
||||
} afe_pcm_config_t;
|
||||
|
||||
typedef enum {
|
||||
AFE_NS_MODE_WEBRTC = 0, // please use model name of NS, SSP: "WEBRTC"
|
||||
AFE_NS_MODE_NET = 1, // please use model name of NSNET
|
||||
} afe_ns_mode_t;
|
||||
|
||||
typedef enum {
|
||||
AFE_AGC_MODE_WEBRTC = 0, // WEBRTC AGC
|
||||
AFE_AGC_MODE_WAKENET = 1, // AGC gain is calculated by wakenet model if wakenet is activated
|
||||
} afe_agc_mode_t;
|
||||
|
||||
/**
|
||||
* @brief Function to get the debug audio data
|
||||
*
|
||||
* @param data The debug audio data which don't be modify. It should be copied away as soon as possible that
|
||||
* avoid blocking for too long.
|
||||
* @param data_size The number of bytes of data.
|
||||
* @returns
|
||||
*/
|
||||
typedef void (*afe_debug_hook_callback_t)(const int16_t *data, int data_size);
|
||||
|
||||
typedef enum {
|
||||
AFE_DEBUG_HOOK_MASE_TASK_IN = 0, // To get the input data of mase task
|
||||
AFE_DEBUG_HOOK_FETCH_TASK_IN = 1, // To get the input data of fetch task
|
||||
AFE_DEBUG_HOOK_MAX = 2
|
||||
} afe_debug_hook_type_t;
|
||||
|
||||
typedef struct {
|
||||
afe_debug_hook_type_t hook_type; // debug type of hook
|
||||
afe_debug_hook_callback_t hook_callback; // callback function which transfer debug audio data
|
||||
} afe_debug_hook_t;
|
||||
|
||||
typedef struct {
|
||||
/********** AEC(Acoustic Echo Cancellation) **********/
|
||||
bool aec_init; // Whether to init aec
|
||||
aec_mode_t aec_mode; // The mode of aec, AEC_MODE_SR_LOW_COST or AEC_MODE_SR_HIGH_PERF
|
||||
int aec_filter_length; // The filter length of aec
|
||||
|
||||
/********** SE(Speech Enhancement, microphone array processing) **********/
|
||||
bool se_init; // Whether to init se
|
||||
|
||||
/********** NS(Noise Suppression) **********/
|
||||
bool ns_init; // Whether to init ns
|
||||
char *ns_model_name; // Model name of ns
|
||||
afe_ns_mode_t afe_ns_mode; // Model mode of ns
|
||||
|
||||
/********** VAD(Voice Activity Detection) **********/
|
||||
bool vad_init; // Whether to init vad
|
||||
vad_mode_t vad_mode; // The value can be: VAD_MODE_0, VAD_MODE_1, VAD_MODE_2, VAD_MODE_3, VAD_MODE_4
|
||||
char *vad_model_name; // The model name of vad, If it is null, WebRTC VAD will be used.
|
||||
int vad_min_speech_ms; // The minimum duration of speech in ms. It should be bigger than 32 ms, default: 128 ms
|
||||
int vad_min_noise_ms; // The minimum duration of noise or silence in ms. It should be bigger than 64 ms, default:
|
||||
// 1000 ms
|
||||
int vad_delay_ms; // The delay of the first speech frame in ms, default: 128 ms
|
||||
// If you find vad cache can not cover all speech, please increase this value.
|
||||
bool vad_mute_playback; // If true, the playback will be muted for vad detection. default: false
|
||||
bool vad_enable_channel_trigger; // If true, the vad will be used to choose the channel id. default: false
|
||||
|
||||
/********** WakeNet(Wake Word Engine) **********/
|
||||
bool wakenet_init;
|
||||
char *wakenet_model_name; // The model name of wakenet 1
|
||||
char *wakenet_model_name_2; // The model name of wakenet 2 if has wakenet 2
|
||||
det_mode_t wakenet_mode; // The mode of wakenet
|
||||
|
||||
/********** AGC(Automatic Gain Control) **********/
|
||||
bool agc_init; // Whether to init agc
|
||||
afe_agc_mode_t
|
||||
agc_mode; // The AGC mode for ASR. and the gain generated by AGC acts on the audio after far linear gain.
|
||||
int agc_compression_gain_db; // Compression gain in dB (default 9)
|
||||
int agc_target_level_dbfs; // Target level in -dBfs of envelope (default 3, means target level is -3 dBFS)
|
||||
|
||||
/********** General AFE(Audio Front End) parameter **********/
|
||||
afe_pcm_config_t pcm_config; // Config the channel num of original data which is fed to the afe feed function.
|
||||
afe_mode_t afe_mode; // The mode of afe, AFE_MODE_LOW_COST or AFE_MODE_HIGH_PERF
|
||||
afe_type_t afe_type; // The mode of afe, AFE_MODE_LOW_COST or AFE_MODE_HIGH_PERF
|
||||
int afe_perferred_core; // The preferred core of afe se task, which is created in afe_create function.
|
||||
int afe_perferred_priority; // The preferred priority of afe se task, which is created in afe_create function.
|
||||
int afe_ringbuf_size; // The ring buffer size: the number of frame data in ring buffer.
|
||||
afe_memory_alloc_mode_t memory_alloc_mode; // The memory alloc mode for afe. From Internal RAM or PSRAM
|
||||
float afe_linear_gain; // The linear gain for afe output the value should be in [0.1, 10.0]. This value acts
|
||||
// directly on the output amplitude: out_linear_gain * amplitude.
|
||||
bool debug_init;
|
||||
bool fixed_first_channel; // If true, the channel after first wake-up is fixed to raw data of microphone
|
||||
// otherwise, select channel number by wakenet
|
||||
} afe_config_t;
|
||||
|
||||
/**
|
||||
* @brief Get AFE default configuration. The default configuration will enable all algorithms as much as possible based
|
||||
* on the chip target and input format. You can manually fine-tune it after creating the configuration
|
||||
*
|
||||
* The input format:
|
||||
* M to represent the microphone channel
|
||||
* R to represent the playback reference channel
|
||||
* N to represent an unknown or unused channel
|
||||
*
|
||||
* For example, input_format="MMNR" indicates that the input data consists of four channels,
|
||||
* which are the microphone channel, the microphone channel, an unused channel, and the playback channel
|
||||
*
|
||||
* @param input_format The input format
|
||||
* @param models Models from partition, which is configured by Kconfig
|
||||
* @param type The type of afe, AFE_TYPE_SR or AFE_TYPE_VC
|
||||
* @param mode The mode of afe, AFE_MODE_LOW_COST or AFE_MODE_HIGH_PERF
|
||||
*
|
||||
* @return afe_config_t* The default config of afe
|
||||
*/
|
||||
afe_config_t *afe_config_init(const char *input_format, srmodel_list_t *models, afe_type_t type, afe_mode_t mode);
|
||||
|
||||
/**
|
||||
* @brief Check AFE configuration and make sure it is correct.
|
||||
*
|
||||
* @warning If there is a configuration conflict, this function will modify some parameters.
|
||||
* The guiding behind these modifications is to maintain the highest performance of the output audio and results.
|
||||
* And remove the conflict between different algorithms.
|
||||
*
|
||||
* For example, If input is two-channel data, the SE(BSS) algorithm will be prioritized over the NS algorithm.
|
||||
* If SE(BSS) algorithm is deactivated, will only use the first microphone channel.
|
||||
*
|
||||
* @param afe_config Input AFE config
|
||||
*
|
||||
* @return afe_config_t* The modified AFE config
|
||||
*/
|
||||
afe_config_t *afe_config_check(afe_config_t *afe_config);
|
||||
|
||||
/**
|
||||
* @brief Parse input format
|
||||
*
|
||||
* @param input_format The input format, same with afe_config_init() function
|
||||
* @param pcm_config The pcm config
|
||||
*
|
||||
* @return true if the input format is parsed successfully, otherwise false
|
||||
*/
|
||||
bool afe_parse_input_format(const char *input_format, afe_pcm_config_t *pcm_config);
|
||||
|
||||
/**
|
||||
* @brief Parse I2S input data
|
||||
*
|
||||
* @param data The input multi channel data
|
||||
* @param frame_size The frame size of input, it is also the size of single channel data
|
||||
* @param mic_data The output microphone data
|
||||
* @param ref_data The output playback reference data
|
||||
* @param pcm_config The pcm config
|
||||
*
|
||||
*/
|
||||
void afe_parse_input(int16_t *data, int frame_size, int16_t *mic_data, int16_t *ref_data, afe_pcm_config_t *pcm_config);
|
||||
|
||||
/**
|
||||
* @brief Parse input data, from interleaved arrangement to contiguous arrangement
|
||||
*
|
||||
* @param data The input multi channel data
|
||||
* @param frame_size The frame size of input, it is also the size of single channel data
|
||||
* @param channel_num The channel number of data
|
||||
* @param out_data The output data
|
||||
*
|
||||
*/
|
||||
void afe_parse_data(int16_t *data, int frame_size, int channel_num, int16_t *out_data);
|
||||
|
||||
/**
|
||||
* @brief Format input data, from contiguous arrangement to interleaved arrangement
|
||||
*
|
||||
* @param data The input multi channel data
|
||||
* @param frame_size The frame size of input, it is also the size of single channel data
|
||||
* @param channel_num The channel number of data
|
||||
* @param out_data The output data
|
||||
*
|
||||
*/
|
||||
void afe_format_data(int16_t *data, int frame_size, int channel_num, int16_t *out_data);
|
||||
|
||||
/**
|
||||
* @brief Adjust the gain of input data
|
||||
*
|
||||
* @warning the input data will be modified inplace.
|
||||
*
|
||||
* @param data The input audio data
|
||||
* @param frame_size The frame size of input, it is also the size of single channel data
|
||||
* @param factor The gain factor
|
||||
*
|
||||
* @return int16_t* The output audio data
|
||||
*/
|
||||
int16_t *afe_adjust_gain(int16_t *data, int frame_size, float factor);
|
||||
|
||||
/**
|
||||
* @brief Adjust the gain of input data
|
||||
*
|
||||
* @warning the input data will be modified inplace.
|
||||
*
|
||||
* @param in_data The input audio data
|
||||
* @param in_frame_size Input data frame size of input
|
||||
* @param channel_num The channel number of input data, which is same as output data
|
||||
* @param out_data The output audio data
|
||||
* @param out_frame_size Onput data frame size of input
|
||||
*
|
||||
*/
|
||||
void afe_concat_data(int16_t *in_data, int in_frame_size, int channel_num, int16_t *out_data, int out_frame_size);
|
||||
|
||||
/**
|
||||
* @brief Copy the afe config
|
||||
*
|
||||
* @param dst_config The destination afe config
|
||||
* @param src_config The source afe config
|
||||
*
|
||||
* @return The destination afe config
|
||||
*/
|
||||
afe_config_t *afe_config_copy(afe_config_t *dst_config, const afe_config_t *src_config);
|
||||
|
||||
/**
|
||||
* @brief Print the afe config
|
||||
*
|
||||
* @param afe_config The afe config
|
||||
*/
|
||||
void afe_config_print(const afe_config_t *afe_config);
|
||||
|
||||
/**
|
||||
* @brief Allocate afe config
|
||||
*
|
||||
* @return The afe config pointer
|
||||
*/
|
||||
afe_config_t *afe_config_alloc();
|
||||
|
||||
/**
|
||||
* @brief Free afe config
|
||||
*
|
||||
* @param afe_config The afe config pointer
|
||||
*/
|
||||
void afe_config_free(afe_config_t *afe_config);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
@@ -0,0 +1,48 @@
|
||||
#ifndef _ESP_AFE_DOA_H_
|
||||
#define _ESP_AFE_DOA_H_
|
||||
|
||||
#include "esp_doa.h"
|
||||
#include "esp_afe_config.h"
|
||||
#include <stdint.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
doa_handle_t *doa_handle;
|
||||
afe_pcm_config_t pcm_config;
|
||||
int16_t *leftdata;
|
||||
int16_t *rightdata;
|
||||
int frame_size;
|
||||
} afe_doa_handle_t;
|
||||
|
||||
/**
|
||||
* @brief Initialize SRP-PHAT processor
|
||||
* @param input_format The input format
|
||||
* @param fs Sampling rate (Hz), e.g., 16000
|
||||
* @param resolution Angular search resolution (degrees), e.g., 20
|
||||
* @param d_mics Microphone spacing (meters), e.g., 0.06
|
||||
* @param input_timedate_samples input timedate samples, e.g., 1024
|
||||
* @return Initialized doa_handle_t object pointer, Recommend using the above configuration for better performance
|
||||
*/
|
||||
afe_doa_handle_t *afe_doa_create(const char *input_format, int fs, float resolution, float d_mics, int input_timedate_samples);
|
||||
/**
|
||||
* @brief Process audio frame for direction estimation
|
||||
* @param handle doa_handle_t instance pointer
|
||||
* @param indata Input audio data, format is define by input_format.
|
||||
* @return Estimated sound direction in degrees, e.g., 0-180
|
||||
*/
|
||||
float afe_doa_process(afe_doa_handle_t *handle, const int16_t *indata);
|
||||
/**
|
||||
* @brief Release all allocated resources
|
||||
* @param doa doa_handle_t instance pointer to be freed
|
||||
*/
|
||||
void afe_doa_destroy(afe_doa_handle_t *handle);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _ESP_AFE_DOA_H_ */
|
||||
@@ -0,0 +1,237 @@
|
||||
#pragma once
|
||||
#include "esp_afe_config.h"
|
||||
#include "stdbool.h"
|
||||
#include "stdint.h"
|
||||
#include "stdlib.h"
|
||||
#include "freertos/FreeRTOS.h"
|
||||
#include "freertos/task.h"
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// AFE: Audio Front-End
|
||||
// SR: Speech Recognition
|
||||
// afe_sr/AFE_SR: the audio front-end for speech recognition
|
||||
|
||||
// Opaque AFE_SR data container
|
||||
typedef struct esp_afe_sr_data_t esp_afe_sr_data_t;
|
||||
|
||||
/**
|
||||
* @brief The state of vad
|
||||
*/
|
||||
typedef enum {
|
||||
AFE_VAD_SILENCE = 0, // Deprecated, please use vad_state_t, noise or silence
|
||||
AFE_VAD_SPEECH = 1 // Deprecated, please use vad_state_t, speech
|
||||
} afe_vad_state_t;
|
||||
|
||||
/**
|
||||
* @brief The result of fetch function
|
||||
*/
|
||||
typedef struct afe_fetch_result_t {
|
||||
int16_t *data; // the target channel data of audio.
|
||||
int data_size; // the size of data. The unit is byte.
|
||||
int16_t *vad_cache; // the cache data of vad. It's only valid when vad_cache_size > 0. It is used to complete the
|
||||
// audio that was truncated.
|
||||
int vad_cache_size; // the size of vad_cache. The unit is byte.
|
||||
float data_volume; // the volume of input audio, the unit is decibel(dB). This value is calculated before agc.
|
||||
// (note: invalid in vc). if enable wakenet, the window length is the receptive fields of
|
||||
// wakenet(about 1.5s), otherwise is the frame length.
|
||||
wakenet_state_t wakeup_state; // the value is wakenet_state_t
|
||||
int wake_word_index; // if the wake word is detected. It will store the wake word index which start from 1.
|
||||
int wakenet_model_index; // if there are multiple wakenets, this value identifies which model be wakes up. Index
|
||||
// start from 1.
|
||||
vad_state_t vad_state; // the value is afe_vad_state_t
|
||||
int trigger_channel_id; // the channel index of output
|
||||
int wake_word_length; // the length of wake word. The unit is the number of samples.
|
||||
int ret_value; // the return state of fetch function
|
||||
int16_t *raw_data; // the multi-channel output data of audio.
|
||||
int raw_data_channels; // the channel number of raw data
|
||||
float ringbuff_free_pct; // the percent of ringbuff free size. if the value is larger than 0.5, it means the ringbuff is buzy.
|
||||
void *reserved; // reserved for future use
|
||||
} afe_fetch_result_t;
|
||||
|
||||
/**
|
||||
* @brief Function to initialze a AFE_SR instance
|
||||
*
|
||||
* @param afe_config The config of AFE_SR
|
||||
* @returns Handle to the AFE_SR data
|
||||
*/
|
||||
typedef esp_afe_sr_data_t *(*esp_afe_sr_iface_op_create_from_config_t)(afe_config_t *afe_config);
|
||||
|
||||
/**
|
||||
* @brief Get the amount of each channel samples per frame that need to be passed to the function
|
||||
*
|
||||
* Every speech enhancement AFE_SR processes a certain number of samples at the same time. This function
|
||||
* can be used to query that amount. Note that the returned amount is in 16-bit samples, not in bytes.
|
||||
*
|
||||
* @param afe The AFE_SR object to query
|
||||
* @return The amount of samples to feed the fetch function
|
||||
*/
|
||||
typedef int (*esp_afe_sr_iface_op_get_samp_chunksize_t)(esp_afe_sr_data_t *afe);
|
||||
|
||||
/**
|
||||
* @brief Get the channel number
|
||||
*
|
||||
* @param afe The AFE_SR object to query
|
||||
* @return The amount of total channels
|
||||
*/
|
||||
typedef int (*esp_afe_sr_iface_op_get_channel_num_t)(esp_afe_sr_data_t *afe);
|
||||
|
||||
/**
|
||||
* @brief Get the sample rate of the samples to feed to the function
|
||||
*
|
||||
* @param afe The AFE_SR object to query
|
||||
* @return The sample rate, in hz
|
||||
*/
|
||||
typedef int (*esp_afe_sr_iface_op_get_samp_rate_t)(esp_afe_sr_data_t *afe);
|
||||
|
||||
/**
|
||||
* @brief Feed samples of an audio stream to the AFE_SR
|
||||
*
|
||||
* @Warning The input data should be arranged in the format of channel interleaving.
|
||||
* The last channel is reference signal if it has reference data.
|
||||
*
|
||||
* @param afe The AFE_SR object to query
|
||||
*
|
||||
* @param in The input microphone signal, only support signed 16-bit @ 16 KHZ. The frame size can be queried by the
|
||||
* `get_feed_chunksize`.
|
||||
* @return The size of input
|
||||
*/
|
||||
typedef int (*esp_afe_sr_iface_op_feed_t)(esp_afe_sr_data_t *afe, const int16_t *in);
|
||||
|
||||
/**
|
||||
* @brief fetch enhanced samples of an audio stream from the AFE_SR
|
||||
*
|
||||
* @Warning The output is single channel data, no matter how many channels the input is.
|
||||
* Timeout is 2000 ms. If you want to adjust timeout, please refer to the definition of `fetch_with_delay`.
|
||||
*
|
||||
* @param afe The AFE_SR object to query
|
||||
* @return The result of output, please refer to the definition of `afe_fetch_result_t`. (The frame size of output
|
||||
* audio can be queried by the `get_fetch_chunksize`.)
|
||||
*/
|
||||
typedef afe_fetch_result_t *(*esp_afe_sr_iface_op_fetch_t)(esp_afe_sr_data_t *afe);
|
||||
|
||||
/**
|
||||
* @brief fetch enhanced samples of an audio stream from the AFE_SR, same with the function `fetch`
|
||||
*
|
||||
* @Warning The output is single channel data, no matter how many channels the input is.
|
||||
*
|
||||
* @param afe The AFE_SR object to query
|
||||
* @param ticks_to_wait The timeout value, in ticks, to wait for the fetch result.
|
||||
* @return The result of output, please refer to the definition of `afe_fetch_result_t`. (The frame size of output
|
||||
* audio can be queried by the `get_fetch_chunksize`.)
|
||||
*/
|
||||
typedef afe_fetch_result_t *(*esp_afe_sr_iface_op_fetch_with_delay_t)(esp_afe_sr_data_t *afe, TickType_t ticks_to_wait);
|
||||
|
||||
/**
|
||||
* @brief reset ringbuf of AFE.
|
||||
*
|
||||
* @param afe The AFE_SR object to query
|
||||
* @return -1: fail, 1: success
|
||||
*/
|
||||
typedef int (*esp_afe_sr_iface_op_reset_buffer_t)(esp_afe_sr_data_t *afe);
|
||||
|
||||
/**
|
||||
* @brief Set wakenet detection threshold
|
||||
*
|
||||
* @param afe The AFE_SR object to query
|
||||
* @param index The wakenet index, just support 1: wakenet1 or 2: wakenet2
|
||||
* @param threshold The wakenet detection threshold, the value is between 0.4 and 0.9999.
|
||||
* @return -1: fail, 1: success
|
||||
*/
|
||||
typedef int (*esp_afe_sr_iface_op_set_wakenet_threshold_t)(esp_afe_sr_data_t *afe, int index, float threshold);
|
||||
|
||||
/**
|
||||
* @brief Reset wakenet detection threshold to inital state
|
||||
*
|
||||
* @param afe The AFE_SR object to query
|
||||
* @param index The wakenet index, just support 1: wakenet1 or 2: wakenet2
|
||||
* @return -1: fail, 1: success
|
||||
*/
|
||||
typedef int (*esp_afe_sr_iface_op_reset_wakenet_threshold_t)(esp_afe_sr_data_t *afe, int index);
|
||||
|
||||
/**
|
||||
* @brief Reset one function/module/algorithm.
|
||||
*
|
||||
* @param afe The AFE_SR object to query
|
||||
* @return -1: fail, 1: success
|
||||
*/
|
||||
typedef int (*esp_afe_sr_iface_op_reset_op_t)(esp_afe_sr_data_t *afe);
|
||||
|
||||
/**
|
||||
* @brief Disable one function/module/algorithm.
|
||||
*
|
||||
* @param afe The AFE_SR object to query
|
||||
* @return -1: fail, 0: disabled, 1: enabled
|
||||
*/
|
||||
typedef int (*esp_afe_sr_iface_op_disable_func_t)(esp_afe_sr_data_t *afe);
|
||||
|
||||
/**
|
||||
* @brief Enable one function/module/algorithm.
|
||||
*
|
||||
* @param afe The AFE_SR object to query
|
||||
* @return -1: fail, 0: disabled, 1: enabled
|
||||
*/
|
||||
typedef int (*esp_afe_sr_iface_op_enable_func_t)(esp_afe_sr_data_t *afe);
|
||||
|
||||
/**
|
||||
* @brief Print all functions/modules/algorithms pipeline.
|
||||
* The pipeline is the order of the functions/modules/algorithms.
|
||||
* The format like this: [input] -> |AEC(VOIP_HIGH_PERF)| -> |WakeNet(wn9_hilexin)| -> [output]
|
||||
*
|
||||
* @param afe The AFE_SR object to query
|
||||
*/
|
||||
typedef void (*esp_afe_sr_iface_op_print_pipeline_t)(esp_afe_sr_data_t *afe);
|
||||
|
||||
/**
|
||||
* @brief Destroy a AFE_SR instance
|
||||
*
|
||||
* @param afe AFE_SR object to destroy
|
||||
*/
|
||||
typedef void (*esp_afe_sr_iface_op_destroy_t)(esp_afe_sr_data_t *afe);
|
||||
|
||||
/**
|
||||
* This structure contains the functions used to do operations on a AFE_SR.
|
||||
*/
|
||||
typedef struct {
|
||||
esp_afe_sr_iface_op_create_from_config_t create_from_config;
|
||||
esp_afe_sr_iface_op_feed_t feed;
|
||||
esp_afe_sr_iface_op_fetch_t fetch;
|
||||
esp_afe_sr_iface_op_fetch_with_delay_t fetch_with_delay;
|
||||
esp_afe_sr_iface_op_reset_buffer_t reset_buffer;
|
||||
esp_afe_sr_iface_op_get_samp_chunksize_t get_feed_chunksize;
|
||||
esp_afe_sr_iface_op_get_samp_chunksize_t get_fetch_chunksize;
|
||||
esp_afe_sr_iface_op_get_channel_num_t get_channel_num; // same with get_feed_channel_num
|
||||
esp_afe_sr_iface_op_get_channel_num_t get_feed_channel_num;
|
||||
esp_afe_sr_iface_op_get_channel_num_t get_fetch_channel_num;
|
||||
esp_afe_sr_iface_op_get_samp_rate_t get_samp_rate;
|
||||
esp_afe_sr_iface_op_set_wakenet_threshold_t set_wakenet_threshold;
|
||||
esp_afe_sr_iface_op_reset_wakenet_threshold_t reset_wakenet_threshold;
|
||||
esp_afe_sr_iface_op_disable_func_t disable_wakenet;
|
||||
esp_afe_sr_iface_op_enable_func_t enable_wakenet;
|
||||
esp_afe_sr_iface_op_disable_func_t disable_aec;
|
||||
esp_afe_sr_iface_op_enable_func_t enable_aec;
|
||||
esp_afe_sr_iface_op_disable_func_t disable_se;
|
||||
esp_afe_sr_iface_op_enable_func_t enable_se;
|
||||
esp_afe_sr_iface_op_disable_func_t disable_vad;
|
||||
esp_afe_sr_iface_op_enable_func_t enable_vad;
|
||||
esp_afe_sr_iface_op_reset_op_t reset_vad;
|
||||
esp_afe_sr_iface_op_disable_func_t disable_ns;
|
||||
esp_afe_sr_iface_op_enable_func_t enable_ns;
|
||||
esp_afe_sr_iface_op_disable_func_t disable_agc;
|
||||
esp_afe_sr_iface_op_enable_func_t enable_agc;
|
||||
esp_afe_sr_iface_op_print_pipeline_t print_pipeline;
|
||||
esp_afe_sr_iface_op_destroy_t destroy;
|
||||
} esp_afe_sr_iface_t;
|
||||
|
||||
// struct is used to store the AFE handle and data for the AFE task
|
||||
typedef struct {
|
||||
esp_afe_sr_data_t *afe_data;
|
||||
esp_afe_sr_iface_t *afe_handle;
|
||||
TaskHandle_t feed_task;
|
||||
TaskHandle_t fetch_task;
|
||||
} afe_task_into_t;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
@@ -0,0 +1,13 @@
|
||||
#pragma once
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include "esp_afe_sr_iface.h"
|
||||
|
||||
esp_afe_sr_iface_t *esp_afe_handle_from_config(const afe_config_t *config);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
@@ -0,0 +1,47 @@
|
||||
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License
|
||||
#ifndef _ESP_AGC_H_
|
||||
#define _ESP_AGC_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
////all positive value is valid, negective is error
|
||||
typedef enum {
|
||||
ESP_AGC_SUCCESS = 0, ////success
|
||||
ESP_AGC_FAIL = -1, ////agc fail
|
||||
ESP_AGC_SAMPLE_RATE_ERROR = -2, ///sample rate can be only 8khz, 16khz, 32khz
|
||||
ESP_AGC_FRAME_SIZE_ERROR = -3, ////the input frame size should be only 10ms, so should together with sample-rate to get the frame size
|
||||
} ESP_AGE_ERR;
|
||||
|
||||
typedef enum {
|
||||
AGC_MODE_SR = -1, // Bypass WEBRTC AGC
|
||||
AGC_MODE_0 = 0, // Only saturation protection
|
||||
AGC_MODE_1 = 1, // Analog Automatic Gain Control [-targetLevelDbfs (default -3 dBOv)]
|
||||
AGC_MODE_2 = 2, // Digital Automatic Gain Control [-targetLevelDbfs (default -3 dBOv)]
|
||||
AGC_MODE_3 = 3, // Fixed Digital Gain [compressionGaindB (default 8 dB)]
|
||||
} agc_mode_t;
|
||||
|
||||
void *esp_agc_open(agc_mode_t agc_mode, int sample_rate);
|
||||
void set_agc_config(void *agc_handle, int gain_dB, int limiter_enable, int target_level_dbfs);
|
||||
int esp_agc_process(void *agc_handle, short *in_pcm, short *out_pcm, int frame_size, int sample_rate);
|
||||
void esp_agc_close(void *agc_handle);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // _ESP_AGC_H_
|
||||
@@ -0,0 +1,41 @@
|
||||
#ifndef _ESP_DOA_H_
|
||||
#define _ESP_DOA_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct doa_handle_t doa_handle_t;
|
||||
/**
|
||||
* @brief Initialize SRP-PHAT processor
|
||||
* @param fs Sampling rate (Hz), e.g., 16000
|
||||
* @param resolution Angular search resolution (degrees), e.g., 20
|
||||
* @param d_mics Microphone spacing (meters), e.g., 0.06
|
||||
* @param input_timedate_samples input timedate samples, e.g., 1024
|
||||
* @return Initialized doa_handle_t object pointer, Recommend using the above configuration for better performance
|
||||
*/
|
||||
doa_handle_t *esp_doa_create(int fs, float resolution, float d_mics, int input_timedate_samples);
|
||||
|
||||
/**
|
||||
* @brief Release all allocated resources
|
||||
* @param doa doa_handle_t instance pointer to be freed
|
||||
*/
|
||||
void esp_doa_destroy(doa_handle_t *doa);
|
||||
|
||||
/**
|
||||
* @brief Process audio frame for direction estimation
|
||||
* @param doa doa_handle_t instance pointer
|
||||
* @param left Left channel 16-bit PCM data
|
||||
* @param right Right channel 16-bit PCM data
|
||||
* @return Estimated sound direction in degrees, e.g., 0-180
|
||||
*/
|
||||
float esp_doa_process(doa_handle_t *doa, int16_t* left, int16_t* right);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _ESP_DOA_H_ */
|
||||
@@ -0,0 +1,93 @@
|
||||
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License
|
||||
#ifndef _ESP_MASE_H_
|
||||
#define _ESP_MASE_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define MASE_SAMPLE_RATE 16000 // Supports 16kHz only
|
||||
#define MASE_FRAME_SIZE 16 // Supports 16ms only
|
||||
#define MASE_MIC_DISTANCE 65 // According to physical design of mic-array
|
||||
|
||||
/**
|
||||
* @brief Sets mic-array type, currently 2-mic line array and 3-mic circular array
|
||||
* are supported.
|
||||
*/
|
||||
typedef enum {
|
||||
TWO_MIC_LINE = 0,
|
||||
THREE_MIC_CIRCLE = 1
|
||||
} mase_mic_array_type_t;
|
||||
|
||||
/**
|
||||
* @brief Sets operating mode, supporting normal mode and wake-up enhancement mode
|
||||
*/
|
||||
typedef enum {
|
||||
NORMAL_ENHANCEMENT_MODE = 0,
|
||||
WAKE_UP_ENHANCEMENT_MODE = 1
|
||||
} mase_op_mode_t;
|
||||
|
||||
typedef void* mase_handle_t;
|
||||
|
||||
/**
|
||||
* @brief Creates an instance to the MASE structure.
|
||||
*
|
||||
* @param sample_rate The sampling frequency (Hz) must be 16000.
|
||||
*
|
||||
* @param frame_size The length of the audio processing must be 16ms.
|
||||
*
|
||||
* @param array_type '0' for 2-mic line array and '1' for 3-mic circular array.
|
||||
*
|
||||
* @param mic_distance The distance between neiboring microphones in mm.
|
||||
*
|
||||
* @param operating_mode '0' for normal mode and '1' for wake-up enhanced mode.
|
||||
*
|
||||
* @param filter_strength Strengh of the mic-array speech enhancement, must be 0, 1, 2 or 3.
|
||||
*
|
||||
* @return
|
||||
* - NULL: Create failed
|
||||
* - Others: An instance of MASE
|
||||
*/
|
||||
mase_handle_t mase_create(int fs, int frame_size, int array_type, float mic_distance, int operating_mode, int filter_strength);
|
||||
|
||||
/**
|
||||
* @brief Performs mic array processing for one frame.
|
||||
*
|
||||
* @param inst The instance of MASE.
|
||||
*
|
||||
* @param in An array of 16-bit signed audio samples from mic.
|
||||
*
|
||||
* @param dsp_out Returns enhanced signal.
|
||||
*
|
||||
* @return None
|
||||
*
|
||||
*/
|
||||
void mase_process(mase_handle_t st, int16_t *in, int16_t *dsp_out);
|
||||
|
||||
/**
|
||||
* @brief Free the MASE instance
|
||||
*
|
||||
* @param inst The instance of MASE.
|
||||
*
|
||||
* @return None
|
||||
*
|
||||
*/
|
||||
void mase_destory(mase_handle_t st);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,86 @@
|
||||
#pragma once
|
||||
#include "esp_speech_features.h"
|
||||
#include <stdint.h>
|
||||
|
||||
/*
|
||||
This describes an interface for a MFCC runner, that is, some kind of implementation that can be
|
||||
fed sample chunks and returns the MFCC cepstrum of those samples. This is an abstracted interface so
|
||||
multiple implementations can be used.
|
||||
*/
|
||||
|
||||
typedef struct esp_mfcc_data_t esp_mfcc_data_t;
|
||||
|
||||
// Options for the mfcc algorithm itself. These more-or-less match the parameters of csf_mfcc (from c_speech_features),
|
||||
// please refer to its documentation for details.
|
||||
typedef struct {
|
||||
int winstep_ms; // The step between successive windows in ms. (10)
|
||||
int winlen_ms; // The length of the analysis window in ms. (25)
|
||||
int nch; // The number of input channel
|
||||
int numcep; // The number of cepstrum to return
|
||||
int nfilter; // The number of filters in the filterbank
|
||||
int nfft; // The FFT size
|
||||
int samp_freq; // The sample-rate of the signal.
|
||||
int low_freq; // The lowest band edge of mel filters, in hz. (e.g. 0)
|
||||
int high_freq; // The highest band edge of mel filters, in hz. Must not be higher than samp_freq
|
||||
float preemph; // Preemphasis filter coefficient. 0 is no filter. (e.g. 0.97)
|
||||
char *win_type; // Analysis window type to apply to each frame, "hanning","hamming","sine","rectangular","povey"
|
||||
bool append_energy; // If true, the zeroth cepstral coefficient is replaced with the log of the total frame energy
|
||||
bool use_power; // If true, use power of fft spectrum, else use magnitude of fft spectrum
|
||||
int use_log_fbank; // 0: return fbank, 1: return log(x+log_epsilon), 2: return log(max(x, log_epsilon))
|
||||
float log_epsilon; // log epsilon. (e.g. 1e-7)
|
||||
bool psram_first; // Alloc memory from PSRAM first
|
||||
bool remove_dc_offset; // Whether to subtract mean of wave before FFT
|
||||
} esp_mfcc_opts_t;
|
||||
|
||||
/**
|
||||
* @brief Un-initialize and free a mfcc runner
|
||||
*
|
||||
* Function to free a previously allocated mfcc runner.
|
||||
*
|
||||
* @param r Runner object to destroy
|
||||
*/
|
||||
typedef void (*esp_mfcc_op_destroy_t)(esp_mfcc_data_t *r);
|
||||
|
||||
/**
|
||||
* @brief Initialize parameters for a mfcc runner.
|
||||
*
|
||||
* After creation, a mfcc runner needs to be initialized first; this is usually done
|
||||
* in the initialization routine of a speech recognition algorithm. This provides
|
||||
* a pointer to do this for a specific mfcc runner.
|
||||
*
|
||||
* @param opt Options for the mfcc process
|
||||
* @return True if success, false on error.
|
||||
*/
|
||||
typedef esp_mfcc_data_t *(*esp_mfcc_op_create_t)(const esp_mfcc_opts_t *opt);
|
||||
|
||||
/**
|
||||
* @brief Run a mfcc iteration on frame by frame
|
||||
*
|
||||
* This will take a set of samples and return a ceptrum. Note that this may be pipelined:
|
||||
* an initial call to this function may return NULL and subsequent calls may return the
|
||||
* cepstrum of previous calls.
|
||||
*
|
||||
* @param r The mfcc runner
|
||||
* @param samp An array of signed 16-bit samples. The amount of samples should be sampfreq/(winstep_ms/1000).
|
||||
* @return A set of cepstral values, or NULL if no such values are available yet. Free using the free_cepbuf function
|
||||
* when done with this buffer. Note that some implementations require the buffer to be freed before another call
|
||||
* to this function is done.
|
||||
*/
|
||||
typedef float *(*esp_mfcc_op_run_step_t)(esp_mfcc_data_t *r, int16_t *samp, int16_t nch);
|
||||
|
||||
/**
|
||||
* @brief Clean all state of mfcc handle
|
||||
*
|
||||
* @param r The mfcc runner
|
||||
*/
|
||||
typedef void (*esp_mfcc_op_clean_t)(esp_mfcc_data_t *r);
|
||||
|
||||
/**
|
||||
* @brief Operations possible on a mfcc runner
|
||||
*/
|
||||
typedef struct {
|
||||
esp_mfcc_op_destroy_t destroy;
|
||||
esp_mfcc_op_create_t create;
|
||||
esp_mfcc_op_run_step_t run_step;
|
||||
esp_mfcc_op_clean_t clean;
|
||||
} esp_mfcc_iface_t;
|
||||
@@ -0,0 +1,89 @@
|
||||
#pragma once
|
||||
#include "esp_speech_features.h"
|
||||
#include <stdint.h>
|
||||
|
||||
/*
|
||||
This describes an interface for a MFCC runner, that is, some kind of implementation that can be
|
||||
fed sample chunks and returns the MFCC cepstrum of those samples. This is an abstracted interface so
|
||||
multiple implementations can be used.
|
||||
*/
|
||||
|
||||
typedef struct esp_mfcc_data_t esp_mfcc_data_t;
|
||||
|
||||
// Options for the mfcc algorithm itself. These more-or-less match the parameters of csf_mfcc (from c_speech_features),
|
||||
// please refer to its documentation for details.
|
||||
typedef struct {
|
||||
int winstep_ms; // The step between successive windows in ms. (10)
|
||||
int winlen_ms; // The length of the analysis window in ms. (25)
|
||||
int nch; // The number of input channel
|
||||
int numcep; // The number of cepstrum to return
|
||||
int nfilter; // The number of filters in the filterbank
|
||||
int nfft; // The FFT size
|
||||
int samp_freq; // The sample-rate of the signal.
|
||||
int low_freq; // The lowest band edge of mel filters, in hz. (e.g. 0)
|
||||
int high_freq; // The highest band edge of mel filters, in hz. Must not be higher than samp_freq
|
||||
float preemph; // Preemphasis filter coefficient. 0 is no filter. (e.g. 0.97)
|
||||
char *win_type; // Analysis window type to apply to each frame, "hanning","hamming","sine","rectangular","povey"
|
||||
bool append_energy; // If true, the zeroth cepstral coefficient is replaced with the log of the total frame energy
|
||||
bool use_power; // If true, use power of fft spectrum, else use magnitude of fft spectrum
|
||||
int use_log_fbank; // 0: return fbank, 1: return log(x+log_epsilon), 2: return log(max(x, log_epsilon))
|
||||
float log_epsilon; // log epsilon. (e.g. 1e-7)
|
||||
bool psram_first; // Alloc memory from PSRAM first
|
||||
bool remove_dc_offset; // Whether to subtract mean of wave before FFT
|
||||
} esp_mfcc_opts_t;
|
||||
|
||||
/**
|
||||
* @brief Un-initialize and free a mfcc runner
|
||||
*
|
||||
* Function to free a previously allocated mfcc runner.
|
||||
*
|
||||
* @param r Runner object to destroy
|
||||
*/
|
||||
typedef void (*esp_mfcc_op_destroy_t)(esp_mfcc_data_t *r);
|
||||
|
||||
/**
|
||||
* @brief Initialize parameters for a mfcc runner.
|
||||
*
|
||||
* After creation, a mfcc runner needs to be initialized first; this is usually done
|
||||
* in the initialization routine of a speech recognition algorithm. This provides
|
||||
* a pointer to do this for a specific mfcc runner.
|
||||
*
|
||||
* @param opt Options for the mfcc process
|
||||
* @return True if success, false on error.
|
||||
*/
|
||||
typedef esp_mfcc_data_t *(*esp_mfcc_op_create_t)(const esp_mfcc_opts_t *opt);
|
||||
|
||||
/**
|
||||
* @brief Run a mfcc iteration on frame by frame
|
||||
*
|
||||
* This will take a set of samples and return a ceptrum. Note that this may be pipelined:
|
||||
* an initial call to this function may return NULL and subsequent calls may return the
|
||||
* cepstrum of previous calls.
|
||||
*
|
||||
* @param r The mfcc runner
|
||||
* @param samp An array of signed 16-bit samples. The amount of samples should be sampfreq/(winstep_ms/1000).
|
||||
* @return A set of cepstral values, or NULL if no such values are available yet. Free using the free_cepbuf function
|
||||
* when done with this buffer. Note that some implementations require the buffer to be freed before another call
|
||||
* to this function is done.
|
||||
*/
|
||||
typedef float *(*esp_mfcc_op_run_step_t)(esp_mfcc_data_t *r, int16_t *samp, int16_t nch);
|
||||
|
||||
typedef void (*esp_mfcc_op_run_step_s16_t)(esp_mfcc_data_t *r, int16_t *samp, int16_t *fbank);
|
||||
|
||||
/**
|
||||
* @brief Clean all state of mfcc handle
|
||||
*
|
||||
* @param r The mfcc runner
|
||||
*/
|
||||
typedef void (*esp_mfcc_op_clean_t)(esp_mfcc_data_t *r);
|
||||
|
||||
/**
|
||||
* @brief Operations possible on a mfcc runner
|
||||
*/
|
||||
typedef struct {
|
||||
esp_mfcc_op_destroy_t destroy;
|
||||
esp_mfcc_op_create_t create;
|
||||
esp_mfcc_op_run_step_t run_step;
|
||||
esp_mfcc_op_run_step_s16_t run_step_s16;
|
||||
esp_mfcc_op_clean_t clean;
|
||||
} esp_mfcc_iface_t;
|
||||
@@ -0,0 +1,44 @@
|
||||
#pragma once
|
||||
#include "esp_mfcc_iface.h"
|
||||
|
||||
extern const esp_mfcc_iface_t esp_fbank_f32; // float32-fbank handle
|
||||
extern const esp_mfcc_iface_t esp_fbank_s16; // int16-fbank handle
|
||||
|
||||
/**
|
||||
* @brief Return basic opts used in wakenet9 & multinet5
|
||||
**/
|
||||
esp_mfcc_opts_t *get_mfcc_opts_wn9();
|
||||
|
||||
/**
|
||||
* @brief Return basic opts used in wakenet9s
|
||||
**/
|
||||
esp_mfcc_opts_t *get_mfcc_opts(const char *win_type, bool use_power, int winstep_ms, int winlen_ms, int nfilter);
|
||||
|
||||
/**
|
||||
* @brief Return basic opts for default kaldifeat
|
||||
*
|
||||
opts->psram_first = true;
|
||||
opts->use_power = true;
|
||||
opts->use_log_fbank = 2; // log(max(x, log_epsilon))
|
||||
opts->log_epsilon = 1.1920928955078125e-07f; // torch.finfo(torch.float32).eps
|
||||
opts->win_type = "povey";
|
||||
opts->low_freq = 20;
|
||||
opts->high_freq = 7600;
|
||||
opts->samp_freq = 16000;
|
||||
opts->nch = 1;
|
||||
opts->nfft = 512;
|
||||
opts->nfilter = 80;
|
||||
opts->numcep = 80;
|
||||
opts->preemph = 0.97;
|
||||
opts->append_energy = false;
|
||||
opts->winlen_ms = 25;
|
||||
opts->winstep_ms = 10;
|
||||
opts->remove_dc_offset = true;
|
||||
*
|
||||
**/
|
||||
esp_mfcc_opts_t *get_mfcc_opts_kaldi();
|
||||
|
||||
/**
|
||||
* @brief Print mfcc opts
|
||||
**/
|
||||
void print_mfcc_opts(esp_mfcc_opts_t *opts);
|
||||
@@ -0,0 +1,224 @@
|
||||
#pragma once
|
||||
#include "stdint.h"
|
||||
#include "esp_wn_iface.h"
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define ESP_MN_RESULT_MAX_NUM 5
|
||||
#define ESP_MN_MAX_PHRASE_NUM 400
|
||||
#define ESP_MN_MAX_PHRASE_LEN 63
|
||||
#define ESP_MN_MIN_PHRASE_LEN 2
|
||||
|
||||
#define ESP_MN_PREFIX "mn"
|
||||
#define ESP_MN_ENGLISH "en"
|
||||
#define ESP_MN_CHINESE "cn"
|
||||
|
||||
typedef enum {
|
||||
ESP_MN_STATE_DETECTING = 0, // detecting
|
||||
ESP_MN_STATE_DETECTED = 1, // detected
|
||||
ESP_MN_STATE_TIMEOUT = 2, // time out
|
||||
} esp_mn_state_t;
|
||||
|
||||
//Set multinet loading mode
|
||||
//The memory comsumption is decreased with increasing mode,
|
||||
//As a consequence also the CPU loading rate goes up
|
||||
typedef enum {
|
||||
ESP_MN_LOAD_FROM_PSRAM = 0, // Load all weights from PSRAM. Fastest computation with Maximum memory consumption
|
||||
ESP_MN_LOAD_FROM_PSRAM_FLASH = 1, // Load some weights from PSRAM and laod the rest from FLASH (default)
|
||||
ESP_MN_LOAD_FROM_FLASH = 2, // Load more weights from FLASH. Minimum memory consumption with slowest computation
|
||||
} esp_mn_loader_mode_t;
|
||||
|
||||
typedef enum {
|
||||
ESP_MN_GREEDY_SEARCH = 0, // greedy search
|
||||
ESP_MN_BEAM_SEARCH = 1, // beam search
|
||||
ESP_MN_BEAM_SEARCH_WITH_FST = 2, // beam search with trie language model
|
||||
} esp_mn_search_method_t;
|
||||
|
||||
typedef enum {
|
||||
CHINESE_ID = 1, // Chinese language
|
||||
ENGLISH_ID = 2, // English language
|
||||
} language_id_t;
|
||||
|
||||
// Return all possible recognition results
|
||||
typedef struct{
|
||||
esp_mn_state_t state;
|
||||
int num; // The number of phrase in list, num<=5. When num=0, no phrase is recognized.
|
||||
int command_id[ESP_MN_RESULT_MAX_NUM]; // The list of command id.
|
||||
int phrase_id[ESP_MN_RESULT_MAX_NUM]; // The list of phrase id.
|
||||
float prob[ESP_MN_RESULT_MAX_NUM]; // The list of probability.
|
||||
char string[256]; // recognized string with commands graph
|
||||
char raw_string[256]; // recognized string without commands graph
|
||||
} esp_mn_results_t;
|
||||
|
||||
typedef struct {
|
||||
char *string; // command string
|
||||
char *phonemes; // command phonemes, if applicable
|
||||
int16_t command_id; // the command id
|
||||
float threshold; // trigger threshold, default: 0
|
||||
int16_t *wave; // prompt wave data of the phrase
|
||||
} esp_mn_phrase_t;
|
||||
|
||||
typedef struct _mn_node_ {
|
||||
esp_mn_phrase_t *phrase;
|
||||
struct _mn_node_ *next;
|
||||
} esp_mn_node_t;
|
||||
|
||||
typedef struct{
|
||||
int16_t num; // The number of error phrases, which can not added into model
|
||||
esp_mn_phrase_t **phrases; // The array of error phrase pointer
|
||||
} esp_mn_error_t;
|
||||
|
||||
/**
|
||||
* @brief Initialze a model instance with specified model name.
|
||||
*
|
||||
* @param model_name The wakenet model name.
|
||||
* @param duration The duration (ms) to trigger the timeout
|
||||
*
|
||||
* @returns Handle to the model data.
|
||||
*/
|
||||
typedef model_iface_data_t* (*esp_mn_iface_op_create_t)(const char *model_name, int duration);
|
||||
|
||||
/**
|
||||
* @brief Switch multinet mode to change memory consumption and CPU loading
|
||||
*
|
||||
* @warning Just Support multinet6 or later versions
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @param mode The multinet loader mode
|
||||
*
|
||||
* @returns Handle to the model data.
|
||||
*/
|
||||
typedef model_iface_data_t* (*esp_mn_iface_op_switch_loader_mode_t)(model_iface_data_t *model, esp_mn_loader_mode_t mode);
|
||||
|
||||
/**
|
||||
* @brief Callback function type to fetch the amount of samples that need to be passed to the detect function
|
||||
*
|
||||
* Every speech recognition model processes a certain number of samples at the same time. This function
|
||||
* can be used to query that amount. Note that the returned amount is in 16-bit samples, not in bytes.
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @return The amount of samples to feed the detect function
|
||||
*/
|
||||
typedef int (*esp_mn_iface_op_get_samp_chunksize_t)(model_iface_data_t *model);
|
||||
|
||||
/**
|
||||
* @brief Callback function type to fetch the number of frames recognized by the command word
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @return The number of the frames recognized by the command word
|
||||
*/
|
||||
typedef int (*esp_mn_iface_op_get_samp_chunknum_t)(model_iface_data_t *model);
|
||||
|
||||
/**
|
||||
* @brief Set the detection threshold to manually abjust the probability
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @param det_treshold The threshold to trigger speech commands, the range of det_threshold is 0.0~0.9999
|
||||
*/
|
||||
typedef int (*esp_mn_iface_op_set_det_threshold_t)(model_iface_data_t *model, float det_threshold);
|
||||
|
||||
/**
|
||||
* @brief Get the sample rate of the samples to feed to the detect function
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @return The sample rate, in hz
|
||||
*/
|
||||
typedef int (*esp_mn_iface_op_get_samp_rate_t)(model_iface_data_t *model);
|
||||
|
||||
/**
|
||||
* @brief Get the language of model
|
||||
*
|
||||
* @param model The language name
|
||||
* @return Language name string defined in esp_mn_models.h, eg: ESP_MN_CHINESE, ESP_MN_ENGLISH
|
||||
*/
|
||||
typedef char * (*esp_mn_iface_op_get_language_t)(model_iface_data_t *model);
|
||||
|
||||
/**
|
||||
* @brief Feed samples of an audio stream to the speech recognition model and detect if there is a speech command found.
|
||||
*
|
||||
* @param model The model object to query.
|
||||
* @param samples An array of 16-bit signed audio samples. The array size used can be queried by the
|
||||
* get_samp_chunksize function.
|
||||
* @return The state of multinet
|
||||
*/
|
||||
typedef esp_mn_state_t (*esp_mn_iface_op_detect_t)(model_iface_data_t *model, int16_t *samples);
|
||||
|
||||
/**
|
||||
* @brief Destroy a speech commands recognition model
|
||||
*
|
||||
* @param model The Model object to destroy
|
||||
*/
|
||||
typedef void (*esp_mn_iface_op_destroy_t)(model_iface_data_t *model);
|
||||
|
||||
/**
|
||||
* @brief Get recognition results
|
||||
*
|
||||
* @param model The Model object to query
|
||||
*
|
||||
* @return The current results.
|
||||
*/
|
||||
typedef esp_mn_results_t* (*esp_mn_iface_op_get_results_t)(model_iface_data_t *model);
|
||||
|
||||
/**
|
||||
* @brief Open the log print
|
||||
*
|
||||
* @param model_data The model object to query.
|
||||
*
|
||||
*/
|
||||
typedef void (*esp_mn_iface_op_open_log_t)(model_iface_data_t *model_data);
|
||||
|
||||
/**
|
||||
* @brief Clean all status of model
|
||||
*
|
||||
* @param model_data The model object to query.
|
||||
*
|
||||
*/
|
||||
typedef void (*esp_mn_iface_op_clean_t)(model_iface_data_t *model_data);
|
||||
|
||||
/**
|
||||
* @brief Set the speech commands by mn_command_root
|
||||
*
|
||||
* @param model_data The model object to query.
|
||||
* @param mn_command_root The speech commands link.
|
||||
* @return The error phrase id info.
|
||||
*/
|
||||
typedef esp_mn_error_t* (*esp_wn_iface_op_set_speech_commands)(model_iface_data_t *model_data, esp_mn_node_t *mn_command_root);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Print out current commands in fst, note the ones "added" but not "updated" will not be shown here
|
||||
*
|
||||
* @param model_data The model object to query
|
||||
*/
|
||||
typedef void (*esp_mn_iface_op_print_active_speech_commands)(model_iface_data_t *model_data);
|
||||
|
||||
/**
|
||||
* @brief Check if input string can be tokenized
|
||||
*
|
||||
* @param model_data The model object to query
|
||||
* @param str The input string
|
||||
*/
|
||||
typedef int (*esp_mn_iface_op_check_speech_command)(model_iface_data_t *model_data, const char *str);
|
||||
|
||||
typedef struct {
|
||||
esp_mn_iface_op_create_t create;
|
||||
esp_mn_iface_op_get_samp_rate_t get_samp_rate;
|
||||
esp_mn_iface_op_get_samp_chunksize_t get_samp_chunksize;
|
||||
esp_mn_iface_op_get_samp_chunknum_t get_samp_chunknum;
|
||||
esp_mn_iface_op_set_det_threshold_t set_det_threshold;
|
||||
esp_mn_iface_op_get_language_t get_language;
|
||||
esp_mn_iface_op_detect_t detect;
|
||||
esp_mn_iface_op_destroy_t destroy;
|
||||
esp_mn_iface_op_get_results_t get_results;
|
||||
esp_mn_iface_op_open_log_t open_log;
|
||||
esp_mn_iface_op_clean_t clean;
|
||||
esp_wn_iface_op_set_speech_commands set_speech_commands;
|
||||
esp_mn_iface_op_switch_loader_mode_t switch_loader_mode;
|
||||
esp_mn_iface_op_print_active_speech_commands print_active_speech_commands;
|
||||
esp_mn_iface_op_check_speech_command check_speech_command;
|
||||
} esp_mn_iface_t;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
@@ -0,0 +1,66 @@
|
||||
#pragma once
|
||||
#include "esp_mn_iface.h"
|
||||
|
||||
//Contains declarations of all available speech recognion models. Pair this up with the right coefficients and you have a model that can recognize
|
||||
//a specific phrase or word.
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
/**
|
||||
* @brief Get the multinet handle from model name
|
||||
*
|
||||
* @param model_name The name of model
|
||||
* @returns The handle of multinet
|
||||
*/
|
||||
esp_mn_iface_t *esp_mn_handle_from_name(char *model_name);
|
||||
|
||||
/**
|
||||
* @brief Get the multinet language from model name
|
||||
*
|
||||
* @param model_name The name of model
|
||||
* @returns The language of multinet
|
||||
*/
|
||||
char *esp_mn_language_from_name(char *model_name);
|
||||
|
||||
/*
|
||||
Configure wake word to use based on what's selected in menuconfig.
|
||||
*/
|
||||
|
||||
#ifdef CONFIG_SR_MN_CN_MULTINET2_SINGLE_RECOGNITION
|
||||
#include "multinet2_ch.h"
|
||||
#define MULTINET_COEFF get_coeff_multinet2_ch
|
||||
#define MULTINET_MODEL_NAME "mn2_cn"
|
||||
|
||||
#else
|
||||
#define MULTINET_COEFF "COEFF_NULL"
|
||||
#define MULTINET_MODEL_NAME "NULL"
|
||||
#endif
|
||||
|
||||
|
||||
/* example
|
||||
|
||||
static const esp_mn_iface_t *multinet = &MULTINET_MODEL;
|
||||
|
||||
//Initialize MultiNet model data
|
||||
model_iface_data_t *model_data = multinet->create(&MULTINET_COEFF);
|
||||
add_speech_commands(multinet, model_data);
|
||||
|
||||
//Set parameters of buffer
|
||||
int audio_chunksize=model->get_samp_chunksize(model_data);
|
||||
int frequency = model->get_samp_rate(model_data);
|
||||
int16_t *buffer=malloc(audio_chunksize*sizeof(int16_t));
|
||||
|
||||
//Detect
|
||||
int r=model->detect(model_data, buffer);
|
||||
if (r>0) {
|
||||
printf("Detection triggered output %d.\n", r);
|
||||
}
|
||||
|
||||
//Destroy model
|
||||
model->destroy(model_data)
|
||||
|
||||
*/
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user