add some code
This commit is contained in:
@@ -0,0 +1,164 @@
|
||||
#pragma once
|
||||
#include "esp_vad.h"
|
||||
#include "stdint.h"
|
||||
#include "dl_lib_convq_queue.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// Opaque model data container
|
||||
typedef struct model_iface_data_t model_iface_data_t;
|
||||
|
||||
// /**
|
||||
// * @brief The state of vad
|
||||
// */
|
||||
// typedef enum {
|
||||
// VAD_NOISE = -1, // Noise
|
||||
// VADNET_STATE_SILENCE = 0, // Silence
|
||||
// VAD_SPEECH = 1 // Speech
|
||||
// } vad_state_t;
|
||||
|
||||
/**
|
||||
* @brief Easy function type to initialze a model instance with a detection mode
|
||||
* and specified model name
|
||||
*
|
||||
* @param model_name The specified model name
|
||||
* @param mode The voice activity detection mode
|
||||
* @param channel_num The number of input audio channels
|
||||
* @param min_speech_ms The minimum duration of speech in ms to trigger vad
|
||||
* speech
|
||||
* @param min_noise_ms The minimum duration of noise in ms to trigger vad
|
||||
* noise
|
||||
* @returns Handle to the model data
|
||||
*/
|
||||
typedef model_iface_data_t *(*esp_vadn_iface_op_create_t)(
|
||||
const void *model_name, vad_mode_t mode, int channel_num, int min_speech_ms, int min_noise_ms);
|
||||
|
||||
/**
|
||||
* @brief Get the amount of samples that need to be passed to the detect
|
||||
* function
|
||||
*
|
||||
* Every speech recognition model processes a certain number of samples at the
|
||||
* same time. This function can be used to query that amount. Note that the
|
||||
* returned amount is in 16-bit samples, not in bytes.
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @return The amount of samples to feed the detect function
|
||||
*/
|
||||
typedef int (*esp_vadn_iface_op_get_samp_chunksize_t)(model_iface_data_t *model);
|
||||
|
||||
/**
|
||||
* @brief Get the channel number of samples that need to be passed to the detect
|
||||
* function
|
||||
*
|
||||
* Every speech recognition model processes a certain number of samples at the
|
||||
* same time. This function can be used to query that amount. Note that the
|
||||
* returned amount is in 16-bit samples, not in bytes.
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @return The amount of samples to feed the detect function
|
||||
*/
|
||||
typedef int (*esp_vadn_iface_op_get_channel_num_t)(model_iface_data_t *model);
|
||||
|
||||
/**
|
||||
* @brief Get the sample rate of the samples to feed to the detect function
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @return The sample rate, in hz
|
||||
*/
|
||||
typedef int (*esp_vadn_iface_op_get_samp_rate_t)(model_iface_data_t *model);
|
||||
|
||||
/**
|
||||
* @brief Set the detection threshold to manually abjust the probability
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @param det_treshold The threshold to trigger wake words, the range of
|
||||
* det_threshold is 0.5~0.9999
|
||||
* @return 0: setting failed, 1: setting success
|
||||
*/
|
||||
typedef int (*esp_vadn_iface_op_set_det_threshold_t)(model_iface_data_t *model, float det_threshold);
|
||||
|
||||
/**
|
||||
* @brief Get the voice activity detection threshold
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @returns the detection threshold
|
||||
*/
|
||||
typedef float (*esp_vadn_iface_op_get_det_threshold_t)(model_iface_data_t *model);
|
||||
|
||||
/**
|
||||
* @brief Feed samples of an audio stream to the vad model and detect whether is
|
||||
* voice.
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @param samples An array of 16-bit signed audio samples. The array size used
|
||||
* can be queried by the get_samp_chunksize function.
|
||||
* @return The index of wake words, return 0 if no wake word is detected, else
|
||||
* the index of the wake words.
|
||||
*/
|
||||
typedef vad_state_t (*esp_vadn_iface_op_detect_t)(model_iface_data_t *model, int16_t *samples);
|
||||
|
||||
/**
|
||||
* @brief Feed MFCC of an audio stream to the vad model and detect whether is
|
||||
* voice.
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @param cq An array of 16-bit MFCC.
|
||||
* @return The index of wake words, return 0 if no wake word is detected, else
|
||||
* the index of the wake words.
|
||||
*/
|
||||
typedef vad_state_t (*esp_vadn_iface_op_detect_mfcc_t)(model_iface_data_t *model, dl_convq_queue_t *cq);
|
||||
|
||||
/**
|
||||
* @brief Get MFCC of an audio stream
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @return MFCC data
|
||||
*/
|
||||
typedef dl_convq_queue_t* (*esp_vadn_iface_op_get_mfcc_data_t)(model_iface_data_t *model);
|
||||
|
||||
/**
|
||||
* @brief Get the triggered channel index. Channel index starts from zero
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @return The channel index
|
||||
*/
|
||||
typedef int (*esp_vadn_iface_op_get_triggered_channel_t)(model_iface_data_t *model);
|
||||
|
||||
/**
|
||||
* @brief Clean all states of model
|
||||
*
|
||||
* @param model The model object to query
|
||||
*/
|
||||
typedef void (*esp_vadn_iface_op_clean_t)(model_iface_data_t *model);
|
||||
|
||||
/**
|
||||
* @brief Destroy a model object
|
||||
*
|
||||
* @param model Model object to destroy
|
||||
*/
|
||||
typedef void (*esp_vadn_iface_op_destroy_t)(model_iface_data_t *model);
|
||||
|
||||
/**
|
||||
* This structure contains the functions used to do operations on a voice
|
||||
* activity detection model.
|
||||
*/
|
||||
typedef struct {
|
||||
esp_vadn_iface_op_create_t create;
|
||||
esp_vadn_iface_op_get_samp_chunksize_t get_samp_chunksize;
|
||||
esp_vadn_iface_op_get_channel_num_t get_channel_num;
|
||||
esp_vadn_iface_op_get_samp_rate_t get_samp_rate;
|
||||
esp_vadn_iface_op_set_det_threshold_t set_det_threshold;
|
||||
esp_vadn_iface_op_get_det_threshold_t get_det_threshold;
|
||||
esp_vadn_iface_op_get_triggered_channel_t get_triggered_channel;
|
||||
esp_vadn_iface_op_detect_t detect;
|
||||
esp_vadn_iface_op_detect_mfcc_t detect_mfcc;
|
||||
esp_vadn_iface_op_get_mfcc_data_t get_mfcc_data;
|
||||
esp_vadn_iface_op_clean_t clean;
|
||||
esp_vadn_iface_op_destroy_t destroy;
|
||||
} esp_vadn_iface_t;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
Reference in New Issue
Block a user