add some code

2025-09-05 13:25:11 +08:00
parent 9ff0a99e7a
commit 3cf1229a85
8911 changed files with 2535396 additions and 0 deletions
--- a/managed_components/espressif__esp-sr/include/esp32c6/esp_afe_config.h
+++ b/managed_components/espressif__esp-sr/include/esp32c6/esp_afe_config.h
@@ -0,0 +1,288 @@
+#pragma once
+#include "esp_aec.h"
+#include "esp_agc.h"
+#include "esp_nsn_models.h"
+#include "esp_vad.h"
+#include "esp_vadn_models.h"
+#include "esp_wn_iface.h"
+#include "esp_wn_models.h"
+#include "model_path.h"
+#include "stdbool.h"
+#include "stdint.h"
+#include "stdlib.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// AFE: Audio Front-End
+// SR:  Speech Recognition
+// VC:  Voice Communication
+
+// Set AFE_SR mode
+typedef enum {
+    SR_MODE_LOW_COST = 0,  // Deprecated, please use afe_mode_t, AFE mode: low cost mode
+    SR_MODE_HIGH_PERF = 1, // Deprecated, please use afe_mode_t, AFE mode: high performance mode
+} afe_sr_mode_t;
+
+// Set AFE mode
+typedef enum {
+    AFE_MODE_LOW_COST = 0,  // AFE mode: low cost mode
+    AFE_MODE_HIGH_PERF = 1, // AFE mode: high performance mode
+} afe_mode_t;
+
+// Set AFE type
+typedef enum {
+    AFE_TYPE_SR = 0, // Speech recognition scenarios, excluding nonlinear noise suppression
+    AFE_TYPE_VC = 1, // Voice communication scenarios, 16KHz input, including nonlinear noise suppression
+    AFE_TYPE_VC_8K = 2, // Voice communication scenarios, 8KHz input, note that the input data must be 8KHz
+} afe_type_t;
+
+typedef enum {
+    AFE_MEMORY_ALLOC_MORE_INTERNAL = 1,          // malloc with more internal ram
+    AFE_MEMORY_ALLOC_INTERNAL_PSRAM_BALANCE = 2, // malloc with internal ram and psram in balance
+    AFE_MEMORY_ALLOC_MORE_PSRAM = 3              // malloc with more psram
+} afe_memory_alloc_mode_t;
+
+typedef enum {
+    AFE_MN_PEAK_AGC_MODE_1 = -9, // The peak amplitude of fetch audio is -9dB
+    AFE_MN_PEAK_AGC_MODE_2 = -6, // The peak amplitude of fetch audio is -6dB
+    AFE_MN_PEAK_AGC_MODE_3 = -3, // The peak amplitude of fetcg is -3dB
+    AFE_MN_PEAK_NO_AGC = 0,      // There is no agc gain
+} afe_mn_peak_agc_mode_t;
+
+typedef struct {
+    int total_ch_num; // total channel num, include microphone channel, playback channel and unknown channel
+    int mic_num;      // microphone channel number
+    uint8_t *mic_ids; // microphone channel indices
+    int ref_num;      // playback reference channel number
+    uint8_t *ref_ids; // playback reference channel indices
+    int sample_rate;  // sample rate of audio
+} afe_pcm_config_t;
+
+typedef enum {
+    AFE_NS_MODE_WEBRTC = 0, // please use model name of NS, SSP: "WEBRTC"
+    AFE_NS_MODE_NET = 1,    // please use model name of NSNET
+} afe_ns_mode_t;
+
+typedef enum {
+    AFE_AGC_MODE_WEBRTC = 0,  // WEBRTC AGC
+    AFE_AGC_MODE_WAKENET = 1, // AGC gain is calculated by wakenet model if wakenet is activated
+} afe_agc_mode_t;
+
+/**
+ * @brief Function to get the debug audio data
+ *
+ * @param data        The debug audio data which don't be modify. It should be copied away as soon as possible that
+ * avoid blocking for too long.
+ * @param data_size   The number of bytes of data.
+ * @returns
+ */
+typedef void (*afe_debug_hook_callback_t)(const int16_t *data, int data_size);
+
+typedef enum {
+    AFE_DEBUG_HOOK_MASE_TASK_IN = 0,  // To get the input data of mase task
+    AFE_DEBUG_HOOK_FETCH_TASK_IN = 1, // To get the input data of fetch task
+    AFE_DEBUG_HOOK_MAX = 2
+} afe_debug_hook_type_t;
+
+typedef struct {
+    afe_debug_hook_type_t hook_type;         // debug type of hook
+    afe_debug_hook_callback_t hook_callback; // callback function which transfer debug audio data
+} afe_debug_hook_t;
+
+typedef struct {
+    /********** AEC(Acoustic Echo Cancellation) **********/
+    bool aec_init;         // Whether to init aec
+    aec_mode_t aec_mode;   // The mode of aec, AEC_MODE_SR_LOW_COST or AEC_MODE_SR_HIGH_PERF
+    int aec_filter_length; // The filter length of aec
+
+    /********** SE(Speech Enhancement, microphone array processing) **********/
+    bool se_init; // Whether to init se
+
+    /********** NS(Noise Suppression) **********/
+    bool ns_init;              // Whether to init ns
+    char *ns_model_name;       // Model name of ns
+    afe_ns_mode_t afe_ns_mode; // Model mode of ns
+
+    /********** VAD(Voice Activity Detection) **********/
+    bool vad_init;          // Whether to init vad
+    vad_mode_t vad_mode;    // The value can be: VAD_MODE_0, VAD_MODE_1, VAD_MODE_2, VAD_MODE_3, VAD_MODE_4
+    char *vad_model_name;   // The model name of vad, If it is null, WebRTC VAD will be used.
+    int vad_min_speech_ms;  // The minimum duration of speech in ms. It should be bigger than 32 ms, default: 128 ms
+    int vad_min_noise_ms;   // The minimum duration of noise or silence in ms. It should be bigger than 64 ms, default:
+                            // 1000 ms
+    int vad_delay_ms;       // The delay of the first speech frame in ms, default: 128 ms
+                            // If you find vad cache can not cover all speech, please increase this value.
+    bool vad_mute_playback; // If true, the playback will be muted for vad detection. default: false
+    bool vad_enable_channel_trigger; // If true, the vad will be used to choose the channel id. default: false
+
+    /********** WakeNet(Wake Word Engine) **********/
+    bool wakenet_init;
+    char *wakenet_model_name;   // The model name of wakenet 1
+    char *wakenet_model_name_2; // The model name of wakenet 2 if has wakenet 2
+    det_mode_t wakenet_mode;    // The mode of wakenet
+
+    /********** AGC(Automatic Gain Control) **********/
+    bool agc_init; // Whether to init agc
+    afe_agc_mode_t
+        agc_mode; // The AGC mode for ASR. and the gain generated by AGC acts on the audio after far linear gain.
+    int agc_compression_gain_db; // Compression gain in dB (default 9)
+    int agc_target_level_dbfs;   // Target level in -dBfs of envelope (default 3, means target level is -3 dBFS)
+
+    /********** General AFE(Audio Front End) parameter **********/
+    afe_pcm_config_t pcm_config; // Config the channel num of original data which is fed to the afe feed function.
+    afe_mode_t afe_mode;         // The mode of afe， AFE_MODE_LOW_COST or AFE_MODE_HIGH_PERF
+    afe_type_t afe_type;         // The mode of afe， AFE_MODE_LOW_COST or AFE_MODE_HIGH_PERF
+    int afe_perferred_core;      // The preferred core of afe se task, which is created in afe_create function.
+    int afe_perferred_priority;  // The preferred priority of afe se task, which is created in afe_create function.
+    int afe_ringbuf_size;        // The ring buffer size: the number of frame data in ring buffer.
+    afe_memory_alloc_mode_t memory_alloc_mode; // The memory alloc mode for afe. From Internal RAM or PSRAM
+    float afe_linear_gain; // The linear gain for afe output the value should be in [0.1, 10.0]. This value acts
+                           // directly on the output amplitude: out_linear_gain * amplitude.
+    bool debug_init;
+    bool fixed_first_channel; // If true, the channel after first wake-up is fixed to raw data of microphone
+                              // otherwise, select channel number by wakenet
+} afe_config_t;
+
+/**
+ * @brief Get AFE default configuration. The default configuration will enable all algorithms as much as possible based
+ * on the chip target and input format. You can manually fine-tune it after creating the configuration
+ *
+ * The input format:
+ * M to represent the microphone channel
+ * R to represent the playback reference channel
+ * N to represent an unknown or unused channel
+ *
+ * For example, input_format="MMNR" indicates that the input data consists of four channels,
+ * which are the microphone channel, the microphone channel, an unused channel, and the playback channel
+ *
+ * @param input_format     The input format
+ * @param models           Models from partition, which is configured by Kconfig
+ * @param type             The type of afe, AFE_TYPE_SR or AFE_TYPE_VC
+ * @param mode             The mode of afe, AFE_MODE_LOW_COST or AFE_MODE_HIGH_PERF
+ *
+ * @return afe_config_t*  The default config of afe
+ */
+afe_config_t *afe_config_init(const char *input_format, srmodel_list_t *models, afe_type_t type, afe_mode_t mode);
+
+/**
+ * @brief Check AFE configuration and make sure it is correct.
+ *
+ * @warning If there is a configuration conflict, this function will modify some parameters.
+ * The guiding behind these modifications is to maintain the highest performance of the output audio and results.
+ * And remove the conflict between different algorithms.
+ *
+ * For example, If input is two-channel data, the SE(BSS) algorithm will be prioritized over the NS algorithm.
+ * If SE(BSS) algorithm is deactivated, will only use the first microphone channel.
+ *
+ * @param afe_config       Input AFE config
+ *
+ * @return afe_config_t*  The modified AFE config
+ */
+afe_config_t *afe_config_check(afe_config_t *afe_config);
+
+/**
+ * @brief Parse input format
+ *
+ * @param input_format The input format, same with afe_config_init() function
+ * @param pcm_config   The pcm config
+ *
+ * @return true if the input format is parsed successfully, otherwise false
+ */
+bool afe_parse_input_format(const char *input_format, afe_pcm_config_t *pcm_config);
+
+/**
+ * @brief Parse I2S input data
+ *
+ * @param data         The input multi channel data
+ * @param frame_size   The frame size of input, it is also the size of single channel data
+ * @param mic_data     The output microphone data
+ * @param ref_data     The output playback reference data
+ * @param pcm_config   The pcm config
+ *
+ */
+void afe_parse_input(int16_t *data, int frame_size, int16_t *mic_data, int16_t *ref_data, afe_pcm_config_t *pcm_config);
+
+/**
+ * @brief Parse input data, from interleaved arrangement to contiguous arrangement
+ *
+ * @param data         The input multi channel data
+ * @param frame_size   The frame size of input, it is also the size of single channel data
+ * @param channel_num  The channel number of data
+ * @param out_data     The output data
+ *
+ */
+void afe_parse_data(int16_t *data, int frame_size, int channel_num, int16_t *out_data);
+
+/**
+ * @brief Format input data, from contiguous arrangement to interleaved arrangement
+ *
+ * @param data         The input multi channel data
+ * @param frame_size   The frame size of input, it is also the size of single channel data
+ * @param channel_num  The channel number of data
+ * @param out_data     The output data
+ *
+ */
+void afe_format_data(int16_t *data, int frame_size, int channel_num, int16_t *out_data);
+
+/**
+ * @brief Adjust the gain of input data
+ *
+ * @warning the input data will be modified inplace.
+ *
+ * @param data         The input audio data
+ * @param frame_size   The frame size of input, it is also the size of single channel data
+ * @param factor       The gain factor
+ *
+ * @return int16_t*    The output audio data
+ */
+int16_t *afe_adjust_gain(int16_t *data, int frame_size, float factor);
+
+/**
+ * @brief Adjust the gain of input data
+ *
+ * @warning the input data will be modified inplace.
+ *
+ * @param in_data         The input audio data
+ * @param in_frame_size   Input data frame size of input
+ * @param channel_num     The channel number of input data, which is same as output data
+ * @param out_data        The output audio data
+ * @param out_frame_size  Onput data frame size of input
+ *
+ */
+void afe_concat_data(int16_t *in_data, int in_frame_size, int channel_num, int16_t *out_data, int out_frame_size);
+
+/**
+ * @brief Copy the afe config
+ *
+ * @param dst_config    The destination afe config
+ * @param src_config    The source afe config
+ *
+ * @return   The destination afe config
+ */
+afe_config_t *afe_config_copy(afe_config_t *dst_config, const afe_config_t *src_config);
+
+/**
+ * @brief Print the afe config
+ *
+ * @param afe_config    The afe config
+ */
+void afe_config_print(const afe_config_t *afe_config);
+
+/**
+ * @brief Allocate afe config
+ *
+ * @return The afe config pointer
+ */
+afe_config_t *afe_config_alloc();
+
+/**
+ * @brief Free afe config
+ *
+ * @param afe_config  The afe config pointer
+ */
+void afe_config_free(afe_config_t *afe_config);
+
+#ifdef __cplusplus
+}
+#endif