Update to 2.0.0

This commit is contained in:
2025-09-13 23:40:38 +08:00
parent 5a929f5b06
commit 63e404d610
247 changed files with 13586 additions and 11497 deletions

View File

@@ -8,28 +8,28 @@
#define TAG "AudioCodec"
AudioCodec::AudioCodec(){
AudioCodec::AudioCodec() {
}
AudioCodec::~AudioCodec(){
AudioCodec::~AudioCodec() {
}
void AudioCodec::OutputData(std::vector<int16_t> &data){
void AudioCodec::OutputData(std::vector<int16_t>& data) {
Write(data.data(), data.size());
}
bool AudioCodec::InputData(std::vector<int16_t> &data){
bool AudioCodec::InputData(std::vector<int16_t>& data) {
int samples = Read(data.data(), data.size());
if (samples > 0){
if (samples > 0) {
return true;
}
return false;
}
void AudioCodec::Start(){
void AudioCodec::Start() {
Settings settings("audio", false);
output_volume_ = settings.GetInt("output_volume", output_volume_);
if (output_volume_ <= 0){
if (output_volume_ <= 0) {
ESP_LOGW(TAG, "Output volume value (%d) is too small, setting to default (10)", output_volume_);
output_volume_ = 10;
}
@@ -40,11 +40,11 @@ void AudioCodec::Start(){
ESP_LOGI(TAG, "Saved original output sample rate: %d Hz", original_output_sample_rate_);
}
if (tx_handle_ != nullptr){
if (tx_handle_ != nullptr) {
ESP_ERROR_CHECK(i2s_channel_enable(tx_handle_));
}
if (rx_handle_ != nullptr){
if (rx_handle_ != nullptr) {
ESP_ERROR_CHECK(i2s_channel_enable(rx_handle_));
}
@@ -53,74 +53,73 @@ void AudioCodec::Start(){
ESP_LOGI(TAG, "Audio codec started");
}
void AudioCodec::SetOutputVolume(int volume){
void AudioCodec::SetOutputVolume(int volume) {
output_volume_ = volume;
ESP_LOGI(TAG, "Set output volume to %d", output_volume_);
Settings settings("audio", true);
settings.SetInt("output_volume", output_volume_);
}
void AudioCodec::EnableInput(bool enable){
if (enable == input_enabled_){
void AudioCodec::EnableInput(bool enable) {
if (enable == input_enabled_) {
return;
}
input_enabled_ = enable;
ESP_LOGI(TAG, "Set input enable to %s", enable ? "true" : "false");
}
void AudioCodec::EnableOutput(bool enable){
if (enable == output_enabled_){
void AudioCodec::EnableOutput(bool enable) {
if (enable == output_enabled_) {
return;
}
output_enabled_ = enable;
ESP_LOGI(TAG, "Set output enable to %s", enable ? "true" : "false");
}
bool AudioCodec::SetOutputSampleRate(int sample_rate){
bool AudioCodec::SetOutputSampleRate(int sample_rate) {
// 特殊处理:如果传入 -1表示重置到原始采样率
if (sample_rate == -1){
if (original_output_sample_rate_ > 0){
if (sample_rate == -1) {
if (original_output_sample_rate_ > 0) {
sample_rate = original_output_sample_rate_;
ESP_LOGI(TAG, "Resetting to original output sample rate: %d Hz", sample_rate);
}else{
} else {
ESP_LOGW(TAG, "Original sample rate not available, cannot reset");
return false;
}
}
if (sample_rate <= 0 || sample_rate > 192000){
if (sample_rate <= 0 || sample_rate > 192000) {
ESP_LOGE(TAG, "Invalid sample rate: %d", sample_rate);
return false;
}
if (output_sample_rate_ == sample_rate){
if (output_sample_rate_ == sample_rate) {
ESP_LOGI(TAG, "Sample rate already set to %d Hz", sample_rate);
return true;
}
if (tx_handle_ == nullptr){
if (tx_handle_ == nullptr) {
ESP_LOGW(TAG, "TX handle is null, only updating sample rate variable");
output_sample_rate_ = sample_rate;
return true;
}
ESP_LOGI(TAG, "Changing output sample rate from %d to %d Hz", output_sample_rate_, sample_rate);
// 先尝试禁用 I2S 通道(如果已启用的话)
bool was_enabled = false;
esp_err_t disable_ret = i2s_channel_disable(tx_handle_);
if (disable_ret == ESP_OK){
if (disable_ret == ESP_OK) {
was_enabled = true;
ESP_LOGI(TAG, "Disabled I2S TX channel for reconfiguration");
}
else if (disable_ret == ESP_ERR_INVALID_STATE){
} else if (disable_ret == ESP_ERR_INVALID_STATE) {
// 通道可能已经是禁用状态,这是正常的
ESP_LOGI(TAG, "I2S TX channel was already disabled");
}else{
} else {
ESP_LOGW(TAG, "Failed to disable I2S TX channel: %s", esp_err_to_name(disable_ret));
}
// 重新配置 I2S 时钟
i2s_std_clk_config_t clk_cfg = {
.sample_rate_hz = (uint32_t)sample_rate,
@@ -130,23 +129,23 @@ bool AudioCodec::SetOutputSampleRate(int sample_rate){
.ext_clk_freq_hz = 0,
#endif
};
esp_err_t ret = i2s_channel_reconfig_std_clock(tx_handle_, &clk_cfg);
// 重新启用通道(无论之前是什么状态,现在都需要启用以便播放音频)
esp_err_t enable_ret = i2s_channel_enable(tx_handle_);
if (enable_ret != ESP_OK){
if (enable_ret != ESP_OK) {
ESP_LOGE(TAG, "Failed to enable I2S TX channel: %s", esp_err_to_name(enable_ret));
}else{
} else {
ESP_LOGI(TAG, "Enabled I2S TX channel");
}
if (ret == ESP_OK){
if (ret == ESP_OK) {
output_sample_rate_ = sample_rate;
ESP_LOGI(TAG, "Successfully changed output sample rate to %d Hz", sample_rate);
return true;
}else{
} else {
ESP_LOGE(TAG, "Failed to change sample rate to %d Hz: %s", sample_rate, esp_err_to_name(ret));
return false;
}
}
}

View File

@@ -5,13 +5,14 @@
#include <vector>
#include <functional>
#include <model_path.h>
#include "audio_codec.h"
class AudioProcessor {
public:
virtual ~AudioProcessor() = default;
virtual void Initialize(AudioCodec* codec, int frame_duration_ms) = 0;
virtual void Initialize(AudioCodec* codec, int frame_duration_ms, srmodel_list_t* models_list) = 0;
virtual void Feed(std::vector<int16_t>&& data) = 0;
virtual void Start() = 0;
virtual void Stop() = 0;

View File

@@ -111,7 +111,7 @@ void AudioService::Start() {
AudioService* audio_service = (AudioService*)arg;
audio_service->AudioOutputTask();
vTaskDelete(NULL);
}, "audio_output", 2048 * 2, this, 3, &audio_output_task_handle_);
}, "audio_output", 2048 * 2, this, 4, &audio_output_task_handle_);
#else
/* Start the audio input task */
xTaskCreate([](void* arg) {
@@ -125,7 +125,7 @@ void AudioService::Start() {
AudioService* audio_service = (AudioService*)arg;
audio_service->AudioOutputTask();
vTaskDelete(NULL);
}, "audio_output", 2048, this, 3, &audio_output_task_handle_);
}, "audio_output", 2048, this, 4, &audio_output_task_handle_);
#endif
/* Start the opus codec task */
@@ -479,7 +479,7 @@ void AudioService::EnableWakeWordDetection(bool enable) {
ESP_LOGD(TAG, "%s wake word detection", enable ? "Enabling" : "Disabling");
if (enable) {
if (!wake_word_initialized_) {
if (!wake_word_->Initialize(codec_)) {
if (!wake_word_->Initialize(codec_, models_list_)) {
ESP_LOGE(TAG, "Failed to initialize wake word");
return;
}
@@ -497,7 +497,7 @@ void AudioService::EnableVoiceProcessing(bool enable) {
ESP_LOGD(TAG, "%s voice processing", enable ? "Enabling" : "Disabling");
if (enable) {
if (!audio_processor_initialized_) {
audio_processor_->Initialize(codec_, OPUS_FRAME_DURATION_MS);
audio_processor_->Initialize(codec_, OPUS_FRAME_DURATION_MS, models_list_);
audio_processor_initialized_ = true;
}
@@ -528,7 +528,7 @@ void AudioService::EnableAudioTesting(bool enable) {
void AudioService::EnableDeviceAec(bool enable) {
ESP_LOGI(TAG, "%s device AEC", enable ? "Enabling" : "Disabling");
if (!audio_processor_initialized_) {
audio_processor_->Initialize(codec_, OPUS_FRAME_DURATION_MS);
audio_processor_->Initialize(codec_, OPUS_FRAME_DURATION_MS, models_list_);
audio_processor_initialized_ = true;
}
@@ -668,6 +668,10 @@ void AudioService::CheckAndUpdateAudioPowerState() {
}
}
void AudioService::SetModelsList(srmodel_list_t* models_list) {
models_list_ = models_list;
}
void AudioService::UpdateOutputTimestamp() {
last_output_time_ = std::chrono::steady_clock::now();
}

View File

@@ -11,6 +11,7 @@
#include <freertos/task.h>
#include <freertos/event_groups.h>
#include <esp_timer.h>
#include <model_path.h>
#include <opus_encoder.h>
#include <opus_decoder.h>
@@ -106,6 +107,7 @@ public:
void PlaySound(const std::string_view& sound);
bool ReadAudioData(std::vector<int16_t>& data, int sample_rate, int samples);
void ResetDecoder();
void SetModelsList(srmodel_list_t* models_list);
void UpdateOutputTimestamp();
@@ -121,6 +123,7 @@ private:
OpusResampler reference_resampler_;
OpusResampler output_resampler_;
DebugStatistics debug_statistics_;
srmodel_list_t* models_list_ = nullptr;
EventGroupHandle_t event_group_;

View File

@@ -10,7 +10,7 @@ AfeAudioProcessor::AfeAudioProcessor()
event_group_ = xEventGroupCreate();
}
void AfeAudioProcessor::Initialize(AudioCodec* codec, int frame_duration_ms) {
void AfeAudioProcessor::Initialize(AudioCodec* codec, int frame_duration_ms, srmodel_list_t* models_list) {
codec_ = codec;
frame_samples_ = frame_duration_ms * 16000 / 1000;
@@ -27,7 +27,13 @@ void AfeAudioProcessor::Initialize(AudioCodec* codec, int frame_duration_ms) {
input_format.push_back('R');
}
srmodel_list_t *models = esp_srmodel_init("model");
srmodel_list_t *models;
if (models_list == nullptr) {
models = esp_srmodel_init("model");
} else {
models = models_list;
}
char* ns_model_name = esp_srmodel_filter(models, ESP_NSNET_PREFIX, NULL);
char* vad_model_name = esp_srmodel_filter(models, ESP_VADN_PREFIX, NULL);

View File

@@ -18,7 +18,7 @@ public:
AfeAudioProcessor();
~AfeAudioProcessor();
void Initialize(AudioCodec* codec, int frame_duration_ms) override;
void Initialize(AudioCodec* codec, int frame_duration_ms, srmodel_list_t* models_list) override;
void Feed(std::vector<int16_t>&& data) override;
void Start() override;
void Stop() override;

View File

@@ -3,7 +3,7 @@
#define TAG "NoAudioProcessor"
void NoAudioProcessor::Initialize(AudioCodec* codec, int frame_duration_ms) {
void NoAudioProcessor::Initialize(AudioCodec* codec, int frame_duration_ms, srmodel_list_t* models_list) {
codec_ = codec;
frame_samples_ = frame_duration_ms * 16000 / 1000;
}

View File

@@ -12,7 +12,7 @@ public:
NoAudioProcessor() = default;
~NoAudioProcessor() = default;
void Initialize(AudioCodec* codec, int frame_duration_ms) override;
void Initialize(AudioCodec* codec, int frame_duration_ms, srmodel_list_t* models_list) override;
void Feed(std::vector<int16_t>&& data) override;
void Start() override;
void Stop() override;

View File

@@ -5,13 +5,14 @@
#include <vector>
#include <functional>
#include <model_path.h>
#include "audio_codec.h"
class WakeWord {
public:
virtual ~WakeWord() = default;
virtual bool Initialize(AudioCodec* codec) = 0;
virtual bool Initialize(AudioCodec* codec, srmodel_list_t* models_list) = 0;
virtual void Feed(const std::vector<int16_t>& data) = 0;
virtual void OnWakeWordDetected(std::function<void(const std::string& wake_word)> callback) = 0;
virtual void Start() = 0;

View File

@@ -36,11 +36,16 @@ AfeWakeWord::~AfeWakeWord() {
vEventGroupDelete(event_group_);
}
bool AfeWakeWord::Initialize(AudioCodec* codec) {
bool AfeWakeWord::Initialize(AudioCodec* codec, srmodel_list_t* models_list) {
codec_ = codec;
int ref_num = codec_->input_reference() ? 1 : 0;
models_ = esp_srmodel_init("model");
if (models_list == nullptr) {
models_ = esp_srmodel_init("model");
} else {
models_ = models_list;
}
if (models_ == nullptr || models_->num == -1) {
ESP_LOGE(TAG, "Failed to initialize wakenet model");
return false;

View File

@@ -24,7 +24,7 @@ public:
AfeWakeWord();
~AfeWakeWord();
bool Initialize(AudioCodec* codec);
bool Initialize(AudioCodec* codec, srmodel_list_t* models_list);
void Feed(const std::vector<int16_t>& data);
void OnWakeWordDetected(std::function<void(const std::string& wake_word)> callback);
void Start();

View File

@@ -34,10 +34,15 @@ CustomWakeWord::~CustomWakeWord() {
}
}
bool CustomWakeWord::Initialize(AudioCodec* codec) {
bool CustomWakeWord::Initialize(AudioCodec* codec, srmodel_list_t* models_list) {
codec_ = codec;
models_ = esp_srmodel_init("model");
if (models_list == nullptr) {
models_ = esp_srmodel_init("model");
} else {
models_ = models_list;
}
if (models_ == nullptr || models_->num == -1) {
ESP_LOGE(TAG, "Failed to initialize wakenet model");
return false;

View File

@@ -22,7 +22,7 @@ public:
CustomWakeWord();
~CustomWakeWord();
bool Initialize(AudioCodec* codec);
bool Initialize(AudioCodec* codec, srmodel_list_t* models_list);
void Feed(const std::vector<int16_t>& data);
void OnWakeWordDetected(std::function<void(const std::string& wake_word)> callback);
void Start();

View File

@@ -14,10 +14,15 @@ EspWakeWord::~EspWakeWord() {
}
}
bool EspWakeWord::Initialize(AudioCodec* codec) {
bool EspWakeWord::Initialize(AudioCodec* codec, srmodel_list_t* models_list) {
codec_ = codec;
wakenet_model_ = esp_srmodel_init("model");
if (models_list == nullptr) {
wakenet_model_ = esp_srmodel_init("model");
} else {
wakenet_model_ = models_list;
}
if (wakenet_model_ == nullptr || wakenet_model_->num == -1) {
ESP_LOGE(TAG, "Failed to initialize wakenet model");
return false;

View File

@@ -18,7 +18,7 @@ public:
EspWakeWord();
~EspWakeWord();
bool Initialize(AudioCodec* codec);
bool Initialize(AudioCodec* codec, srmodel_list_t* models_list);
void Feed(const std::vector<int16_t>& data);
void OnWakeWordDetected(std::function<void(const std::string& wake_word)> callback);
void Start();