Files
xiaozhi-esp32/main/application.cc
2025-09-05 13:25:11 +08:00

869 lines
32 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#include "application.h"
#include "board.h"
#include "display.h"
#include "system_info.h"
#include "audio_codec.h"
#include "mqtt_protocol.h"
#include "websocket_protocol.h"
#include "assets/lang_config.h"
#include "mcp_server.h"
#include <cstring>
#include <esp_log.h>
#include <cJSON.h>
#include <driver/gpio.h>
#include <arpa/inet.h>
#include <font_awesome.h>
#define TAG "Application"
static const char* const STATE_STRINGS[] = {
"unknown",
"starting",
"configuring",
"idle",
"connecting",
"listening",
"speaking",
"upgrading",
"activating",
"audio_testing",
"fatal_error",
"invalid_state"
};
Application::Application() {
event_group_ = xEventGroupCreate();
#if CONFIG_USE_DEVICE_AEC && CONFIG_USE_SERVER_AEC
#error "CONFIG_USE_DEVICE_AEC and CONFIG_USE_SERVER_AEC cannot be enabled at the same time"
#elif CONFIG_USE_DEVICE_AEC
aec_mode_ = kAecOnDeviceSide;
#elif CONFIG_USE_SERVER_AEC
aec_mode_ = kAecOnServerSide;
#else
aec_mode_ = kAecOff;
#endif
esp_timer_create_args_t clock_timer_args = {
.callback = [](void* arg) {
Application* app = (Application*)arg;
app->OnClockTimer();
},
.arg = this,
.dispatch_method = ESP_TIMER_TASK,
.name = "clock_timer",
.skip_unhandled_events = true
};
esp_timer_create(&clock_timer_args, &clock_timer_handle_);
}
Application::~Application() {
if (clock_timer_handle_ != nullptr) {
esp_timer_stop(clock_timer_handle_);
esp_timer_delete(clock_timer_handle_);
}
vEventGroupDelete(event_group_);
}
void Application::CheckNewVersion(Ota& ota) {
const int MAX_RETRY = 10;
int retry_count = 0;
int retry_delay = 10; // 初始重试延迟为10秒
auto& board = Board::GetInstance();
while (true) {
SetDeviceState(kDeviceStateActivating);
auto display = board.GetDisplay();
display->SetStatus(Lang::Strings::CHECKING_NEW_VERSION);
if (!ota.CheckVersion()) {
retry_count++;
if (retry_count >= MAX_RETRY) {
ESP_LOGE(TAG, "Too many retries, exit version check");
return;
}
char buffer[256];
snprintf(buffer, sizeof(buffer), Lang::Strings::CHECK_NEW_VERSION_FAILED, retry_delay, ota.GetCheckVersionUrl().c_str());
Alert(Lang::Strings::ERROR, buffer, "cloud_slash", Lang::Sounds::OGG_EXCLAMATION);
ESP_LOGW(TAG, "Check new version failed, retry in %d seconds (%d/%d)", retry_delay, retry_count, MAX_RETRY);
for (int i = 0; i < retry_delay; i++) {
vTaskDelay(pdMS_TO_TICKS(1000));
if (device_state_ == kDeviceStateIdle) {
break;
}
}
retry_delay *= 2; // 每次重试后延迟时间翻倍
continue;
}
retry_count = 0;
retry_delay = 10; // 重置重试延迟时间
if (ota.HasNewVersion()) {
Alert(Lang::Strings::OTA_UPGRADE, Lang::Strings::UPGRADING, "download", Lang::Sounds::OGG_UPGRADE);
vTaskDelay(pdMS_TO_TICKS(3000));
SetDeviceState(kDeviceStateUpgrading);
std::string message = std::string(Lang::Strings::NEW_VERSION) + ota.GetFirmwareVersion();
display->SetChatMessage("system", message.c_str());
board.SetPowerSaveMode(false);
audio_service_.Stop();
vTaskDelay(pdMS_TO_TICKS(1000));
bool upgrade_success = ota.StartUpgrade([display](int progress, size_t speed) {
std::thread([display, progress, speed]() {
char buffer[32];
snprintf(buffer, sizeof(buffer), "%d%% %uKB/s", progress, speed / 1024);
display->SetChatMessage("system", buffer);
}).detach();
});
if (!upgrade_success) {
// Upgrade failed, restart audio service and continue running
ESP_LOGE(TAG, "Firmware upgrade failed, restarting audio service and continuing operation...");
audio_service_.Start(); // Restart audio service
board.SetPowerSaveMode(true); // Restore power save mode
Alert(Lang::Strings::ERROR, Lang::Strings::UPGRADE_FAILED, "circle_xmark", Lang::Sounds::OGG_EXCLAMATION);
vTaskDelay(pdMS_TO_TICKS(3000));
// Continue to normal operation (don't break, just fall through)
} else {
// Upgrade success, reboot immediately
ESP_LOGI(TAG, "Firmware upgrade successful, rebooting...");
display->SetChatMessage("system", "Upgrade successful, rebooting...");
vTaskDelay(pdMS_TO_TICKS(1000)); // Brief pause to show message
Reboot();
return; // This line will never be reached after reboot
}
}
// No new version, mark the current version as valid
ota.MarkCurrentVersionValid();
if (!ota.HasActivationCode() && !ota.HasActivationChallenge()) {
xEventGroupSetBits(event_group_, MAIN_EVENT_CHECK_NEW_VERSION_DONE);
// Exit the loop if done checking new version
break;
}
display->SetStatus(Lang::Strings::ACTIVATION);
// Activation code is shown to the user and waiting for the user to input
if (ota.HasActivationCode()) {
ShowActivationCode(ota.GetActivationCode(), ota.GetActivationMessage());
}
// This will block the loop until the activation is done or timeout
for (int i = 0; i < 10; ++i) {
ESP_LOGI(TAG, "Activating... %d/%d", i + 1, 10);
esp_err_t err = ota.Activate();
if (err == ESP_OK) {
xEventGroupSetBits(event_group_, MAIN_EVENT_CHECK_NEW_VERSION_DONE);
break;
} else if (err == ESP_ERR_TIMEOUT) {
vTaskDelay(pdMS_TO_TICKS(3000));
} else {
vTaskDelay(pdMS_TO_TICKS(10000));
}
if (device_state_ == kDeviceStateIdle) {
break;
}
}
}
}
void Application::ShowActivationCode(const std::string& code, const std::string& message) {
struct digit_sound {
char digit;
const std::string_view& sound;
};
static const std::array<digit_sound, 10> digit_sounds{{
digit_sound{'0', Lang::Sounds::OGG_0},
digit_sound{'1', Lang::Sounds::OGG_1},
digit_sound{'2', Lang::Sounds::OGG_2},
digit_sound{'3', Lang::Sounds::OGG_3},
digit_sound{'4', Lang::Sounds::OGG_4},
digit_sound{'5', Lang::Sounds::OGG_5},
digit_sound{'6', Lang::Sounds::OGG_6},
digit_sound{'7', Lang::Sounds::OGG_7},
digit_sound{'8', Lang::Sounds::OGG_8},
digit_sound{'9', Lang::Sounds::OGG_9}
}};
// This sentence uses 9KB of SRAM, so we need to wait for it to finish
Alert(Lang::Strings::ACTIVATION, message.c_str(), "link", Lang::Sounds::OGG_ACTIVATION);
for (const auto& digit : code) {
auto it = std::find_if(digit_sounds.begin(), digit_sounds.end(),
[digit](const digit_sound& ds) { return ds.digit == digit; });
if (it != digit_sounds.end()) {
audio_service_.PlaySound(it->sound);
}
}
}
void Application::Alert(const char* status, const char* message, const char* emotion, const std::string_view& sound) {
ESP_LOGW(TAG, "Alert [%s] %s: %s", emotion, status, message);
auto display = Board::GetInstance().GetDisplay();
display->SetStatus(status);
display->SetEmotion(emotion);
display->SetChatMessage("system", message);
if (!sound.empty()) {
audio_service_.PlaySound(sound);
}
}
void Application::DismissAlert() {
if (device_state_ == kDeviceStateIdle) {
auto display = Board::GetInstance().GetDisplay();
display->SetStatus(Lang::Strings::STANDBY);
display->SetEmotion("neutral");
display->SetChatMessage("system", "");
}
}
void Application::ToggleChatState() {
if (device_state_ == kDeviceStateActivating) {
SetDeviceState(kDeviceStateIdle);
return;
} else if (device_state_ == kDeviceStateWifiConfiguring) {
audio_service_.EnableAudioTesting(true);
SetDeviceState(kDeviceStateAudioTesting);
return;
} else if (device_state_ == kDeviceStateAudioTesting) {
audio_service_.EnableAudioTesting(false);
SetDeviceState(kDeviceStateWifiConfiguring);
return;
}
if (!protocol_) {
ESP_LOGE(TAG, "Protocol not initialized");
return;
}
if (device_state_ == kDeviceStateIdle) {
Schedule([this]() {
if (!protocol_->IsAudioChannelOpened()) {
SetDeviceState(kDeviceStateConnecting);
if (!protocol_->OpenAudioChannel()) {
return;
}
}
SetListeningMode(aec_mode_ == kAecOff ? kListeningModeAutoStop : kListeningModeRealtime);
});
} else if (device_state_ == kDeviceStateSpeaking) {
Schedule([this]() {
AbortSpeaking(kAbortReasonNone);
});
} else if (device_state_ == kDeviceStateListening) {
Schedule([this]() {
protocol_->CloseAudioChannel();
});
}
}
void Application::StartListening() {
if (device_state_ == kDeviceStateActivating) {
SetDeviceState(kDeviceStateIdle);
return;
} else if (device_state_ == kDeviceStateWifiConfiguring) {
audio_service_.EnableAudioTesting(true);
SetDeviceState(kDeviceStateAudioTesting);
return;
}
if (!protocol_) {
ESP_LOGE(TAG, "Protocol not initialized");
return;
}
if (device_state_ == kDeviceStateIdle) {
Schedule([this]() {
if (!protocol_->IsAudioChannelOpened()) {
SetDeviceState(kDeviceStateConnecting);
if (!protocol_->OpenAudioChannel()) {
return;
}
}
SetListeningMode(kListeningModeManualStop);
});
} else if (device_state_ == kDeviceStateSpeaking) {
Schedule([this]() {
AbortSpeaking(kAbortReasonNone);
SetListeningMode(kListeningModeManualStop);
});
}
}
void Application::StopListening() {
if (device_state_ == kDeviceStateAudioTesting) {
audio_service_.EnableAudioTesting(false);
SetDeviceState(kDeviceStateWifiConfiguring);
return;
}
const std::array<int, 3> valid_states = {
kDeviceStateListening,
kDeviceStateSpeaking,
kDeviceStateIdle,
};
// If not valid, do nothing
if (std::find(valid_states.begin(), valid_states.end(), device_state_) == valid_states.end()) {
return;
}
Schedule([this]() {
if (device_state_ == kDeviceStateListening) {
protocol_->SendStopListening();
SetDeviceState(kDeviceStateIdle);
}
});
}
void Application::Start() {
auto& board = Board::GetInstance();
SetDeviceState(kDeviceStateStarting);
/* Setup the display */
auto display = board.GetDisplay();
/* Setup the audio service */
auto codec = board.GetAudioCodec();
audio_service_.Initialize(codec);
audio_service_.Start();
AudioServiceCallbacks callbacks;
callbacks.on_send_queue_available = [this]() {
xEventGroupSetBits(event_group_, MAIN_EVENT_SEND_AUDIO);
};
callbacks.on_wake_word_detected = [this](const std::string& wake_word) {
xEventGroupSetBits(event_group_, MAIN_EVENT_WAKE_WORD_DETECTED);
};
callbacks.on_vad_change = [this](bool speaking) {
xEventGroupSetBits(event_group_, MAIN_EVENT_VAD_CHANGE);
};
audio_service_.SetCallbacks(callbacks);
/* Start the clock timer to update the status bar */
esp_timer_start_periodic(clock_timer_handle_, 1000000);
/* Wait for the network to be ready */
board.StartNetwork();
// Update the status bar immediately to show the network state
display->UpdateStatusBar(true);
// Check for new firmware version or get the MQTT broker address
Ota ota;
CheckNewVersion(ota);
// Initialize the protocol
display->SetStatus(Lang::Strings::LOADING_PROTOCOL);
// Add MCP common tools before initializing the protocol
McpServer::GetInstance().AddCommonTools();
if (ota.HasMqttConfig()) {
protocol_ = std::make_unique<MqttProtocol>();
} else if (ota.HasWebsocketConfig()) {
protocol_ = std::make_unique<WebsocketProtocol>();
} else {
ESP_LOGW(TAG, "No protocol specified in the OTA config, using MQTT");
protocol_ = std::make_unique<MqttProtocol>();
}
protocol_->OnConnected([this]() {
DismissAlert();
});
protocol_->OnNetworkError([this](const std::string& message) {
last_error_message_ = message;
xEventGroupSetBits(event_group_, MAIN_EVENT_ERROR);
});
protocol_->OnIncomingAudio([this](std::unique_ptr<AudioStreamPacket> packet) {
if (device_state_ == kDeviceStateSpeaking) {
audio_service_.PushPacketToDecodeQueue(std::move(packet));
}
});
protocol_->OnAudioChannelOpened([this, codec, &board]() {
board.SetPowerSaveMode(false);
if (protocol_->server_sample_rate() != codec->output_sample_rate()) {
ESP_LOGW(TAG, "Server sample rate %d does not match device output sample rate %d, resampling may cause distortion",
protocol_->server_sample_rate(), codec->output_sample_rate());
}
});
protocol_->OnAudioChannelClosed([this, &board]() {
board.SetPowerSaveMode(true);
Schedule([this]() {
auto display = Board::GetInstance().GetDisplay();
display->SetChatMessage("system", "");
SetDeviceState(kDeviceStateIdle);
});
});
protocol_->OnIncomingJson([this, display](const cJSON* root) {
// Parse JSON data
auto type = cJSON_GetObjectItem(root, "type");
if (strcmp(type->valuestring, "tts") == 0) {
auto state = cJSON_GetObjectItem(root, "state");
if (strcmp(state->valuestring, "start") == 0) {
Schedule([this]() {
aborted_ = false;
if (device_state_ == kDeviceStateIdle || device_state_ == kDeviceStateListening) {
SetDeviceState(kDeviceStateSpeaking);
}
});
} else if (strcmp(state->valuestring, "stop") == 0) {
Schedule([this]() {
if (device_state_ == kDeviceStateSpeaking) {
if (listening_mode_ == kListeningModeManualStop) {
SetDeviceState(kDeviceStateIdle);
} else {
SetDeviceState(kDeviceStateListening);
}
}
});
} else if (strcmp(state->valuestring, "sentence_start") == 0) {
auto text = cJSON_GetObjectItem(root, "text");
if (cJSON_IsString(text)) {
ESP_LOGI(TAG, "<< %s", text->valuestring);
Schedule([this, display, message = std::string(text->valuestring)]() {
display->SetChatMessage("assistant", message.c_str());
});
}
}
} else if (strcmp(type->valuestring, "stt") == 0) {
auto text = cJSON_GetObjectItem(root, "text");
if (cJSON_IsString(text)) {
ESP_LOGI(TAG, ">> %s", text->valuestring);
Schedule([this, display, message = std::string(text->valuestring)]() {
display->SetChatMessage("user", message.c_str());
});
}
} else if (strcmp(type->valuestring, "llm") == 0) {
auto emotion = cJSON_GetObjectItem(root, "emotion");
if (cJSON_IsString(emotion)) {
Schedule([this, display, emotion_str = std::string(emotion->valuestring)]() {
display->SetEmotion(emotion_str.c_str());
});
}
} else if (strcmp(type->valuestring, "mcp") == 0) {
auto payload = cJSON_GetObjectItem(root, "payload");
if (cJSON_IsObject(payload)) {
McpServer::GetInstance().ParseMessage(payload);
}
} else if (strcmp(type->valuestring, "system") == 0) {
auto command = cJSON_GetObjectItem(root, "command");
if (cJSON_IsString(command)) {
ESP_LOGI(TAG, "System command: %s", command->valuestring);
if (strcmp(command->valuestring, "reboot") == 0) {
// Do a reboot if user requests a OTA update
Schedule([this]() {
Reboot();
});
} else {
ESP_LOGW(TAG, "Unknown system command: %s", command->valuestring);
}
}
} else if (strcmp(type->valuestring, "alert") == 0) {
auto status = cJSON_GetObjectItem(root, "status");
auto message = cJSON_GetObjectItem(root, "message");
auto emotion = cJSON_GetObjectItem(root, "emotion");
if (cJSON_IsString(status) && cJSON_IsString(message) && cJSON_IsString(emotion)) {
Alert(status->valuestring, message->valuestring, emotion->valuestring, Lang::Sounds::OGG_VIBRATION);
} else {
ESP_LOGW(TAG, "Alert command requires status, message and emotion");
}
#if CONFIG_RECEIVE_CUSTOM_MESSAGE
} else if (strcmp(type->valuestring, "custom") == 0) {
auto payload = cJSON_GetObjectItem(root, "payload");
ESP_LOGI(TAG, "Received custom message: %s", cJSON_PrintUnformatted(root));
if (cJSON_IsObject(payload)) {
Schedule([this, display, payload_str = std::string(cJSON_PrintUnformatted(payload))]() {
display->SetChatMessage("system", payload_str.c_str());
});
} else {
ESP_LOGW(TAG, "Invalid custom message format: missing payload");
}
#endif
} else {
ESP_LOGW(TAG, "Unknown message type: %s", type->valuestring);
}
});
bool protocol_started = protocol_->Start();
SetDeviceState(kDeviceStateIdle);
has_server_time_ = ota.HasServerTime();
if (protocol_started) {
std::string message = std::string(Lang::Strings::VERSION) + ota.GetCurrentVersion();
display->ShowNotification(message.c_str());
display->SetChatMessage("system", "");
// Play the success sound to indicate the device is ready
audio_service_.PlaySound(Lang::Sounds::OGG_SUCCESS);
}
// Print heap stats
SystemInfo::PrintHeapStats();
}
void Application::OnClockTimer() {
clock_ticks_++;
auto display = Board::GetInstance().GetDisplay();
display->UpdateStatusBar();
// Print the debug info every 10 seconds
if (clock_ticks_ % 10 == 0) {
// SystemInfo::PrintTaskCpuUsage(pdMS_TO_TICKS(1000));
// SystemInfo::PrintTaskList();
SystemInfo::PrintHeapStats();
}
}
// Add a async task to MainLoop
void Application::Schedule(std::function<void()> callback) {
{
std::lock_guard<std::mutex> lock(mutex_);
main_tasks_.push_back(std::move(callback));
}
xEventGroupSetBits(event_group_, MAIN_EVENT_SCHEDULE);
}
// The Main Event Loop controls the chat state and websocket connection
// If other tasks need to access the websocket or chat state,
// they should use Schedule to call this function
void Application::MainEventLoop() {
// Raise the priority of the main event loop to avoid being interrupted by background tasks (which has priority 2)
vTaskPrioritySet(NULL, 3);
while (true) {
auto bits = xEventGroupWaitBits(event_group_, MAIN_EVENT_SCHEDULE |
MAIN_EVENT_SEND_AUDIO |
MAIN_EVENT_WAKE_WORD_DETECTED |
MAIN_EVENT_VAD_CHANGE |
MAIN_EVENT_ERROR, pdTRUE, pdFALSE, portMAX_DELAY);
if (bits & MAIN_EVENT_ERROR) {
SetDeviceState(kDeviceStateIdle);
Alert(Lang::Strings::ERROR, last_error_message_.c_str(), "circle_xmark", Lang::Sounds::OGG_EXCLAMATION);
}
if (bits & MAIN_EVENT_SEND_AUDIO) {
while (auto packet = audio_service_.PopPacketFromSendQueue()) {
if (!protocol_->SendAudio(std::move(packet))) {
break;
}
}
}
if (bits & MAIN_EVENT_WAKE_WORD_DETECTED) {
OnWakeWordDetected();
}
if (bits & MAIN_EVENT_VAD_CHANGE) {
if (device_state_ == kDeviceStateListening) {
auto led = Board::GetInstance().GetLed();
led->OnStateChanged();
}
}
if (bits & MAIN_EVENT_SCHEDULE) {
std::unique_lock<std::mutex> lock(mutex_);
auto tasks = std::move(main_tasks_);
lock.unlock();
for (auto& task : tasks) {
task();
}
}
}
}
void Application::OnWakeWordDetected() {
if (!protocol_) {
return;
}
if (device_state_ == kDeviceStateIdle) {
audio_service_.EncodeWakeWord();
if (!protocol_->IsAudioChannelOpened()) {
SetDeviceState(kDeviceStateConnecting);
if (!protocol_->OpenAudioChannel()) {
audio_service_.EnableWakeWordDetection(true);
return;
}
}
auto wake_word = audio_service_.GetLastWakeWord();
ESP_LOGI(TAG, "Wake word detected: %s", wake_word.c_str());
#if CONFIG_USE_AFE_WAKE_WORD || CONFIG_USE_CUSTOM_WAKE_WORD
// Encode and send the wake word data to the server
while (auto packet = audio_service_.PopWakeWordPacket()) {
protocol_->SendAudio(std::move(packet));
}
// Set the chat state to wake word detected
protocol_->SendWakeWordDetected(wake_word);
SetListeningMode(aec_mode_ == kAecOff ? kListeningModeAutoStop : kListeningModeRealtime);
#else
SetListeningMode(aec_mode_ == kAecOff ? kListeningModeAutoStop : kListeningModeRealtime);
// Play the pop up sound to indicate the wake word is detected
audio_service_.PlaySound(Lang::Sounds::OGG_POPUP);
#endif
} else if (device_state_ == kDeviceStateSpeaking) {
AbortSpeaking(kAbortReasonWakeWordDetected);
} else if (device_state_ == kDeviceStateActivating) {
SetDeviceState(kDeviceStateIdle);
}
}
void Application::AbortSpeaking(AbortReason reason) {
ESP_LOGI(TAG, "Abort speaking");
aborted_ = true;
protocol_->SendAbortSpeaking(reason);
}
void Application::SetListeningMode(ListeningMode mode) {
listening_mode_ = mode;
SetDeviceState(kDeviceStateListening);
}
void Application::SetDeviceState(DeviceState state) {
if (device_state_ == state) {
return;
}
clock_ticks_ = 0;
auto previous_state = device_state_;
device_state_ = state;
ESP_LOGI(TAG, "STATE: %s", STATE_STRINGS[device_state_]);
// Send the state change event
DeviceStateEventManager::GetInstance().PostStateChangeEvent(previous_state, state);
auto& board = Board::GetInstance();
auto display = board.GetDisplay();
auto led = board.GetLed();
led->OnStateChanged();
// 当从idle状态变成其他任何状态时停止音乐播放
if (previous_state == kDeviceStateIdle && state != kDeviceStateIdle) {
auto music = board.GetMusic();
if (music) {
ESP_LOGI(TAG, "Stopping music streaming due to state change: %s -> %s",
STATE_STRINGS[previous_state], STATE_STRINGS[state]);
music->StopStreaming();
}
}
switch (state) {
case kDeviceStateUnknown:
case kDeviceStateIdle:
display->SetStatus(Lang::Strings::STANDBY);
display->SetEmotion("neutral");
audio_service_.EnableVoiceProcessing(false);
audio_service_.EnableWakeWordDetection(true);
break;
case kDeviceStateConnecting:
display->SetStatus(Lang::Strings::CONNECTING);
display->SetEmotion("neutral");
display->SetChatMessage("system", "");
break;
case kDeviceStateListening:
display->SetStatus(Lang::Strings::LISTENING);
display->SetEmotion("neutral");
// Make sure the audio processor is running
if (!audio_service_.IsAudioProcessorRunning()) {
// Send the start listening command
protocol_->SendStartListening(listening_mode_);
audio_service_.EnableVoiceProcessing(true);
audio_service_.EnableWakeWordDetection(false);
}
break;
case kDeviceStateSpeaking:
display->SetStatus(Lang::Strings::SPEAKING);
if (listening_mode_ != kListeningModeRealtime) {
audio_service_.EnableVoiceProcessing(false);
// Only AFE wake word can be detected in speaking mode
#if CONFIG_USE_AFE_WAKE_WORD
audio_service_.EnableWakeWordDetection(true);
#else
audio_service_.EnableWakeWordDetection(false);
#endif
}
audio_service_.ResetDecoder();
break;
default:
// Do nothing
break;
}
}
void Application::Reboot() {
ESP_LOGI(TAG, "Rebooting...");
esp_restart();
}
void Application::WakeWordInvoke(const std::string& wake_word) {
if (device_state_ == kDeviceStateIdle) {
ToggleChatState();
Schedule([this, wake_word]() {
if (protocol_) {
protocol_->SendWakeWordDetected(wake_word);
}
});
} else if (device_state_ == kDeviceStateSpeaking) {
Schedule([this]() {
AbortSpeaking(kAbortReasonNone);
});
} else if (device_state_ == kDeviceStateListening) {
Schedule([this]() {
if (protocol_) {
protocol_->CloseAudioChannel();
}
});
}
}
bool Application::CanEnterSleepMode() {
if (device_state_ != kDeviceStateIdle) {
return false;
}
if (protocol_ && protocol_->IsAudioChannelOpened()) {
return false;
}
if (!audio_service_.IsIdle()) {
return false;
}
// Now it is safe to enter sleep mode
return true;
}
void Application::SendMcpMessage(const std::string& payload) {
Schedule([this, payload]() {
if (protocol_) {
protocol_->SendMcpMessage(payload);
}
});
}
void Application::SetAecMode(AecMode mode) {
aec_mode_ = mode;
Schedule([this]() {
auto& board = Board::GetInstance();
auto display = board.GetDisplay();
switch (aec_mode_) {
case kAecOff:
audio_service_.EnableDeviceAec(false);
display->ShowNotification(Lang::Strings::RTC_MODE_OFF);
break;
case kAecOnServerSide:
audio_service_.EnableDeviceAec(false);
display->ShowNotification(Lang::Strings::RTC_MODE_ON);
break;
case kAecOnDeviceSide:
audio_service_.EnableDeviceAec(true);
display->ShowNotification(Lang::Strings::RTC_MODE_ON);
break;
}
// If the AEC mode is changed, close the audio channel
if (protocol_ && protocol_->IsAudioChannelOpened()) {
protocol_->CloseAudioChannel();
}
});
}
// 新增:接收外部音频数据(如音乐播放)
void Application::AddAudioData(AudioStreamPacket&& packet) {
auto codec = Board::GetInstance().GetAudioCodec();
if (device_state_ == kDeviceStateIdle && codec->output_enabled()) {
// packet.payload包含的是原始PCM数据int16_t
if (packet.payload.size() >= 2) {
size_t num_samples = packet.payload.size() / sizeof(int16_t);
std::vector<int16_t> pcm_data(num_samples);
memcpy(pcm_data.data(), packet.payload.data(), packet.payload.size());
// 检查采样率是否匹配,如果不匹配则进行简单重采样
if (packet.sample_rate != codec->output_sample_rate()) {
// ESP_LOGI(TAG, "Resampling music audio from %d to %d Hz",
// packet.sample_rate, codec->output_sample_rate());
// 验证采样率参数
if (packet.sample_rate <= 0 || codec->output_sample_rate() <= 0) {
ESP_LOGE(TAG, "Invalid sample rates: %d -> %d",
packet.sample_rate, codec->output_sample_rate());
return;
}
std::vector<int16_t> resampled;
if (packet.sample_rate > codec->output_sample_rate()) {
ESP_LOGI(TAG, "Music playback: Adjust the sampling rate from %d Hz switch to %d Hz",
codec->output_sample_rate(), packet.sample_rate);
// 尝试动态切换采样率
if (codec->SetOutputSampleRate(packet.sample_rate)) {
ESP_LOGI(TAG, "Successfully switched to music playback sampling rate: %d Hz", packet.sample_rate);
} else {
ESP_LOGW(TAG, "Unable to switch sampling rate, continue using current sampling rate: %d Hz", codec->output_sample_rate());
}
} else {
// 上采样:线性插值
float upsample_ratio = codec->output_sample_rate() / static_cast<float>(packet.sample_rate);
size_t expected_size = static_cast<size_t>(pcm_data.size() * upsample_ratio + 0.5f);
resampled.reserve(expected_size);
for (size_t i = 0; i < pcm_data.size(); ++i) {
// 添加原始样本
resampled.push_back(pcm_data[i]);
// 计算需要插值的样本数
int interpolation_count = static_cast<int>(upsample_ratio) - 1;
if (interpolation_count > 0 && i + 1 < pcm_data.size()) {
int16_t current = pcm_data[i];
int16_t next = pcm_data[i + 1];
for (int j = 1; j <= interpolation_count; ++j) {
float t = static_cast<float>(j) / (interpolation_count + 1);
int16_t interpolated = static_cast<int16_t>(current + (next - current) * t);
resampled.push_back(interpolated);
}
} else if (interpolation_count > 0) {
// 最后一个样本,直接重复
for (int j = 1; j <= interpolation_count; ++j) {
resampled.push_back(pcm_data[i]);
}
}
}
ESP_LOGI(TAG, "Upsampled %d -> %d samples (ratio: %.2f)",
pcm_data.size(), resampled.size(), upsample_ratio);
}
pcm_data = std::move(resampled);
}
// 确保音频输出已启用
if (!codec->output_enabled()) {
codec->EnableOutput(true);
}
// 发送PCM数据到音频编解码器
codec->OutputData(pcm_data);
audio_service_.UpdateOutputTimestamp();
}
}
}
void Application::PlaySound(const std::string_view& sound) {
audio_service_.PlaySound(sound);
}