Skip to content

Commit

Permalink
ES8311 + ES7210
Browse files Browse the repository at this point in the history
  • Loading branch information
78 committed Oct 24, 2024
1 parent 4c6da77 commit a2487f4
Show file tree
Hide file tree
Showing 17 changed files with 3,304 additions and 163 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
# CMakeLists in this exact order for cmake to work correctly
cmake_minimum_required(VERSION 3.16)

set(PROJECT_VER "0.3.3")
set(PROJECT_VER "0.4.0")

include($ENV{IDF_PATH}/tools/cmake/project.cmake)
project(xiaozhi)
112 changes: 83 additions & 29 deletions main/Application.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,25 +18,28 @@

Application::Application()
: boot_button_((gpio_num_t)CONFIG_BOOT_BUTTON_GPIO),
volume_up_button_((gpio_num_t)CONFIG_VOLUME_UP_BUTTON_GPIO)
volume_up_button_((gpio_num_t)CONFIG_VOLUME_UP_BUTTON_GPIO),
volume_down_button_((gpio_num_t)CONFIG_VOLUME_DOWN_BUTTON_GPIO),
#ifdef CONFIG_USE_DISPLAY
display_(CONFIG_DISPLAY_SDA_PIN, CONFIG_DISPLAY_SCL_PIN),
#endif
#ifdef CONFIG_USE_ML307
, ml307_at_modem_(CONFIG_ML307_TX_PIN, CONFIG_ML307_RX_PIN, 4096),
ml307_at_modem_(CONFIG_ML307_TX_PIN, CONFIG_ML307_RX_PIN, 4096),
http_(ml307_at_modem_),
firmware_upgrade_(http_)
#else
, http_(),
firmware_upgrade_(http_)
#endif
#ifdef CONFIG_USE_DISPLAY
, display_(CONFIG_DISPLAY_SDA_PIN, CONFIG_DISPLAY_SCL_PIN)
http_(),
#endif
firmware_upgrade_(http_)
{
event_group_ = xEventGroupCreate();
opus_encoder_.Configure(CONFIG_AUDIO_INPUT_SAMPLE_RATE, 1);

opus_encoder_.Configure(16000, 1);
opus_decoder_ = opus_decoder_create(opus_decode_sample_rate_, 1, NULL);
if (opus_decode_sample_rate_ != CONFIG_AUDIO_OUTPUT_SAMPLE_RATE) {
opus_resampler_.Configure(opus_decode_sample_rate_, CONFIG_AUDIO_OUTPUT_SAMPLE_RATE);
output_resampler_.Configure(CONFIG_AUDIO_OUTPUT_SAMPLE_RATE, opus_decode_sample_rate_);
}
if (16000 != CONFIG_AUDIO_INPUT_SAMPLE_RATE) {
input_resampler_.Configure(CONFIG_AUDIO_INPUT_SAMPLE_RATE, 16000);
}

firmware_upgrade_.SetCheckVersionUrl(CONFIG_OTA_VERSION_URL);
Expand Down Expand Up @@ -185,29 +188,49 @@ void Application::Start() {
}
#endif

audio_device_.OnInputData([this](const int16_t* data, int size) {
audio_device_.Initialize();
audio_device_.OnInputData([this](std::vector<int16_t>&& data) {
if (16000 != CONFIG_AUDIO_INPUT_SAMPLE_RATE) {
if (audio_device_.input_channels() == 2) {
auto left_channel = std::vector<int16_t>(data.size() / 2);
auto right_channel = std::vector<int16_t>(data.size() / 2);
for (size_t i = 0, j = 0; i < left_channel.size(); ++i, j += 2) {
left_channel[i] = data[j];
right_channel[i] = data[j + 1];
}
auto resampled_left = std::vector<int16_t>(input_resampler_.GetOutputSamples(left_channel.size()));
auto resampled_right = std::vector<int16_t>(input_resampler_.GetOutputSamples(right_channel.size()));
input_resampler_.Process(left_channel.data(), left_channel.size(), resampled_left.data());
input_resampler_.Process(right_channel.data(), right_channel.size(), resampled_right.data());
data.resize(resampled_left.size() + resampled_right.size());
for (size_t i = 0, j = 0; i < resampled_left.size(); ++i, j += 2) {
data[j] = resampled_left[i];
data[j + 1] = resampled_right[i];
}
} else {
auto resampled = std::vector<int16_t>(input_resampler_.GetOutputSamples(data.size()));
input_resampler_.Process(data.data(), data.size(), resampled.data());
data = std::move(resampled);
}
}
#ifdef CONFIG_USE_AFE_SR
if (audio_processor_.IsRunning()) {
audio_processor_.Input(data, size);
audio_processor_.Input(data);
}
if (wake_word_detect_.IsDetectionRunning()) {
wake_word_detect_.Feed(data, size);
wake_word_detect_.Feed(data);
}
#else
std::vector<int16_t> pcm(data, data + size);
Schedule([this, pcm = std::move(pcm)]() {
Schedule([this, data = std::move(data)]() {
if (chat_state_ == kChatStateListening) {
std::lock_guard<std::mutex> lock(mutex_);
audio_encode_queue_.emplace_back(std::move(pcm));
audio_encode_queue_.emplace_back(std::move(data));
cv_.notify_all();
}
});
#endif
});

// Initialize the audio device
audio_device_.Start(CONFIG_AUDIO_INPUT_SAMPLE_RATE, CONFIG_AUDIO_OUTPUT_SAMPLE_RATE);

// OPUS encoder / decoder use a lot of stack memory
const size_t opus_stack_size = 4096 * 8;
audio_encode_task_stack_ = (StackType_t*)malloc(opus_stack_size);
Expand All @@ -221,9 +244,10 @@ void Application::Start() {
Application* app = (Application*)arg;
app->AudioPlayTask();
vTaskDelete(NULL);
}, "play_audio", 4096 * 4, this, 5, NULL);
}, "play_audio", 4096 * 4, this, 4, NULL);

#ifdef CONFIG_USE_AFE_SR
wake_word_detect_.Initialize(audio_device_.input_channels(), audio_device_.input_reference());
wake_word_detect_.OnVadStateChange([this](bool speaking) {
Schedule([this, speaking]() {
auto& builtin_led = BuiltinLed::GetInstance();
Expand Down Expand Up @@ -272,6 +296,7 @@ void Application::Start() {
});
wake_word_detect_.StartDetection();

audio_processor_.Initialize(audio_device_.input_channels(), audio_device_.input_reference());
audio_processor_.OnOutput([this](std::vector<int16_t>&& data) {
Schedule([this, data = std::move(data)]() {
if (chat_state_ == kChatStateListening) {
Expand Down Expand Up @@ -317,7 +342,7 @@ void Application::Start() {
Schedule([this]() {
auto volume = audio_device_.output_volume() + 10;
if (volume > 100) {
volume = 0;
volume = 100;
}
audio_device_.SetOutputVolume(volume);
#ifdef CONFIG_USE_DISPLAY
Expand All @@ -327,6 +352,28 @@ void Application::Start() {
});

volume_up_button_.OnLongPress([this]() {
Schedule([this]() {
audio_device_.SetOutputVolume(100);
#ifdef CONFIG_USE_DISPLAY
display_.ShowNotification("Volume\n100");
#endif
});
});

volume_down_button_.OnClick([this]() {
Schedule([this]() {
auto volume = audio_device_.output_volume() - 10;
if (volume < 0) {
volume = 0;
}
audio_device_.SetOutputVolume(volume);
#ifdef CONFIG_USE_DISPLAY
display_.ShowNotification("Volume\n" + std::to_string(volume));
#endif
});
});

volume_down_button_.OnLongPress([this]() {
Schedule([this]() {
audio_device_.SetOutputVolume(0);
#ifdef CONFIG_USE_DISPLAY
Expand Down Expand Up @@ -449,10 +496,12 @@ BinaryProtocol* Application::AllocateBinaryProtocol(const uint8_t* payload, size

void Application::AudioEncodeTask() {
ESP_LOGI(TAG, "Audio encode task started");
const int max_audio_play_queue_size_ = 2;

while (true) {
std::unique_lock<std::mutex> lock(mutex_);
cv_.wait(lock, [this]() {
return !audio_encode_queue_.empty() || !audio_decode_queue_.empty();
return !audio_encode_queue_.empty() || (!audio_decode_queue_.empty() && audio_play_queue_.size() < max_audio_play_queue_size_);
});

if (!audio_encode_queue_.empty()) {
Expand Down Expand Up @@ -488,9 +537,9 @@ void Application::AudioEncodeTask() {
}

if (opus_decode_sample_rate_ != CONFIG_AUDIO_OUTPUT_SAMPLE_RATE) {
int target_size = opus_resampler_.GetOutputSamples(frame_size);
int target_size = output_resampler_.GetOutputSamples(frame_size);
std::vector<int16_t> resampled(target_size);
opus_resampler_.Process(packet->pcm.data(), frame_size, resampled.data());
output_resampler_.Process(packet->pcm.data(), frame_size, resampled.data());
packet->pcm = std::move(resampled);
}

Expand All @@ -513,7 +562,6 @@ void Application::HandleAudioPacket(AudioPacket* packet) {
audio_device_.OutputData(packet->pcm);

if (break_speaking_) {
break_speaking_ = false;
skip_to_end_ = true;

// Play a silence and skip to the end
Expand All @@ -525,12 +573,13 @@ void Application::HandleAudioPacket(AudioPacket* packet) {
break;
}
case kAudioPacketTypeStart:
break_speaking_ = false;
skip_to_end_ = false;
Schedule([this]() {
SetChatState(kChatStateSpeaking);
});
break;
case kAudioPacketTypeStop:
skip_to_end_ = false;
Schedule([this]() {
SetChatState(kChatStateListening);
});
Expand Down Expand Up @@ -558,6 +607,7 @@ void Application::AudioPlayTask() {
});
auto packet = std::move(audio_play_queue_.front());
audio_play_queue_.pop_front();
cv_.notify_all();
lock.unlock();

HandleAudioPacket(packet);
Expand All @@ -574,7 +624,7 @@ void Application::SetDecodeSampleRate(int sample_rate) {
opus_decoder_ = opus_decoder_create(opus_decode_sample_rate_, 1, NULL);
if (opus_decode_sample_rate_ != CONFIG_AUDIO_OUTPUT_SAMPLE_RATE) {
ESP_LOGI(TAG, "Resampling audio from %d to %d", opus_decode_sample_rate_, CONFIG_AUDIO_OUTPUT_SAMPLE_RATE);
opus_resampler_.Configure(opus_decode_sample_rate_, CONFIG_AUDIO_OUTPUT_SAMPLE_RATE);
output_resampler_.Configure(opus_decode_sample_rate_, CONFIG_AUDIO_OUTPUT_SAMPLE_RATE);
}
}

Expand Down Expand Up @@ -607,7 +657,7 @@ void Application::StartWebSocketClient() {
std::string message = "{";
message += "\"type\":\"hello\",";
message += "\"audio_params\":{";
message += "\"format\":\"opus\", \"sample_rate\":" + std::to_string(CONFIG_AUDIO_INPUT_SAMPLE_RATE) + ", \"channels\":1";
message += "\"format\":\"opus\", \"sample_rate\":16000, \"channels\":1";
message += "}}";
ws_client_->Send(message);
});
Expand Down Expand Up @@ -640,6 +690,10 @@ void Application::StartWebSocketClient() {
if (sample_rate != NULL) {
SetDecodeSampleRate(sample_rate->valueint);
}

// If the device is speaking, we need to break the speaking
break_speaking_ = true;
skip_to_end_ = true;
} else if (strcmp(state->valuestring, "stop") == 0) {
packet->type = kAudioPacketTypeStop;
} else if (strcmp(state->valuestring, "sentence_end") == 0) {
Expand Down
16 changes: 11 additions & 5 deletions main/Application.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
#ifndef _APPLICATION_H_
#define _APPLICATION_H_

#include "AudioDevice.h"
#include <OpusEncoder.h>
#include <OpusResampler.h>
#include <WebSocket.h>
Expand All @@ -17,6 +16,7 @@
#include <list>
#include <condition_variable>

#include "BoxAudioDevice.h"
#include "Display.h"
#include "FirmwareUpgrade.h"

Expand Down Expand Up @@ -86,7 +86,15 @@ class Application {

Button boot_button_;
Button volume_up_button_;
Button volume_down_button_;
#ifdef CONFIG_AUDIO_CODEC_ES8311_ES7210
BoxAudioDevice audio_device_;
#else
AudioDevice audio_device_;
#endif
#ifdef CONFIG_USE_DISPLAY
Display display_;
#endif
#ifdef CONFIG_USE_AFE_SR
WakeWordDetect wake_word_detect_;
AudioProcessor audio_processor_;
Expand All @@ -98,9 +106,6 @@ class Application {
EspHttp http_;
#endif
FirmwareUpgrade firmware_upgrade_;
#ifdef CONFIG_USE_DISPLAY
Display display_;
#endif
std::mutex mutex_;
std::condition_variable_any cv_;
std::list<std::function<void()>> main_tasks_;
Expand All @@ -123,7 +128,8 @@ class Application {

int opus_duration_ms_ = 60;
int opus_decode_sample_rate_ = CONFIG_AUDIO_OUTPUT_SAMPLE_RATE;
OpusResampler opus_resampler_;
OpusResampler input_resampler_;
OpusResampler output_resampler_;

TaskHandle_t check_new_version_task_ = nullptr;
StaticTask_t check_new_version_task_buffer_;
Expand Down
Loading

0 comments on commit a2487f4

Please sign in to comment.