add some code

2025-09-05 13:25:11 +08:00
parent 9ff0a99e7a
commit 3cf1229a85
8911 changed files with 2535396 additions and 0 deletions
--- a/managed_components/espressif__esp-sr/esp-tts/CMakeLists.txt
+++ b/managed_components/espressif__esp-sr/esp-tts/CMakeLists.txt
@@ -0,0 +1,31 @@
+
+set(COMPONENT_ADD_INCLUDEDIRS
+    ./esp_tts_chinese/include
+    )
+
+register_component()
+
+target_link_libraries(${COMPONENT_TARGET} INTERFACE "-L ${CMAKE_CURRENT_SOURCE_DIR}/esp_tts_chinese")
+
+if(IDF_TARGET STREQUAL "esp32")
+target_link_libraries(${COMPONENT_TARGET} INTERFACE
+    esp_tts_chinese 
+    voice_set_xiaole 
+    voice_set_template
+    )
+endif()
+
+if(IDF_TARGET STREQUAL "esp32s2")
+target_link_libraries(${COMPONENT_TARGET} INTERFACE
+    esp_tts_chinese_esp32s2 
+    voice_set_xiaole_esp32s2
+    voice_set_template_esp32s2
+    )
+endif()
+
+if(IDF_TARGET STREQUAL "esp32s3")
+target_link_libraries(${COMPONENT_TARGET} INTERFACE
+    esp_tts_chinese_esp32s3
+    voice_set_xiaole_esp32s3
+    )
+endif()
--- a/managed_components/espressif__esp-sr/esp-tts/README.md
+++ b/managed_components/espressif__esp-sr/esp-tts/README.md
@@ -0,0 +1,3 @@
+# ESP Chinese TTS
+
+Espressif TTS speech synthesis model is a lightweight speech synthesis system designed for embedded systems. Currently, only the Chinese language is supported. See more documentation [Here](https://docs.espressif.com/projects/esp-sr/en/latest/esp32s3/speech_synthesis/readme.html).
--- a/managed_components/espressif__esp-sr/esp-tts/component.mk
+++ b/managed_components/espressif__esp-sr/esp-tts/component.mk
@@ -0,0 +1,10 @@
+COMPONENT_ADD_INCLUDEDIRS := esp_tts_chinese/include 
+
+
+LIB_FILES := $(shell ls $(COMPONENT_PATH)/esp_tts_chinese/lib*.a) 
+
+LIBS := $(patsubst lib%.a,-l%,$(LIB_FILES))
+
+COMPONENT_ADD_LDFLAGS +=  -L$(COMPONENT_PATH)/esp_tts_chinese \
+						  $(LIBS)
+
--- a/managed_components/espressif__esp-sr/esp-tts/esp_tts_chinese/esp32/libesp_tts_chinese.a
+++ b/managed_components/espressif__esp-sr/esp-tts/esp_tts_chinese/esp32/libesp_tts_chinese.a
--- a/managed_components/espressif__esp-sr/esp-tts/esp_tts_chinese/esp32/libvoice_set_xiaole.a
+++ b/managed_components/espressif__esp-sr/esp-tts/esp_tts_chinese/esp32/libvoice_set_xiaole.a
--- a/managed_components/espressif__esp-sr/esp-tts/esp_tts_chinese/esp32c3/libesp_tts_chinese.a
+++ b/managed_components/espressif__esp-sr/esp-tts/esp_tts_chinese/esp32c3/libesp_tts_chinese.a
--- a/managed_components/espressif__esp-sr/esp-tts/esp_tts_chinese/esp32c3/libvoice_set_xiaole.a
+++ b/managed_components/espressif__esp-sr/esp-tts/esp_tts_chinese/esp32c3/libvoice_set_xiaole.a
--- a/managed_components/espressif__esp-sr/esp-tts/esp_tts_chinese/esp32c5/libesp_tts_chinese.a
+++ b/managed_components/espressif__esp-sr/esp-tts/esp_tts_chinese/esp32c5/libesp_tts_chinese.a
--- a/managed_components/espressif__esp-sr/esp-tts/esp_tts_chinese/esp32c5/libvoice_set_xiaole.a
+++ b/managed_components/espressif__esp-sr/esp-tts/esp_tts_chinese/esp32c5/libvoice_set_xiaole.a
--- a/managed_components/espressif__esp-sr/esp-tts/esp_tts_chinese/esp32c6/libesp_tts_chinese.a
+++ b/managed_components/espressif__esp-sr/esp-tts/esp_tts_chinese/esp32c6/libesp_tts_chinese.a
--- a/managed_components/espressif__esp-sr/esp-tts/esp_tts_chinese/esp32c6/libvoice_set_xiaole.a
+++ b/managed_components/espressif__esp-sr/esp-tts/esp_tts_chinese/esp32c6/libvoice_set_xiaole.a
--- a/managed_components/espressif__esp-sr/esp-tts/esp_tts_chinese/esp32p4/libesp_tts_chinese.a
+++ b/managed_components/espressif__esp-sr/esp-tts/esp_tts_chinese/esp32p4/libesp_tts_chinese.a
--- a/managed_components/espressif__esp-sr/esp-tts/esp_tts_chinese/esp32p4/libvoice_set_xiaole.a
+++ b/managed_components/espressif__esp-sr/esp-tts/esp_tts_chinese/esp32p4/libvoice_set_xiaole.a
--- a/managed_components/espressif__esp-sr/esp-tts/esp_tts_chinese/esp32s2/libesp_tts_chinese.a
+++ b/managed_components/espressif__esp-sr/esp-tts/esp_tts_chinese/esp32s2/libesp_tts_chinese.a
--- a/managed_components/espressif__esp-sr/esp-tts/esp_tts_chinese/esp32s2/libvoice_set_xiaole.a
+++ b/managed_components/espressif__esp-sr/esp-tts/esp_tts_chinese/esp32s2/libvoice_set_xiaole.a
--- a/managed_components/espressif__esp-sr/esp-tts/esp_tts_chinese/esp32s3/libesp_tts_chinese.a
+++ b/managed_components/espressif__esp-sr/esp-tts/esp_tts_chinese/esp32s3/libesp_tts_chinese.a
--- a/managed_components/espressif__esp-sr/esp-tts/esp_tts_chinese/esp32s3/libvoice_set_xiaole.a
+++ b/managed_components/espressif__esp-sr/esp-tts/esp_tts_chinese/esp32s3/libvoice_set_xiaole.a
--- a/managed_components/espressif__esp-sr/esp-tts/esp_tts_chinese/esp_tts_voice_data_xiaole.dat
+++ b/managed_components/espressif__esp-sr/esp-tts/esp_tts_chinese/esp_tts_voice_data_xiaole.dat
--- a/managed_components/espressif__esp-sr/esp-tts/esp_tts_chinese/esp_tts_voice_data_xiaoxin.dat
+++ b/managed_components/espressif__esp-sr/esp-tts/esp_tts_chinese/esp_tts_voice_data_xiaoxin.dat
--- a/managed_components/espressif__esp-sr/esp-tts/esp_tts_chinese/esp_tts_voice_data_xiaoxin_custom.dat
+++ b/managed_components/espressif__esp-sr/esp-tts/esp_tts_chinese/esp_tts_voice_data_xiaoxin_custom.dat
--- a/managed_components/espressif__esp-sr/esp-tts/esp_tts_chinese/esp_tts_voice_data_xiaoxin_small.dat
+++ b/managed_components/espressif__esp-sr/esp-tts/esp_tts_chinese/esp_tts_voice_data_xiaoxin_small.dat
--- a/managed_components/espressif__esp-sr/esp-tts/esp_tts_chinese/include/esp_tts.h
+++ b/managed_components/espressif__esp-sr/esp-tts/esp_tts_chinese/include/esp_tts.h
@@ -0,0 +1,135 @@
+// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License
+#ifndef _ESP_TTS_H_
+#define _ESP_TTS_H_
+
+#include "stdlib.h"
+#include "stdio.h"
+#include "esp_tts_voice.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef enum {
+	NONE_MODE      = 0,    //do not play any word before playing a specific number
+	ALI_PAY_MODE,          //play zhi fu bao shou kuan before playing a specific number
+	WEIXIN_PAY_MODE        //play wei xin shou kuan before playing a specific number
+} pay_mode_t;
+
+typedef void * esp_tts_handle_t;
+
+
+/**
+ * @brief Init an instance of the TTS voice set structure.
+ *
+ * @param template      The const esp_tts_voice_template.
+ * @param data          The customize voice data
+ * @return
+ *         - NULL: Init failed
+ *         - Others: The instance of voice set
+ */
+esp_tts_voice_t *esp_tts_voice_set_init(const esp_tts_voice_t *voice_template, void *data);
+
+/**
+ * @brief Init an instance of the TTS voice set structure.
+ *
+ * @param template      The const esp_tts_voice_template.
+ * @param data          The customize voice data
+ * @return
+ *         - NULL: Init failed
+ *         - Others: The instance of voice set
+ */
+void esp_tts_voice_set_free(esp_tts_voice_t *voice);
+
+/**
+ * @brief Creates an instance of the TTS structure.
+ *
+ * @param voice      Voice set containing all basic phonemes.
+ * @return
+ *         - NULL: Create failed
+ *         - Others: The instance of TTS structure
+ */
+esp_tts_handle_t esp_tts_create(esp_tts_voice_t *voice);
+
+/**
+ * @brief parse money pronuciation.
+ *
+ * @param tts_handle   Instance of TTS
+ * @param yuan         The number of yuan    
+ * @param jiao         The number of jiao
+ * @param fen          The number of fen   
+ * @param mode         The pay mode: please refer to pay_mode_t
+ * @return
+ *        - 0: failed
+ *        - 1: succeeded
+ */
+int esp_tts_parse_money(esp_tts_handle_t tts_handle, int yuan, int jiao, int fen, pay_mode_t mode);
+
+/**
+ * @brief parse Chinese PinYin pronuciation.
+ *
+ * @param tts_handle   Instance of TTS
+ * @param pinyin       PinYin string, like this "da4 jia1 hao3"    
+ * @return
+ *         - 0: failed
+ *         - 1: succeeded
+ */
+int esp_tts_parse_pinyin(esp_tts_handle_t tts_handle, const char *pinyin);
+
+/**
+ * @brief parse Chinese string.
+ *
+ * @param tts_handle   Instance of TTS
+ * @param str          Chinese string, like this "大家好"    
+ * @return
+ *         - 0: failed
+ *         - 1: succeeded
+ */
+int esp_tts_parse_chinese(esp_tts_handle_t tts_handle, const char *str);
+
+/**
+ * @brief output TTS voice data by stream.
+ *
+ * @Warning The output data should not be freed. 
+            Once the output length is 0, the all voice data has been output.  
+ *
+ * @param tts_handle   Instance of TTS
+ * @param len          The length of output data 
+ * @param speed        The speech speed speed of synthesized speech, 
+                       range:0~5, 0: the slowest speed, 5: the fastest speech 
+ * @return
+ *        - voice raw data
+ */
+short* esp_tts_stream_play(esp_tts_handle_t tts_handle, int *len, unsigned int speed);
+
+/**
+ * @brief reset tts stream and clean all cache of TTS instance.
+ *
+ * @param tts_handle   Instance of TTS
+ */
+void esp_tts_stream_reset(esp_tts_handle_t tts_handle);
+
+/**
+ * @brief Free the TTS instance
+ *
+ * @param tts_handle The instance of TTS. 
+ */
+void esp_tts_destroy(esp_tts_handle_t tts_handle);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
--- a/managed_components/espressif__esp-sr/esp-tts/esp_tts_chinese/include/esp_tts_parser.h
+++ b/managed_components/espressif__esp-sr/esp-tts/esp_tts_chinese/include/esp_tts_parser.h
@@ -0,0 +1,31 @@
+#ifndef _ESP_TTS_PARSER_H_
+#define _ESP_TTS_PARSER_H_
+
+#include "stdlib.h"
+#include "esp_tts_voice.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+typedef struct {
+	int *syll_idx;
+	int syll_num;
+	int total_num;
+	esp_tts_voice_t *voice;
+}esp_tts_utt_t;
+
+esp_tts_utt_t* esp_tts_parser_chinese   (const char* str, esp_tts_voice_t *voice);
+
+esp_tts_utt_t* esp_tts_parser_money(char *play_tag, int yuan, int jiao, int fen, esp_tts_voice_t *voice);
+
+esp_tts_utt_t* esp_tts_parser_pinyin(char* pinyin, esp_tts_voice_t *voice);
+
+esp_tts_utt_t* esp_tts_utt_alloc(int syll_num, esp_tts_voice_t *voice);
+
+void esp_tts_utt_free(esp_tts_utt_t *utt);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
--- a/managed_components/espressif__esp-sr/esp-tts/esp_tts_chinese/include/esp_tts_player.h
+++ b/managed_components/espressif__esp-sr/esp-tts/esp_tts_chinese/include/esp_tts_player.h
@@ -0,0 +1,67 @@
+// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License
+#ifndef _ESP_TTS_PLAYER_H_
+#define _ESP_TTS_PLAYER_H_
+
+#include "stdlib.h"
+#include "stdio.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+typedef void * esp_tts_player_handle_t;
+
+/**
+ * @brief Creates an instance of the TTS Player structure.
+ *
+ * @param mode      mode of player, default:0 
+ * @return
+ *         - NULL: Create failed
+ *         - Others: The instance of  TTS Player
+ */
+esp_tts_player_handle_t esp_tts_player_create(int mode);
+
+
+
+/**
+ * @brief Concatenate audio files. 
+ *
+ * @Warning Just support mono audio data.
+ *
+ * @param player       The handle of TTS player
+ * @param file_list    The dir of files
+ * @param file_num     The number of file    
+ * @param len          The length of return audio buffer 
+ * @param sample_rate  The sample rate of input audio file
+ * @param sample_width The sample width of input audio file, sample_width=1:8-bit, sample_width=2:16-bit,...
+ * @return
+ *        - audio data buffer
+ */
+unsigned char* esp_tts_stream_play_by_concat(esp_tts_player_handle_t player, const char **file_list, int file_num, int *len, int *sample_rate, int *sample_width);
+
+
+/**
+ * @brief Free the TTS Player instance
+ *
+ * @param player The instance of TTS Player. 
+ */
+void esp_tts_player_destroy(esp_tts_player_handle_t player);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
--- a/managed_components/espressif__esp-sr/esp-tts/esp_tts_chinese/include/esp_tts_stretcher.h
+++ b/managed_components/espressif__esp-sr/esp-tts/esp_tts_chinese/include/esp_tts_stretcher.h
@@ -0,0 +1,48 @@
+////////////////////////////////////////////////////////////////////////////
+//                        **** AUDIO-STRETCH ****                         //
+//                      Time Domain Harmonic Scaler                       //
+//                    Copyright (c) 2019 David Bryant                     //
+//                          All Rights Reserved.                          //
+//      Distributed under the BSD Software License (see license.txt)      //
+////////////////////////////////////////////////////////////////////////////
+
+// stretch.h
+
+// Time Domain Harmonic Compression and Expansion
+//
+// This library performs time domain harmonic scaling with pitch detection
+// to stretch the timing of a 16-bit PCM signal (either mono or stereo) from
+// 1/2 to 2 times its original length. This is done without altering any of
+// its tonal characteristics.
+
+#ifndef STRETCH_H
+#define STRETCH_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef void *StretchHandle;
+
+/* extern function */
+StretchHandle stretch_init (int shortest_period, int longest_period, int num_chans, int fast_mode);
+int stretch_samples (StretchHandle handle, short *samples, int num_samples, short *output, float ratio);
+int stretch_flush (StretchHandle handle, short *output);
+void stretch_deinit (StretchHandle handle);
+
+/* internel function */
+StretchHandle stretcher_init_internal(int shortest_period, int longest_period, int buff_len);
+void stretcher_deinit (StretchHandle handle);
+int stretcher_is_empty(StretchHandle handle);
+int stretcher_is_full(StretchHandle handle, int num_samples);
+int stretcher_push_data(StretchHandle handle, short *samples, int num_samples);
+int stretcher_stretch_samples(StretchHandle handle, short *output, float ratio);
+int stretcher_stretch_samples_flash(StretchHandle handle, short *output, float ratio, const short *period_data, 
+                                    int *start_idx, int end_idx);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
+
--- a/managed_components/espressif__esp-sr/esp-tts/esp_tts_chinese/include/esp_tts_voice.h
+++ b/managed_components/espressif__esp-sr/esp-tts/esp_tts_chinese/include/esp_tts_voice.h
@@ -0,0 +1,26 @@
+#ifndef _ESP_TTS_VOICE_H_
+#define _ESP_TTS_VOICE_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct {
+	char *voice_name;       // voice set name 
+	char *format;           // the format of voice data, currently support pcm and amrwb
+	int sample_rate;        // the sample rate of voice data, just for pcm format
+	int bit_width;          // the bit width of voice data, just for pcm format
+	int syll_num;           // the syllable mumber 
+	char **sylls;           // the syllable names
+	int *syll_pos;          // the position of syllable in syllable audio data array
+	short *pinyin_idx;      // the index of pinyin
+	short *phrase_dict;     // the pinyin dictionary of common phrase
+	short *extern_idx;      // the idx of extern phrases 
+	short *extern_dict;     // the extern phrase dictionary
+	unsigned char *data;    // the audio data of all syllables
+} esp_tts_voice_t;
+
+#ifdef __cplusplus
+}
+#endif
+#endif
--- a/managed_components/espressif__esp-sr/esp-tts/esp_tts_chinese/include/esp_tts_voice_template.h
+++ b/managed_components/espressif__esp-sr/esp-tts/esp_tts_chinese/include/esp_tts_voice_template.h
@@ -0,0 +1,5 @@
+#pragma once
+
+
+#include "esp_tts.h"
+extern const esp_tts_voice_t  esp_tts_voice_template;
--- a/managed_components/espressif__esp-sr/esp-tts/esp_tts_chinese/include/esp_tts_voice_xiaole.h
+++ b/managed_components/espressif__esp-sr/esp-tts/esp_tts_chinese/include/esp_tts_voice_xiaole.h
@@ -0,0 +1,5 @@
+#pragma once
+
+
+#include "esp_tts.h"
+extern const esp_tts_voice_t  esp_tts_voice_xiaole;
--- a/managed_components/espressif__esp-sr/esp-tts/samples/S1_xiaole_speed0.wav
+++ b/managed_components/espressif__esp-sr/esp-tts/samples/S1_xiaole_speed0.wav
--- a/managed_components/espressif__esp-sr/esp-tts/samples/S1_xiaole_speed0_v0.4.wav
+++ b/managed_components/espressif__esp-sr/esp-tts/samples/S1_xiaole_speed0_v0.4.wav
--- a/managed_components/espressif__esp-sr/esp-tts/samples/S1_xiaole_speed2.wav
+++ b/managed_components/espressif__esp-sr/esp-tts/samples/S1_xiaole_speed2.wav
--- a/managed_components/espressif__esp-sr/esp-tts/samples/S1_xiaole_speed4_v0.4.wav
+++ b/managed_components/espressif__esp-sr/esp-tts/samples/S1_xiaole_speed4_v0.4.wav
--- a/managed_components/espressif__esp-sr/esp-tts/samples/S2_xiaole_speed0.wav
+++ b/managed_components/espressif__esp-sr/esp-tts/samples/S2_xiaole_speed0.wav
--- a/managed_components/espressif__esp-sr/esp-tts/samples/S2_xiaole_speed0_v0.4.wav
+++ b/managed_components/espressif__esp-sr/esp-tts/samples/S2_xiaole_speed0_v0.4.wav
--- a/managed_components/espressif__esp-sr/esp-tts/samples/S2_xiaole_speed2.wav
+++ b/managed_components/espressif__esp-sr/esp-tts/samples/S2_xiaole_speed2.wav
--- a/managed_components/espressif__esp-sr/esp-tts/samples/S2_xiaole_speed4_v0.4.wav
+++ b/managed_components/espressif__esp-sr/esp-tts/samples/S2_xiaole_speed4_v0.4.wav
--- a/managed_components/espressif__esp-sr/esp-tts/samples/S3_xiaole_speed0.wav
+++ b/managed_components/espressif__esp-sr/esp-tts/samples/S3_xiaole_speed0.wav
--- a/managed_components/espressif__esp-sr/esp-tts/samples/S3_xiaole_speed0_v0.4.wav
+++ b/managed_components/espressif__esp-sr/esp-tts/samples/S3_xiaole_speed0_v0.4.wav
--- a/managed_components/espressif__esp-sr/esp-tts/samples/S3_xiaole_speed4.wav
+++ b/managed_components/espressif__esp-sr/esp-tts/samples/S3_xiaole_speed4.wav
--- a/managed_components/espressif__esp-sr/esp-tts/samples/S3_xiaole_speed4_v0.4.wav
+++ b/managed_components/espressif__esp-sr/esp-tts/samples/S3_xiaole_speed4_v0.4.wav
--- a/managed_components/espressif__esp-sr/esp-tts/samples/xiaoxin_speed1.wav
+++ b/managed_components/espressif__esp-sr/esp-tts/samples/xiaoxin_speed1.wav