add some code

This commit is contained in:
2025-09-05 13:25:11 +08:00
parent 9ff0a99e7a
commit 3cf1229a85
8911 changed files with 2535396 additions and 0 deletions

View File

@@ -0,0 +1,31 @@
set(COMPONENT_ADD_INCLUDEDIRS
./esp_tts_chinese/include
)
register_component()
target_link_libraries(${COMPONENT_TARGET} INTERFACE "-L ${CMAKE_CURRENT_SOURCE_DIR}/esp_tts_chinese")
if(IDF_TARGET STREQUAL "esp32")
target_link_libraries(${COMPONENT_TARGET} INTERFACE
esp_tts_chinese
voice_set_xiaole
voice_set_template
)
endif()
if(IDF_TARGET STREQUAL "esp32s2")
target_link_libraries(${COMPONENT_TARGET} INTERFACE
esp_tts_chinese_esp32s2
voice_set_xiaole_esp32s2
voice_set_template_esp32s2
)
endif()
if(IDF_TARGET STREQUAL "esp32s3")
target_link_libraries(${COMPONENT_TARGET} INTERFACE
esp_tts_chinese_esp32s3
voice_set_xiaole_esp32s3
)
endif()

View File

@@ -0,0 +1,3 @@
# ESP Chinese TTS
Espressif TTS speech synthesis model is a lightweight speech synthesis system designed for embedded systems. Currently, only the Chinese language is supported. See more documentation [Here](https://docs.espressif.com/projects/esp-sr/en/latest/esp32s3/speech_synthesis/readme.html).

View File

@@ -0,0 +1,10 @@
COMPONENT_ADD_INCLUDEDIRS := esp_tts_chinese/include
LIB_FILES := $(shell ls $(COMPONENT_PATH)/esp_tts_chinese/lib*.a)
LIBS := $(patsubst lib%.a,-l%,$(LIB_FILES))
COMPONENT_ADD_LDFLAGS += -L$(COMPONENT_PATH)/esp_tts_chinese \
$(LIBS)

View File

@@ -0,0 +1,135 @@
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License
#ifndef _ESP_TTS_H_
#define _ESP_TTS_H_
#include "stdlib.h"
#include "stdio.h"
#include "esp_tts_voice.h"
#ifdef __cplusplus
extern "C" {
#endif
typedef enum {
NONE_MODE = 0, //do not play any word before playing a specific number
ALI_PAY_MODE, //play zhi fu bao shou kuan before playing a specific number
WEIXIN_PAY_MODE //play wei xin shou kuan before playing a specific number
} pay_mode_t;
typedef void * esp_tts_handle_t;
/**
* @brief Init an instance of the TTS voice set structure.
*
* @param template The const esp_tts_voice_template.
* @param data The customize voice data
* @return
* - NULL: Init failed
* - Others: The instance of voice set
*/
esp_tts_voice_t *esp_tts_voice_set_init(const esp_tts_voice_t *voice_template, void *data);
/**
* @brief Init an instance of the TTS voice set structure.
*
* @param template The const esp_tts_voice_template.
* @param data The customize voice data
* @return
* - NULL: Init failed
* - Others: The instance of voice set
*/
void esp_tts_voice_set_free(esp_tts_voice_t *voice);
/**
* @brief Creates an instance of the TTS structure.
*
* @param voice Voice set containing all basic phonemes.
* @return
* - NULL: Create failed
* - Others: The instance of TTS structure
*/
esp_tts_handle_t esp_tts_create(esp_tts_voice_t *voice);
/**
* @brief parse money pronuciation.
*
* @param tts_handle Instance of TTS
* @param yuan The number of yuan
* @param jiao The number of jiao
* @param fen The number of fen
* @param mode The pay mode: please refer to pay_mode_t
* @return
* - 0: failed
* - 1: succeeded
*/
int esp_tts_parse_money(esp_tts_handle_t tts_handle, int yuan, int jiao, int fen, pay_mode_t mode);
/**
* @brief parse Chinese PinYin pronuciation.
*
* @param tts_handle Instance of TTS
* @param pinyin PinYin string, like this "da4 jia1 hao3"
* @return
* - 0: failed
* - 1: succeeded
*/
int esp_tts_parse_pinyin(esp_tts_handle_t tts_handle, const char *pinyin);
/**
* @brief parse Chinese string.
*
* @param tts_handle Instance of TTS
* @param str Chinese string, like this "大家好"
* @return
* - 0: failed
* - 1: succeeded
*/
int esp_tts_parse_chinese(esp_tts_handle_t tts_handle, const char *str);
/**
* @brief output TTS voice data by stream.
*
* @Warning The output data should not be freed.
Once the output length is 0, the all voice data has been output.
*
* @param tts_handle Instance of TTS
* @param len The length of output data
* @param speed The speech speed speed of synthesized speech,
range:0~5, 0: the slowest speed, 5: the fastest speech
* @return
* - voice raw data
*/
short* esp_tts_stream_play(esp_tts_handle_t tts_handle, int *len, unsigned int speed);
/**
* @brief reset tts stream and clean all cache of TTS instance.
*
* @param tts_handle Instance of TTS
*/
void esp_tts_stream_reset(esp_tts_handle_t tts_handle);
/**
* @brief Free the TTS instance
*
* @param tts_handle The instance of TTS.
*/
void esp_tts_destroy(esp_tts_handle_t tts_handle);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,31 @@
#ifndef _ESP_TTS_PARSER_H_
#define _ESP_TTS_PARSER_H_
#include "stdlib.h"
#include "esp_tts_voice.h"
#ifdef __cplusplus
extern "C" {
#endif
typedef struct {
int *syll_idx;
int syll_num;
int total_num;
esp_tts_voice_t *voice;
}esp_tts_utt_t;
esp_tts_utt_t* esp_tts_parser_chinese (const char* str, esp_tts_voice_t *voice);
esp_tts_utt_t* esp_tts_parser_money(char *play_tag, int yuan, int jiao, int fen, esp_tts_voice_t *voice);
esp_tts_utt_t* esp_tts_parser_pinyin(char* pinyin, esp_tts_voice_t *voice);
esp_tts_utt_t* esp_tts_utt_alloc(int syll_num, esp_tts_voice_t *voice);
void esp_tts_utt_free(esp_tts_utt_t *utt);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,67 @@
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License
#ifndef _ESP_TTS_PLAYER_H_
#define _ESP_TTS_PLAYER_H_
#include "stdlib.h"
#include "stdio.h"
#ifdef __cplusplus
extern "C" {
#endif
typedef void * esp_tts_player_handle_t;
/**
* @brief Creates an instance of the TTS Player structure.
*
* @param mode mode of player, default:0
* @return
* - NULL: Create failed
* - Others: The instance of TTS Player
*/
esp_tts_player_handle_t esp_tts_player_create(int mode);
/**
* @brief Concatenate audio files.
*
* @Warning Just support mono audio data.
*
* @param player The handle of TTS player
* @param file_list The dir of files
* @param file_num The number of file
* @param len The length of return audio buffer
* @param sample_rate The sample rate of input audio file
* @param sample_width The sample width of input audio file, sample_width=1:8-bit, sample_width=2:16-bit,...
* @return
* - audio data buffer
*/
unsigned char* esp_tts_stream_play_by_concat(esp_tts_player_handle_t player, const char **file_list, int file_num, int *len, int *sample_rate, int *sample_width);
/**
* @brief Free the TTS Player instance
*
* @param player The instance of TTS Player.
*/
void esp_tts_player_destroy(esp_tts_player_handle_t player);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,48 @@
////////////////////////////////////////////////////////////////////////////
// **** AUDIO-STRETCH **** //
// Time Domain Harmonic Scaler //
// Copyright (c) 2019 David Bryant //
// All Rights Reserved. //
// Distributed under the BSD Software License (see license.txt) //
////////////////////////////////////////////////////////////////////////////
// stretch.h
// Time Domain Harmonic Compression and Expansion
//
// This library performs time domain harmonic scaling with pitch detection
// to stretch the timing of a 16-bit PCM signal (either mono or stereo) from
// 1/2 to 2 times its original length. This is done without altering any of
// its tonal characteristics.
#ifndef STRETCH_H
#define STRETCH_H
#ifdef __cplusplus
extern "C" {
#endif
typedef void *StretchHandle;
/* extern function */
StretchHandle stretch_init (int shortest_period, int longest_period, int num_chans, int fast_mode);
int stretch_samples (StretchHandle handle, short *samples, int num_samples, short *output, float ratio);
int stretch_flush (StretchHandle handle, short *output);
void stretch_deinit (StretchHandle handle);
/* internel function */
StretchHandle stretcher_init_internal(int shortest_period, int longest_period, int buff_len);
void stretcher_deinit (StretchHandle handle);
int stretcher_is_empty(StretchHandle handle);
int stretcher_is_full(StretchHandle handle, int num_samples);
int stretcher_push_data(StretchHandle handle, short *samples, int num_samples);
int stretcher_stretch_samples(StretchHandle handle, short *output, float ratio);
int stretcher_stretch_samples_flash(StretchHandle handle, short *output, float ratio, const short *period_data,
int *start_idx, int end_idx);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,26 @@
#ifndef _ESP_TTS_VOICE_H_
#define _ESP_TTS_VOICE_H_
#ifdef __cplusplus
extern "C" {
#endif
typedef struct {
char *voice_name; // voice set name
char *format; // the format of voice data, currently support pcm and amrwb
int sample_rate; // the sample rate of voice data, just for pcm format
int bit_width; // the bit width of voice data, just for pcm format
int syll_num; // the syllable mumber
char **sylls; // the syllable names
int *syll_pos; // the position of syllable in syllable audio data array
short *pinyin_idx; // the index of pinyin
short *phrase_dict; // the pinyin dictionary of common phrase
short *extern_idx; // the idx of extern phrases
short *extern_dict; // the extern phrase dictionary
unsigned char *data; // the audio data of all syllables
} esp_tts_voice_t;
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,5 @@
#pragma once
#include "esp_tts.h"
extern const esp_tts_voice_t esp_tts_voice_template;

View File

@@ -0,0 +1,5 @@
#pragma once
#include "esp_tts.h"
extern const esp_tts_voice_t esp_tts_voice_xiaole;