add some code
This commit is contained in:
31
managed_components/espressif__esp-sr/esp-tts/CMakeLists.txt
Normal file
31
managed_components/espressif__esp-sr/esp-tts/CMakeLists.txt
Normal file
@@ -0,0 +1,31 @@
|
||||
|
||||
set(COMPONENT_ADD_INCLUDEDIRS
|
||||
./esp_tts_chinese/include
|
||||
)
|
||||
|
||||
register_component()
|
||||
|
||||
target_link_libraries(${COMPONENT_TARGET} INTERFACE "-L ${CMAKE_CURRENT_SOURCE_DIR}/esp_tts_chinese")
|
||||
|
||||
if(IDF_TARGET STREQUAL "esp32")
|
||||
target_link_libraries(${COMPONENT_TARGET} INTERFACE
|
||||
esp_tts_chinese
|
||||
voice_set_xiaole
|
||||
voice_set_template
|
||||
)
|
||||
endif()
|
||||
|
||||
if(IDF_TARGET STREQUAL "esp32s2")
|
||||
target_link_libraries(${COMPONENT_TARGET} INTERFACE
|
||||
esp_tts_chinese_esp32s2
|
||||
voice_set_xiaole_esp32s2
|
||||
voice_set_template_esp32s2
|
||||
)
|
||||
endif()
|
||||
|
||||
if(IDF_TARGET STREQUAL "esp32s3")
|
||||
target_link_libraries(${COMPONENT_TARGET} INTERFACE
|
||||
esp_tts_chinese_esp32s3
|
||||
voice_set_xiaole_esp32s3
|
||||
)
|
||||
endif()
|
||||
3
managed_components/espressif__esp-sr/esp-tts/README.md
Normal file
3
managed_components/espressif__esp-sr/esp-tts/README.md
Normal file
@@ -0,0 +1,3 @@
|
||||
# ESP Chinese TTS
|
||||
|
||||
Espressif TTS speech synthesis model is a lightweight speech synthesis system designed for embedded systems. Currently, only the Chinese language is supported. See more documentation [Here](https://docs.espressif.com/projects/esp-sr/en/latest/esp32s3/speech_synthesis/readme.html).
|
||||
10
managed_components/espressif__esp-sr/esp-tts/component.mk
Normal file
10
managed_components/espressif__esp-sr/esp-tts/component.mk
Normal file
@@ -0,0 +1,10 @@
|
||||
COMPONENT_ADD_INCLUDEDIRS := esp_tts_chinese/include
|
||||
|
||||
|
||||
LIB_FILES := $(shell ls $(COMPONENT_PATH)/esp_tts_chinese/lib*.a)
|
||||
|
||||
LIBS := $(patsubst lib%.a,-l%,$(LIB_FILES))
|
||||
|
||||
COMPONENT_ADD_LDFLAGS += -L$(COMPONENT_PATH)/esp_tts_chinese \
|
||||
$(LIBS)
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,135 @@
|
||||
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License
|
||||
#ifndef _ESP_TTS_H_
|
||||
#define _ESP_TTS_H_
|
||||
|
||||
#include "stdlib.h"
|
||||
#include "stdio.h"
|
||||
#include "esp_tts_voice.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef enum {
|
||||
NONE_MODE = 0, //do not play any word before playing a specific number
|
||||
ALI_PAY_MODE, //play zhi fu bao shou kuan before playing a specific number
|
||||
WEIXIN_PAY_MODE //play wei xin shou kuan before playing a specific number
|
||||
} pay_mode_t;
|
||||
|
||||
typedef void * esp_tts_handle_t;
|
||||
|
||||
|
||||
/**
|
||||
* @brief Init an instance of the TTS voice set structure.
|
||||
*
|
||||
* @param template The const esp_tts_voice_template.
|
||||
* @param data The customize voice data
|
||||
* @return
|
||||
* - NULL: Init failed
|
||||
* - Others: The instance of voice set
|
||||
*/
|
||||
esp_tts_voice_t *esp_tts_voice_set_init(const esp_tts_voice_t *voice_template, void *data);
|
||||
|
||||
/**
|
||||
* @brief Init an instance of the TTS voice set structure.
|
||||
*
|
||||
* @param template The const esp_tts_voice_template.
|
||||
* @param data The customize voice data
|
||||
* @return
|
||||
* - NULL: Init failed
|
||||
* - Others: The instance of voice set
|
||||
*/
|
||||
void esp_tts_voice_set_free(esp_tts_voice_t *voice);
|
||||
|
||||
/**
|
||||
* @brief Creates an instance of the TTS structure.
|
||||
*
|
||||
* @param voice Voice set containing all basic phonemes.
|
||||
* @return
|
||||
* - NULL: Create failed
|
||||
* - Others: The instance of TTS structure
|
||||
*/
|
||||
esp_tts_handle_t esp_tts_create(esp_tts_voice_t *voice);
|
||||
|
||||
/**
|
||||
* @brief parse money pronuciation.
|
||||
*
|
||||
* @param tts_handle Instance of TTS
|
||||
* @param yuan The number of yuan
|
||||
* @param jiao The number of jiao
|
||||
* @param fen The number of fen
|
||||
* @param mode The pay mode: please refer to pay_mode_t
|
||||
* @return
|
||||
* - 0: failed
|
||||
* - 1: succeeded
|
||||
*/
|
||||
int esp_tts_parse_money(esp_tts_handle_t tts_handle, int yuan, int jiao, int fen, pay_mode_t mode);
|
||||
|
||||
/**
|
||||
* @brief parse Chinese PinYin pronuciation.
|
||||
*
|
||||
* @param tts_handle Instance of TTS
|
||||
* @param pinyin PinYin string, like this "da4 jia1 hao3"
|
||||
* @return
|
||||
* - 0: failed
|
||||
* - 1: succeeded
|
||||
*/
|
||||
int esp_tts_parse_pinyin(esp_tts_handle_t tts_handle, const char *pinyin);
|
||||
|
||||
/**
|
||||
* @brief parse Chinese string.
|
||||
*
|
||||
* @param tts_handle Instance of TTS
|
||||
* @param str Chinese string, like this "大家好"
|
||||
* @return
|
||||
* - 0: failed
|
||||
* - 1: succeeded
|
||||
*/
|
||||
int esp_tts_parse_chinese(esp_tts_handle_t tts_handle, const char *str);
|
||||
|
||||
/**
|
||||
* @brief output TTS voice data by stream.
|
||||
*
|
||||
* @Warning The output data should not be freed.
|
||||
Once the output length is 0, the all voice data has been output.
|
||||
*
|
||||
* @param tts_handle Instance of TTS
|
||||
* @param len The length of output data
|
||||
* @param speed The speech speed speed of synthesized speech,
|
||||
range:0~5, 0: the slowest speed, 5: the fastest speech
|
||||
* @return
|
||||
* - voice raw data
|
||||
*/
|
||||
short* esp_tts_stream_play(esp_tts_handle_t tts_handle, int *len, unsigned int speed);
|
||||
|
||||
/**
|
||||
* @brief reset tts stream and clean all cache of TTS instance.
|
||||
*
|
||||
* @param tts_handle Instance of TTS
|
||||
*/
|
||||
void esp_tts_stream_reset(esp_tts_handle_t tts_handle);
|
||||
|
||||
/**
|
||||
* @brief Free the TTS instance
|
||||
*
|
||||
* @param tts_handle The instance of TTS.
|
||||
*/
|
||||
void esp_tts_destroy(esp_tts_handle_t tts_handle);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,31 @@
|
||||
#ifndef _ESP_TTS_PARSER_H_
|
||||
#define _ESP_TTS_PARSER_H_
|
||||
|
||||
#include "stdlib.h"
|
||||
#include "esp_tts_voice.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
typedef struct {
|
||||
int *syll_idx;
|
||||
int syll_num;
|
||||
int total_num;
|
||||
esp_tts_voice_t *voice;
|
||||
}esp_tts_utt_t;
|
||||
|
||||
esp_tts_utt_t* esp_tts_parser_chinese (const char* str, esp_tts_voice_t *voice);
|
||||
|
||||
esp_tts_utt_t* esp_tts_parser_money(char *play_tag, int yuan, int jiao, int fen, esp_tts_voice_t *voice);
|
||||
|
||||
esp_tts_utt_t* esp_tts_parser_pinyin(char* pinyin, esp_tts_voice_t *voice);
|
||||
|
||||
esp_tts_utt_t* esp_tts_utt_alloc(int syll_num, esp_tts_voice_t *voice);
|
||||
|
||||
void esp_tts_utt_free(esp_tts_utt_t *utt);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,67 @@
|
||||
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License
|
||||
#ifndef _ESP_TTS_PLAYER_H_
|
||||
#define _ESP_TTS_PLAYER_H_
|
||||
|
||||
#include "stdlib.h"
|
||||
#include "stdio.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
typedef void * esp_tts_player_handle_t;
|
||||
|
||||
/**
|
||||
* @brief Creates an instance of the TTS Player structure.
|
||||
*
|
||||
* @param mode mode of player, default:0
|
||||
* @return
|
||||
* - NULL: Create failed
|
||||
* - Others: The instance of TTS Player
|
||||
*/
|
||||
esp_tts_player_handle_t esp_tts_player_create(int mode);
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* @brief Concatenate audio files.
|
||||
*
|
||||
* @Warning Just support mono audio data.
|
||||
*
|
||||
* @param player The handle of TTS player
|
||||
* @param file_list The dir of files
|
||||
* @param file_num The number of file
|
||||
* @param len The length of return audio buffer
|
||||
* @param sample_rate The sample rate of input audio file
|
||||
* @param sample_width The sample width of input audio file, sample_width=1:8-bit, sample_width=2:16-bit,...
|
||||
* @return
|
||||
* - audio data buffer
|
||||
*/
|
||||
unsigned char* esp_tts_stream_play_by_concat(esp_tts_player_handle_t player, const char **file_list, int file_num, int *len, int *sample_rate, int *sample_width);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Free the TTS Player instance
|
||||
*
|
||||
* @param player The instance of TTS Player.
|
||||
*/
|
||||
void esp_tts_player_destroy(esp_tts_player_handle_t player);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,48 @@
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
// **** AUDIO-STRETCH **** //
|
||||
// Time Domain Harmonic Scaler //
|
||||
// Copyright (c) 2019 David Bryant //
|
||||
// All Rights Reserved. //
|
||||
// Distributed under the BSD Software License (see license.txt) //
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// stretch.h
|
||||
|
||||
// Time Domain Harmonic Compression and Expansion
|
||||
//
|
||||
// This library performs time domain harmonic scaling with pitch detection
|
||||
// to stretch the timing of a 16-bit PCM signal (either mono or stereo) from
|
||||
// 1/2 to 2 times its original length. This is done without altering any of
|
||||
// its tonal characteristics.
|
||||
|
||||
#ifndef STRETCH_H
|
||||
#define STRETCH_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef void *StretchHandle;
|
||||
|
||||
/* extern function */
|
||||
StretchHandle stretch_init (int shortest_period, int longest_period, int num_chans, int fast_mode);
|
||||
int stretch_samples (StretchHandle handle, short *samples, int num_samples, short *output, float ratio);
|
||||
int stretch_flush (StretchHandle handle, short *output);
|
||||
void stretch_deinit (StretchHandle handle);
|
||||
|
||||
/* internel function */
|
||||
StretchHandle stretcher_init_internal(int shortest_period, int longest_period, int buff_len);
|
||||
void stretcher_deinit (StretchHandle handle);
|
||||
int stretcher_is_empty(StretchHandle handle);
|
||||
int stretcher_is_full(StretchHandle handle, int num_samples);
|
||||
int stretcher_push_data(StretchHandle handle, short *samples, int num_samples);
|
||||
int stretcher_stretch_samples(StretchHandle handle, short *output, float ratio);
|
||||
int stretcher_stretch_samples_flash(StretchHandle handle, short *output, float ratio, const short *period_data,
|
||||
int *start_idx, int end_idx);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
@@ -0,0 +1,26 @@
|
||||
#ifndef _ESP_TTS_VOICE_H_
|
||||
#define _ESP_TTS_VOICE_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
char *voice_name; // voice set name
|
||||
char *format; // the format of voice data, currently support pcm and amrwb
|
||||
int sample_rate; // the sample rate of voice data, just for pcm format
|
||||
int bit_width; // the bit width of voice data, just for pcm format
|
||||
int syll_num; // the syllable mumber
|
||||
char **sylls; // the syllable names
|
||||
int *syll_pos; // the position of syllable in syllable audio data array
|
||||
short *pinyin_idx; // the index of pinyin
|
||||
short *phrase_dict; // the pinyin dictionary of common phrase
|
||||
short *extern_idx; // the idx of extern phrases
|
||||
short *extern_dict; // the extern phrase dictionary
|
||||
unsigned char *data; // the audio data of all syllables
|
||||
} esp_tts_voice_t;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
@@ -0,0 +1,5 @@
|
||||
#pragma once
|
||||
|
||||
|
||||
#include "esp_tts.h"
|
||||
extern const esp_tts_voice_t esp_tts_voice_template;
|
||||
@@ -0,0 +1,5 @@
|
||||
#pragma once
|
||||
|
||||
|
||||
#include "esp_tts.h"
|
||||
extern const esp_tts_voice_t esp_tts_voice_xiaole;
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Reference in New Issue
Block a user