add some code
This commit is contained in:
1
managed_components/espressif__esp-sr/.component_hash
Normal file
1
managed_components/espressif__esp-sr/.component_hash
Normal file
@@ -0,0 +1 @@
|
||||
12733d9b4aef5d5e295f35c4671835d605992d00583fcd2f8d21166f62c6b071
|
||||
34
managed_components/espressif__esp-sr/.gitignore
vendored
Normal file
34
managed_components/espressif__esp-sr/.gitignore
vendored
Normal file
@@ -0,0 +1,34 @@
|
||||
# VS Code Settings
|
||||
.vscode/
|
||||
|
||||
include/config
|
||||
include/sdkconfig.h
|
||||
build/
|
||||
sdkconfig.old
|
||||
sdkconfig
|
||||
.DS_Store
|
||||
|
||||
*.pyc
|
||||
|
||||
# Doc build artifacts
|
||||
docs/_build/
|
||||
docs/*/_build/
|
||||
docs/*/doxygen-warning-log.txt
|
||||
docs/*/sphinx-warning-log.txt
|
||||
docs/*/sphinx-warning-log-sanitized.txt
|
||||
docs/*/xml/
|
||||
docs/*/xml_in/
|
||||
docs/*/man/
|
||||
docs/doxygen_sqlite3.db
|
||||
_build/*
|
||||
|
||||
# Downloaded font files
|
||||
docs/_static/DejaVuSans.ttf
|
||||
docs/_static/NotoSansSC-Regular.otf
|
||||
|
||||
# ci
|
||||
test_apps/*/dependencies.lock
|
||||
test_apps/*/managed_components
|
||||
test_apps/*/*/build_*
|
||||
pytest_log
|
||||
XUNIT_RESULT.xml
|
||||
209
managed_components/espressif__esp-sr/CHANGELOG.md
Normal file
209
managed_components/espressif__esp-sr/CHANGELOG.md
Normal file
@@ -0,0 +1,209 @@
|
||||
# Change log for esp-sr
|
||||
|
||||
## 2.1.5
|
||||
- Fix ringbuf bug
|
||||
- Add more wakenet9 models
|
||||
|
||||
## 2.1.4
|
||||
- Fix the bug of multinet duration overflow
|
||||
- Fix the memory leak bug in multinet7 and multinet6
|
||||
|
||||
## 2.1.3
|
||||
- Replace kissfft with dl_fft for wakenet9s
|
||||
- Replace esp-dsp fft with dl_fft for wakenet9
|
||||
- Add more wakenet9 model
|
||||
|
||||
## 2.1.2
|
||||
- Add debug mode
|
||||
- Update wakenet trigget to v4
|
||||
|
||||
## 2.1.1
|
||||
- Add 8KHz AEC for VoIP
|
||||
- Add more wakenet9 models
|
||||
|
||||
## 2.1.0
|
||||
- esp32c3 support wakenet9s and aec
|
||||
- esp32c5 support wakenet9s and aec
|
||||
- esp32c6 support wakenet9s and aec
|
||||
- esp32s2 support wakenet9s and aec
|
||||
- Add four wakenet9s model: hilexin,hiesp,nihaoxiaozhi,hijason
|
||||
|
||||
## 2.0.5
|
||||
- Fix fftr bug
|
||||
|
||||
## 2.0.4
|
||||
- Add DOA(Direction of Arrival) algorithm
|
||||
|
||||
## 2.0.3
|
||||
- Add wakenet9 support for ESP32
|
||||
|
||||
## 2.0.2
|
||||
- Accelerate the AEC of ESP32C5, reducing the CPU loading from 39% to 30%.
|
||||
|
||||
## 2.0.1
|
||||
- Add `afe_aec_create`, `afe_aec_destroy`, `afe_aec_process` interface
|
||||
|
||||
## 2.0.0
|
||||
- Add vadnet1_medium model
|
||||
- Refactor AFE interface. Note AFE v2.0 is not compatible with previous versions
|
||||
- Add esp32c5 AEC support
|
||||
- Add some new wake words
|
||||
|
||||
## 1.9.5
|
||||
- Add Hi,Jason; 小鸭小鸭; 璃奈板 wake word models
|
||||
|
||||
## 1.9.4
|
||||
- Fix bugs when using C++ to compile
|
||||
- Add more new wake words
|
||||
|
||||
## 1.9.3
|
||||
- Fix nsnet2 crash
|
||||
- Add esp32p4 ci test
|
||||
|
||||
## 1.9.2
|
||||
- Improve nsnet2 performance
|
||||
|
||||
## 1.9.1
|
||||
- Support esp32p4 for nsnet2
|
||||
- Add a method to load model from rodata
|
||||
|
||||
## 1.9.0
|
||||
- Support esp32p4 for WakeNet, MultiNet and AFE_SR
|
||||
|
||||
|
||||
## 1.8.0
|
||||
- Support esp-idf v5.3
|
||||
- Add more new wake words
|
||||
- Add setting "fixed_first_channel" in afe_config
|
||||
|
||||
## 1.7.1
|
||||
- Add 喵喵同学,Hi,joy, (Hi,Lily/Hi,莉莉) wakenet model
|
||||
|
||||
## 1.7.0
|
||||
- Add first Noise Suppression model: nsnet2
|
||||
- Add more wake word model trained by TTS sample
|
||||
|
||||
## 1.6.1
|
||||
- Add hey willow wakenet model: wn9_heywillow_tts
|
||||
- Update wn9_alexa from v3 to v4
|
||||
- Add model info into srmodels_list_t
|
||||
- parse wake words from model_info string
|
||||
|
||||
## 1.6.0
|
||||
- Add Chinese MultiNet7 models
|
||||
- Add first Noise Suppression model: nsnet1
|
||||
- Add wakenet model trained by TTS samples: Jarvis and Computer
|
||||
- fix some bugs
|
||||
|
||||
## 1.5.1
|
||||
- Reduce Internal RAM of multinet7
|
||||
- Update benchmark
|
||||
- Add ci build test for esp32
|
||||
- Fix some bugs
|
||||
|
||||
## 1.5.0
|
||||
- Add esp32c6 tts lib
|
||||
- Return the volume of wake word audio when one wake word is detected
|
||||
- Reduce MultiNet6 SRAM size from 48KB to 32 KB
|
||||
- Add "Hi M Five" wake word model from M5Stack
|
||||
- Remove all MultiNet4 models
|
||||
- Update MultiNet7_en v2.0
|
||||
- Fix some bugs in AFE
|
||||
- Add Flite G2P module
|
||||
|
||||
## 1.4.2
|
||||
- Reset timeout trigger of multinet6 when a new speech command is detected
|
||||
- Allocate all beams from PSRAM
|
||||
|
||||
## 1.4.1
|
||||
- Fix the wrong APIs of multinet2 when using ESP32 chip
|
||||
- VAD can work fine when WakeNet is disabled
|
||||
|
||||
## 1.4.0
|
||||
- Add ci tests to check the APIs of wakenet, multinet and AFE work fine
|
||||
- Support to load and run two wakenet9 models at the same time in AFE
|
||||
- Reduce the latency of multinet6
|
||||
|
||||
## 1.3.4
|
||||
- Fix the bug of multinet5q8 whrn adding new speech commands
|
||||
|
||||
## 1.3.3
|
||||
- Fix the crash of multinet5q8 when speech commands are triggered
|
||||
- Update esp_mn_commands_update() in esp_process_sdkconfig.c
|
||||
|
||||
## 1.3.2
|
||||
- Output text of ctc greedy search result when no command can be detected.
|
||||
- Modify the default Chinese commands
|
||||
- Remove __pycache__ in esp-sr/model folder
|
||||
|
||||
## 1.3.1
|
||||
- Bugfix: remove all cxx11:string
|
||||
- Bugfix: remove esp-partition for esp32s2 & esp32c3 on idf v4.4
|
||||
- Update multinet API to add/modify/check new commands in the code
|
||||
- Update documents to introduce how to use multinet API
|
||||
|
||||
## 1.3.0
|
||||
- Update the partition APIs to keep compatible with both IDF v4.4 and IDF v5.0
|
||||
- Add a new Chinese MultiNet6 mdoel for air conditioner controller
|
||||
|
||||
## 1.2.1
|
||||
- Fix bugs in model loader
|
||||
- Read all parameters sequentially, which reduces about 5x in model loading time.
|
||||
- Use esp_partition_mmap to replace spiffs file system, which further reduces about 3x in model loading time
|
||||
- Add WakeNet API unity test
|
||||
- Add MultiNet API unity test
|
||||
|
||||
## 1.2.0
|
||||
- ESP-DSP dependency is now installed from the component registry
|
||||
- Add an English MultiNet6 model which is trained by RNNT and CTC
|
||||
- Add a Chinese MultiNet6 model which is trained by RNNT and CTC
|
||||
- Fixed CMake errors when esp-sr was installed from component registry
|
||||
- Fixed the list of supported chips displayed in the component registry
|
||||
|
||||
## 1.1.0
|
||||
- Support esp32c3 for Chinese TTS
|
||||
- Update document of ESP-SR
|
||||
- Add ESP-SR into Espressif component manager
|
||||
|
||||
## 1.0.0
|
||||
- Add wakenet8 & wakenet9
|
||||
- Add multinet5 to support English and Chinese speech command recognition
|
||||
- Remove wakenet7
|
||||
- Add AFE pipeline for voice communication
|
||||
|
||||
## 0.8.0
|
||||
- support ESP32S3 chip
|
||||
- add wakenet7 & update wakenet5 to support multi-channel detection
|
||||
- remove wakenet6
|
||||
- add AFE pipeline for speech recognition
|
||||
|
||||
## 0.7.0
|
||||
- add chinese tts
|
||||
- update noise suppression v2
|
||||
- update AEC v3
|
||||
|
||||
## 0.6.0
|
||||
- update multinet_cn_1.4 and add CONTINUOUS RECOGNITION mode
|
||||
- improve hilexin wakeNet5X3 model(v5)
|
||||
- support IDFv4.0 build system
|
||||
- replace MAP algorithm with MASE(Mic Array Speech Enhancement) algorithm v1.0
|
||||
|
||||
## 0.5.0
|
||||
- add multinet1 English model v1.0
|
||||
- update multinet1 Chinese model v2.0
|
||||
- add Mic Array Processing(MAP) algorithm
|
||||
- Fix the bug of parsing speech command
|
||||
- fix the bug of decoder
|
||||
|
||||
## 0.3.0
|
||||
- add wakenet6
|
||||
- support cmake
|
||||
- add free wake word: hi jeson
|
||||
- update wakenet5X3 wake word model(v2)
|
||||
|
||||
## 0.2.0
|
||||
- add acoustic algorithm, include AEC, AGC, VAD ,NS
|
||||
- add wakenet5X2 and wakenet5X3
|
||||
|
||||
## 0.1.0
|
||||
- Initial commit, include wakenet4,wakenet5 and multinet1_cni
|
||||
1
managed_components/espressif__esp-sr/CHECKSUMS.json
Normal file
1
managed_components/espressif__esp-sr/CHECKSUMS.json
Normal file
File diff suppressed because one or more lines are too long
101
managed_components/espressif__esp-sr/CMakeLists.txt
Normal file
101
managed_components/espressif__esp-sr/CMakeLists.txt
Normal file
@@ -0,0 +1,101 @@
|
||||
if((${IDF_TARGET} STREQUAL "esp32s3") OR (${IDF_TARGET} STREQUAL "esp32p4") OR (${IDF_TARGET} STREQUAL "esp32") OR (${IDF_TARGET} STREQUAL "esp32c3") OR (${IDF_TARGET} STREQUAL "esp32c5") OR (${IDF_TARGET} STREQUAL "esp32c6") OR (${IDF_TARGET} STREQUAL "esp32s2"))
|
||||
set(include_dirs
|
||||
"esp-tts/esp_tts_chinese/include"
|
||||
"include/${IDF_TARGET}"
|
||||
"src/include"
|
||||
)
|
||||
|
||||
set(srcs
|
||||
"src/model_path.c"
|
||||
"src/esp_sr_debug.c"
|
||||
"src/esp_mn_speech_commands.c"
|
||||
"src/esp_process_sdkconfig.c"
|
||||
)
|
||||
|
||||
set(requires
|
||||
json
|
||||
spiffs
|
||||
)
|
||||
|
||||
IF (IDF_VERSION_MAJOR GREATER 4)
|
||||
list(APPEND requires esp_partition)
|
||||
ENDIF (IDF_VERSION_MAJOR GREATER 4)
|
||||
|
||||
idf_component_register(SRCS ${srcs}
|
||||
INCLUDE_DIRS ${include_dirs}
|
||||
REQUIRES ${requires}
|
||||
PRIV_REQUIRES spi_flash)
|
||||
|
||||
|
||||
target_link_libraries(${COMPONENT_TARGET} "-L ${CMAKE_CURRENT_SOURCE_DIR}/lib/${IDF_TARGET}")
|
||||
target_link_libraries(${COMPONENT_TARGET} "-L ${CMAKE_CURRENT_SOURCE_DIR}/esp-tts/esp_tts_chinese/${IDF_TARGET}")
|
||||
add_prebuilt_library(dl_lib "${CMAKE_CURRENT_SOURCE_DIR}/lib/${IDF_TARGET}/libdl_lib.a" PRIV_REQUIRES ${COMPONENT_NAME})
|
||||
add_prebuilt_library(c_speech_features "${CMAKE_CURRENT_SOURCE_DIR}/lib/${IDF_TARGET}/libc_speech_features.a" PRIV_REQUIRES ${COMPONENT_NAME})
|
||||
add_prebuilt_library(esp_audio_processor "${CMAKE_CURRENT_SOURCE_DIR}/lib/${IDF_TARGET}/libesp_audio_processor.a" PRIV_REQUIRES ${COMPONENT_NAME})
|
||||
add_prebuilt_library(esp_audio_front_end "${CMAKE_CURRENT_SOURCE_DIR}/lib/${IDF_TARGET}/libesp_audio_front_end.a" PRIV_REQUIRES ${COMPONENT_NAME})
|
||||
add_prebuilt_library(esp_tts_chinese "${CMAKE_CURRENT_SOURCE_DIR}/esp-tts/esp_tts_chinese/${IDF_TARGET}/libesp_tts_chinese.a" PRIV_REQUIRES ${COMPONENT_NAME})
|
||||
add_prebuilt_library(voice_set_xiaole "${CMAKE_CURRENT_SOURCE_DIR}/esp-tts/esp_tts_chinese/${IDF_TARGET}/libvoice_set_xiaole.a" PRIV_REQUIRES ${COMPONENT_NAME})
|
||||
add_prebuilt_library(fst "${CMAKE_CURRENT_SOURCE_DIR}/lib/${IDF_TARGET}/libfst.a" PRIV_REQUIRES ${COMPONENT_NAME})
|
||||
add_prebuilt_library(flite_g2p "${CMAKE_CURRENT_SOURCE_DIR}/lib/${IDF_TARGET}/libflite_g2p.a" PRIV_REQUIRES ${COMPONENT_NAME})
|
||||
add_prebuilt_library(multinet "${CMAKE_CURRENT_SOURCE_DIR}/lib/${IDF_TARGET}/libmultinet.a" PRIV_REQUIRES ${COMPONENT_NAME})
|
||||
add_prebuilt_library(hufzip "${CMAKE_CURRENT_SOURCE_DIR}/lib/${IDF_TARGET}/libhufzip.a" PRIV_REQUIRES ${COMPONENT_NAME})
|
||||
add_prebuilt_library(vadnet "${CMAKE_CURRENT_SOURCE_DIR}/lib/${IDF_TARGET}/libvadnet.a" PRIV_REQUIRES ${COMPONENT_NAME})
|
||||
add_prebuilt_library(wakenet "${CMAKE_CURRENT_SOURCE_DIR}/lib/${IDF_TARGET}/libwakenet.a" PRIV_REQUIRES ${COMPONENT_NAME})
|
||||
add_prebuilt_library(nsnet "${CMAKE_CURRENT_SOURCE_DIR}/lib/${IDF_TARGET}/libnsnet.a" PRIV_REQUIRES ${COMPONENT_NAME})
|
||||
idf_component_get_property(esp_dsp_lib espressif__esp-dsp COMPONENT_LIB)
|
||||
idf_component_get_property(dl_fft_lib espressif__dl_fft COMPONENT_LIB)
|
||||
|
||||
set(sr_libs
|
||||
$<TARGET_FILE:${esp_dsp_lib}>
|
||||
$<TARGET_FILE:${dl_fft_lib}>
|
||||
dl_lib
|
||||
c_speech_features
|
||||
esp_audio_front_end
|
||||
esp_audio_processor
|
||||
esp_tts_chinese
|
||||
voice_set_xiaole
|
||||
fst
|
||||
flite_g2p
|
||||
hufzip
|
||||
multinet
|
||||
nsnet
|
||||
vadnet
|
||||
wakenet)
|
||||
|
||||
if(${IDF_TARGET} STREQUAL "esp32")
|
||||
add_prebuilt_library(multinet2_ch "${CMAKE_CURRENT_SOURCE_DIR}/lib/${IDF_TARGET}/libmultinet2_ch.a" PRIV_REQUIRES ${COMPONENT_NAME})
|
||||
list(APPEND sr_libs multinet2_ch)
|
||||
endif()
|
||||
|
||||
target_link_libraries(${COMPONENT_TARGET} "-Wl,--start-group"
|
||||
${sr_libs}
|
||||
"-Wl,--end-group")
|
||||
|
||||
|
||||
endif()
|
||||
|
||||
# Add model partition and flash srmodels.bin
|
||||
if(CONFIG_PARTITION_TABLE_CUSTOM)
|
||||
partition_table_get_partition_info(size "--partition-name model" "size")
|
||||
partition_table_get_partition_info(offset "--partition-name model" "offset")
|
||||
|
||||
if("${size}" AND "${offset}")
|
||||
set(MVMODEL_EXE ${COMPONENT_PATH}/model/movemodel.py)
|
||||
idf_build_get_property(build_dir BUILD_DIR)
|
||||
set(image_file ${build_dir}/srmodels/srmodels.bin)
|
||||
|
||||
add_custom_command(
|
||||
OUTPUT ${image_file}
|
||||
COMMENT "Move and Pack models..."
|
||||
COMMAND python ${MVMODEL_EXE} -d1 ${SDKCONFIG} -d2 ${COMPONENT_PATH} -d3 ${build_dir}
|
||||
DEPENDS ${SDKCONFIG}
|
||||
VERBATIM)
|
||||
|
||||
add_custom_target(srmodels_bin ALL DEPENDS ${image_file})
|
||||
add_dependencies(flash srmodels_bin)
|
||||
esptool_py_flash_to_partition(flash "model" "${image_file}")
|
||||
else()
|
||||
set(message "Failed to find model in partition table file"
|
||||
"Please add a line(Name=model) to the partition file if you want to use esp-sr models.")
|
||||
endif()
|
||||
endif()
|
||||
2323
managed_components/espressif__esp-sr/Kconfig.projbuild
Normal file
2323
managed_components/espressif__esp-sr/Kconfig.projbuild
Normal file
File diff suppressed because it is too large
Load Diff
20
managed_components/espressif__esp-sr/LICENSE
Normal file
20
managed_components/espressif__esp-sr/LICENSE
Normal file
@@ -0,0 +1,20 @@
|
||||
ESPRESSIF MIT License
|
||||
|
||||
Copyright (c) 2018 <ESPRESSIF SYSTEMS (SHANGHAI) PTE LTD>
|
||||
|
||||
Permission is hereby granted for use on all ESPRESSIF SYSTEMS products, in which case,
|
||||
it is free of charge, to any person obtaining a copy of this software and associated
|
||||
documentation files (the "Software"), to deal in the Software without restriction, including
|
||||
without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
and/or sell copies of the Software, and to permit persons to whom the Software is furnished
|
||||
to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all copies or
|
||||
substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
123
managed_components/espressif__esp-sr/README.md
Normal file
123
managed_components/espressif__esp-sr/README.md
Normal file
@@ -0,0 +1,123 @@
|
||||
# ESP-SR Speech Recognition Framework
|
||||
|
||||
[](https://docs.espressif.com/projects/esp-sr/en/latest/esp32s3/index.html)
|
||||
[](https://components.espressif.com/components/espressif/esp-sr)
|
||||
|
||||
Espressif [ESP-SR](https://github.com/espressif/esp-sr) helps users build AI speech solutions.
|
||||
|
||||
Overview
|
||||
--------
|
||||
|
||||
ESP-SR framework includes the following modules:
|
||||
|
||||
* [Audio Front-end AFE](https://docs.espressif.com/projects/esp-sr/en/latest/esp32s3/audio_front_end/README.html)
|
||||
* [Wake Word Engine WakeNet](https://docs.espressif.com/projects/esp-sr/en/latest/esp32s3/wake_word_engine/README.html)
|
||||
* [VAD VADNet](https://docs.espressif.com/projects/esp-sr/en/latest/esp32s3/vadnet/README.html)
|
||||
* [Speech Command Word Recognition MultiNet](https://docs.espressif.com/projects/esp-sr/en/latest/esp32s3/speech_command_recognition/README.html)
|
||||
* [Speech Synthesis](https://docs.espressif.com/projects/esp-sr/en/latest/esp32s3/speech_synthesis/readme.html)
|
||||
|
||||
These algorithms are provided in the form of a component, so they can be integrated into your projects with minimum effort.
|
||||
|
||||
|
||||
News
|
||||
----
|
||||
[21/4/2025]: We add a new model WakeNet9s, which can run on chips that do not have PSRAM and do not support SIMD, such as ESP32C3 and ESP32C5. [examples](https://github.com/espressif/esp-skainet/tree/master/examples/wake_word_detection)
|
||||
[17/4/2025]: We add a new DOA(Direction of Arrival) algorithm.
|
||||
[14/2/2025]: We release **ESP-SR V2.0**. [Migration from ESP-SR V1.* to ESP-SR V2.*](https://docs.espressif.com/projects/esp-sr/en/latest/esp32s3/audio_front_end/migration_guide.html)
|
||||
[13/2/2025]: We release **VADNet**, a voice activaty detection model. You can use it to replace the WebRTC VAD and improve the performance.
|
||||
|
||||
## Wake Word Engine
|
||||
|
||||
| Supported Targets | ESP32 | ESP32-S2 | ESP32-S3 | ESP32-P4 | ESP32-C3 | ESP32-C5 | ESP32-C6 |
|
||||
| ----------------- | -------- | -------- | -------- | -------- | -------- | -------- | -------- |
|
||||
|
||||
Espressif wake word engine **WakeNet** is specially designed to provide a high performance and low memory footprint wake word detection algorithm for users, which enables devices always listen to wake words, such as “Alexa”, “Hi,lexin” and “Hi,ESP”. WakeNet9 and WakeNet9s models are supported. WakeNet9s is a cost-down version of WakeNet9, with fewer parameters and lower computational requirements.
|
||||
|
||||
Espressif offers two ways to customize the wake word, please refer to the following document to choose the one that meets your needs:
|
||||
[Espressif Speech Wake Words Customization Process](https://docs.espressif.com/projects/esp-sr/en/latest/esp32s3/wake_word_engine/ESP_Wake_Words_Customization.html) or [Training Wake Words by TTS sample](https://github.com/espressif/esp-sr/issues/88).
|
||||
|
||||
The following wake words are supported in esp-sr:
|
||||
|
||||
|wake words | WakeNet9s | WakeNet9 |
|
||||
|:--------------- | :------------------------------:| :---------------------:|
|
||||
|Hi,乐鑫 | wn9s_hilexin | wn9_hilexin |
|
||||
|Hi,ESP | wn9s_hiesp | wn9_hiesp |
|
||||
|你好小智 | wn9s_nihaoxiaozhi | wn9_nihaoxiaozhi_tts |
|
||||
|Hi,Jason | wn9s_hijason_tts2 | wn9_hijason_tts2 |
|
||||
|你好喵伴 | | wn9_nihaomiaoban_tts2 |
|
||||
|小爱同学 | | wn9_xiaoaitongxue |
|
||||
|Hi,M Five | | wn9_himfive |
|
||||
|Alexa | | wn9_alexa |
|
||||
|Jarvis | | wn9_jarvis_tts |
|
||||
|Computer | | wn9_computer_tts |
|
||||
|Hey,Willow | | wn9_heywillow_tts |
|
||||
|Sophia | | wn9_sophia_tts |
|
||||
|Mycroft | | wn9_mycroft_tts |
|
||||
|Hey,Printer | | wn9_heyprinter_tts |
|
||||
|Hi,Joy | | wn9_hijoy_tts |
|
||||
|Hey,Wand | | wn9_heywanda_tts |
|
||||
|Astrolabe | | wn9_astrolabe_tts |
|
||||
|Hey,Ily | | wn9_heyily_tts2 |
|
||||
|Hi,Jolly | | wn9_hijolly_tts2 |
|
||||
|Hi,Fairy | | wn9_hifairy_tts2 |
|
||||
|Blue Chip | | wn9_bluechip_tts2 |
|
||||
|Hi,Wall E/Hi,瓦力| | wn9_hiwalle_tts2 |
|
||||
|你好小鑫 | | wn9_nihaoxiaoxin_tts |
|
||||
|小美同学 | | wn9_xiaomeitongxue_tts |
|
||||
|Hi,小星 | | wn9_hixiaoxing_tts |
|
||||
|小龙小龙 | | wn9_xiaolongxiaolong_tts |
|
||||
|喵喵同学 | | wn9_miaomiaotongxue_tts|
|
||||
|Hi,喵喵 | | wn9_himiaomiao_tts |
|
||||
|Hi,Lily/Hi,莉莉 | | wn9_hilili_tts |
|
||||
|Hi,Telly/Hi,泰力 | | wn9_hitelly_tts |
|
||||
|小滨小滨/小冰小冰| | wn9_xiaobinxiaobin_tts |
|
||||
|Hi,小巫 | | wn9_haixiaowu_tts |
|
||||
|小鸭小鸭 | | wn9_xiaoyaxiaoya_tts2 |
|
||||
|璃奈板 | | wn9_linaiban_tts2 |
|
||||
|小酥肉 | | wn9_xiaosurou_tts2 |
|
||||
|小宇同学 | | wn9_xiaoyutongxue_tts2 |
|
||||
|小明同学 | | wn9_xiaomingtongxue_tts2|
|
||||
|小康同学 | | wn9_xiaokangtongxue_tts2|
|
||||
|小箭小箭 | | wn9_xiaojianxiaojian_tts2|
|
||||
|小特小特 | | wn9_xiaotexiaote_tts2|
|
||||
|你好小益 | | wn9_nihaoxiaoyi_tts2|
|
||||
|你好百应 | | wn9_nihaobaiying_tts2|
|
||||
|小鹿小鹿 | | wn9_xiaoluxiaolu_tts2|
|
||||
|你好东东 | | wn9_nihaodongdong_tts2|
|
||||
|你好小安 | | wn9_nihaoxiaoan_tts2|
|
||||
|
||||
*NOTE:* `_tts` suffix means this WakeNet model is trained by TTS samples. `_tts2` suffix means this WakeNet model is trained by TTS Pipeline V2.
|
||||
|
||||
## Speech Command Recognition
|
||||
|
||||
| Supported Targets | ESP32 | ESP32-S3 | ESP32-P4 |
|
||||
| ----------------- | -------- | -------- | -------- |
|
||||
|
||||
Espressif's speech command recognition model **MultiNet** is specially designed to provide a flexible off-line speech command recognition model. With this model, you can easily add your own speech commands, eliminating the need to train model again.
|
||||
|
||||
Currently, Espressif **MultiNet** supports up to 300 Chinese or English speech commands, such as “打开空调” (Turn on the air conditioner) and “打开卧室灯” (Turn on the bedroom light).
|
||||
|
||||
The following MultiNet models are supported in esp-sr:
|
||||
|
||||
|language | ESP32 | ESP32-S3 | ESP32-P4 |
|
||||
|:--------------- | :-------------------------:| :----------------------------:| :----------------------------:|
|
||||
|Chinese | mn2_cn | mn5q8_cn, mn6_cn, mn7_cn | mn7_cn |
|
||||
|English | | mn5q8_en, mn6_en, mn7_en | mn7_en |
|
||||
|
||||
## Audio Front End
|
||||
|
||||
| Supported Targets | ESP32 | ESP32-S3 | ESP32-P4 |
|
||||
| ----------------- | -------- | -------- | -------- |
|
||||
|
||||
Espressif Audio Front-End **AFE** integrates AEC (Acoustic Echo Cancellation), VAD (Voice Activity Detection), BSS (Blind Source Separation) and NS (Noise Suppression), NSNET(Deep noise suppression) and other functions. It is designed to be used with the ESP-SR library.
|
||||
|
||||
Our two-mic Audio Front-End (AFE) have been qualified as a “Software Audio Front-End Solution” for [Amazon Alexa Built-in devices](https://developer.amazon.com/en-US/alexa/solution-providers/alexa-connect-kit).
|
||||
|
||||
|
||||
## Documentation and Resources
|
||||
|
||||
ESP-SR Documentation: [ESP-SR Documentation](https://docs.espressif.com/projects/esp-sr/en/latest/esp32s3/index.html)
|
||||
Migration Guide: [Migration from V1.* to V2.*](https://docs.espressif.com/projects/esp-sr/en/latest/esp32s3/audio_front_end/migration_guide.html)
|
||||
Wake Word Training: [Wake Word Training by TTS Pipeline V2.0](https://github.com/espressif/esp-sr/issues/88)
|
||||
Examples: [esp-skainet/examples](https://github.com/espressif/esp-skainet)
|
||||
|
||||
18
managed_components/espressif__esp-sr/ci/utils.sh
Normal file
18
managed_components/espressif__esp-sr/ci/utils.sh
Normal file
@@ -0,0 +1,18 @@
|
||||
# Modified from https://gitlab.com/gitlab-org/gitlab/-/blob/master/scripts/utils.sh
|
||||
|
||||
function add_ssh_keys() {
|
||||
local key_string="${1}"
|
||||
mkdir -p ~/.ssh
|
||||
chmod 700 ~/.ssh
|
||||
echo -n "${key_string}" >~/.ssh/id_rsa_base64
|
||||
base64 --decode --ignore-garbage ~/.ssh/id_rsa_base64 >~/.ssh/id_rsa
|
||||
chmod 600 ~/.ssh/id_rsa
|
||||
}
|
||||
|
||||
function add_doc_server_ssh_keys() {
|
||||
local key_string="${1}"
|
||||
local server_url="${2}"
|
||||
local server_user="${3}"
|
||||
add_ssh_keys "${key_string}"
|
||||
echo -e "Host ${server_url}\n\tStrictHostKeyChecking no\n\tUser ${server_user}\n" >>~/.ssh/config
|
||||
}
|
||||
15
managed_components/espressif__esp-sr/component.mk
Normal file
15
managed_components/espressif__esp-sr/component.mk
Normal file
@@ -0,0 +1,15 @@
|
||||
COMPONENT_ADD_INCLUDEDIRS := include/$(IDF_TARGET) \
|
||||
esp-tts/esp_tts_chinese/include \
|
||||
src/include \
|
||||
|
||||
COMPONENT_SRCDIRS := src
|
||||
|
||||
|
||||
LIB_FILES := $(shell ls $(COMPONENT_PATH)/lib/$(IDF_TARGET)/lib*.a) \
|
||||
$(shell ls $(COMPONENT_PATH)/esp-tts/esp_tts_chinese/esp32/lib*.a)
|
||||
|
||||
LIBS := $(patsubst lib%.a,-l%,$(LIB_FILES))
|
||||
|
||||
COMPONENT_ADD_LDFLAGS += -L$(COMPONENT_PATH)/lib/$(IDF_TARGET) \
|
||||
-L$(COMPONENT_PATH)/esp-tts \
|
||||
$(LIBS)
|
||||
215
managed_components/espressif__esp-sr/conftest.py
Normal file
215
managed_components/espressif__esp-sr/conftest.py
Normal file
@@ -0,0 +1,215 @@
|
||||
# SPDX-FileCopyrightText: 2022-2023 Espressif Systems (Shanghai) CO LTD
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
import logging
|
||||
import os
|
||||
import pathlib
|
||||
import re
|
||||
import sys
|
||||
from datetime import datetime
|
||||
from typing import Callable, List, Optional, Tuple
|
||||
|
||||
import pytest
|
||||
from pytest import Config, FixtureRequest, Function, Session
|
||||
from pytest_embedded.plugin import multi_dut_argument, multi_dut_fixture
|
||||
|
||||
IDF_VERSION = os.environ.get('IDF_VERSION')
|
||||
PYTEST_ROOT_DIR = str(pathlib.Path(__file__).parent)
|
||||
logging.info(f'Pytest root dir: {PYTEST_ROOT_DIR}')
|
||||
|
||||
|
||||
@pytest.fixture(scope='session', autouse=True)
|
||||
def idf_version() -> str:
|
||||
if os.environ.get('IDF_VERSION'):
|
||||
return os.environ.get('IDF_VERSION')
|
||||
idf_path = os.environ.get('IDF_PATH')
|
||||
if not idf_path:
|
||||
logging.warning('Failed to get IDF_VERSION!')
|
||||
return ''
|
||||
version_path = os.path.join(os.environ['IDF_PATH'], 'tools/cmake/version.cmake')
|
||||
regex = re.compile(r'^\s*set\s*\(\s*IDF_VERSION_([A-Z]{5})\s+(\d+)')
|
||||
ver = {}
|
||||
with open(version_path) as f:
|
||||
for line in f:
|
||||
m = regex.match(line)
|
||||
if m:
|
||||
ver[m.group(1)] = m.group(2)
|
||||
return '{}.{}'.format(int(ver['MAJOR']), int(ver['MINOR']))
|
||||
|
||||
|
||||
@pytest.fixture(scope='session', autouse=True)
|
||||
def session_tempdir() -> str:
|
||||
_tmpdir = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
'pytest_log',
|
||||
datetime.now().strftime('%Y-%m-%d_%H-%M-%S'),
|
||||
)
|
||||
os.makedirs(_tmpdir, exist_ok=True)
|
||||
return _tmpdir
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
@multi_dut_argument
|
||||
def config(request: FixtureRequest) -> str:
|
||||
config_marker = list(request.node.iter_markers(name='config'))
|
||||
return config_marker[0].args[0] if config_marker else 'default'
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
@multi_dut_argument
|
||||
def app_path(request: FixtureRequest, test_file_path: str) -> str:
|
||||
config_marker = list(request.node.iter_markers(name='app_path'))
|
||||
if config_marker:
|
||||
return config_marker[0].args[0]
|
||||
else:
|
||||
# compatible with old pytest-embedded parametrize --app_path
|
||||
return request.config.getoption('app_path', None) or os.path.dirname(test_file_path)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def test_case_name(request: FixtureRequest, target: str, config: str) -> str:
|
||||
if not isinstance(target, str):
|
||||
target = '|'.join(sorted(list(set(target))))
|
||||
if not isinstance(config, str):
|
||||
config = '|'.join(sorted(list(config)))
|
||||
return f'{target}.{config}.{request.node.originalname}'
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
@multi_dut_fixture
|
||||
def build_dir(
|
||||
app_path: str,
|
||||
target: Optional[str],
|
||||
config: Optional[str],
|
||||
idf_version: str
|
||||
) -> Optional[str]:
|
||||
"""
|
||||
Check local build dir with the following priority:
|
||||
|
||||
1. <app_path>/${IDF_VERSION}/build_<target>_<config>
|
||||
2. <app_path>/${IDF_VERSION}/build_<target>
|
||||
3. <app_path>/build_<target>_<config>
|
||||
4. <app_path>/build
|
||||
5. <app_path>
|
||||
|
||||
Args:
|
||||
app_path: app path
|
||||
target: target
|
||||
config: config
|
||||
|
||||
Returns:
|
||||
valid build directory
|
||||
"""
|
||||
|
||||
assert target
|
||||
assert config
|
||||
check_dirs = []
|
||||
if idf_version:
|
||||
check_dirs.append(os.path.join(idf_version, f'build_{target}_{config}'))
|
||||
check_dirs.append(os.path.join(idf_version, f'build_{target}'))
|
||||
check_dirs.append(f'build_{target}_{config}')
|
||||
check_dirs.append('build')
|
||||
check_dirs.append('.')
|
||||
for check_dir in check_dirs:
|
||||
binary_path = os.path.join(app_path, check_dir)
|
||||
if os.path.isdir(binary_path):
|
||||
logging.info(f'find valid binary path: {binary_path}')
|
||||
return check_dir
|
||||
|
||||
logging.warning(
|
||||
f'checking binary path: {binary_path} ... missing ... try another place')
|
||||
|
||||
logging.error(
|
||||
f'no build dir. Please build the binary "python tools/build_apps.py {app_path}" and run pytest again')
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
@multi_dut_fixture
|
||||
def junit_properties(
|
||||
test_case_name: str, record_xml_attribute: Callable[[str, object], None]
|
||||
) -> None:
|
||||
"""
|
||||
This fixture is autoused and will modify the junit report test case name to <target>.<config>.<case_name>
|
||||
"""
|
||||
record_xml_attribute('name', test_case_name)
|
||||
|
||||
|
||||
##################
|
||||
# Hook functions #
|
||||
##################
|
||||
_idf_pytest_embedded_key = pytest.StashKey['IdfPytestEmbedded']
|
||||
|
||||
|
||||
def pytest_addoption(parser: pytest.Parser) -> None:
|
||||
base_group = parser.getgroup('idf')
|
||||
base_group.addoption(
|
||||
'--env',
|
||||
help='only run tests matching the environment NAME.',
|
||||
)
|
||||
|
||||
|
||||
def pytest_configure(config: Config) -> None:
|
||||
# Require cli option "--target"
|
||||
help_commands = ['--help', '--fixtures', '--markers', '--version']
|
||||
for cmd in help_commands:
|
||||
if cmd in config.invocation_params.args:
|
||||
target = 'unneeded'
|
||||
break
|
||||
else:
|
||||
target = config.getoption('target')
|
||||
if not target:
|
||||
raise ValueError('Please specify one target marker via "--target [TARGET]"')
|
||||
|
||||
config.stash[_idf_pytest_embedded_key] = IdfPytestEmbedded(
|
||||
target=target,
|
||||
env_name=config.getoption('env'),
|
||||
)
|
||||
config.pluginmanager.register(config.stash[_idf_pytest_embedded_key])
|
||||
|
||||
|
||||
def pytest_unconfigure(config: Config) -> None:
|
||||
_pytest_embedded = config.stash.get(_idf_pytest_embedded_key, None)
|
||||
if _pytest_embedded:
|
||||
del config.stash[_idf_pytest_embedded_key]
|
||||
config.pluginmanager.unregister(_pytest_embedded)
|
||||
|
||||
|
||||
class IdfPytestEmbedded:
|
||||
def __init__(
|
||||
self,
|
||||
target: Optional[str] = None,
|
||||
env_name: Optional[str] = None,
|
||||
):
|
||||
# CLI options to filter the test cases
|
||||
self.target = target
|
||||
self.env_name = env_name
|
||||
|
||||
self._failed_cases: List[
|
||||
Tuple[str, bool, bool]
|
||||
] = [] # (test_case_name, is_known_failure_cases, is_xfail)
|
||||
|
||||
@pytest.hookimpl(tryfirst=True)
|
||||
def pytest_sessionstart(self, session: Session) -> None:
|
||||
if self.target:
|
||||
self.target = self.target.lower()
|
||||
session.config.option.target = self.target
|
||||
|
||||
# @pytest.hookimpl(tryfirst=True)
|
||||
def pytest_collection_modifyitems(self, items: List[Function]) -> None:
|
||||
# set default timeout 10 minutes for each case
|
||||
for item in items:
|
||||
# default timeout 5 mins
|
||||
if 'timeout' not in item.keywords:
|
||||
item.add_marker(pytest.mark.timeout(500 * 60))
|
||||
|
||||
# filter all the test cases with "--target"
|
||||
if self.target:
|
||||
def item_targets(item): return [m.args[0] for m in item.iter_markers(name='target')]
|
||||
items[:] = [item for item in items if self.target in item_targets(item)]
|
||||
|
||||
# filter all the test cases with "--env"
|
||||
if self.env_name:
|
||||
def item_envs(item): return [m.args[0] for m in item.iter_markers(name='env')]
|
||||
items[:] = [item for item in items if self.env_name in item_envs(item)]
|
||||
31
managed_components/espressif__esp-sr/esp-tts/CMakeLists.txt
Normal file
31
managed_components/espressif__esp-sr/esp-tts/CMakeLists.txt
Normal file
@@ -0,0 +1,31 @@
|
||||
|
||||
set(COMPONENT_ADD_INCLUDEDIRS
|
||||
./esp_tts_chinese/include
|
||||
)
|
||||
|
||||
register_component()
|
||||
|
||||
target_link_libraries(${COMPONENT_TARGET} INTERFACE "-L ${CMAKE_CURRENT_SOURCE_DIR}/esp_tts_chinese")
|
||||
|
||||
if(IDF_TARGET STREQUAL "esp32")
|
||||
target_link_libraries(${COMPONENT_TARGET} INTERFACE
|
||||
esp_tts_chinese
|
||||
voice_set_xiaole
|
||||
voice_set_template
|
||||
)
|
||||
endif()
|
||||
|
||||
if(IDF_TARGET STREQUAL "esp32s2")
|
||||
target_link_libraries(${COMPONENT_TARGET} INTERFACE
|
||||
esp_tts_chinese_esp32s2
|
||||
voice_set_xiaole_esp32s2
|
||||
voice_set_template_esp32s2
|
||||
)
|
||||
endif()
|
||||
|
||||
if(IDF_TARGET STREQUAL "esp32s3")
|
||||
target_link_libraries(${COMPONENT_TARGET} INTERFACE
|
||||
esp_tts_chinese_esp32s3
|
||||
voice_set_xiaole_esp32s3
|
||||
)
|
||||
endif()
|
||||
3
managed_components/espressif__esp-sr/esp-tts/README.md
Normal file
3
managed_components/espressif__esp-sr/esp-tts/README.md
Normal file
@@ -0,0 +1,3 @@
|
||||
# ESP Chinese TTS
|
||||
|
||||
Espressif TTS speech synthesis model is a lightweight speech synthesis system designed for embedded systems. Currently, only the Chinese language is supported. See more documentation [Here](https://docs.espressif.com/projects/esp-sr/en/latest/esp32s3/speech_synthesis/readme.html).
|
||||
10
managed_components/espressif__esp-sr/esp-tts/component.mk
Normal file
10
managed_components/espressif__esp-sr/esp-tts/component.mk
Normal file
@@ -0,0 +1,10 @@
|
||||
COMPONENT_ADD_INCLUDEDIRS := esp_tts_chinese/include
|
||||
|
||||
|
||||
LIB_FILES := $(shell ls $(COMPONENT_PATH)/esp_tts_chinese/lib*.a)
|
||||
|
||||
LIBS := $(patsubst lib%.a,-l%,$(LIB_FILES))
|
||||
|
||||
COMPONENT_ADD_LDFLAGS += -L$(COMPONENT_PATH)/esp_tts_chinese \
|
||||
$(LIBS)
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,135 @@
|
||||
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License
|
||||
#ifndef _ESP_TTS_H_
|
||||
#define _ESP_TTS_H_
|
||||
|
||||
#include "stdlib.h"
|
||||
#include "stdio.h"
|
||||
#include "esp_tts_voice.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef enum {
|
||||
NONE_MODE = 0, //do not play any word before playing a specific number
|
||||
ALI_PAY_MODE, //play zhi fu bao shou kuan before playing a specific number
|
||||
WEIXIN_PAY_MODE //play wei xin shou kuan before playing a specific number
|
||||
} pay_mode_t;
|
||||
|
||||
typedef void * esp_tts_handle_t;
|
||||
|
||||
|
||||
/**
|
||||
* @brief Init an instance of the TTS voice set structure.
|
||||
*
|
||||
* @param template The const esp_tts_voice_template.
|
||||
* @param data The customize voice data
|
||||
* @return
|
||||
* - NULL: Init failed
|
||||
* - Others: The instance of voice set
|
||||
*/
|
||||
esp_tts_voice_t *esp_tts_voice_set_init(const esp_tts_voice_t *voice_template, void *data);
|
||||
|
||||
/**
|
||||
* @brief Init an instance of the TTS voice set structure.
|
||||
*
|
||||
* @param template The const esp_tts_voice_template.
|
||||
* @param data The customize voice data
|
||||
* @return
|
||||
* - NULL: Init failed
|
||||
* - Others: The instance of voice set
|
||||
*/
|
||||
void esp_tts_voice_set_free(esp_tts_voice_t *voice);
|
||||
|
||||
/**
|
||||
* @brief Creates an instance of the TTS structure.
|
||||
*
|
||||
* @param voice Voice set containing all basic phonemes.
|
||||
* @return
|
||||
* - NULL: Create failed
|
||||
* - Others: The instance of TTS structure
|
||||
*/
|
||||
esp_tts_handle_t esp_tts_create(esp_tts_voice_t *voice);
|
||||
|
||||
/**
|
||||
* @brief parse money pronuciation.
|
||||
*
|
||||
* @param tts_handle Instance of TTS
|
||||
* @param yuan The number of yuan
|
||||
* @param jiao The number of jiao
|
||||
* @param fen The number of fen
|
||||
* @param mode The pay mode: please refer to pay_mode_t
|
||||
* @return
|
||||
* - 0: failed
|
||||
* - 1: succeeded
|
||||
*/
|
||||
int esp_tts_parse_money(esp_tts_handle_t tts_handle, int yuan, int jiao, int fen, pay_mode_t mode);
|
||||
|
||||
/**
|
||||
* @brief parse Chinese PinYin pronuciation.
|
||||
*
|
||||
* @param tts_handle Instance of TTS
|
||||
* @param pinyin PinYin string, like this "da4 jia1 hao3"
|
||||
* @return
|
||||
* - 0: failed
|
||||
* - 1: succeeded
|
||||
*/
|
||||
int esp_tts_parse_pinyin(esp_tts_handle_t tts_handle, const char *pinyin);
|
||||
|
||||
/**
|
||||
* @brief parse Chinese string.
|
||||
*
|
||||
* @param tts_handle Instance of TTS
|
||||
* @param str Chinese string, like this "大家好"
|
||||
* @return
|
||||
* - 0: failed
|
||||
* - 1: succeeded
|
||||
*/
|
||||
int esp_tts_parse_chinese(esp_tts_handle_t tts_handle, const char *str);
|
||||
|
||||
/**
|
||||
* @brief output TTS voice data by stream.
|
||||
*
|
||||
* @Warning The output data should not be freed.
|
||||
Once the output length is 0, the all voice data has been output.
|
||||
*
|
||||
* @param tts_handle Instance of TTS
|
||||
* @param len The length of output data
|
||||
* @param speed The speech speed speed of synthesized speech,
|
||||
range:0~5, 0: the slowest speed, 5: the fastest speech
|
||||
* @return
|
||||
* - voice raw data
|
||||
*/
|
||||
short* esp_tts_stream_play(esp_tts_handle_t tts_handle, int *len, unsigned int speed);
|
||||
|
||||
/**
|
||||
* @brief reset tts stream and clean all cache of TTS instance.
|
||||
*
|
||||
* @param tts_handle Instance of TTS
|
||||
*/
|
||||
void esp_tts_stream_reset(esp_tts_handle_t tts_handle);
|
||||
|
||||
/**
|
||||
* @brief Free the TTS instance
|
||||
*
|
||||
* @param tts_handle The instance of TTS.
|
||||
*/
|
||||
void esp_tts_destroy(esp_tts_handle_t tts_handle);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,31 @@
|
||||
#ifndef _ESP_TTS_PARSER_H_
|
||||
#define _ESP_TTS_PARSER_H_
|
||||
|
||||
#include "stdlib.h"
|
||||
#include "esp_tts_voice.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
typedef struct {
|
||||
int *syll_idx;
|
||||
int syll_num;
|
||||
int total_num;
|
||||
esp_tts_voice_t *voice;
|
||||
}esp_tts_utt_t;
|
||||
|
||||
esp_tts_utt_t* esp_tts_parser_chinese (const char* str, esp_tts_voice_t *voice);
|
||||
|
||||
esp_tts_utt_t* esp_tts_parser_money(char *play_tag, int yuan, int jiao, int fen, esp_tts_voice_t *voice);
|
||||
|
||||
esp_tts_utt_t* esp_tts_parser_pinyin(char* pinyin, esp_tts_voice_t *voice);
|
||||
|
||||
esp_tts_utt_t* esp_tts_utt_alloc(int syll_num, esp_tts_voice_t *voice);
|
||||
|
||||
void esp_tts_utt_free(esp_tts_utt_t *utt);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,67 @@
|
||||
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License
|
||||
#ifndef _ESP_TTS_PLAYER_H_
|
||||
#define _ESP_TTS_PLAYER_H_
|
||||
|
||||
#include "stdlib.h"
|
||||
#include "stdio.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
typedef void * esp_tts_player_handle_t;
|
||||
|
||||
/**
|
||||
* @brief Creates an instance of the TTS Player structure.
|
||||
*
|
||||
* @param mode mode of player, default:0
|
||||
* @return
|
||||
* - NULL: Create failed
|
||||
* - Others: The instance of TTS Player
|
||||
*/
|
||||
esp_tts_player_handle_t esp_tts_player_create(int mode);
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* @brief Concatenate audio files.
|
||||
*
|
||||
* @Warning Just support mono audio data.
|
||||
*
|
||||
* @param player The handle of TTS player
|
||||
* @param file_list The dir of files
|
||||
* @param file_num The number of file
|
||||
* @param len The length of return audio buffer
|
||||
* @param sample_rate The sample rate of input audio file
|
||||
* @param sample_width The sample width of input audio file, sample_width=1:8-bit, sample_width=2:16-bit,...
|
||||
* @return
|
||||
* - audio data buffer
|
||||
*/
|
||||
unsigned char* esp_tts_stream_play_by_concat(esp_tts_player_handle_t player, const char **file_list, int file_num, int *len, int *sample_rate, int *sample_width);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Free the TTS Player instance
|
||||
*
|
||||
* @param player The instance of TTS Player.
|
||||
*/
|
||||
void esp_tts_player_destroy(esp_tts_player_handle_t player);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,48 @@
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
// **** AUDIO-STRETCH **** //
|
||||
// Time Domain Harmonic Scaler //
|
||||
// Copyright (c) 2019 David Bryant //
|
||||
// All Rights Reserved. //
|
||||
// Distributed under the BSD Software License (see license.txt) //
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// stretch.h
|
||||
|
||||
// Time Domain Harmonic Compression and Expansion
|
||||
//
|
||||
// This library performs time domain harmonic scaling with pitch detection
|
||||
// to stretch the timing of a 16-bit PCM signal (either mono or stereo) from
|
||||
// 1/2 to 2 times its original length. This is done without altering any of
|
||||
// its tonal characteristics.
|
||||
|
||||
#ifndef STRETCH_H
|
||||
#define STRETCH_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef void *StretchHandle;
|
||||
|
||||
/* extern function */
|
||||
StretchHandle stretch_init (int shortest_period, int longest_period, int num_chans, int fast_mode);
|
||||
int stretch_samples (StretchHandle handle, short *samples, int num_samples, short *output, float ratio);
|
||||
int stretch_flush (StretchHandle handle, short *output);
|
||||
void stretch_deinit (StretchHandle handle);
|
||||
|
||||
/* internel function */
|
||||
StretchHandle stretcher_init_internal(int shortest_period, int longest_period, int buff_len);
|
||||
void stretcher_deinit (StretchHandle handle);
|
||||
int stretcher_is_empty(StretchHandle handle);
|
||||
int stretcher_is_full(StretchHandle handle, int num_samples);
|
||||
int stretcher_push_data(StretchHandle handle, short *samples, int num_samples);
|
||||
int stretcher_stretch_samples(StretchHandle handle, short *output, float ratio);
|
||||
int stretcher_stretch_samples_flash(StretchHandle handle, short *output, float ratio, const short *period_data,
|
||||
int *start_idx, int end_idx);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
@@ -0,0 +1,26 @@
|
||||
#ifndef _ESP_TTS_VOICE_H_
|
||||
#define _ESP_TTS_VOICE_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
char *voice_name; // voice set name
|
||||
char *format; // the format of voice data, currently support pcm and amrwb
|
||||
int sample_rate; // the sample rate of voice data, just for pcm format
|
||||
int bit_width; // the bit width of voice data, just for pcm format
|
||||
int syll_num; // the syllable mumber
|
||||
char **sylls; // the syllable names
|
||||
int *syll_pos; // the position of syllable in syllable audio data array
|
||||
short *pinyin_idx; // the index of pinyin
|
||||
short *phrase_dict; // the pinyin dictionary of common phrase
|
||||
short *extern_idx; // the idx of extern phrases
|
||||
short *extern_dict; // the extern phrase dictionary
|
||||
unsigned char *data; // the audio data of all syllables
|
||||
} esp_tts_voice_t;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
@@ -0,0 +1,5 @@
|
||||
#pragma once
|
||||
|
||||
|
||||
#include "esp_tts.h"
|
||||
extern const esp_tts_voice_t esp_tts_voice_template;
|
||||
@@ -0,0 +1,5 @@
|
||||
#pragma once
|
||||
|
||||
|
||||
#include "esp_tts.h"
|
||||
extern const esp_tts_voice_t esp_tts_voice_xiaole;
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
16
managed_components/espressif__esp-sr/idf_component.yml
Normal file
16
managed_components/espressif__esp-sr/idf_component.yml
Normal file
@@ -0,0 +1,16 @@
|
||||
dependencies:
|
||||
espressif/dl_fft: '>=0.2.0'
|
||||
espressif/esp-dsp: 1.6.0
|
||||
idf: '>=5.0'
|
||||
description: esp_sr provides basic algorithms for Speech Recognition applications
|
||||
files:
|
||||
exclude:
|
||||
- .github
|
||||
- docs/**/*
|
||||
- test_apps/**/*
|
||||
repository: git://github.com/espressif/esp-sr.git
|
||||
repository_info:
|
||||
commit_sha: dec03a644fc1b4b412851f425f2a91f1083d1e0b
|
||||
path: .
|
||||
url: https://github.com/espressif/esp-sr
|
||||
version: 2.1.5
|
||||
@@ -0,0 +1,29 @@
|
||||
#pragma once
|
||||
#include <float.h>
|
||||
#include <math.h>
|
||||
|
||||
/* #undef ENABLE_DOUBLE */
|
||||
|
||||
#ifdef ENABLE_DOUBLE
|
||||
# define csf_float double
|
||||
# define csf_ceil ceil
|
||||
# define csf_floor floor
|
||||
# define csf_sin sin
|
||||
# define csf_log log
|
||||
# define csf_log10 log10
|
||||
# define csf_pow pow
|
||||
# define csf_sqrt sqrt
|
||||
# define csf_abs fabs
|
||||
# define csf_float_min DBL_MIN
|
||||
#else
|
||||
# define csf_float float
|
||||
# define csf_ceil ceilf
|
||||
# define csf_floor floorf
|
||||
# define csf_sin sinf
|
||||
# define csf_log logf
|
||||
# define csf_log10 log10f
|
||||
# define csf_pow powf
|
||||
# define csf_sqrt sqrtf
|
||||
# define csf_abs fabsf
|
||||
# define csf_float_min FLT_MIN
|
||||
#endif
|
||||
@@ -0,0 +1,9 @@
|
||||
//Generated by mkmodel_py
|
||||
#pragma once
|
||||
#include <string.h>
|
||||
#include "dl_lib_coefgetter_if.h"
|
||||
#include "dl_lib_matrix.h"
|
||||
#include "dl_lib_matrixq.h"
|
||||
#include "dl_lib_matrixq8.h"
|
||||
|
||||
extern const model_coeff_getter_t get_coeff_customized_word_wn5;
|
||||
418
managed_components/espressif__esp-sr/include/esp32/dl_lib.h
Normal file
418
managed_components/espressif__esp-sr/include/esp32/dl_lib.h
Normal file
@@ -0,0 +1,418 @@
|
||||
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
#ifndef DL_LIB_H
|
||||
#define DL_LIB_H
|
||||
|
||||
#include "dl_lib_matrix.h"
|
||||
#include "dl_lib_matrixq.h"
|
||||
#include "dl_lib_matrixq8.h"
|
||||
|
||||
#ifdef ESP_PLATFORM
|
||||
#include "freertos/FreeRTOS.h"
|
||||
#include "freertos/task.h"
|
||||
#include "freertos/queue.h"
|
||||
#include "esp_system.h"
|
||||
#include "esp_heap_caps.h"
|
||||
#include "sdkconfig.h"
|
||||
#define DL_SPIRAM_SUPPORT 1
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_IDF_TARGET_ESP32S3
|
||||
#include "esp32s3/rom/cache.h"
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef int padding_state;
|
||||
|
||||
// /**
|
||||
// * @brief Allocate a chunk of memory which has the given capabilities.
|
||||
// * Equivalent semantics to libc malloc(), for capability-aware memory.
|
||||
// * In IDF, malloc(p) is equivalent to heap_caps_malloc(p, MALLOC_CAP_8BIT).
|
||||
// *
|
||||
// * @param size In bytes, of the amount of memory to allocate
|
||||
// * @param caps Bitwise OR of MALLOC_CAP_* flags indicating the type of memory to be returned
|
||||
// * MALLOC_CAP_SPIRAM: Memory must be in SPI RAM
|
||||
// * MALLOC_CAP_INTERNAL: Memory must be internal; specifically it should not disappear when flash/spiram cache is switched off
|
||||
// * MALLOC_CAP_DMA: Memory must be able to accessed by DMA
|
||||
// * MALLOC_CAP_DEFAULT: Memory can be returned in a non-capability-specific memory allocation
|
||||
// * @return Pointer to currently allocated heap memory
|
||||
// **/
|
||||
// void *heap_caps_malloc(size_t size, uint32_t caps);
|
||||
|
||||
/**
|
||||
* @brief Allocate aligned memory from internal memory or external memory.
|
||||
* if cnt*size > CONFIG_SPIRAM_MALLOC_ALWAYSINTERNAL, allocate memory from internal RAM
|
||||
* else, allocate memory from PSRAM
|
||||
*
|
||||
* @param cnt Number of continuing chunks of memory to allocate
|
||||
* @param size Size, in bytes, of a chunk of memory to allocate
|
||||
* @param align Aligned size, in bits
|
||||
* @return Pointer to currently allocated heap memory
|
||||
*/
|
||||
void *dl_lib_calloc(int cnt, int size, int align);
|
||||
|
||||
/**
|
||||
* @brief Always allocate aligned memory from external memory.
|
||||
*
|
||||
* @param cnt Number of continuing chunks of memory to allocate
|
||||
* @param size Size, in bytes, of a chunk of memory to allocate
|
||||
* @param align Aligned size, in bits
|
||||
* @return Pointer to currently aligned heap memory
|
||||
*/
|
||||
void *dl_lib_calloc_psram(int cnt, int size, int align);
|
||||
|
||||
/**
|
||||
* @brief Free aligned memory allocated by `dl_lib_calloc` or `dl_lib_calloc_psram`
|
||||
*
|
||||
* @param ptr Pointer to free
|
||||
*/
|
||||
void dl_lib_free(void *ptr);
|
||||
|
||||
/**
|
||||
* @brief Does a fast version of the exp() operation on a floating point number.
|
||||
*
|
||||
* As described in https://codingforspeed.com/using-faster-exponential-approximation/
|
||||
* Should be good til an input of 5 or so with a steps factor of 8.
|
||||
*
|
||||
* @param in Floating point input
|
||||
* @param steps Approximation steps. More is more precise. 8 or 10 should be good enough for most purposes.
|
||||
* @return Exp()'ed output
|
||||
*/
|
||||
fptp_t fast_exp(double x, int steps);
|
||||
|
||||
/**
|
||||
* @brief Does a fast version of the exp() operation on a floating point number.
|
||||
*
|
||||
* @param in Floating point input
|
||||
* @return Exp()'ed output
|
||||
*/
|
||||
double fast_exp_pro(double x);
|
||||
|
||||
/**
|
||||
* @brief Does a softmax operation on a matrix.
|
||||
*
|
||||
* @param in Input matrix
|
||||
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
|
||||
*/
|
||||
void dl_softmax(const dl_matrix2d_t *in, dl_matrix2d_t *out);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Does a softmax operation on a quantized matrix.
|
||||
*
|
||||
* @param in Input matrix
|
||||
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
|
||||
*/
|
||||
void dl_softmax_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out);
|
||||
|
||||
/**
|
||||
* @brief Does a sigmoid operation on a floating point number
|
||||
*
|
||||
* @param in Floating point input
|
||||
* @return Sigmoid output
|
||||
*/
|
||||
|
||||
fptp_t dl_sigmoid_op(fptp_t in);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Does a sigmoid operation on a matrix.
|
||||
*
|
||||
* @param in Input matrix
|
||||
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
|
||||
*/
|
||||
void dl_sigmoid(const dl_matrix2d_t *in, dl_matrix2d_t *out);
|
||||
|
||||
/**
|
||||
* @brief Does a tanh operation on a floating point number
|
||||
*
|
||||
* @param in Floating point input number
|
||||
* @return Tanh value
|
||||
*/
|
||||
fptp_t dl_tanh_op(fptp_t v);
|
||||
|
||||
/**
|
||||
* @brief Does a tanh operation on a matrix.
|
||||
*
|
||||
* @param in Input matrix
|
||||
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
|
||||
*/
|
||||
void dl_tanh(const dl_matrix2d_t *in, dl_matrix2d_t *out);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Does a relu (Rectifier Linear Unit) operation on a floating point number
|
||||
*
|
||||
* @param in Floating point input
|
||||
* @param clip If value is higher than this, it will be clipped to this value
|
||||
* @return Relu output
|
||||
*/
|
||||
fptp_t dl_relu_op(fptp_t in, fptp_t clip);
|
||||
|
||||
/**
|
||||
* @brief Does a ReLu operation on a matrix.
|
||||
*
|
||||
* @param in Input matrix
|
||||
* @param clip If values are higher than this, they will be clipped to this value
|
||||
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
|
||||
*/
|
||||
void dl_relu(const dl_matrix2d_t *in, fptp_t clip, dl_matrix2d_t *out);
|
||||
|
||||
/**
|
||||
* @brief Fully connected layer operation
|
||||
*
|
||||
* @param in Input vector
|
||||
* @param weight Weights of the neurons
|
||||
* @param bias Biases for the neurons. Can be NULL if a bias of 0 is required.
|
||||
* @param out Output array. Outputs are placed here. Needs to be an initialized, weight->w by in->h in size, matrix.
|
||||
*/
|
||||
void dl_fully_connect_layer(const dl_matrix2d_t *in, const dl_matrix2d_t *weight, const dl_matrix2d_t *bias, dl_matrix2d_t *out);
|
||||
|
||||
/**
|
||||
* @brief Pre-calculate the sqrtvari variable for the batch_normalize function.
|
||||
* The sqrtvari matrix depends on the variance and epsilon values, which normally are constant. Hence,
|
||||
* this matrix only needs to be calculated once. This function does that.
|
||||
*
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
void dl_batch_normalize_get_sqrtvar(const dl_matrix2d_t *variance, fptp_t epsilon, dl_matrix2d_t *out);
|
||||
|
||||
/**
|
||||
* @brief Batch-normalize a matrix
|
||||
*
|
||||
* @param m The matrix to normalize
|
||||
* @param offset Offset matrix
|
||||
* @param scale Scale matrix
|
||||
* @param mean Mean matrix
|
||||
* @param sqrtvari Matrix precalculated using dl_batch_normalize_get_sqrtvar
|
||||
* @return
|
||||
*/
|
||||
void dl_batch_normalize(dl_matrix2d_t *m, const dl_matrix2d_t *offset, const dl_matrix2d_t *scale,
|
||||
const dl_matrix2d_t *mean, const dl_matrix2d_t *sqrtvari);
|
||||
|
||||
/**
|
||||
* @brief Do a basic LSTM layer pass.
|
||||
*
|
||||
* @warning Returns state_h pointer, so do not free result.
|
||||
|
||||
* @param in Input vector
|
||||
* @param state_c Internal state of the LSTM network
|
||||
* @param state_h Internal state (previous output values) of the LSTM network
|
||||
* @param weights Weights for the neurons
|
||||
* @param bias Bias for the neurons. Can be NULL if no bias is required
|
||||
* @return Output values of the neurons
|
||||
*/
|
||||
dl_matrix2d_t *dl_basic_lstm_layer(const dl_matrix2d_t *in, dl_matrix2d_t *state_c, dl_matrix2d_t *state_h,
|
||||
const dl_matrix2d_t *weight, const dl_matrix2d_t *bias);
|
||||
|
||||
/**
|
||||
* @brief Do a basic LSTM layer pass, partial quantized version.
|
||||
* This LSTM function accepts 16-bit fixed-point weights and 32-bit float-point bias.
|
||||
*
|
||||
* @warning Returns state_h pointer, so do not free result.
|
||||
|
||||
* @param in Input vector
|
||||
* @param state_c Internal state of the LSTM network
|
||||
* @param state_h Internal state (previous output values) of the LSTM network
|
||||
* @param weights Weights for the neurons, need to be quantised
|
||||
* @param bias Bias for the neurons. Can be NULL if no bias is required
|
||||
* @return Output values of the neurons
|
||||
*/
|
||||
dl_matrix2dq_t *dl_basic_lstm_layer_quantised_weights(const dl_matrix2d_t *in, dl_matrix2d_t *state_c, dl_matrix2d_t *state_h,
|
||||
const dl_matrix2dq_t *weight, const dl_matrix2d_t *bias);
|
||||
|
||||
/**
|
||||
* @brief Do a fully-connected layer pass, fully-quantized version.
|
||||
*
|
||||
* @param in Input vector
|
||||
* @param weight Weights of the neurons
|
||||
* @param bias Bias values of the neurons. Can be NULL if no bias is needed.
|
||||
* @param shift Number of bits to shift the result back by. See dl_lib_matrixq.h for more info
|
||||
* @return Output values of the neurons
|
||||
*/
|
||||
void dl_fully_connect_layer_q(const dl_matrix2dq_t *in, const dl_matrix2dq_t *weight, const dl_matrix2dq_t *bias, dl_matrix2dq_t *out, int shift);
|
||||
|
||||
/**
|
||||
* @brief Do a basic LSTM layer pass, fully-quantized version
|
||||
*
|
||||
* @warning Returns state_h pointer, so do not free result.
|
||||
|
||||
* @param in Input vector
|
||||
* @param state_c Internal state of the LSTM network
|
||||
* @param state_h Internal state (previous output values) of the LSTM network
|
||||
* @param weights Weights for the neurons
|
||||
* @param bias Bias for the neurons. Can be NULL if no bias is required
|
||||
* @param shift Number of bits to shift the result back by. See dl_lib_matrixq.h for more info
|
||||
* @return Output values of the neurons
|
||||
*/
|
||||
dl_matrix2dq_t *dl_basic_lstm_layer_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *state_c, dl_matrix2dq_t *state_h,
|
||||
const dl_matrix2dq_t *weight, const dl_matrix2dq_t *bias, int shift);
|
||||
|
||||
/**
|
||||
* @brief Batch-normalize a matrix, fully-quantized version
|
||||
*
|
||||
* @param m The matrix to normalize
|
||||
* @param offset Offset matrix
|
||||
* @param scale Scale matrix
|
||||
* @param mean Mean matrix
|
||||
* @param sqrtvari Matrix precalculated using dl_batch_normalize_get_sqrtvar
|
||||
* @param shift Number of bits to shift the result back by. See dl_lib_matrixq.h for more info
|
||||
* @return
|
||||
*/
|
||||
void dl_batch_normalize_q(dl_matrix2dq_t *m, const dl_matrix2dq_t *offset, const dl_matrix2dq_t *scale,
|
||||
const dl_matrix2dq_t *mean, const dl_matrix2dq_t *sqrtvari, int shift);
|
||||
|
||||
/**
|
||||
* @brief Does a relu (Rectifier Linear Unit) operation on a fixed-point number
|
||||
* This accepts and returns fixed-point 32-bit number with the last 15 bits being the bits after the decimal
|
||||
* point. (Equivalent to a mantissa in a quantized matrix with exponent -15.)
|
||||
*
|
||||
* @param in Fixed-point input
|
||||
* @param clip If value is higher than this, it will be clipped to this value
|
||||
* @return Relu output
|
||||
*/
|
||||
qtp_t dl_relu_q_op(qtp_t in, qtp_t clip);
|
||||
|
||||
/**
|
||||
* @brief Does a ReLu operation on a matrix, quantized version
|
||||
*
|
||||
* @param in Input matrix
|
||||
* @param clip If values are higher than this, they will be clipped to this value
|
||||
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
|
||||
*/
|
||||
void dl_relu_q(const dl_matrix2dq_t *in, fptp_t clip, dl_matrix2dq_t *out);
|
||||
|
||||
/**
|
||||
* @brief Does a sigmoid operation on a fixed-point number.
|
||||
* This accepts and returns a fixed-point 32-bit number with the last 15 bits being the bits after the decimal
|
||||
* point. (Equivalent to a mantissa in a quantized matrix with exponent -15.)
|
||||
*
|
||||
* @param in Fixed-point input
|
||||
* @return Sigmoid output
|
||||
*/
|
||||
int dl_sigmoid_op_q(const int in);
|
||||
int16_t dl_sigmoid_op_q8(const int16_t in);
|
||||
/**
|
||||
* @brief Does a sigmoid operation on a matrix, quantized version
|
||||
*
|
||||
* @param in Input matrix
|
||||
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
|
||||
*/
|
||||
void dl_sigmoid_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out);
|
||||
|
||||
/**
|
||||
* @brief Does a tanh operation on a matrix, quantized version
|
||||
*
|
||||
* @param in Input matrix
|
||||
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
|
||||
*/
|
||||
void dl_tanh_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out);
|
||||
|
||||
/**
|
||||
* @brief Does a tanh operation on a fixed-point number.
|
||||
* This accepts and returns a fixed-point 32-bit number with the last 15 bits being the bits after the decimal
|
||||
* point. (Equivalent to a mantissa in a quantized matrix with exponent -15.)
|
||||
*
|
||||
* @param in Fixed-point input
|
||||
* @return tanh output
|
||||
*/
|
||||
int dl_tanh_op_q(int v);
|
||||
int16_t dl_tanh_op_q8(int16_t v);
|
||||
|
||||
void load_mat_psram_mn4(void);
|
||||
void load_mat_psram_mn3(void);
|
||||
void free_mat_psram_mn4(void);
|
||||
void free_mat_psram_mn3(void);
|
||||
qtp_t dl_hard_sigmoid_op(qtp_t in, int exponent);
|
||||
qtp_t dl_hard_tanh_op(qtp_t in, int exponent);
|
||||
|
||||
int16_t dl_table_tanh_op(int16_t in, int exponent);
|
||||
int16_t dl_table_sigmoid_op(int16_t in, int exponent);
|
||||
|
||||
void dl_hard_sigmoid_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out);
|
||||
void dl_hard_tanh_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out);
|
||||
|
||||
void dl_table_sigmoid_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out);
|
||||
void dl_table_tanh_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Filter out the number greater than clip in the matrix, quantized version
|
||||
*
|
||||
* @param in Input matrix
|
||||
* @param clip If values are higher than this, they will be clipped to this value
|
||||
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
|
||||
*/
|
||||
void dl_minimum(const dl_matrix2d_t *in, fptp_t clip, dl_matrix2d_t *out);
|
||||
|
||||
/**
|
||||
* @brief Filter out the number greater than clip in the matrix, float version
|
||||
*
|
||||
* @param in Input matrix
|
||||
* @param clip If values are higher than this, they will be clipped to this value
|
||||
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
|
||||
*/
|
||||
void dl_minimum_q(const dl_matrix2dq_t *in, fptp_t clip, dl_matrix2dq_t *out);
|
||||
/**
|
||||
* @brief Do a basic CNN layer pass.
|
||||
*
|
||||
* @Warning This just supports the single channel input image, and the output is single row matrix.
|
||||
That is to say, the height of output is 1, and the weight of output is out_channels*out_image_width*out_image_height
|
||||
*
|
||||
* @param in Input single channel image
|
||||
* @param weight Weights of the neurons, weight->w = out_channels, weight->h = filter_width*filter_height
|
||||
* @param bias Bias for the CNN layer.
|
||||
* @param filter_height The height of convolution kernel
|
||||
* @param filter_width The width of convolution kernel
|
||||
* @param out_channels The number of output channels of convolution kernel
|
||||
* @param stride_x The step length of the convolution window in x(width) direction
|
||||
* @param stride_y The step length of the convolution window in y(height) direction
|
||||
* @param pad One of `"VALID"` or `"SAME"`, 0 is "VALID" and the other is "SAME"
|
||||
* @param out The result of CNN layer, out->h=1.
|
||||
* @return The result of CNN layer.
|
||||
*/
|
||||
dl_matrix2d_t *dl_basic_conv_layer(const dl_matrix2d_t *in, const dl_matrix2d_t *weight, const dl_matrix2d_t *bias, int filter_width, int filter_height,
|
||||
const int out_channels, const int stride_x, const int stride_y, padding_state pad, const dl_matrix2d_t* out);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Do a basic CNN layer pass, quantised wersion.
|
||||
*
|
||||
* @Warning This just supports the single channel input image, and the output is single row matrix.
|
||||
That is to say, the height of output is 1, and the weight of output is out_channels*out_image_width*out_image_height
|
||||
*
|
||||
* @param in Input single channel image
|
||||
* @param weight Weights of the neurons, weight->w = out_channels, weight->h = filter_width*filter_height,
|
||||
* @param bias Bias of the neurons.
|
||||
* @param filter_height The height of convolution kernel
|
||||
* @param filter_width The width of convolution kernel
|
||||
* @param out_channels The number of output channels of convolution kernel
|
||||
* @param stride_x The step length of the convolution window in x(width) direction
|
||||
* @param stride_y The step length of the convolution window in y(height) direction
|
||||
* @param pad One of `"VALID"` or `"SAME"`, 0 is "VALID" and the other is "SAME"
|
||||
* @param out The result of CNN layer, out->h=1
|
||||
* @return The result of CNN layer
|
||||
*/
|
||||
dl_matrix2d_t *dl_basic_conv_layer_quantised_weight(const dl_matrix2d_t *in, const dl_matrix2dq_t *weight, const dl_matrix2d_t *bias, int filter_width, int filter_height,
|
||||
const int out_channels, const int stride_x, const int stride_y, padding_state pad, const dl_matrix2d_t* out);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,80 @@
|
||||
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
#ifndef DL_LIB_COEFGETTER_IF_H
|
||||
#define DL_LIB_COEFGETTER_IF_H
|
||||
|
||||
#include "dl_lib_matrix.h"
|
||||
#include "dl_lib_matrixq.h"
|
||||
#include "dl_lib_matrixq8.h"
|
||||
#include "cJSON.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
//Set this if the coefficient requested is a batch-normalization popvar matrix which needs to be preprocessed by
|
||||
//dl_batch_normalize_get_sqrtvar first.
|
||||
#define COEF_GETTER_HINT_BNVAR (1<<0)
|
||||
|
||||
/*
|
||||
This struct describes the basic information of model data:
|
||||
word_num: the number of wake words or speech commands
|
||||
word_list: the name list of wake words or speech commands
|
||||
thres_list: the threshold list of wake words or speech commands
|
||||
info_str: the string used to reflect the version and information of model data
|
||||
which consist of the architecture of network, the version of model data, wake words and their threshold
|
||||
*/
|
||||
typedef struct {
|
||||
int word_num;
|
||||
char **word_list;
|
||||
int *win_list;
|
||||
float *thresh_list;
|
||||
char *info_str;
|
||||
} model_info_t;
|
||||
|
||||
/*
|
||||
Alphabet struct describes the basic grapheme or phoneme.
|
||||
item_num: the number of baisc item(grapheme or phonemr)
|
||||
items: the list of basic item
|
||||
*/
|
||||
typedef struct {
|
||||
int item_num;
|
||||
char **items;
|
||||
}alphabet_t;
|
||||
|
||||
/*
|
||||
This struct describes a generic coefficient getter: a way to get the constant coefficients needed for a neural network.
|
||||
For the two getters, the name describes the name of the coefficient matrix, usually the same as the Numpy filename the
|
||||
coefficient was originally stored in. The arg argument can be used to optionally pass an additional user-defined argument
|
||||
to the getter (e.g. the directory to look for files in the case of the Numpy file loader getter). The hint argument
|
||||
is a bitwise OR of the COEF_GETTER_HINT_* flags or 0 when none is needed. Use the free_f/free_q functions to release the
|
||||
memory for the returned matrices, when applicable.
|
||||
*/
|
||||
typedef struct {
|
||||
const dl_matrix2d_t* (*getter_f)(const char *name, void *arg, int hint);
|
||||
const dl_matrix2dq_t* (*getter_q)(const char *name, void *arg, int hint);
|
||||
const dl_matrix2dq8_t* (*getter_q8)(const char *name, void *arg, int hint);
|
||||
void (*free_f)(const dl_matrix2d_t *m);
|
||||
void (*free_q)(const dl_matrix2dq_t *m);
|
||||
void (*free_q8)(const dl_matrix2dq8_t *m);
|
||||
const model_info_t* (*getter_info)(void *arg);
|
||||
const alphabet_t* (*getter_alphabet)(void *arg);
|
||||
const cJSON* (*getter_config)(void *arg);
|
||||
} model_coeff_getter_t;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,180 @@
|
||||
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
#ifndef DL_LIB_CONV_QUEUE_H
|
||||
#define DL_LIB_CONV_QUEUE_H
|
||||
|
||||
|
||||
#include "dl_lib_matrix.h"
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef float fptp_t;
|
||||
|
||||
//Flags for matrices
|
||||
// #define DL_MF_FOREIGNDATA (0) /*< Matrix *item data actually points to another matrix and should not be freed */
|
||||
|
||||
//Float convolution FIFO queue.
|
||||
typedef struct {
|
||||
int n; /*< the length of queue */
|
||||
int c; /*< the channel number of queue element*/
|
||||
int front; /*< the front(top) position of queue */
|
||||
int flag; /*< not used*/
|
||||
fptp_t *item; /*< Pointer to item array */
|
||||
} dl_conv_queue_t;
|
||||
|
||||
/**
|
||||
* @brief Allocate a convolution queue
|
||||
*
|
||||
* @param n The length of queue
|
||||
* @param c The channel number of elements in the queue
|
||||
* @return The convolution queue, or NULL if out of memory
|
||||
*/
|
||||
dl_conv_queue_t *dl_conv_queue_alloc(int n, int c);
|
||||
|
||||
/**
|
||||
* @brief Allocate a convolution queue from psram
|
||||
*
|
||||
* @param n The length of queue
|
||||
* @param c The channel number of elements in the queue
|
||||
* @return The convolution queue, or NULL if out of memory
|
||||
*/
|
||||
dl_conv_queue_t *dl_conv_queue_alloc_from_psram(int n, int c);
|
||||
|
||||
/**
|
||||
* @brief Free a convolution queue
|
||||
*
|
||||
* @param cq The convolution queue to free
|
||||
*/
|
||||
void dl_conv_queue_free(dl_conv_queue_t *cq);
|
||||
|
||||
void dl_conv_to_matrix2d(dl_conv_queue_t *cq, dl_matrix2d_t* out);
|
||||
|
||||
/**
|
||||
* @brief Move the front pointer of queue forward,
|
||||
the First(oldest) element become the last(newest) element,
|
||||
*
|
||||
* @param cq Input convolution queue
|
||||
* @return Pointer of oldest element
|
||||
*/
|
||||
fptp_t *dl_conv_queue_pop(dl_conv_queue_t *cq);
|
||||
|
||||
/**
|
||||
* @brief Remove the oldest element, then insert the input element at the end of queue
|
||||
*
|
||||
* @param cq Input convolution queue
|
||||
* @param item The new element
|
||||
*/
|
||||
void dl_conv_queue_push(dl_conv_queue_t *cq, fptp_t* item);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Get the pointer of element in the queue by offset
|
||||
*
|
||||
* @param cq Input convolution queue
|
||||
* @param offset Offset from the front of the queue
|
||||
* @return Pointer of the element
|
||||
*/
|
||||
fptp_t *dl_get_queue_item(dl_conv_queue_t *cq, int offset);
|
||||
|
||||
/**
|
||||
* @brief Does a sigmoid operation on the one of element in the convolution queue.
|
||||
* Gets the pointer of element in the convolution queue by offset, and does a sigmoid operation
|
||||
* by this pointer, then return the pointer
|
||||
*
|
||||
* @param cq Input convolution queue
|
||||
* @param offset Offset from the front of the queue
|
||||
* @return Pointer of the element
|
||||
*/
|
||||
fptp_t *dl_sigmoid_step(dl_conv_queue_t *cq, int offset);
|
||||
|
||||
/**
|
||||
* @brief Does a tanh operation on the one of element in the convolution queue.
|
||||
* Gets the pointer of element in the convolution queue by offset, and does a tanh operation
|
||||
* by this pointer, then return the pointer
|
||||
*
|
||||
* @param cq Input convolution queue
|
||||
* @param offset Offset from the front of the queue
|
||||
* @return Pointer of the element
|
||||
*/
|
||||
fptp_t *dl_tanh_step(dl_conv_queue_t *cq, int offset);
|
||||
|
||||
/**
|
||||
* @brief Does a softmax operation on the one of element in the convolution queue.
|
||||
* Gets the pointer of element in the convolution queue by offset, and does a softmax operation
|
||||
* by this pointer, then return the pointer
|
||||
*
|
||||
* @param cq Input convolution queue
|
||||
* @param offset Offset from the front of the queue
|
||||
* @return Pointer of the element
|
||||
*/
|
||||
fptp_t *dl_softmax_step(dl_conv_queue_t *cq, int offset);
|
||||
|
||||
fptp_t *dl_relu_step(dl_conv_queue_t *cq, int offset);
|
||||
fptp_t *dl_relu_look(dl_matrix2d_t *cq, int offset);
|
||||
dl_matrix2d_t *dl_matrix_concat1(const dl_conv_queue_t *a, const dl_matrix2d_t *b);
|
||||
dl_matrix2d_t *dl_basic_lstm_layer1(const dl_conv_queue_t *in, dl_matrix2d_t *state_c, dl_matrix2d_t *state_h,
|
||||
const dl_matrix2d_t *weight, const dl_matrix2d_t *bias);
|
||||
/**
|
||||
* @brief Fast implement for 1D atrous convolution (a.k.a. convolution with holes or dilated convolution)
|
||||
* based on convolution queue.
|
||||
*
|
||||
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
|
||||
* is first element of output queue and should not be freed separately.
|
||||
*
|
||||
* @param in Input convolution queue
|
||||
* @param out Output convolution queue
|
||||
* @param rate A positive int, the stride with which we sample input value
|
||||
* @param size A positive int, the size of 1D-filter
|
||||
* @param kernel The kernel matrix of filter
|
||||
* @param bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
|
||||
* @return The result of atrous convolution
|
||||
*/
|
||||
fptp_t *dl_atrous_conv1d_step(dl_conv_queue_t *in, dl_conv_queue_t *out, int rate, int size,
|
||||
dl_matrix2d_t* kernel, dl_matrix2d_t* bias);
|
||||
fptp_t *dl_look_conv_step(dl_conv_queue_t *in, dl_matrix2d_t *out, int rate, int size,
|
||||
dl_matrix2d_t* kernel, dl_matrix2d_t* bias);
|
||||
|
||||
/**
|
||||
* @brief Fast implement of dilation layer as follows
|
||||
*
|
||||
* |-> [gate(sigmoid)] -|
|
||||
* input - | |-> (*) - output
|
||||
* |-> [filter(tanh)] -|
|
||||
*
|
||||
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
|
||||
* is first element of output queue and should not be freed separately.
|
||||
*
|
||||
* @param in Input convolution queue
|
||||
* @param out Output convolution queue
|
||||
* @param rate A positive int, the stride with which we sample input value
|
||||
* @param size A positive int, the size of 1D-filter
|
||||
* @param filter_kernel The kernel matrix of filter
|
||||
* @param filter_bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
|
||||
* @param gate_kernel The kernel matrix of gate
|
||||
* @param gate_bias The bias matrix of gate. Can be NULL if a bias of 0 is required.
|
||||
* @return The result of dilation layer
|
||||
*/
|
||||
fptp_t *dl_dilation_layer(dl_conv_queue_t *in, dl_conv_queue_t *out, int rate, int size,
|
||||
dl_matrix2d_t* filter_kernel, dl_matrix2d_t* filter_bias,
|
||||
dl_matrix2d_t* gate_kernel, dl_matrix2d_t* gate_bias);
|
||||
|
||||
|
||||
void test_atrous_conv(int size, int rate, int in_channel, int out_channel);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,303 @@
|
||||
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
#ifndef DL_LIB_CONVQ8_QUEUE_H
|
||||
#define DL_LIB_CONVQ8_QUEUE_H
|
||||
|
||||
|
||||
#include "dl_lib_matrixq.h"
|
||||
#include "dl_lib_matrixq8.h"
|
||||
#include "dl_lib_conv_queue.h"
|
||||
#include "dl_lib_convq_queue.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
//[nch, n, c]
|
||||
typedef struct {
|
||||
int n; /*< the length of queue */
|
||||
int c; /*< the number of queue element*/
|
||||
int front; /*< the front(top) position of queue */
|
||||
int nch; /*< the channel of queue */
|
||||
int exponent; /*< The values in items should be multiplied by pow(2,exponent)
|
||||
to get the real values */
|
||||
q8tp_t *itemq; /*< Pointer to item array */
|
||||
} dl_convq8_queue_t;
|
||||
|
||||
/**
|
||||
* @brief Allocate a fixed-point convolution queue
|
||||
*
|
||||
* @param n The length of queue
|
||||
* @param c The number of elements in the queue
|
||||
* @return The convolution queue, or NULL if out of memory
|
||||
*/
|
||||
dl_convq8_queue_t *dl_convq8_queue_alloc(int n, int c);
|
||||
|
||||
/**
|
||||
* @brief Allocate a fixed-point convolution queue
|
||||
*
|
||||
* @param n The length of queue
|
||||
* @param c The number of elements in the queue
|
||||
* @param c The channel of queue
|
||||
* @return The convolution queue, or NULL if out of memory
|
||||
*/
|
||||
dl_convq8_queue_t *dl_convq8_queue_alloc_mc(int n, int c, int nch);
|
||||
|
||||
/**
|
||||
* @brief Allocate a bit fixed-point convolution queue from PSRAM
|
||||
*
|
||||
* @param n The length of queue
|
||||
* @param c The number of elements in the queue
|
||||
* @param nch The channel of queue
|
||||
* @return The convolution queue, or NULL if out of memory
|
||||
*/
|
||||
dl_convq8_queue_t *dl_convq8_queue_alloc_mc_from_psram(int n, int c, int nch);
|
||||
|
||||
/**
|
||||
* @brief Free a fixed-point convolution queue
|
||||
*
|
||||
* @param cq The fixed-point convolution queue to free
|
||||
*/
|
||||
void dl_convq8_queue_free(dl_convq8_queue_t *cq);
|
||||
|
||||
/**
|
||||
* @brief Set itemq of convolution queue to 0
|
||||
*
|
||||
* @param cq The fixed-point convolution queue to free
|
||||
*/
|
||||
void dl_convq8_queue_bzero(dl_convq8_queue_t *cqm);
|
||||
|
||||
/**
|
||||
* @brief Move the front pointer of queue forward,
|
||||
the First(oldest) element become the last(newest) element,
|
||||
*
|
||||
* @param cq Input fixed-point convolution queue
|
||||
* @return Pointer of oldest element
|
||||
*/
|
||||
q8tp_t *dl_convq8_queue_pop(dl_convq8_queue_t *cq);
|
||||
q8tp_t *dl_convq8_queue_popn(dl_convq8_queue_t *cq, int n);
|
||||
|
||||
/**
|
||||
* @brief Insert the float-point element at the end of queue.
|
||||
* The precision of fixed-point numbers is described by the Qm.f notation,
|
||||
*
|
||||
* @param cq Input fixed-point convolution queue
|
||||
* @param item The float-point element
|
||||
* @param m_bit The number of integer bits including the sign bits
|
||||
* @param f_bit The number of fractional bits
|
||||
*/
|
||||
void dl_convq8_queue_push_by_qmf(dl_convq8_queue_t *cq, fptp_t* item, int m_bit, int f_bit);
|
||||
|
||||
/**
|
||||
* @brief Get the pointer of element in the queue by offset
|
||||
*
|
||||
* @param cq Input fixed-point convolution queue
|
||||
* @param offset Offset from the front of the queue
|
||||
* @return Pointer of the element
|
||||
*/
|
||||
q8tp_t *dl_get_queue_itemq8(dl_convq8_queue_t *cq, int offset);
|
||||
|
||||
/**
|
||||
* @brief Get the pointer of element in the queue by offset
|
||||
*
|
||||
* @param cq Input fixed-point convolution queue
|
||||
* @param offset Offset from the front of the queue
|
||||
* @param ch Channel index of queue
|
||||
* @return Pointer of the element
|
||||
*/
|
||||
q8tp_t *dl_get_queue_itemq8_mc(dl_convq8_queue_t *cq, int offset, int ch);
|
||||
|
||||
/**
|
||||
* @brief Fast and quantised implement for 1D atrous convolution (a.k.a. convolution with holes or dilated convolution)
|
||||
* based on convolution queue.
|
||||
*
|
||||
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
|
||||
* is last element of output queue and should not be freed separately.
|
||||
*
|
||||
* @param in Input fixed-point convolution queue
|
||||
* @param out Output fixed-point convolution queue
|
||||
* @param rate A positive int, the stride with which we sample input value
|
||||
* @param size A positive int, the size of 1D-filter
|
||||
* @param kernel Kernel matrix of filter
|
||||
* @param bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
|
||||
* @param out_exponent Shift ratio used in dot operation between two 16-bit fixed point vector
|
||||
* @param offset Offset used to calculate the beginning of input conv queue
|
||||
* @param prenum The num to control the parameter size of preload operation
|
||||
* @return The result of atrous convolution
|
||||
*/
|
||||
void dl_atrous_conv1dq8_steps(dl_convq8_queue_t *in, dl_convq8_queue_t *out, int rate, int size,
|
||||
dl_matrix2dq8_t* kernel, dl_matrix2dq8_t* bias,
|
||||
int out_exponent, int offset, int prenum);
|
||||
|
||||
/**
|
||||
* @brief Fast implement of dilation layer as follows
|
||||
*
|
||||
* |-> [gate(sigmoid)] -|
|
||||
* input - | |-> (*) - output
|
||||
* |-> [filter(tanh)] -|
|
||||
*
|
||||
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
|
||||
* is last element of output queue and should not be freed separately.
|
||||
*
|
||||
* @param in Input fixed-point convolution queue
|
||||
* @param out Output fixed-point convolution queue
|
||||
* @param rate A positive int, the stride with which we sample input value
|
||||
* @param size A positive int, the size of 1D-filter
|
||||
* @param filter_kernel The kernel matrix of filter
|
||||
* @param filter_bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
|
||||
* @param gate_kernel The kernel matrix of gate
|
||||
* @param gate_bias The bias matrix of gate. Can be NULL if a bias of 0 is required.
|
||||
* @param offset Offset used to calculate the beginning of input conv queue
|
||||
* @param prenum The num to control the parameter size of preload operation
|
||||
* @return The result of dilation layer
|
||||
*/
|
||||
void dl_dilation_layerq8_steps(dl_convq8_queue_t *in, dl_convq8_queue_t *out, int rate, int size,
|
||||
dl_matrix2dq8_t* filter_kernel, dl_matrix2dq8_t* filter_bias,
|
||||
dl_matrix2dq8_t* gate_kernel, dl_matrix2dq8_t* gate_bias,
|
||||
int offset, int prenum);
|
||||
|
||||
|
||||
|
||||
|
||||
dl_conv_queue_t *dl_convq8_queue_add(dl_convq8_queue_t *cq1, dl_convq8_queue_t *cq2);
|
||||
|
||||
int8_t dl_sigmoid_lutq8(int in);
|
||||
/**
|
||||
* @brief Allocate a 8-bit fixed-point Multi-Channel convolution queue
|
||||
*
|
||||
* @param n The length of queue
|
||||
* @param c The number of elements in the queue
|
||||
* @param nch The channel number
|
||||
* @return The convolution queue, or NULL if out of memory
|
||||
*/
|
||||
dl_convq8_queue_t **dl_convq8_queue_mc_alloc(int n, int c, int nch);
|
||||
|
||||
/**
|
||||
* @brief Free a 8-bit fixed-point Multi-Channel convolution queue
|
||||
*
|
||||
* @param cqm The fixed-point convolution queue to free
|
||||
* @param nch The channel number
|
||||
*/
|
||||
void dl_convq8_queue_mc_free(dl_convq8_queue_t **cqm, int nch);
|
||||
|
||||
/**
|
||||
* @brief Tanh activation function for 8-bit fixed-point Multi-Channel convolution queue input
|
||||
*
|
||||
* @param cqm Input 8-bit fixed-point Multi-Channel convolution queue
|
||||
* @param offset Offset used to calculate the beginning of input conv queue
|
||||
* @param nch The channel number
|
||||
*/
|
||||
void dl_tanh_convq8_mc(dl_convq8_queue_t **cqm, int offset, int nch);
|
||||
|
||||
/**
|
||||
* @brief Fast and quantised 16-bit implement for Multi-channel 1D atrous convolution (a.k.a. convolution with holes or dilated convolution)
|
||||
* Usually, this layer is used as first layer for 8-bit network.
|
||||
*
|
||||
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
|
||||
* Input is a 16-bit queue point, Output is an 8-bit queue point.
|
||||
*
|
||||
* @param in Input 16bit fixed-point convolution queue array
|
||||
* @param out Output 8bit fixed-point convolution queue array
|
||||
* @param rate A positive int, the stride with which we sample input value
|
||||
* @param size A positive int, the size of 1D-filter
|
||||
* @param kernel The kernel matrix of filter
|
||||
* @param bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
|
||||
* @param out_exponent Exponent of output
|
||||
* @param offset Offset used to calculate the beginning of input conv queue
|
||||
* @param prenum The num to control the parameter size of preload operation
|
||||
*/
|
||||
void dl_atrous_conv1dq8_16in_mc_steps(dl_convq_queue_t **in, dl_convq8_queue_t **out, int nch, int rate, int size,
|
||||
dl_matrix2dq_t* kernel, dl_matrix2dq_t* bias, int out_exponent, int offset, int prenum);
|
||||
|
||||
/**
|
||||
* @brief Fast and quantised 8-bit implement for Multi-channel 1D atrous convolution (a.k.a. convolution with holes or dilated convolution)
|
||||
* based on convolution queue.
|
||||
*
|
||||
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
|
||||
* is last element of output queue and should not be freed separately.
|
||||
*
|
||||
* @param in Input 8bit fixed-point convolution queue array
|
||||
* @param out Output 8bit fixed-point convolution queue array
|
||||
* @param rate A positive int, the stride with which we sample input value
|
||||
* @param size A positive int, the size of 1D-filter
|
||||
* @param kernel The kernel matrix of filter
|
||||
* @param bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
|
||||
* @param out_exponent Exponent of output
|
||||
* @param offset Offset used to calculate the beginning of input conv queue
|
||||
* @param prenum The num to control the parameter size of preload operation
|
||||
*/
|
||||
void dl_atrous_conv1dq8_mc_steps(dl_convq8_queue_t **in, dl_convq8_queue_t **out,
|
||||
int nch, int rate, int size,
|
||||
dl_matrix2dq8_t* kernel, dl_matrix2dq8_t* bias,
|
||||
int out_exponent, int offset, int prenum);
|
||||
|
||||
/**
|
||||
* @brief Fast implement of 8-bit dilation layer as follows
|
||||
*
|
||||
* |-> [gate(sigmoid)] -|
|
||||
* input - | |-> (*) - output
|
||||
* |-> [filter(tanh)] -|
|
||||
*
|
||||
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
|
||||
* is last element of output queue and should not be freed separately.
|
||||
*
|
||||
* @param in Input 8-bit fixed-point convolution queue
|
||||
* @param out Output 8-bit fixed-point convolution queue
|
||||
* @param rate A positive int, the stride with which we sample input value
|
||||
* @param size A positive int, the size of 1D-filter
|
||||
* @param filter_kernel The kernel matrix of filter
|
||||
* @param filter_bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
|
||||
* @param gate_kernel The kernel matrix of gate
|
||||
* @param gate_bias The bias matrix of gate. Can be NULL if a bias of 0 is required.
|
||||
* @param offset Offset used to calculate the beginning of input conv queue
|
||||
* @param prenum The num to control the parameter size of preload operation
|
||||
*/
|
||||
void dl_dilation_layerq8_mc_steps(dl_convq8_queue_t **in, dl_convq8_queue_t **out, int nch, int rate, int size,
|
||||
dl_matrix2dq8_t* filter_kernel, dl_matrix2dq8_t* filter_bias,
|
||||
dl_matrix2dq8_t* gate_kernel, dl_matrix2dq8_t* gate_bias,
|
||||
int offset, int prenum);
|
||||
|
||||
void dl_convq8_queue_mc_bzero(dl_convq8_queue_t **cqm, int nch);
|
||||
|
||||
|
||||
|
||||
dl_convq8_queue_t *dl_convq8_queue_alloc_from_psram(int n, int c);
|
||||
|
||||
qtp_t *dl_dilation_layerq16_8(dl_convq_queue_t *in, dl_convq8_queue_t *out, int rate, int size,
|
||||
dl_matrix2dq_t* filter_kernel, dl_matrix2dq_t* filter_bias,
|
||||
dl_matrix2dq_t* gate_kernel, dl_matrix2dq_t* gate_bias, int prenum);
|
||||
|
||||
|
||||
qtp_t *dl_dilation_layerq8(dl_convq8_queue_t *in, dl_convq8_queue_t *out, int rate, int size,
|
||||
dl_matrix2dq8_t* filter_kernel, dl_matrix2dq_t* filter_bias,
|
||||
dl_matrix2dq8_t* gate_kernel, dl_matrix2dq_t* gate_bias, int prenum);
|
||||
|
||||
dl_matrix2dq8_t *dl_convq8_lstm_layer(const dl_convq8_queue_t *in, dl_convq8_queue_t *out, dl_matrix2dq8_t *state_c,
|
||||
dl_matrix2dq8_t *state_h, const dl_matrix2dq8_t *in_weight, const dl_matrix2dq8_t *h_weight,
|
||||
const dl_matrix2dq_t *bias, int prenum);
|
||||
|
||||
qtp_t *dl_atrous_conv1dq8_16_s3(dl_convq8_queue_t *in, dl_convq_queue_t *out, int rate, int size,
|
||||
dl_matrix2dq8_t* kernel, dl_matrix2dq_t* bias, int prenum);
|
||||
|
||||
void print_convq8(dl_convq8_queue_t *cq, int offset);
|
||||
void print_convq(dl_convq_queue_t *cq, int offset);
|
||||
void dl_relu_convq8(dl_convq8_queue_t *cq);
|
||||
|
||||
void lstmq8_free(void);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,382 @@
|
||||
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
#ifndef DL_LIB_CONVQ_QUEUE_H
|
||||
#define DL_LIB_CONVQ_QUEUE_H
|
||||
|
||||
#include "dl_lib_matrixq.h"
|
||||
#include "dl_lib_conv_queue.h"
|
||||
#include "dl_lib.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
//fixed-point convolution FIFO queue.
|
||||
//[nch, n, c]
|
||||
typedef struct {
|
||||
int n; /*< the length of queue */
|
||||
int c; /*< the number of queue element*/
|
||||
int front; /*< the front(top) position of queue */
|
||||
int nch; /*< the multiple of queue*/
|
||||
int exponent; /*< The values in items should be multiplied by pow(2,exponent)
|
||||
to get the real values */
|
||||
qtp_t *itemq; /*< Pointer to item array */
|
||||
} dl_convq_queue_t;
|
||||
|
||||
/**
|
||||
* @brief Allocate a fixed-point convolution queue
|
||||
*
|
||||
* @param n The length of queue
|
||||
* @param c The number of elements in the queue
|
||||
* @return The convolution queue, or NULL if out of memory
|
||||
*/
|
||||
dl_convq_queue_t *dl_convq_queue_alloc(int n, int c);
|
||||
|
||||
/**
|
||||
* @brief Allocate a fixed-point convolution queue from PSRAM
|
||||
*
|
||||
* @param n The length of queue
|
||||
* @param c The number of elements in the queue
|
||||
* @return The convolution queue, or NULL if out of memory
|
||||
*/
|
||||
dl_convq_queue_t *dl_convq_queue_alloc_from_psram(int n, int c);
|
||||
|
||||
/**
|
||||
* @brief Allocate a fixed-point multi-channel convolution queue
|
||||
*
|
||||
* @param n The length of queue
|
||||
* @param c The number of elements in the queue
|
||||
* @param nch The channel of conv queue
|
||||
* @return The convolution queue, or NULL if out of memory
|
||||
*/
|
||||
dl_convq_queue_t *dl_convq_queue_alloc_mc(int n, int c, int nch);
|
||||
|
||||
/**
|
||||
* @brief Allocate a fixed-point multi-channel convolution queue from PSRAM
|
||||
*
|
||||
* @param n The length of queue
|
||||
* @param c The number of elements in the queue
|
||||
* @param nch The channel of conv queue
|
||||
* @return The convolution queue, or NULL if out of memory
|
||||
*/
|
||||
dl_convq_queue_t *dl_convq_queue_alloc_mc_from_psram(int n, int c, int nch);
|
||||
|
||||
|
||||
void dl_convq_to_matrix2dq(dl_convq_queue_t *cq, dl_matrix2dq_t* out, int row);
|
||||
|
||||
/**
|
||||
* @brief Free a fixed-point convolution queue
|
||||
*
|
||||
* @param cq The fixed-point convolution queue to free
|
||||
*/
|
||||
void dl_convq_queue_free(dl_convq_queue_t *cq);
|
||||
|
||||
/**
|
||||
* @brief Set itemq of convolution queue to 0
|
||||
*
|
||||
* @param cq The fixed-point convolution queue point
|
||||
*/
|
||||
void dl_convq_queue_bzero(dl_convq_queue_t *cq);
|
||||
|
||||
/**
|
||||
* @brief Move the front pointer of queue forward,
|
||||
the First(oldest) element become the last(newest) element,
|
||||
*
|
||||
* @param cq Input fixed-point convolution queue
|
||||
* @return Pointer of oldest element
|
||||
*/
|
||||
qtp_t *dl_convq_queue_pop(dl_convq_queue_t *cq);
|
||||
qtp_t *dl_convq_queue_popn(dl_convq_queue_t *cq, int n);
|
||||
/**
|
||||
* @brief Remove the oldest element, then insert the input element at the end of queue
|
||||
*
|
||||
* @param cq Input fixed-point convolution queue
|
||||
* @param item The new element
|
||||
*/
|
||||
void dl_convq_queue_push(dl_convq_queue_t *cq, dl_matrix2dq_t *a, int shift);
|
||||
|
||||
/**
|
||||
* @brief Insert the float-point element at the end of queue.
|
||||
* The precision of fixed-point numbers is described by the Qm.f notation,
|
||||
*
|
||||
* @param cq Input fixed-point convolution queue
|
||||
* @param item The float-point element
|
||||
* @param m_bit The number of integer bits including the sign bits
|
||||
* @param f_bit The number of fractional bits
|
||||
*/
|
||||
void dl_convq_queue_push_by_qmf(dl_convq_queue_t *cq, fptp_t* item, int m_bit, int f_bit);
|
||||
|
||||
void dl_convq16_queue_push_by_qmf(dl_convq_queue_t *cq, fptp_t* item, int m_bit, int f_bit);
|
||||
|
||||
dl_conv_queue_t *dl_queue_from_convq(dl_convq_queue_t *cq1);
|
||||
|
||||
/**
|
||||
* @brief Get the pointer of element in the queue by offset
|
||||
*
|
||||
* @param cq Input fixed-point convolution queue
|
||||
* @param last_num Offset from the front of the queue
|
||||
* @return Pointer of the element
|
||||
*/
|
||||
qtp_t *dl_get_queue_itemq(dl_convq_queue_t *cq, int last_num);
|
||||
|
||||
/**
|
||||
* @brief Get the pointer of element in the queue by offset
|
||||
*
|
||||
* @param cq Input fixed-point convolution queue
|
||||
* @param offset Offset from the front of the queue
|
||||
* @param ch Channel index of convolution queue
|
||||
* @return Pointer of the element
|
||||
*/
|
||||
qtp_t *dl_get_queue_itemq_mc(dl_convq_queue_t *cq, int offset, int ch);
|
||||
|
||||
/**
|
||||
* @brief Does a tanh operation on the one of element in the convolution queue.
|
||||
* Gets the pointer of element in the convolution queue by offset, and does a
|
||||
* tanh operation by this pointer, then return the pointer
|
||||
*
|
||||
* @param cq Input fixed-point convolution queue
|
||||
* @param offset Offset from the front of the queue
|
||||
* @return Pointer of the element
|
||||
*/
|
||||
void dl_tanh_convq(dl_convq_queue_t *cq, int offset);
|
||||
|
||||
/**
|
||||
* @brief Does a tanh operation on the one of element in multi channel convolution queue.
|
||||
* Gets the pointer of element in the convolution queue by offset, and does a
|
||||
* tanh operation by this pointer, then return the pointer
|
||||
*
|
||||
* @param cq Input fixed-point multi channnel convolution queue
|
||||
* @param offset Offset from the front of the queue
|
||||
* @param nch The channel number of cqm
|
||||
* @return Pointer of the element
|
||||
*/
|
||||
void dl_tanh_convq_mc(dl_convq_queue_t **cqm, int offset, int nch);
|
||||
|
||||
/**
|
||||
* @brief Does a relu operation on the one of element in the convolution queue.
|
||||
* Gets the pointer of element in the convolution queue by offset, and does a
|
||||
* relu operation by this pointer, then return the pointer
|
||||
*
|
||||
* @param cq Input fixed-point convolution queue
|
||||
* @param offset Offset from the front of the queue
|
||||
* @return Pointer of the element
|
||||
*/
|
||||
void dl_relu_convq(dl_convq_queue_t *cq, fptp_t clip, int last_num);
|
||||
|
||||
/**
|
||||
* @brief Does a softmax operation on the one of element in the convolution queue.
|
||||
* Gets the pointer of element in the convolution queue by offset, input data
|
||||
stay as it is. Results are saved into the *out* array.
|
||||
*
|
||||
* @param cq Input fixed-point convolution queue
|
||||
* @param offset Offset from the front of the queue
|
||||
* @param out Old array to re-use. Passing NULL will allocate a new matrix.
|
||||
* @return softmax results
|
||||
*/
|
||||
fptp_t * dl_softmax_step_q(dl_convq_queue_t *cq, int offset, fptp_t *out);
|
||||
|
||||
/**
|
||||
* @brief Fast and quantised implement for 1D atrous convolution (a.k.a. convolution with holes or dilated convolution)
|
||||
* based on convolution queue.
|
||||
*
|
||||
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
|
||||
* is last element of output queue and should not be freed separately.
|
||||
*
|
||||
* @param in Input fixed-point convolution queue
|
||||
* @param out Output fixed-point convolution queue
|
||||
* @param rate A positive int, the stride with which we sample input value
|
||||
* @param size A positive int, the size of 1D-filter
|
||||
* @param kernel The kernel matrix of filter
|
||||
* @param bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
|
||||
* @param shift Shift ratio used in dot operation between two 16-bit fixed point vector
|
||||
* @return The result of atrous convolution
|
||||
*/
|
||||
qtp_t * dl_atrous_conv1dq(dl_convq_queue_t *in, dl_convq_queue_t *out, int rate, int size,
|
||||
dl_matrix2dq_t* kernel, dl_matrix2dq_t* bias, int shift, int prenum);
|
||||
|
||||
/**
|
||||
* @brief Fast implement of dilation layer as follows
|
||||
*
|
||||
* |-> [gate(sigmoid)] -|
|
||||
* input - | |-> (*) - output
|
||||
* |-> [filter(tanh)] -|
|
||||
*
|
||||
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
|
||||
* is last element of output queue and should not be freed separately.
|
||||
*
|
||||
* @param in Input fixed-point convolution queue
|
||||
* @param out Output fixed-point convolution queue
|
||||
* @param rate A positive int, the stride with which we sample input value
|
||||
* @param size A positive int, the size of 1D-filter
|
||||
* @param filter_kernel The kernel matrix of filter
|
||||
* @param filter_bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
|
||||
* @param gate_kernel The kernel matrix of gate
|
||||
* @param gate_bias The bias matrix of gate. Can be NULL if a bias of 0 is required.
|
||||
* @param filter_shift Shift ratio used in filter operation between two 16-bit fixed point vector
|
||||
* @param gate_shift Shift ratio used in gate operation between two 16-bit fixed point vector
|
||||
* @return The result of dilation layer
|
||||
*/
|
||||
qtp_t *dl_dilation_layerq_steps(dl_convq_queue_t *in, dl_convq_queue_t *out, int rate, int size,
|
||||
dl_matrix2dq_t* filter_kernel, dl_matrix2dq_t* filter_bias,
|
||||
dl_matrix2dq_t* gate_kernel, dl_matrix2dq_t* gate_bias,
|
||||
int filter_shift, int gate_shift, int offset, int prenum);
|
||||
|
||||
|
||||
qtp_t *dl_dilation_layerq(dl_convq_queue_t *in, dl_convq_queue_t *out, int rate, int size,
|
||||
dl_matrix2dq_t* filter_kernel, dl_matrix2dq_t* filter_bias,
|
||||
dl_matrix2dq_t* gate_kernel, dl_matrix2dq_t* gate_bias,
|
||||
int filter_shift, int gate_shift, int prenum);
|
||||
|
||||
qtp_t *dl_dilation_layerq16(dl_convq_queue_t *in, dl_convq_queue_t *out, int rate, int size,
|
||||
dl_matrix2dq_t* filter_kernel, dl_matrix2dq_t* filter_bias,
|
||||
dl_matrix2dq_t* gate_kernel, dl_matrix2dq_t* gate_bias, int prenum);
|
||||
|
||||
|
||||
qtp_t *dl_atrous_conv1dq_steps(dl_convq_queue_t *in, dl_convq_queue_t *out, int rate, int size,
|
||||
dl_matrix2dq_t* kernel, dl_matrix2dq_t* bias, int shift, int offset, int prenum);
|
||||
|
||||
/**
|
||||
* @brief Add a pair of fixed-point convolution queue item-by-item, and return float-point convolution queue
|
||||
*
|
||||
* @param cq1 First fixed-point convolution queue
|
||||
* @param cq2 Seconf fixed-point convolution queue
|
||||
* @return The result of float-point convolution queue
|
||||
*/
|
||||
dl_conv_queue_t *dl_convq_queue_add(dl_convq_queue_t *cq1, dl_convq_queue_t *cq2);
|
||||
|
||||
/**
|
||||
* @brief Fast implement of LSTM layer by dl_atrous_conv1dq function
|
||||
*
|
||||
* @Warning LSTM kernel is split into two part, the first part input is the last layer output,
|
||||
* and kernel is parameter *in_weight*. The second part input is the last frame LSTM output,
|
||||
* the kernel is parameters *h_weight*.
|
||||
*
|
||||
* @param in Input fixed-point convolution queue
|
||||
* @param out Output fixed-point convolution queue
|
||||
* @param state_c Internal state of the LSTM network
|
||||
* @param state_h Internal state (previous output values) of the LSTM network
|
||||
* @param in_weight the LSTM kernel needed by first part
|
||||
* @param h_weight the LSTM kernel needed by second part
|
||||
* @param bias The bias matrix of LSTM. Can be NULL if a bias of 0 is required.
|
||||
* @in_shift Shift ratio used in first part
|
||||
* @h_shift Shift ratio used in second part
|
||||
* @return The result of LSTM layer
|
||||
*/
|
||||
dl_matrix2dq_t *dl_convq_lstm_layer(const dl_convq_queue_t *in, dl_convq_queue_t *out, dl_matrix2dq_t *state_c,
|
||||
dl_matrix2dq_t *state_h, const dl_matrix2dq_t *in_weight, const dl_matrix2dq_t *h_weight,
|
||||
const dl_matrix2dq_t *bias, int in_shift, int h_shift, int prenum);
|
||||
dl_matrix2dq_t *dl_basic_lstm_layer1_q(const dl_convq_queue_t *in, dl_matrix2dq_t *state_c, dl_matrix2dq_t *state_h,
|
||||
const dl_matrix2dq_t *weight, const dl_matrix2dq_t *bias, int step, int shift);
|
||||
|
||||
dl_matrix2dq_t *dl_convq16_lstm_layer(dl_convq_queue_t *in, dl_convq_queue_t *out, dl_matrix2dq_t *state_c,
|
||||
dl_matrix2dq_t *state_h, dl_matrix2dq_t *in_weight, dl_matrix2dq_t *h_weight,
|
||||
dl_matrix2dq_t *bias, int prenum);
|
||||
|
||||
/**
|
||||
* @brief Allocate a fixed-point multi channel convolution queue
|
||||
*
|
||||
* @param n The length of queue
|
||||
* @param c The channel number of elements in the queue
|
||||
* @param nch the channel numbet of convolution queue
|
||||
* @return The convolution queue, or NULL if out of memory
|
||||
*/
|
||||
dl_convq_queue_t **dl_convq_queue_mc_alloc(int n, int c, int nch);
|
||||
|
||||
/**
|
||||
* @brief Free a fixed-point multi channel convolution queue
|
||||
*
|
||||
* @param cqm The fixed-point convolution queue to free
|
||||
* @param nch The channel number of cqm
|
||||
*/
|
||||
void dl_convq_queue_mc_free(dl_convq_queue_t **cqm, int nch);
|
||||
|
||||
/**
|
||||
* @brief Fast and quantised implement for 1D atrous convolution (a.k.a. convolution with holes or dilated convolution)
|
||||
* based on convolution queue.
|
||||
*
|
||||
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
|
||||
* is last element of output queue and should not be freed separately.
|
||||
*
|
||||
* @param in Input fixed-point convolution queue
|
||||
* @param out Output fixed-point convolution queue
|
||||
* @param nch The channel number of input
|
||||
* @param rate A positive int, the stride with which we sample input value
|
||||
* @param size A positive int, the size of 1D-filter
|
||||
* @param kernel The kernel matrix of filter
|
||||
* @param bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
|
||||
* @param shift Shift ratio used in dot operation between two 16-bit fixed point vector
|
||||
* @param offset the offset to calculate input convq
|
||||
* @param prenum the preload size, 0: do not use preload function
|
||||
* @return The result of atrous convolution
|
||||
*/
|
||||
qtp_t *dl_atrous_conv1dq_mc_steps( dl_convq_queue_t **in,
|
||||
dl_convq_queue_t **out,
|
||||
int nch,
|
||||
int rate,
|
||||
int size,
|
||||
dl_matrix2dq_t* kernel,
|
||||
dl_matrix2dq_t* bias,
|
||||
int shift,
|
||||
int offset,
|
||||
int prenum);
|
||||
|
||||
/**
|
||||
* @brief Fast implement of dilation layer as follows for multi channel input
|
||||
*
|
||||
* |-> [gate(sigmoid)] -|
|
||||
* input - | |-> (*) - output
|
||||
* |-> [filter(tanh)] -|
|
||||
*
|
||||
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
|
||||
* is last element of output queue and should not be freed separately.
|
||||
*
|
||||
* @param in Input fixed-point convolution queue
|
||||
* @param out Output fixed-point convolution queue
|
||||
* @param nch The channel number of input
|
||||
* @param rate A positive int, the stride with which we sample input value
|
||||
* @param size A positive int, the size of 1D-filter
|
||||
* @param filter_kernel The kernel matrix of filter
|
||||
* @param filter_bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
|
||||
* @param gate_kernel The kernel matrix of gate
|
||||
* @param gate_bias The bias matrix of gate. Can be NULL if a bias of 0 is required.
|
||||
* @param filter_shift Shift ratio used in filter operation between two 16-bit fixed point vector
|
||||
* @param gate_shift Shift ratio used in gate operation between two 16-bit fixed point vector
|
||||
* @param offset The offset to calculate input convq
|
||||
* @param prenum The preload size, 0: do not use preload function
|
||||
* @return The result of dilation layer
|
||||
*/
|
||||
qtp_t *dl_dilation_layerq_mc_steps( dl_convq_queue_t **in,
|
||||
dl_convq_queue_t **out,
|
||||
int nch,
|
||||
int rate,
|
||||
int size,
|
||||
dl_matrix2dq_t* filter_kernel,
|
||||
dl_matrix2dq_t* filter_bias,
|
||||
dl_matrix2dq_t* gate_kernel,
|
||||
dl_matrix2dq_t* gate_bias,
|
||||
int filter_shift,
|
||||
int gate_shift,
|
||||
int offset,
|
||||
int prenum);
|
||||
|
||||
void test_atrous_convq(int size, int rate, int in_channel, int out_channel);
|
||||
void test_lstm_convq(int size, int in_dim, int lstm_cell);
|
||||
void dl_nn_tanh_i162(dl_convq_queue_t **cqm, int offset, int nch);
|
||||
void dl_copy_queue_item_by_qmf(dl_convq_queue_t *cq, fptp_t* item, int m_bit, int f_bit, int offset, int ch);
|
||||
void dl_convq_queue_mc_bzero(dl_convq_queue_t **cqm, int nch);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,257 @@
|
||||
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
#ifndef DL_LIB_MATRIX_H
|
||||
#define DL_LIB_MATRIX_H
|
||||
|
||||
#ifdef ESP_PLATFORM
|
||||
#include "freertos/FreeRTOS.h"
|
||||
#include "freertos/task.h"
|
||||
#include "freertos/queue.h"
|
||||
#include "esp_system.h"
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef float fptp_t;
|
||||
|
||||
#if CONFIG_BT_SHARE_MEM_REUSE
|
||||
extern multi_heap_handle_t gst_heap;
|
||||
#endif
|
||||
|
||||
//Flags for matrices
|
||||
#define DL_MF_FOREIGNDATA 1 /*< Matrix pointer and item data actually points to another matrix and should not be freed */
|
||||
#define DL_MF_FOREIGNITEM 2 /*< Only item data actually points to another matrix and should not be freed */
|
||||
|
||||
//'Normal' float matrix
|
||||
typedef struct {
|
||||
int w; /*< Width */
|
||||
int h; /*< Height */
|
||||
int stride; /*< Row stride, essentially how many items to skip to get to the same position in the next row */
|
||||
int flags; /*< Flags. OR of DL_MF_* values */
|
||||
fptp_t *item; /*< Pointer to item array */
|
||||
} dl_matrix2d_t;
|
||||
|
||||
//Macro to quickly access the raw items in a matrix
|
||||
#define DL_ITM(m, x, y) m->item[(x)+(y)*m->stride]
|
||||
|
||||
|
||||
/**
|
||||
* @brief Allocate a matrix
|
||||
*
|
||||
* @param w Width of the matrix
|
||||
* @param h Height of the matrix
|
||||
* @return The matrix, or NULL if out of memory
|
||||
*/
|
||||
dl_matrix2d_t *dl_matrix_alloc(int w, int h);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Free a matrix
|
||||
* Frees the matrix structure and (if it doesn't have the DL_MF_FOREIGNDATA flag set) the m->items space as well.
|
||||
*
|
||||
* @param m Matrix to free
|
||||
*/
|
||||
void dl_matrix_free(dl_matrix2d_t *m);
|
||||
|
||||
/**
|
||||
* @brief Zero out the matrix
|
||||
* Sets all entries in the matrix to 0.
|
||||
*
|
||||
* @param m Matrix to zero
|
||||
*/
|
||||
void dl_matrix_zero(dl_matrix2d_t *m);
|
||||
|
||||
/**
|
||||
* @brief Copy the matrix into psram
|
||||
* Copy the matrix from flash or iram/psram into psram
|
||||
*
|
||||
* @param m Matrix to zero
|
||||
*/
|
||||
dl_matrix2d_t *dl_matrix_copy_to_psram(const dl_matrix2d_t *m);
|
||||
|
||||
/**
|
||||
* @brief Generate a new matrix using a range of items from an existing matrix.
|
||||
* When using this, the data of the new matrix is not allocated/copied but it re-uses a pointer
|
||||
* to the existing data. Changing the data in the resulting matrix, as a result, will also change
|
||||
* the data in the existing matrix that has been sliced.
|
||||
*
|
||||
* @param x X-offset of the origin of the returned matrix within the sliced matrix
|
||||
* @param y Y-offset of the origin of the returned matrix within the sliced matrix
|
||||
* @param w Width of the resulting matrix
|
||||
* @param h Height of the resulting matrix
|
||||
* @param in Old matrix (with foreign data) to re-use. Passing NULL will allocate a new matrix.
|
||||
* @return The resulting slice matrix, or NULL if out of memory
|
||||
*/
|
||||
dl_matrix2d_t *dl_matrix_slice(const dl_matrix2d_t *src, int x, int y, int w, int h, dl_matrix2d_t *in);
|
||||
|
||||
/**
|
||||
* @brief select a range of items from an existing matrix and flatten them into one dimension.
|
||||
*
|
||||
* @Warning The results are flattened in row-major order.
|
||||
*
|
||||
* @param x X-offset of the origin of the returned matrix within the sliced matrix
|
||||
* @param y Y-offset of the origin of the returned matrix within the sliced matrix
|
||||
* @param w Width of the resulting matrix
|
||||
* @param h Height of the resulting matrix
|
||||
* @param in Old matrix to re-use. Passing NULL will allocate a new matrix.
|
||||
* @return The resulting flatten matrix, or NULL if out of memory
|
||||
*/
|
||||
dl_matrix2d_t *dl_matrix_flatten(const dl_matrix2d_t *src, int x, int y, int w, int h, dl_matrix2d_t *in);
|
||||
|
||||
/**
|
||||
* @brief Generate a matrix from existing floating-point data
|
||||
*
|
||||
* @param w Width of resulting matrix
|
||||
* @param h Height of resulting matrix
|
||||
* @param data Data to populate matrix with
|
||||
* @return A newaly allocated matrix populated with the given input data, or NULL if out of memory.
|
||||
*/
|
||||
dl_matrix2d_t *dl_matrix_from_data(int w, int h, int stride, const void *data);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Multiply a pair of matrices item-by-item: res=a*b
|
||||
*
|
||||
* @param a First multiplicand
|
||||
* @param b Second multiplicand
|
||||
* @param res Multiplicated data. Can be equal to a or b to overwrite that.
|
||||
*/
|
||||
void dl_matrix_mul(const dl_matrix2d_t *a, const dl_matrix2d_t *b, dl_matrix2d_t *res);
|
||||
|
||||
/**
|
||||
* @brief Do a dotproduct of two matrices : res=a.b
|
||||
*
|
||||
* @param a First multiplicand
|
||||
* @param b Second multiplicand
|
||||
* @param res Dotproduct data. *Must* be a *different* matrix from a or b!
|
||||
*/
|
||||
void dl_matrix_dot(const dl_matrix2d_t *a, const dl_matrix2d_t *b, dl_matrix2d_t *res);
|
||||
|
||||
/**
|
||||
* @brief Add a pair of matrices item-by-item: res=a-b
|
||||
*
|
||||
* @param a First matrix
|
||||
* @param b Second matrix
|
||||
* @param res Added data. Can be equal to a or b to overwrite that.
|
||||
*/
|
||||
void dl_matrix_add(const dl_matrix2d_t *a, const dl_matrix2d_t *b, dl_matrix2d_t *out);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Divide a pair of matrices item-by-item: res=a/b
|
||||
*
|
||||
* @param a First matrix
|
||||
* @param b Second matrix
|
||||
* @param res Divided data. Can be equal to a or b to overwrite that.
|
||||
*/
|
||||
void dl_matrix_div(const dl_matrix2d_t *a, const dl_matrix2d_t *b, dl_matrix2d_t *out);
|
||||
|
||||
/**
|
||||
* @brief Subtract a matrix from another, item-by-item: res=a-b
|
||||
*
|
||||
* @param a First matrix
|
||||
* @param b Second matrix
|
||||
* @param res Subtracted data. Can be equal to a or b to overwrite that.
|
||||
*/
|
||||
void dl_matrix_sub(const dl_matrix2d_t *a, const dl_matrix2d_t *b, dl_matrix2d_t *out);
|
||||
|
||||
/**
|
||||
* @brief Add a constant to every item of the matrix
|
||||
*
|
||||
* @param subj Matrix to add the constant to
|
||||
* @param add The constant
|
||||
*/
|
||||
void dl_matrix_add_const(dl_matrix2d_t *subj, const fptp_t add);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Concatenate the rows of two matrices into a new matrix
|
||||
*
|
||||
* @param a First matrix
|
||||
* @param b Second matrix
|
||||
* @return A newly allocated array with as avlues a|b
|
||||
*/
|
||||
dl_matrix2d_t *dl_matrix_concat(const dl_matrix2d_t *a, const dl_matrix2d_t *b);
|
||||
|
||||
dl_matrix2d_t *dl_matrix_concat_h( dl_matrix2d_t *a, const dl_matrix2d_t *b);
|
||||
|
||||
/**
|
||||
* @brief Print the contents of a matrix to stdout. Used for debugging.
|
||||
*
|
||||
* @param a The matrix to print.
|
||||
*/
|
||||
void dl_printmatrix(const dl_matrix2d_t *a);
|
||||
|
||||
/**
|
||||
* @brief Return the average square error given a correct and a test matrix.
|
||||
*
|
||||
* ...Well, more or less. If anything, it gives an indication of the error between
|
||||
* the two. Check the code for the exact implementation.
|
||||
*
|
||||
* @param a First of the two matrices to compare
|
||||
* @param b Second of the two matrices to compare
|
||||
* @return value indicating the relative difference between matrices
|
||||
*/
|
||||
float dl_matrix_get_avg_sq_err(const dl_matrix2d_t *a, const dl_matrix2d_t *b);
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* @brief Check if two matrices have the same shape, that is, the same amount of rows and columns
|
||||
*
|
||||
* @param a First of the two matrices to compare
|
||||
* @param b Second of the two matrices to compare
|
||||
* @return true if the two matrices are shaped the same, false otherwise.
|
||||
*/
|
||||
int dl_matrix_same_shape(const dl_matrix2d_t *a, const dl_matrix2d_t *b);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Get a specific item from the matrix
|
||||
*
|
||||
* Please use these for external matrix access instead of DL_ITM
|
||||
*
|
||||
* @param m Matrix to access
|
||||
* @param x Column address
|
||||
* @param y Row address
|
||||
* @return Value in that position
|
||||
*/
|
||||
inline static fptp_t dl_matrix_get(const dl_matrix2d_t *m, const int x, const int y) {
|
||||
return DL_ITM(m, x, y);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Set a specific item in the matrix to the given value
|
||||
*
|
||||
* Please use these for external matrix access instead of DL_ITM
|
||||
*
|
||||
* @param m Matrix to access
|
||||
* @param x Column address
|
||||
* @param y Row address
|
||||
* @param val Value to write to that position
|
||||
*/
|
||||
inline static void dl_matrix_set(dl_matrix2d_t *m, const int x, const int y, fptp_t val) {
|
||||
DL_ITM(m, x, y)=val;
|
||||
}
|
||||
|
||||
void matrix_get_range(const dl_matrix2d_t *m, fptp_t *rmin, fptp_t *rmax);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
@@ -0,0 +1,387 @@
|
||||
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
#ifndef DL_LIB_MATRIXQ_H
|
||||
#define DL_LIB_MATRIXQ_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include "dl_lib_matrix.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef int16_t qtp_t;
|
||||
|
||||
//Quantized matrix. Uses fixed numbers and has the storage for the rows/columns inverted
|
||||
//for easy use as a multiplicand without stressing out the flash cache too much.
|
||||
typedef struct {
|
||||
int w;
|
||||
int h;
|
||||
int stride; //Normally equals h, not w!
|
||||
int flags;
|
||||
int exponent; //The values in items should be multiplied by pow(2,exponent) to get the real values.
|
||||
qtp_t *itemq;
|
||||
} dl_matrix2dq_t;
|
||||
|
||||
#define DL_QTP_SHIFT 15
|
||||
#define DL_QTP_RANGE ((1<<DL_QTP_SHIFT)-1)
|
||||
#define DL_ITMQ(m, x, y) m->itemq[(y)+(x)*m->stride]
|
||||
#define DL_QTP_EXP_NA 255 //non-applicable exponent because matrix is null
|
||||
|
||||
#define DL_SHIFT_AUTO 32
|
||||
|
||||
/**
|
||||
* @info About quantized matrices and shift values
|
||||
*
|
||||
* Grab a coffee (or tea, or hot water) and sit down when you read this for the first
|
||||
* time. Quantized matrices can speed up your operations, but come with some quirks, and
|
||||
* it's good to understand how they work before using them.
|
||||
*
|
||||
* The data in the quantized matrix type is stored similarily to floating-point types:
|
||||
* when storing a real value, the value is stored as a mantissa (base number) and an
|
||||
* exponent. The 'real' value that can be re-derived from those two numbers is something
|
||||
* similar to mantissa*2^exponent. Up to this point, there's not that much difference from
|
||||
* the standard floating point implementations like e.g. IEEE-754.
|
||||
*
|
||||
* The difference with respect to quantized matrices is that for a quantized matrix, it is
|
||||
* assumed all values stored have more-or-less the same order of magnitude. This allows the
|
||||
* matrix to only store all the mantissas, while the exponents are shared; there is only one
|
||||
* exponent for the entire matrix. This makes it quicker to handle matrix operations - the
|
||||
* logic to fix the exponents only needs to happen once, while the rest can be done in simple
|
||||
* integer arithmetic. It also nets us some memory savings - while normally a floating point
|
||||
* number is 32-bit, storing only 16-bit mantissas as the matrix items almost halves the
|
||||
* memory requirements.
|
||||
*
|
||||
* While most of the details of handling the intricacies of the quantized matrixes are done
|
||||
* transparently by the code in dl_lib_matrixq.c, some implementation details leak out,
|
||||
* specifically in places where addition/subtraction/division happens.
|
||||
*
|
||||
* The problem is that the routines do not know what the size of the resulting operation is. For
|
||||
* instance, when adding two matrices of numbers, the resulting numbers *could* be large enough
|
||||
* to overflow the mantissa of the result if the exponent is the same. However, if by default we
|
||||
* assume the mantissas needs to be scaled back, we may lose precision.
|
||||
*
|
||||
* In order to counter this, all operations that have this issue have a ``shift`` argument. If
|
||||
* the argument is zero, the routine will be conservative, that is, increase the exponent of
|
||||
* the result to such an extent it's mathematically impossible a value in the result will exceed
|
||||
* the maximum value that can be stored. However, when this argument is larger than zero, the
|
||||
* algorithm will hold back on this scaling by the indicated amount of bits, preserving precision
|
||||
* but increasing the chance of some of the calculated values not fitting in the mantissa anymore.
|
||||
* If this happens, the value will be clipped to the largest (or, for negative values, smallest)
|
||||
* value possible. (Neural networks usually are okay with this happening for a limited amount
|
||||
* of matrix indices).
|
||||
*
|
||||
* For deciding on these shift values, it is recommended to start with a shift value of one, then
|
||||
* use dl_matrixq_check_sanity on the result. If this indicates clipping, lower the shift value.
|
||||
* If it indicates bits are under-used, increase it. Note that for adding and subtraction, only
|
||||
* shift values of 0 or 1 make sense; these routines will error out if you try to do something
|
||||
* else.
|
||||
*
|
||||
* For neural networks and other noise-tolerant applications, note that even when
|
||||
* dl_matrixq_check_sanity does not indicate any problems, twiddling with the shift value may lead
|
||||
* to slightly improved precision. Feel free to experiment.
|
||||
**/
|
||||
|
||||
|
||||
/**
|
||||
* @brief Allocate a matrix
|
||||
*
|
||||
* @param w Width of the matrix
|
||||
* @param h Height of the matrix
|
||||
* @return The matrix, or NULL if out of memory
|
||||
*/
|
||||
dl_matrix2dq_t *dl_matrixq_alloc(int w, int h);
|
||||
dl_matrix2dq_t *dl_matrixq_alloc_psram(int w, int h);
|
||||
/**
|
||||
* @brief Convert a floating-point matrix to a quantized matrix
|
||||
*
|
||||
* @param m Floating-point matrix to convert
|
||||
* @param out Quantized matrix to re-use. If NULL, allocate a new one.
|
||||
* @Return The quantized version of the floating-point matrix
|
||||
*/
|
||||
dl_matrix2dq_t *dl_matrixq_from_matrix2d(const dl_matrix2d_t *m, dl_matrix2dq_t *out);
|
||||
|
||||
/**
|
||||
* TODO: DESCRIBE THIS FUNCTION
|
||||
*/
|
||||
dl_matrix2dq_t *dl_matrixq_from_matrix2d_by_qmf(const dl_matrix2d_t *m, dl_matrix2dq_t *out, int m_bit, int f_bit);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Convert a quantized matrix to a floating-point one.
|
||||
*
|
||||
* @param m Floating-point matrix to convert
|
||||
* @param out Quantized matrix to re-use. If NULL, allocate a new one.
|
||||
* @Return The quantized version of the floating-point matrix
|
||||
**/
|
||||
dl_matrix2d_t *dl_matrix2d_from_matrixq(const dl_matrix2dq_t *m, dl_matrix2d_t *out);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Free a quantized matrix
|
||||
* Frees the matrix structure and (if it doesn't have the DL_MF_FOREIGNDATA flag set) the m->items space as well.
|
||||
*
|
||||
* @param m Matrix to free
|
||||
*/
|
||||
void dl_matrixq_free(dl_matrix2dq_t *m);
|
||||
|
||||
/**
|
||||
* @brief Zero out the matrix
|
||||
* Sets all entries in the matrix to 0.
|
||||
*
|
||||
* @param m Matrix to zero
|
||||
*/
|
||||
void dl_matrixq_zero(dl_matrix2dq_t *m);
|
||||
|
||||
/**
|
||||
* @brief Copy the matrix into psram
|
||||
* Copy the matrix from flash or iram/psram into psram
|
||||
*
|
||||
* @param m Matrix to copy
|
||||
*/
|
||||
dl_matrix2dq_t *dl_matrixq_copy_to_psram(const dl_matrix2dq_t *m);
|
||||
|
||||
/**
|
||||
* @brief Do a dotproduct of two quantized matrices : res=a.b, Result is a fixed-point matrix.
|
||||
*
|
||||
* @param a First multiplicand
|
||||
* @param b Second multiplicand
|
||||
* @param res Dotproduct data. *Must* be a *different* matrix from a or b!
|
||||
* @param shift Shift ratio
|
||||
*/
|
||||
void dl_matrixq_dot(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *res, int shift);
|
||||
|
||||
/**
|
||||
* @brief Do a dotproduct of two quantized matrices: res=a.b, Result is a floating-point matrix.
|
||||
*
|
||||
* @param a First multiplicand
|
||||
* @param b Second multiplicand
|
||||
* @param res Dotproduct data. *Must* be a *different* matrix from a or b!
|
||||
*/
|
||||
void dl_matrixq_dot_matrix_out(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2d_t *res);
|
||||
|
||||
/**
|
||||
* @brief Do a dotproduct of two quantized matrices : res=a.b. This always uses the simple & stupid C algo for the dot product.
|
||||
*
|
||||
* Result is a fixed-point matrix.
|
||||
*
|
||||
* Use this only if you expect something is wrong with the accelerated routines that dl_matrixq_dot calls; this function can be
|
||||
* much slower than dl_matrixq_dot .
|
||||
*
|
||||
* @param a First multiplicand
|
||||
* @param b Second multiplicand
|
||||
* @param res Dotproduct data. *Must* be a *different* matrix from a or b!
|
||||
* @param shift Shift ratio
|
||||
*/
|
||||
void dl_matrixq_dot_c_impl(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *res, int shift);
|
||||
|
||||
/**
|
||||
* @brief Do a dotproduct of two quantized matrices : res=a.b. This always uses the simple & stupid C algo for the dot product.
|
||||
*
|
||||
* Result is a floating-point matrix.
|
||||
*
|
||||
* Use this only if you expect something is wrong with the accelerated routines that dl_matrixq_dot_matrix_out calls; this function can be
|
||||
* much slower than dl_matrixq_dot_matrix_out.
|
||||
*
|
||||
* @param a First multiplicand
|
||||
* @param b Second multiplicand
|
||||
* @param res Dotproduct data. *Must* be a *different* matrix from a or b!
|
||||
*/
|
||||
void dl_matrixq_dot_matrix_out_c_impl(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2d_t *res);
|
||||
|
||||
/**
|
||||
* @brief Do a dotproduct of a floating point and a quantized matrix. Result is a floating-point matrix.
|
||||
*
|
||||
* @param a First multiplicand; float matrix
|
||||
* @param b Second multiplicand; quantized matrix
|
||||
* @param res Dotproduct data; float matrix. *Must* be a *different* matrix from a or b!
|
||||
*/
|
||||
void dl_matrix_matrixq_dot(const dl_matrix2d_t *a, const dl_matrix2dq_t *b, dl_matrix2d_t *res);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Print the contents of a quantized matrix to stdout. Used for debugging.
|
||||
*
|
||||
* @param a The matrix to print.
|
||||
*/
|
||||
void dl_printmatrixq(const dl_matrix2dq_t *a);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Add a pair of quantizedmatrices item-by-item: res=a-b
|
||||
*
|
||||
* @param a First matrix
|
||||
* @param b Second matrix
|
||||
* @param res Added data. Can be equal to a or b to overwrite that.
|
||||
* @param shift Shift value. Only 0 or 1 makes sense here. <ToDo: check>
|
||||
*/
|
||||
void dl_matrixq_add(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *res, int shift);
|
||||
|
||||
/**
|
||||
* @brief Generate a new matrix using a range of items from an existing matrix.
|
||||
* When using this, the data of the new matrix is not allocated/copied but it re-uses a pointer
|
||||
* to the existing data. Changing the data in the resulting matrix, as a result, will also change
|
||||
* the data in the existing matrix that has been sliced.
|
||||
*
|
||||
* @Warning In contrast to the floating point equivalent of this function, the fixed-point version
|
||||
* of this has the issue that as soon as the output exponent of one of the slices changes, the data
|
||||
* in the sliced matrix gets corrupted (because the exponent of that matrix is still the same.) If you
|
||||
* use this function, either treat the slices as read-only, or assume the sliced matrix contains
|
||||
* garbage after modifying the data in one of the slices.
|
||||
*
|
||||
* @param x X-offset of the origin of the returned matrix within the sliced matrix
|
||||
* @param y Y-offset of the origin of the returned matrix within the sliced matrix
|
||||
* @param w Width of the resulting matrix
|
||||
* @param h Height of the resulting matrix
|
||||
* @param in Old matrix (with foreign data) to re-use. Passing NULL will allocate a new matrix.
|
||||
* @return The resulting slice matrix, or NULL if out of memory
|
||||
*/
|
||||
dl_matrix2dq_t *dl_matrixq_slice(const dl_matrix2dq_t *src, int x, int y, int w, int h, dl_matrix2dq_t *in);
|
||||
|
||||
/**
|
||||
* @brief select a range of items from an existing matrix and flatten them into one dimension.
|
||||
*
|
||||
* @Warning The results are flattened in row-major order.
|
||||
*
|
||||
* @param x X-offset of the origin of the returned matrix within the sliced matrix
|
||||
* @param y Y-offset of the origin of the returned matrix within the sliced matrix
|
||||
* @param w Width of the resulting matrix
|
||||
* @param h Height of the resulting matrix
|
||||
* @param in Old matrix to re-use. Passing NULL will allocate a new matrix.
|
||||
* @return The resulting flatten matrix, or NULL if out of memory
|
||||
*/
|
||||
dl_matrix2dq_t *dl_matrixq_flatten(const dl_matrix2dq_t *src, int x, int y, int w, int h, dl_matrix2dq_t *in);
|
||||
|
||||
/**
|
||||
* @brief Subtract a quantized matrix from another, item-by-item: res=a-b
|
||||
*
|
||||
* @param a First matrix
|
||||
* @param b Second matrix
|
||||
* @param res Subtracted data. Can be equal to a or b to overwrite that.
|
||||
* @param shift Shift value. Only 0 or 1 makes sense here. <ToDo: check>
|
||||
*/
|
||||
void dl_matrixq_sub(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *res, int shift);
|
||||
|
||||
/**
|
||||
* @brief Multiply a pair of quantized matrices item-by-item: res=a*b
|
||||
*
|
||||
* @param a First multiplicand
|
||||
* @param b Second multiplicand
|
||||
* @param res Multiplicated data. Can be equal to a or b to overwrite that matrix.
|
||||
*/
|
||||
void dl_matrixq_mul( dl_matrix2dq_t *a, dl_matrix2dq_t *b, dl_matrix2dq_t *res);
|
||||
|
||||
/**
|
||||
* @brief Divide a pair of quantized matrices item-by-item: res=a/b
|
||||
*
|
||||
* @param a First matrix
|
||||
* @param b Second matrix
|
||||
* @param res Divided data. Can be equal to a or b to overwrite that.
|
||||
*/
|
||||
void dl_matrixq_div(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *out, int shift);
|
||||
|
||||
/**
|
||||
* @brief Check if two quantized matrices have the same shape, that is, the same amount of
|
||||
* rows and columns
|
||||
*
|
||||
* @param a First of the two matrices to compare
|
||||
* @param b Second of the two matrices to compare
|
||||
* @return true if the two matrices are shaped the same, false otherwise.
|
||||
*/
|
||||
int dl_matrixq_same_shape(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b);
|
||||
|
||||
/**
|
||||
* @brief Concatenate the rows of two quantized matrices into a new matrix
|
||||
*
|
||||
* @param a First matrix
|
||||
* @param b Second matrix
|
||||
* @return A newly allocated quantized matrix with as values a|b
|
||||
*/
|
||||
dl_matrix2dq_t *dl_matrixq_concat(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b);
|
||||
|
||||
/**
|
||||
* @brief Add a constant to every item of the quantized matrix
|
||||
*
|
||||
* @param subj Matrix to add the constant to
|
||||
* @param add The constant
|
||||
*/
|
||||
void dl_matrixq_add_const(dl_matrix2dq_t *subj, const fptp_t add, int shift);
|
||||
|
||||
/**
|
||||
* @brief Check the sanity of a quantized matrix
|
||||
*
|
||||
* Due to the nature of quantized matrices, depending on the calculations a quantized
|
||||
* matrix is the result of and the shift values chosen in those calculations, a quantized
|
||||
* matrix may have an exponent and mantissas that lead to a loss of precision, either because
|
||||
* most significant mantissa bits are unused, or because a fair amount of mantissas are
|
||||
* clipped. This function checks if this is the case and will report a message to stdout
|
||||
* if significant loss of precision is detected.
|
||||
*
|
||||
* @param m The quantized matrix to check
|
||||
* @param name A string to be displayed in the message if the sanity check fails
|
||||
* @return True if matrix is sane, false otherwise
|
||||
**/
|
||||
|
||||
int dl_matrixq_check_sanity(dl_matrix2dq_t *m, const char *name);
|
||||
|
||||
/**
|
||||
* @brief re-adjust the exponent of the matrix to fit the mantissa better
|
||||
*
|
||||
* This function will shift up all the data in the mantissas so there are no
|
||||
* most-significant bits that are unused in all mantissas. It will also adjust
|
||||
* the exponent to keep the actua values in the matrix the same.
|
||||
*
|
||||
* Some operations done on a matrix, especially operations that re-use the
|
||||
* result of earlier operations done in the same way, can lead to the loss of
|
||||
* data because the exponent of the quantized matrix is never re-adjusted. You
|
||||
* can do that implicitely by calling this function.
|
||||
*
|
||||
* @param m The matrix to re-adjust
|
||||
**/
|
||||
void dl_matrixq_readjust_exp(dl_matrix2dq_t *m);
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* @brief Get the floating-point value of a specific item from the quantized matrix
|
||||
*
|
||||
* @param m Matrix to access
|
||||
* @param x Column address
|
||||
* @param y Row address
|
||||
* @return Value in that position
|
||||
*/
|
||||
fptp_t dl_matrixq_get(const dl_matrix2dq_t *m, const int x, const int y);
|
||||
|
||||
/**
|
||||
* @brief Set a specific item in the quantized matrix to the given
|
||||
* floating-point value
|
||||
*
|
||||
* @warning If the given value is more than the exponent in the quantized matrix
|
||||
* allows for, all mantissas in the matrix will be shifted down to make the value
|
||||
* 'fit'. If, however, the exponent is such that the value would result in a
|
||||
* quantized mantissa of 0, nothing is done.
|
||||
*
|
||||
* @param m Matrix to access
|
||||
* @param x Column address
|
||||
* @param y Row address
|
||||
* @param val Value to write to that position
|
||||
*/
|
||||
void dl_matrixq_set(dl_matrix2dq_t *m, const int x, const int y, fptp_t val);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,80 @@
|
||||
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
#ifndef DL_LIB_MATRIXQ8_H
|
||||
#define DL_LIB_MATRIXQ8_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include "dl_lib_matrix.h"
|
||||
#include "dl_lib.h"
|
||||
#include "dl_lib_matrixq.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef int8_t q8tp_t;
|
||||
|
||||
typedef struct {
|
||||
int w;
|
||||
int h;
|
||||
int stride; //Normally equals h, not w!
|
||||
int flags;
|
||||
int exponent; //The values in items should be multiplied by pow(2,exponent) to get the real values.
|
||||
q8tp_t *itemq;
|
||||
} dl_matrix2dq8_t;
|
||||
|
||||
#define DL_Q8TP_SHIFT 7
|
||||
#define DL_Q8TP_RANGE ((1<<DL_Q8TP_SHIFT)-1)
|
||||
#define DL_ITMQ8(m, x, y) m->itemq[(y)+(x)*m->stride]
|
||||
|
||||
/**
|
||||
* @brief Allocate a matrix
|
||||
*
|
||||
* @param w Width of the matrix
|
||||
* @param h Height of the matrix
|
||||
* @return The matrix, or NULL if out of memory
|
||||
*/
|
||||
dl_matrix2dq8_t *dl_matrixq8_alloc(int w, int h);
|
||||
|
||||
/**
|
||||
* @brief Free a quantized matrix
|
||||
* Frees the matrix structure and (if it doesn't have the DL_MF_FOREIGNDATA flag set) the m->items space as well.
|
||||
*
|
||||
* @param m Matrix to free
|
||||
*/
|
||||
void dl_matrixq8_free(dl_matrix2dq8_t *m);
|
||||
|
||||
/**
|
||||
* @brief Copy a quantized matrix
|
||||
* Copy a quantized matrix from flash or iram/psram
|
||||
*
|
||||
* @param m Matrix to copy
|
||||
*/
|
||||
dl_matrix2dq8_t *dl_matrixq8_copy_to_psram(const dl_matrix2dq8_t *m);
|
||||
|
||||
/**
|
||||
* @brief Convert a floating-point matrix to a quantized matrix
|
||||
*
|
||||
* @param m Floating-point matrix to convert
|
||||
* @param out Quantized matrix to re-use. If NULL, allocate a new one.
|
||||
* @Return The quantized version of the floating-point matrix
|
||||
*/
|
||||
dl_matrix2dq8_t *dl_matrixq8_from_matrix2d(const dl_matrix2d_t *m, dl_matrix2dq8_t *out);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
105
managed_components/espressif__esp-sr/include/esp32/esp_aec.h
Normal file
105
managed_components/espressif__esp-sr/include/esp32/esp_aec.h
Normal file
@@ -0,0 +1,105 @@
|
||||
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License
|
||||
#ifndef _ESP_AEC_H_
|
||||
#define _ESP_AEC_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define USE_AEC_FFT // Not kiss_fft
|
||||
#define AEC_SAMPLE_RATE 16000 // Only Support 16000Hz
|
||||
#define AEC_FRAME_LENGTH_MS 32
|
||||
|
||||
typedef struct aec_handle_t aec_handle_t;
|
||||
typedef enum {
|
||||
AEC_MODE_SR_LOW_COST = 0, // Low Cost AEC fro speech recognition
|
||||
AEC_MODE_SR_HIGH_PERF = 1, // High Perforamce AEC for speech recognition
|
||||
AEC_MODE_VOIP_LOW_COST = 3, // Low Cost AEC for voice communication
|
||||
AEC_MODE_VOIP_HIGH_PERF = 4, // High Perforamce AEC for voice communication
|
||||
} aec_mode_t;
|
||||
|
||||
/**
|
||||
* @brief Creates an instance to the AEC structure.
|
||||
* Please get frame size by aec_get_chunksize() function
|
||||
*
|
||||
* @param sample_rate The Sampling frequency (Hz) must be 16000.
|
||||
* @param filter_length Number of filter, recommend to set 4. The larger the filter_length, the more resource consumption.
|
||||
* @param channel_num The input microphone channel number
|
||||
* @param mode The mode of AEC, recommend to set AEC_MODE_SR_LOW_COST
|
||||
* @return
|
||||
* - NULL: Create failed
|
||||
* - Others: The instance of AEC
|
||||
*/
|
||||
aec_handle_t *aec_create(int sample_rate, int filter_length, int channel_num, aec_mode_t mode);
|
||||
|
||||
/**
|
||||
* @brief Creates an instance to the AEC structure, same with aec_create().
|
||||
*
|
||||
* @param filter_length Number of filter, recommend to set 4. The larger the filter_length, the more resource consumption.
|
||||
* @param channel_num The input microphone channel number
|
||||
* @param mode The mode of AEC, recommend to set AEC_MODE_SR_LOW_COST
|
||||
* @return
|
||||
* - NULL: Create failed
|
||||
* - Others: The instance of AEC
|
||||
*/
|
||||
aec_handle_t *aec_pro_create(int filter_length, int channel_num, aec_mode_t mode);
|
||||
|
||||
/**
|
||||
* @brief Performs echo cancellation a frame, based on the audio sent to the speaker and frame from mic.
|
||||
*
|
||||
* @warning The indata, refdata and outdata must be 16-bit signed. please allocate memory by heap_caps_aligned_alloc().
|
||||
*
|
||||
* @param inst The instance of AEC. Format for multi-channel data is "ch0 ch0 ch0 ..., ch1 ch1 ch1 ..."
|
||||
* @param indata An array of 16-bit signed audio samples from mic.
|
||||
* @param refdata An array of 16-bit signed audio samples sent to the speaker.
|
||||
* @param outdata Returns near-end signal with echo removed. Format for multi-channel data is "ch0 ch0 ch0..., ch1 ch1 ch1 ..."
|
||||
* @return None
|
||||
*
|
||||
*/
|
||||
void aec_process(const aec_handle_t *handel, int16_t *indata, int16_t *refdata, int16_t *outdata);
|
||||
|
||||
/**
|
||||
* @brief Get frame size of AEC (the samples of one frame)
|
||||
* @param handle The instance of AEC.
|
||||
* @return Frame size
|
||||
*/
|
||||
int aec_get_chunksize(const aec_handle_t *handle);
|
||||
|
||||
/**
|
||||
* @brief Get AEC mode string
|
||||
*
|
||||
* @param aec_mode The mode of AEC.
|
||||
*
|
||||
* @return AEC mode string
|
||||
*/
|
||||
char * aec_get_mode_string(aec_mode_t aec_mode);
|
||||
|
||||
/**
|
||||
* @brief Free the AEC instance
|
||||
*
|
||||
* @param inst The instance of AEC.
|
||||
*
|
||||
* @return None
|
||||
*
|
||||
*/
|
||||
void aec_destroy(aec_handle_t *handel);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif //_ESP_AEC_H_
|
||||
@@ -0,0 +1,81 @@
|
||||
|
||||
#ifndef _ESP_AFE_AEC_H_
|
||||
#define _ESP_AFE_AEC_H_
|
||||
|
||||
#include "esp_aec.h"
|
||||
#include "esp_afe_config.h"
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
aec_handle_t *handle;
|
||||
aec_mode_t mode;
|
||||
afe_pcm_config_t pcm_config;
|
||||
int frame_size;
|
||||
int16_t *data;
|
||||
} afe_aec_handle_t;
|
||||
|
||||
/**
|
||||
* @brief Creates an instance to the AEC structure.
|
||||
*
|
||||
* @warning Currently only support 1 microphone channel and 1 playback channe.
|
||||
* If input has multiple microphone channels and playback channels, just the first microphone channel and playback
|
||||
* channel will be selected.
|
||||
*
|
||||
* The input format, same as afe config:
|
||||
* M to represent the microphone channel
|
||||
* R to represent the playback reference channel
|
||||
* N to represent an unknown or unused channel
|
||||
*
|
||||
* For example, input_format="MMNR" indicates that the input data consists of four channels,
|
||||
* which are the microphone channel, the microphone channel, an unused channel, and the playback channel
|
||||
*
|
||||
* @param input_format The input format
|
||||
* @param filter_length The length of filter. The larger the filter, the higher the CPU loading.
|
||||
* Recommended filter_length = 4 for esp32s3 and esp32p4. Recommended filter_length = 2 for
|
||||
* esp32c5.
|
||||
* @param type The type of afe, AFE_TYPE_SR or AFE_TYPE_VC
|
||||
* @param mode The mode of afe, AFE_MODE_LOW_COST or AFE_MODE_HIGH_PERF
|
||||
*
|
||||
* @return afe_config_t* The default config of afe
|
||||
*/
|
||||
afe_aec_handle_t *afe_aec_create(const char *input_format, int filter_length, afe_type_t type, afe_mode_t mode);
|
||||
|
||||
/**
|
||||
* @brief Performs echo cancellation a frame, based on the audio sent to the speaker and frame from mic.
|
||||
*
|
||||
* @param inst The instance of AEC.
|
||||
* @param indata Input audio data, format is define by input_format.
|
||||
* @param outdata Near-end signal with echo removed. outdata must be 16-bit aligned.
|
||||
* please use heap_caps_aligned_calloc(16, n, size, caps) to allocate an aligned chunk of memory
|
||||
|
||||
* @return The bytes of outdata.
|
||||
*/
|
||||
size_t afe_aec_process(afe_aec_handle_t *handel, const int16_t *indata, int16_t *outdata);
|
||||
|
||||
/**
|
||||
* @brief Get frame size of AEC (the samples of one frame)
|
||||
* @param handle The instance of AEC.
|
||||
* @return Frame size
|
||||
*/
|
||||
int afe_aec_get_chunksize(afe_aec_handle_t *handle);
|
||||
|
||||
/**
|
||||
* @brief Free the AEC instance
|
||||
*
|
||||
* @param inst The instance of AEC.
|
||||
*
|
||||
* @return None
|
||||
*
|
||||
*/
|
||||
void afe_aec_destroy(afe_aec_handle_t *handel);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif //_ESP_AEC_H_
|
||||
@@ -0,0 +1,288 @@
|
||||
#pragma once
|
||||
#include "esp_aec.h"
|
||||
#include "esp_agc.h"
|
||||
#include "esp_nsn_models.h"
|
||||
#include "esp_vad.h"
|
||||
#include "esp_vadn_models.h"
|
||||
#include "esp_wn_iface.h"
|
||||
#include "esp_wn_models.h"
|
||||
#include "model_path.h"
|
||||
#include "stdbool.h"
|
||||
#include "stdint.h"
|
||||
#include "stdlib.h"
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// AFE: Audio Front-End
|
||||
// SR: Speech Recognition
|
||||
// VC: Voice Communication
|
||||
|
||||
// Set AFE_SR mode
|
||||
typedef enum {
|
||||
SR_MODE_LOW_COST = 0, // Deprecated, please use afe_mode_t, AFE mode: low cost mode
|
||||
SR_MODE_HIGH_PERF = 1, // Deprecated, please use afe_mode_t, AFE mode: high performance mode
|
||||
} afe_sr_mode_t;
|
||||
|
||||
// Set AFE mode
|
||||
typedef enum {
|
||||
AFE_MODE_LOW_COST = 0, // AFE mode: low cost mode
|
||||
AFE_MODE_HIGH_PERF = 1, // AFE mode: high performance mode
|
||||
} afe_mode_t;
|
||||
|
||||
// Set AFE type
|
||||
typedef enum {
|
||||
AFE_TYPE_SR = 0, // Speech recognition scenarios, excluding nonlinear noise suppression
|
||||
AFE_TYPE_VC = 1, // Voice communication scenarios, 16KHz input, including nonlinear noise suppression
|
||||
AFE_TYPE_VC_8K = 2, // Voice communication scenarios, 8KHz input, note that the input data must be 8KHz
|
||||
} afe_type_t;
|
||||
|
||||
typedef enum {
|
||||
AFE_MEMORY_ALLOC_MORE_INTERNAL = 1, // malloc with more internal ram
|
||||
AFE_MEMORY_ALLOC_INTERNAL_PSRAM_BALANCE = 2, // malloc with internal ram and psram in balance
|
||||
AFE_MEMORY_ALLOC_MORE_PSRAM = 3 // malloc with more psram
|
||||
} afe_memory_alloc_mode_t;
|
||||
|
||||
typedef enum {
|
||||
AFE_MN_PEAK_AGC_MODE_1 = -9, // The peak amplitude of fetch audio is -9dB
|
||||
AFE_MN_PEAK_AGC_MODE_2 = -6, // The peak amplitude of fetch audio is -6dB
|
||||
AFE_MN_PEAK_AGC_MODE_3 = -3, // The peak amplitude of fetcg is -3dB
|
||||
AFE_MN_PEAK_NO_AGC = 0, // There is no agc gain
|
||||
} afe_mn_peak_agc_mode_t;
|
||||
|
||||
typedef struct {
|
||||
int total_ch_num; // total channel num, include microphone channel, playback channel and unknown channel
|
||||
int mic_num; // microphone channel number
|
||||
uint8_t *mic_ids; // microphone channel indices
|
||||
int ref_num; // playback reference channel number
|
||||
uint8_t *ref_ids; // playback reference channel indices
|
||||
int sample_rate; // sample rate of audio
|
||||
} afe_pcm_config_t;
|
||||
|
||||
typedef enum {
|
||||
AFE_NS_MODE_WEBRTC = 0, // please use model name of NS, SSP: "WEBRTC"
|
||||
AFE_NS_MODE_NET = 1, // please use model name of NSNET
|
||||
} afe_ns_mode_t;
|
||||
|
||||
typedef enum {
|
||||
AFE_AGC_MODE_WEBRTC = 0, // WEBRTC AGC
|
||||
AFE_AGC_MODE_WAKENET = 1, // AGC gain is calculated by wakenet model if wakenet is activated
|
||||
} afe_agc_mode_t;
|
||||
|
||||
/**
|
||||
* @brief Function to get the debug audio data
|
||||
*
|
||||
* @param data The debug audio data which don't be modify. It should be copied away as soon as possible that
|
||||
* avoid blocking for too long.
|
||||
* @param data_size The number of bytes of data.
|
||||
* @returns
|
||||
*/
|
||||
typedef void (*afe_debug_hook_callback_t)(const int16_t *data, int data_size);
|
||||
|
||||
typedef enum {
|
||||
AFE_DEBUG_HOOK_MASE_TASK_IN = 0, // To get the input data of mase task
|
||||
AFE_DEBUG_HOOK_FETCH_TASK_IN = 1, // To get the input data of fetch task
|
||||
AFE_DEBUG_HOOK_MAX = 2
|
||||
} afe_debug_hook_type_t;
|
||||
|
||||
typedef struct {
|
||||
afe_debug_hook_type_t hook_type; // debug type of hook
|
||||
afe_debug_hook_callback_t hook_callback; // callback function which transfer debug audio data
|
||||
} afe_debug_hook_t;
|
||||
|
||||
typedef struct {
|
||||
/********** AEC(Acoustic Echo Cancellation) **********/
|
||||
bool aec_init; // Whether to init aec
|
||||
aec_mode_t aec_mode; // The mode of aec, AEC_MODE_SR_LOW_COST or AEC_MODE_SR_HIGH_PERF
|
||||
int aec_filter_length; // The filter length of aec
|
||||
|
||||
/********** SE(Speech Enhancement, microphone array processing) **********/
|
||||
bool se_init; // Whether to init se
|
||||
|
||||
/********** NS(Noise Suppression) **********/
|
||||
bool ns_init; // Whether to init ns
|
||||
char *ns_model_name; // Model name of ns
|
||||
afe_ns_mode_t afe_ns_mode; // Model mode of ns
|
||||
|
||||
/********** VAD(Voice Activity Detection) **********/
|
||||
bool vad_init; // Whether to init vad
|
||||
vad_mode_t vad_mode; // The value can be: VAD_MODE_0, VAD_MODE_1, VAD_MODE_2, VAD_MODE_3, VAD_MODE_4
|
||||
char *vad_model_name; // The model name of vad, If it is null, WebRTC VAD will be used.
|
||||
int vad_min_speech_ms; // The minimum duration of speech in ms. It should be bigger than 32 ms, default: 128 ms
|
||||
int vad_min_noise_ms; // The minimum duration of noise or silence in ms. It should be bigger than 64 ms, default:
|
||||
// 1000 ms
|
||||
int vad_delay_ms; // The delay of the first speech frame in ms, default: 128 ms
|
||||
// If you find vad cache can not cover all speech, please increase this value.
|
||||
bool vad_mute_playback; // If true, the playback will be muted for vad detection. default: false
|
||||
bool vad_enable_channel_trigger; // If true, the vad will be used to choose the channel id. default: false
|
||||
|
||||
/********** WakeNet(Wake Word Engine) **********/
|
||||
bool wakenet_init;
|
||||
char *wakenet_model_name; // The model name of wakenet 1
|
||||
char *wakenet_model_name_2; // The model name of wakenet 2 if has wakenet 2
|
||||
det_mode_t wakenet_mode; // The mode of wakenet
|
||||
|
||||
/********** AGC(Automatic Gain Control) **********/
|
||||
bool agc_init; // Whether to init agc
|
||||
afe_agc_mode_t
|
||||
agc_mode; // The AGC mode for ASR. and the gain generated by AGC acts on the audio after far linear gain.
|
||||
int agc_compression_gain_db; // Compression gain in dB (default 9)
|
||||
int agc_target_level_dbfs; // Target level in -dBfs of envelope (default 3, means target level is -3 dBFS)
|
||||
|
||||
/********** General AFE(Audio Front End) parameter **********/
|
||||
afe_pcm_config_t pcm_config; // Config the channel num of original data which is fed to the afe feed function.
|
||||
afe_mode_t afe_mode; // The mode of afe, AFE_MODE_LOW_COST or AFE_MODE_HIGH_PERF
|
||||
afe_type_t afe_type; // The mode of afe, AFE_MODE_LOW_COST or AFE_MODE_HIGH_PERF
|
||||
int afe_perferred_core; // The preferred core of afe se task, which is created in afe_create function.
|
||||
int afe_perferred_priority; // The preferred priority of afe se task, which is created in afe_create function.
|
||||
int afe_ringbuf_size; // The ring buffer size: the number of frame data in ring buffer.
|
||||
afe_memory_alloc_mode_t memory_alloc_mode; // The memory alloc mode for afe. From Internal RAM or PSRAM
|
||||
float afe_linear_gain; // The linear gain for afe output the value should be in [0.1, 10.0]. This value acts
|
||||
// directly on the output amplitude: out_linear_gain * amplitude.
|
||||
bool debug_init;
|
||||
bool fixed_first_channel; // If true, the channel after first wake-up is fixed to raw data of microphone
|
||||
// otherwise, select channel number by wakenet
|
||||
} afe_config_t;
|
||||
|
||||
/**
|
||||
* @brief Get AFE default configuration. The default configuration will enable all algorithms as much as possible based
|
||||
* on the chip target and input format. You can manually fine-tune it after creating the configuration
|
||||
*
|
||||
* The input format:
|
||||
* M to represent the microphone channel
|
||||
* R to represent the playback reference channel
|
||||
* N to represent an unknown or unused channel
|
||||
*
|
||||
* For example, input_format="MMNR" indicates that the input data consists of four channels,
|
||||
* which are the microphone channel, the microphone channel, an unused channel, and the playback channel
|
||||
*
|
||||
* @param input_format The input format
|
||||
* @param models Models from partition, which is configured by Kconfig
|
||||
* @param type The type of afe, AFE_TYPE_SR or AFE_TYPE_VC
|
||||
* @param mode The mode of afe, AFE_MODE_LOW_COST or AFE_MODE_HIGH_PERF
|
||||
*
|
||||
* @return afe_config_t* The default config of afe
|
||||
*/
|
||||
afe_config_t *afe_config_init(const char *input_format, srmodel_list_t *models, afe_type_t type, afe_mode_t mode);
|
||||
|
||||
/**
|
||||
* @brief Check AFE configuration and make sure it is correct.
|
||||
*
|
||||
* @warning If there is a configuration conflict, this function will modify some parameters.
|
||||
* The guiding behind these modifications is to maintain the highest performance of the output audio and results.
|
||||
* And remove the conflict between different algorithms.
|
||||
*
|
||||
* For example, If input is two-channel data, the SE(BSS) algorithm will be prioritized over the NS algorithm.
|
||||
* If SE(BSS) algorithm is deactivated, will only use the first microphone channel.
|
||||
*
|
||||
* @param afe_config Input AFE config
|
||||
*
|
||||
* @return afe_config_t* The modified AFE config
|
||||
*/
|
||||
afe_config_t *afe_config_check(afe_config_t *afe_config);
|
||||
|
||||
/**
|
||||
* @brief Parse input format
|
||||
*
|
||||
* @param input_format The input format, same with afe_config_init() function
|
||||
* @param pcm_config The pcm config
|
||||
*
|
||||
* @return true if the input format is parsed successfully, otherwise false
|
||||
*/
|
||||
bool afe_parse_input_format(const char *input_format, afe_pcm_config_t *pcm_config);
|
||||
|
||||
/**
|
||||
* @brief Parse I2S input data
|
||||
*
|
||||
* @param data The input multi channel data
|
||||
* @param frame_size The frame size of input, it is also the size of single channel data
|
||||
* @param mic_data The output microphone data
|
||||
* @param ref_data The output playback reference data
|
||||
* @param pcm_config The pcm config
|
||||
*
|
||||
*/
|
||||
void afe_parse_input(int16_t *data, int frame_size, int16_t *mic_data, int16_t *ref_data, afe_pcm_config_t *pcm_config);
|
||||
|
||||
/**
|
||||
* @brief Parse input data, from interleaved arrangement to contiguous arrangement
|
||||
*
|
||||
* @param data The input multi channel data
|
||||
* @param frame_size The frame size of input, it is also the size of single channel data
|
||||
* @param channel_num The channel number of data
|
||||
* @param out_data The output data
|
||||
*
|
||||
*/
|
||||
void afe_parse_data(int16_t *data, int frame_size, int channel_num, int16_t *out_data);
|
||||
|
||||
/**
|
||||
* @brief Format input data, from contiguous arrangement to interleaved arrangement
|
||||
*
|
||||
* @param data The input multi channel data
|
||||
* @param frame_size The frame size of input, it is also the size of single channel data
|
||||
* @param channel_num The channel number of data
|
||||
* @param out_data The output data
|
||||
*
|
||||
*/
|
||||
void afe_format_data(int16_t *data, int frame_size, int channel_num, int16_t *out_data);
|
||||
|
||||
/**
|
||||
* @brief Adjust the gain of input data
|
||||
*
|
||||
* @warning the input data will be modified inplace.
|
||||
*
|
||||
* @param data The input audio data
|
||||
* @param frame_size The frame size of input, it is also the size of single channel data
|
||||
* @param factor The gain factor
|
||||
*
|
||||
* @return int16_t* The output audio data
|
||||
*/
|
||||
int16_t *afe_adjust_gain(int16_t *data, int frame_size, float factor);
|
||||
|
||||
/**
|
||||
* @brief Adjust the gain of input data
|
||||
*
|
||||
* @warning the input data will be modified inplace.
|
||||
*
|
||||
* @param in_data The input audio data
|
||||
* @param in_frame_size Input data frame size of input
|
||||
* @param channel_num The channel number of input data, which is same as output data
|
||||
* @param out_data The output audio data
|
||||
* @param out_frame_size Onput data frame size of input
|
||||
*
|
||||
*/
|
||||
void afe_concat_data(int16_t *in_data, int in_frame_size, int channel_num, int16_t *out_data, int out_frame_size);
|
||||
|
||||
/**
|
||||
* @brief Copy the afe config
|
||||
*
|
||||
* @param dst_config The destination afe config
|
||||
* @param src_config The source afe config
|
||||
*
|
||||
* @return The destination afe config
|
||||
*/
|
||||
afe_config_t *afe_config_copy(afe_config_t *dst_config, const afe_config_t *src_config);
|
||||
|
||||
/**
|
||||
* @brief Print the afe config
|
||||
*
|
||||
* @param afe_config The afe config
|
||||
*/
|
||||
void afe_config_print(const afe_config_t *afe_config);
|
||||
|
||||
/**
|
||||
* @brief Allocate afe config
|
||||
*
|
||||
* @return The afe config pointer
|
||||
*/
|
||||
afe_config_t *afe_config_alloc();
|
||||
|
||||
/**
|
||||
* @brief Free afe config
|
||||
*
|
||||
* @param afe_config The afe config pointer
|
||||
*/
|
||||
void afe_config_free(afe_config_t *afe_config);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
@@ -0,0 +1,48 @@
|
||||
#ifndef _ESP_AFE_DOA_H_
|
||||
#define _ESP_AFE_DOA_H_
|
||||
|
||||
#include "esp_doa.h"
|
||||
#include "esp_afe_config.h"
|
||||
#include <stdint.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
doa_handle_t *doa_handle;
|
||||
afe_pcm_config_t pcm_config;
|
||||
int16_t *leftdata;
|
||||
int16_t *rightdata;
|
||||
int frame_size;
|
||||
} afe_doa_handle_t;
|
||||
|
||||
/**
|
||||
* @brief Initialize SRP-PHAT processor
|
||||
* @param input_format The input format
|
||||
* @param fs Sampling rate (Hz), e.g., 16000
|
||||
* @param resolution Angular search resolution (degrees), e.g., 20
|
||||
* @param d_mics Microphone spacing (meters), e.g., 0.06
|
||||
* @param input_timedate_samples input timedate samples, e.g., 1024
|
||||
* @return Initialized doa_handle_t object pointer, Recommend using the above configuration for better performance
|
||||
*/
|
||||
afe_doa_handle_t *afe_doa_create(const char *input_format, int fs, float resolution, float d_mics, int input_timedate_samples);
|
||||
/**
|
||||
* @brief Process audio frame for direction estimation
|
||||
* @param handle doa_handle_t instance pointer
|
||||
* @param indata Input audio data, format is define by input_format.
|
||||
* @return Estimated sound direction in degrees, e.g., 0-180
|
||||
*/
|
||||
float afe_doa_process(afe_doa_handle_t *handle, const int16_t *indata);
|
||||
/**
|
||||
* @brief Release all allocated resources
|
||||
* @param doa doa_handle_t instance pointer to be freed
|
||||
*/
|
||||
void afe_doa_destroy(afe_doa_handle_t *handle);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _ESP_AFE_DOA_H_ */
|
||||
@@ -0,0 +1,237 @@
|
||||
#pragma once
|
||||
#include "esp_afe_config.h"
|
||||
#include "stdbool.h"
|
||||
#include "stdint.h"
|
||||
#include "stdlib.h"
|
||||
#include "freertos/FreeRTOS.h"
|
||||
#include "freertos/task.h"
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// AFE: Audio Front-End
|
||||
// SR: Speech Recognition
|
||||
// afe_sr/AFE_SR: the audio front-end for speech recognition
|
||||
|
||||
// Opaque AFE_SR data container
|
||||
typedef struct esp_afe_sr_data_t esp_afe_sr_data_t;
|
||||
|
||||
/**
|
||||
* @brief The state of vad
|
||||
*/
|
||||
typedef enum {
|
||||
AFE_VAD_SILENCE = 0, // Deprecated, please use vad_state_t, noise or silence
|
||||
AFE_VAD_SPEECH = 1 // Deprecated, please use vad_state_t, speech
|
||||
} afe_vad_state_t;
|
||||
|
||||
/**
|
||||
* @brief The result of fetch function
|
||||
*/
|
||||
typedef struct afe_fetch_result_t {
|
||||
int16_t *data; // the target channel data of audio.
|
||||
int data_size; // the size of data. The unit is byte.
|
||||
int16_t *vad_cache; // the cache data of vad. It's only valid when vad_cache_size > 0. It is used to complete the
|
||||
// audio that was truncated.
|
||||
int vad_cache_size; // the size of vad_cache. The unit is byte.
|
||||
float data_volume; // the volume of input audio, the unit is decibel(dB). This value is calculated before agc.
|
||||
// (note: invalid in vc). if enable wakenet, the window length is the receptive fields of
|
||||
// wakenet(about 1.5s), otherwise is the frame length.
|
||||
wakenet_state_t wakeup_state; // the value is wakenet_state_t
|
||||
int wake_word_index; // if the wake word is detected. It will store the wake word index which start from 1.
|
||||
int wakenet_model_index; // if there are multiple wakenets, this value identifies which model be wakes up. Index
|
||||
// start from 1.
|
||||
vad_state_t vad_state; // the value is afe_vad_state_t
|
||||
int trigger_channel_id; // the channel index of output
|
||||
int wake_word_length; // the length of wake word. The unit is the number of samples.
|
||||
int ret_value; // the return state of fetch function
|
||||
int16_t *raw_data; // the multi-channel output data of audio.
|
||||
int raw_data_channels; // the channel number of raw data
|
||||
float ringbuff_free_pct; // the percent of ringbuff free size. if the value is larger than 0.5, it means the ringbuff is buzy.
|
||||
void *reserved; // reserved for future use
|
||||
} afe_fetch_result_t;
|
||||
|
||||
/**
|
||||
* @brief Function to initialze a AFE_SR instance
|
||||
*
|
||||
* @param afe_config The config of AFE_SR
|
||||
* @returns Handle to the AFE_SR data
|
||||
*/
|
||||
typedef esp_afe_sr_data_t *(*esp_afe_sr_iface_op_create_from_config_t)(afe_config_t *afe_config);
|
||||
|
||||
/**
|
||||
* @brief Get the amount of each channel samples per frame that need to be passed to the function
|
||||
*
|
||||
* Every speech enhancement AFE_SR processes a certain number of samples at the same time. This function
|
||||
* can be used to query that amount. Note that the returned amount is in 16-bit samples, not in bytes.
|
||||
*
|
||||
* @param afe The AFE_SR object to query
|
||||
* @return The amount of samples to feed the fetch function
|
||||
*/
|
||||
typedef int (*esp_afe_sr_iface_op_get_samp_chunksize_t)(esp_afe_sr_data_t *afe);
|
||||
|
||||
/**
|
||||
* @brief Get the channel number
|
||||
*
|
||||
* @param afe The AFE_SR object to query
|
||||
* @return The amount of total channels
|
||||
*/
|
||||
typedef int (*esp_afe_sr_iface_op_get_channel_num_t)(esp_afe_sr_data_t *afe);
|
||||
|
||||
/**
|
||||
* @brief Get the sample rate of the samples to feed to the function
|
||||
*
|
||||
* @param afe The AFE_SR object to query
|
||||
* @return The sample rate, in hz
|
||||
*/
|
||||
typedef int (*esp_afe_sr_iface_op_get_samp_rate_t)(esp_afe_sr_data_t *afe);
|
||||
|
||||
/**
|
||||
* @brief Feed samples of an audio stream to the AFE_SR
|
||||
*
|
||||
* @Warning The input data should be arranged in the format of channel interleaving.
|
||||
* The last channel is reference signal if it has reference data.
|
||||
*
|
||||
* @param afe The AFE_SR object to query
|
||||
*
|
||||
* @param in The input microphone signal, only support signed 16-bit @ 16 KHZ. The frame size can be queried by the
|
||||
* `get_feed_chunksize`.
|
||||
* @return The size of input
|
||||
*/
|
||||
typedef int (*esp_afe_sr_iface_op_feed_t)(esp_afe_sr_data_t *afe, const int16_t *in);
|
||||
|
||||
/**
|
||||
* @brief fetch enhanced samples of an audio stream from the AFE_SR
|
||||
*
|
||||
* @Warning The output is single channel data, no matter how many channels the input is.
|
||||
* Timeout is 2000 ms. If you want to adjust timeout, please refer to the definition of `fetch_with_delay`.
|
||||
*
|
||||
* @param afe The AFE_SR object to query
|
||||
* @return The result of output, please refer to the definition of `afe_fetch_result_t`. (The frame size of output
|
||||
* audio can be queried by the `get_fetch_chunksize`.)
|
||||
*/
|
||||
typedef afe_fetch_result_t *(*esp_afe_sr_iface_op_fetch_t)(esp_afe_sr_data_t *afe);
|
||||
|
||||
/**
|
||||
* @brief fetch enhanced samples of an audio stream from the AFE_SR, same with the function `fetch`
|
||||
*
|
||||
* @Warning The output is single channel data, no matter how many channels the input is.
|
||||
*
|
||||
* @param afe The AFE_SR object to query
|
||||
* @param ticks_to_wait The timeout value, in ticks, to wait for the fetch result.
|
||||
* @return The result of output, please refer to the definition of `afe_fetch_result_t`. (The frame size of output
|
||||
* audio can be queried by the `get_fetch_chunksize`.)
|
||||
*/
|
||||
typedef afe_fetch_result_t *(*esp_afe_sr_iface_op_fetch_with_delay_t)(esp_afe_sr_data_t *afe, TickType_t ticks_to_wait);
|
||||
|
||||
/**
|
||||
* @brief reset ringbuf of AFE.
|
||||
*
|
||||
* @param afe The AFE_SR object to query
|
||||
* @return -1: fail, 1: success
|
||||
*/
|
||||
typedef int (*esp_afe_sr_iface_op_reset_buffer_t)(esp_afe_sr_data_t *afe);
|
||||
|
||||
/**
|
||||
* @brief Set wakenet detection threshold
|
||||
*
|
||||
* @param afe The AFE_SR object to query
|
||||
* @param index The wakenet index, just support 1: wakenet1 or 2: wakenet2
|
||||
* @param threshold The wakenet detection threshold, the value is between 0.4 and 0.9999.
|
||||
* @return -1: fail, 1: success
|
||||
*/
|
||||
typedef int (*esp_afe_sr_iface_op_set_wakenet_threshold_t)(esp_afe_sr_data_t *afe, int index, float threshold);
|
||||
|
||||
/**
|
||||
* @brief Reset wakenet detection threshold to inital state
|
||||
*
|
||||
* @param afe The AFE_SR object to query
|
||||
* @param index The wakenet index, just support 1: wakenet1 or 2: wakenet2
|
||||
* @return -1: fail, 1: success
|
||||
*/
|
||||
typedef int (*esp_afe_sr_iface_op_reset_wakenet_threshold_t)(esp_afe_sr_data_t *afe, int index);
|
||||
|
||||
/**
|
||||
* @brief Reset one function/module/algorithm.
|
||||
*
|
||||
* @param afe The AFE_SR object to query
|
||||
* @return -1: fail, 1: success
|
||||
*/
|
||||
typedef int (*esp_afe_sr_iface_op_reset_op_t)(esp_afe_sr_data_t *afe);
|
||||
|
||||
/**
|
||||
* @brief Disable one function/module/algorithm.
|
||||
*
|
||||
* @param afe The AFE_SR object to query
|
||||
* @return -1: fail, 0: disabled, 1: enabled
|
||||
*/
|
||||
typedef int (*esp_afe_sr_iface_op_disable_func_t)(esp_afe_sr_data_t *afe);
|
||||
|
||||
/**
|
||||
* @brief Enable one function/module/algorithm.
|
||||
*
|
||||
* @param afe The AFE_SR object to query
|
||||
* @return -1: fail, 0: disabled, 1: enabled
|
||||
*/
|
||||
typedef int (*esp_afe_sr_iface_op_enable_func_t)(esp_afe_sr_data_t *afe);
|
||||
|
||||
/**
|
||||
* @brief Print all functions/modules/algorithms pipeline.
|
||||
* The pipeline is the order of the functions/modules/algorithms.
|
||||
* The format like this: [input] -> |AEC(VOIP_HIGH_PERF)| -> |WakeNet(wn9_hilexin)| -> [output]
|
||||
*
|
||||
* @param afe The AFE_SR object to query
|
||||
*/
|
||||
typedef void (*esp_afe_sr_iface_op_print_pipeline_t)(esp_afe_sr_data_t *afe);
|
||||
|
||||
/**
|
||||
* @brief Destroy a AFE_SR instance
|
||||
*
|
||||
* @param afe AFE_SR object to destroy
|
||||
*/
|
||||
typedef void (*esp_afe_sr_iface_op_destroy_t)(esp_afe_sr_data_t *afe);
|
||||
|
||||
/**
|
||||
* This structure contains the functions used to do operations on a AFE_SR.
|
||||
*/
|
||||
typedef struct {
|
||||
esp_afe_sr_iface_op_create_from_config_t create_from_config;
|
||||
esp_afe_sr_iface_op_feed_t feed;
|
||||
esp_afe_sr_iface_op_fetch_t fetch;
|
||||
esp_afe_sr_iface_op_fetch_with_delay_t fetch_with_delay;
|
||||
esp_afe_sr_iface_op_reset_buffer_t reset_buffer;
|
||||
esp_afe_sr_iface_op_get_samp_chunksize_t get_feed_chunksize;
|
||||
esp_afe_sr_iface_op_get_samp_chunksize_t get_fetch_chunksize;
|
||||
esp_afe_sr_iface_op_get_channel_num_t get_channel_num; // same with get_feed_channel_num
|
||||
esp_afe_sr_iface_op_get_channel_num_t get_feed_channel_num;
|
||||
esp_afe_sr_iface_op_get_channel_num_t get_fetch_channel_num;
|
||||
esp_afe_sr_iface_op_get_samp_rate_t get_samp_rate;
|
||||
esp_afe_sr_iface_op_set_wakenet_threshold_t set_wakenet_threshold;
|
||||
esp_afe_sr_iface_op_reset_wakenet_threshold_t reset_wakenet_threshold;
|
||||
esp_afe_sr_iface_op_disable_func_t disable_wakenet;
|
||||
esp_afe_sr_iface_op_enable_func_t enable_wakenet;
|
||||
esp_afe_sr_iface_op_disable_func_t disable_aec;
|
||||
esp_afe_sr_iface_op_enable_func_t enable_aec;
|
||||
esp_afe_sr_iface_op_disable_func_t disable_se;
|
||||
esp_afe_sr_iface_op_enable_func_t enable_se;
|
||||
esp_afe_sr_iface_op_disable_func_t disable_vad;
|
||||
esp_afe_sr_iface_op_enable_func_t enable_vad;
|
||||
esp_afe_sr_iface_op_reset_op_t reset_vad;
|
||||
esp_afe_sr_iface_op_disable_func_t disable_ns;
|
||||
esp_afe_sr_iface_op_enable_func_t enable_ns;
|
||||
esp_afe_sr_iface_op_disable_func_t disable_agc;
|
||||
esp_afe_sr_iface_op_enable_func_t enable_agc;
|
||||
esp_afe_sr_iface_op_print_pipeline_t print_pipeline;
|
||||
esp_afe_sr_iface_op_destroy_t destroy;
|
||||
} esp_afe_sr_iface_t;
|
||||
|
||||
// struct is used to store the AFE handle and data for the AFE task
|
||||
typedef struct {
|
||||
esp_afe_sr_data_t *afe_data;
|
||||
esp_afe_sr_iface_t *afe_handle;
|
||||
TaskHandle_t feed_task;
|
||||
TaskHandle_t fetch_task;
|
||||
} afe_task_into_t;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
@@ -0,0 +1,13 @@
|
||||
#pragma once
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include "esp_afe_sr_iface.h"
|
||||
|
||||
esp_afe_sr_iface_t *esp_afe_handle_from_config(const afe_config_t *config);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
47
managed_components/espressif__esp-sr/include/esp32/esp_agc.h
Normal file
47
managed_components/espressif__esp-sr/include/esp32/esp_agc.h
Normal file
@@ -0,0 +1,47 @@
|
||||
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License
|
||||
#ifndef _ESP_AGC_H_
|
||||
#define _ESP_AGC_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
////all positive value is valid, negective is error
|
||||
typedef enum {
|
||||
ESP_AGC_SUCCESS = 0, ////success
|
||||
ESP_AGC_FAIL = -1, ////agc fail
|
||||
ESP_AGC_SAMPLE_RATE_ERROR = -2, ///sample rate can be only 8khz, 16khz, 32khz
|
||||
ESP_AGC_FRAME_SIZE_ERROR = -3, ////the input frame size should be only 10ms, so should together with sample-rate to get the frame size
|
||||
} ESP_AGE_ERR;
|
||||
|
||||
typedef enum {
|
||||
AGC_MODE_SR = -1, // Bypass WEBRTC AGC
|
||||
AGC_MODE_0 = 0, // Only saturation protection
|
||||
AGC_MODE_1 = 1, // Analog Automatic Gain Control [-targetLevelDbfs (default -3 dBOv)]
|
||||
AGC_MODE_2 = 2, // Digital Automatic Gain Control [-targetLevelDbfs (default -3 dBOv)]
|
||||
AGC_MODE_3 = 3, // Fixed Digital Gain [compressionGaindB (default 8 dB)]
|
||||
} agc_mode_t;
|
||||
|
||||
void *esp_agc_open(agc_mode_t agc_mode, int sample_rate);
|
||||
void set_agc_config(void *agc_handle, int gain_dB, int limiter_enable, int target_level_dbfs);
|
||||
int esp_agc_process(void *agc_handle, short *in_pcm, short *out_pcm, int frame_size, int sample_rate);
|
||||
void esp_agc_close(void *agc_handle);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // _ESP_AGC_H_
|
||||
41
managed_components/espressif__esp-sr/include/esp32/esp_doa.h
Normal file
41
managed_components/espressif__esp-sr/include/esp32/esp_doa.h
Normal file
@@ -0,0 +1,41 @@
|
||||
#ifndef _ESP_DOA_H_
|
||||
#define _ESP_DOA_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct doa_handle_t doa_handle_t;
|
||||
/**
|
||||
* @brief Initialize SRP-PHAT processor
|
||||
* @param fs Sampling rate (Hz), e.g., 16000
|
||||
* @param resolution Angular search resolution (degrees), e.g., 20
|
||||
* @param d_mics Microphone spacing (meters), e.g., 0.06
|
||||
* @param input_timedate_samples input timedate samples, e.g., 1024
|
||||
* @return Initialized doa_handle_t object pointer, Recommend using the above configuration for better performance
|
||||
*/
|
||||
doa_handle_t *esp_doa_create(int fs, float resolution, float d_mics, int input_timedate_samples);
|
||||
|
||||
/**
|
||||
* @brief Release all allocated resources
|
||||
* @param doa doa_handle_t instance pointer to be freed
|
||||
*/
|
||||
void esp_doa_destroy(doa_handle_t *doa);
|
||||
|
||||
/**
|
||||
* @brief Process audio frame for direction estimation
|
||||
* @param doa doa_handle_t instance pointer
|
||||
* @param left Left channel 16-bit PCM data
|
||||
* @param right Right channel 16-bit PCM data
|
||||
* @return Estimated sound direction in degrees, e.g., 0-180
|
||||
*/
|
||||
float esp_doa_process(doa_handle_t *doa, int16_t* left, int16_t* right);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _ESP_DOA_H_ */
|
||||
@@ -0,0 +1,93 @@
|
||||
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License
|
||||
#ifndef _ESP_MASE_H_
|
||||
#define _ESP_MASE_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define MASE_SAMPLE_RATE 16000 // Supports 16kHz only
|
||||
#define MASE_FRAME_SIZE 16 // Supports 16ms only
|
||||
#define MASE_MIC_DISTANCE 65 // According to physical design of mic-array
|
||||
|
||||
/**
|
||||
* @brief Sets mic-array type, currently 2-mic line array and 3-mic circular array
|
||||
* are supported.
|
||||
*/
|
||||
typedef enum {
|
||||
TWO_MIC_LINE = 0,
|
||||
THREE_MIC_CIRCLE = 1
|
||||
} mase_mic_array_type_t;
|
||||
|
||||
/**
|
||||
* @brief Sets operating mode, supporting normal mode and wake-up enhancement mode
|
||||
*/
|
||||
typedef enum {
|
||||
NORMAL_ENHANCEMENT_MODE = 0,
|
||||
WAKE_UP_ENHANCEMENT_MODE = 1
|
||||
} mase_op_mode_t;
|
||||
|
||||
typedef void* mase_handle_t;
|
||||
|
||||
/**
|
||||
* @brief Creates an instance to the MASE structure.
|
||||
*
|
||||
* @param sample_rate The sampling frequency (Hz) must be 16000.
|
||||
*
|
||||
* @param frame_size The length of the audio processing must be 16ms.
|
||||
*
|
||||
* @param array_type '0' for 2-mic line array and '1' for 3-mic circular array.
|
||||
*
|
||||
* @param mic_distance The distance between neiboring microphones in mm.
|
||||
*
|
||||
* @param operating_mode '0' for normal mode and '1' for wake-up enhanced mode.
|
||||
*
|
||||
* @param filter_strength Strengh of the mic-array speech enhancement, must be 0, 1, 2 or 3.
|
||||
*
|
||||
* @return
|
||||
* - NULL: Create failed
|
||||
* - Others: An instance of MASE
|
||||
*/
|
||||
mase_handle_t mase_create(int fs, int frame_size, int array_type, float mic_distance, int operating_mode, int filter_strength);
|
||||
|
||||
/**
|
||||
* @brief Performs mic array processing for one frame.
|
||||
*
|
||||
* @param inst The instance of MASE.
|
||||
*
|
||||
* @param in An array of 16-bit signed audio samples from mic.
|
||||
*
|
||||
* @param dsp_out Returns enhanced signal.
|
||||
*
|
||||
* @return None
|
||||
*
|
||||
*/
|
||||
void mase_process(mase_handle_t st, int16_t *in, int16_t *dsp_out);
|
||||
|
||||
/**
|
||||
* @brief Free the MASE instance
|
||||
*
|
||||
* @param inst The instance of MASE.
|
||||
*
|
||||
* @return None
|
||||
*
|
||||
*/
|
||||
void mase_destory(mase_handle_t st);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,89 @@
|
||||
#pragma once
|
||||
#include "esp_speech_features.h"
|
||||
#include <stdint.h>
|
||||
|
||||
/*
|
||||
This describes an interface for a MFCC runner, that is, some kind of implementation that can be
|
||||
fed sample chunks and returns the MFCC cepstrum of those samples. This is an abstracted interface so
|
||||
multiple implementations can be used.
|
||||
*/
|
||||
|
||||
typedef struct esp_mfcc_data_t esp_mfcc_data_t;
|
||||
|
||||
// Options for the mfcc algorithm itself. These more-or-less match the parameters of csf_mfcc (from c_speech_features),
|
||||
// please refer to its documentation for details.
|
||||
typedef struct {
|
||||
int winstep_ms; // The step between successive windows in ms. (10)
|
||||
int winlen_ms; // The length of the analysis window in ms. (25)
|
||||
int nch; // The number of input channel
|
||||
int numcep; // The number of cepstrum to return
|
||||
int nfilter; // The number of filters in the filterbank
|
||||
int nfft; // The FFT size
|
||||
int samp_freq; // The sample-rate of the signal.
|
||||
int low_freq; // The lowest band edge of mel filters, in hz. (e.g. 0)
|
||||
int high_freq; // The highest band edge of mel filters, in hz. Must not be higher than samp_freq
|
||||
float preemph; // Preemphasis filter coefficient. 0 is no filter. (e.g. 0.97)
|
||||
char *win_type; // Analysis window type to apply to each frame, "hanning","hamming","sine","rectangular","povey"
|
||||
bool append_energy; // If true, the zeroth cepstral coefficient is replaced with the log of the total frame energy
|
||||
bool use_power; // If true, use power of fft spectrum, else use magnitude of fft spectrum
|
||||
int use_log_fbank; // 0: return fbank, 1: return log(x+log_epsilon), 2: return log(max(x, log_epsilon))
|
||||
float log_epsilon; // log epsilon. (e.g. 1e-7)
|
||||
bool psram_first; // Alloc memory from PSRAM first
|
||||
bool remove_dc_offset; // Whether to subtract mean of wave before FFT
|
||||
} esp_mfcc_opts_t;
|
||||
|
||||
/**
|
||||
* @brief Un-initialize and free a mfcc runner
|
||||
*
|
||||
* Function to free a previously allocated mfcc runner.
|
||||
*
|
||||
* @param r Runner object to destroy
|
||||
*/
|
||||
typedef void (*esp_mfcc_op_destroy_t)(esp_mfcc_data_t *r);
|
||||
|
||||
/**
|
||||
* @brief Initialize parameters for a mfcc runner.
|
||||
*
|
||||
* After creation, a mfcc runner needs to be initialized first; this is usually done
|
||||
* in the initialization routine of a speech recognition algorithm. This provides
|
||||
* a pointer to do this for a specific mfcc runner.
|
||||
*
|
||||
* @param opt Options for the mfcc process
|
||||
* @return True if success, false on error.
|
||||
*/
|
||||
typedef esp_mfcc_data_t *(*esp_mfcc_op_create_t)(const esp_mfcc_opts_t *opt);
|
||||
|
||||
/**
|
||||
* @brief Run a mfcc iteration on frame by frame
|
||||
*
|
||||
* This will take a set of samples and return a ceptrum. Note that this may be pipelined:
|
||||
* an initial call to this function may return NULL and subsequent calls may return the
|
||||
* cepstrum of previous calls.
|
||||
*
|
||||
* @param r The mfcc runner
|
||||
* @param samp An array of signed 16-bit samples. The amount of samples should be sampfreq/(winstep_ms/1000).
|
||||
* @return A set of cepstral values, or NULL if no such values are available yet. Free using the free_cepbuf function
|
||||
* when done with this buffer. Note that some implementations require the buffer to be freed before another call
|
||||
* to this function is done.
|
||||
*/
|
||||
typedef float *(*esp_mfcc_op_run_step_t)(esp_mfcc_data_t *r, int16_t *samp, int16_t nch);
|
||||
|
||||
typedef void (*esp_mfcc_op_run_step_s16_t)(esp_mfcc_data_t *r, int16_t *samp, int16_t *fbank);
|
||||
|
||||
/**
|
||||
* @brief Clean all state of mfcc handle
|
||||
*
|
||||
* @param r The mfcc runner
|
||||
*/
|
||||
typedef void (*esp_mfcc_op_clean_t)(esp_mfcc_data_t *r);
|
||||
|
||||
/**
|
||||
* @brief Operations possible on a mfcc runner
|
||||
*/
|
||||
typedef struct {
|
||||
esp_mfcc_op_destroy_t destroy;
|
||||
esp_mfcc_op_create_t create;
|
||||
esp_mfcc_op_run_step_t run_step;
|
||||
esp_mfcc_op_run_step_s16_t run_step_s16;
|
||||
esp_mfcc_op_clean_t clean;
|
||||
} esp_mfcc_iface_t;
|
||||
@@ -0,0 +1,44 @@
|
||||
#pragma once
|
||||
#include "esp_mfcc_iface.h"
|
||||
|
||||
extern const esp_mfcc_iface_t esp_fbank_f32; // float32-fbank handle
|
||||
extern const esp_mfcc_iface_t esp_fbank_s16; // int16-fbank handle
|
||||
|
||||
/**
|
||||
* @brief Return basic opts used in wakenet9 & multinet5
|
||||
**/
|
||||
esp_mfcc_opts_t *get_mfcc_opts_wn9();
|
||||
|
||||
/**
|
||||
* @brief Return basic opts used in wakenet9s
|
||||
**/
|
||||
esp_mfcc_opts_t *get_mfcc_opts(const char *win_type, bool use_power, int winstep_ms, int winlen_ms, int nfilter);
|
||||
|
||||
/**
|
||||
* @brief Return basic opts for default kaldifeat
|
||||
*
|
||||
opts->psram_first = true;
|
||||
opts->use_power = true;
|
||||
opts->use_log_fbank = 2; // log(max(x, log_epsilon))
|
||||
opts->log_epsilon = 1.1920928955078125e-07f; // torch.finfo(torch.float32).eps
|
||||
opts->win_type = "povey";
|
||||
opts->low_freq = 20;
|
||||
opts->high_freq = 7600;
|
||||
opts->samp_freq = 16000;
|
||||
opts->nch = 1;
|
||||
opts->nfft = 512;
|
||||
opts->nfilter = 80;
|
||||
opts->numcep = 80;
|
||||
opts->preemph = 0.97;
|
||||
opts->append_energy = false;
|
||||
opts->winlen_ms = 25;
|
||||
opts->winstep_ms = 10;
|
||||
opts->remove_dc_offset = true;
|
||||
*
|
||||
**/
|
||||
esp_mfcc_opts_t *get_mfcc_opts_kaldi();
|
||||
|
||||
/**
|
||||
* @brief Print mfcc opts
|
||||
**/
|
||||
void print_mfcc_opts(esp_mfcc_opts_t *opts);
|
||||
@@ -0,0 +1,224 @@
|
||||
#pragma once
|
||||
#include "stdint.h"
|
||||
#include "esp_wn_iface.h"
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define ESP_MN_RESULT_MAX_NUM 5
|
||||
#define ESP_MN_MAX_PHRASE_NUM 400
|
||||
#define ESP_MN_MAX_PHRASE_LEN 63
|
||||
#define ESP_MN_MIN_PHRASE_LEN 2
|
||||
|
||||
#define ESP_MN_PREFIX "mn"
|
||||
#define ESP_MN_ENGLISH "en"
|
||||
#define ESP_MN_CHINESE "cn"
|
||||
|
||||
typedef enum {
|
||||
ESP_MN_STATE_DETECTING = 0, // detecting
|
||||
ESP_MN_STATE_DETECTED = 1, // detected
|
||||
ESP_MN_STATE_TIMEOUT = 2, // time out
|
||||
} esp_mn_state_t;
|
||||
|
||||
//Set multinet loading mode
|
||||
//The memory comsumption is decreased with increasing mode,
|
||||
//As a consequence also the CPU loading rate goes up
|
||||
typedef enum {
|
||||
ESP_MN_LOAD_FROM_PSRAM = 0, // Load all weights from PSRAM. Fastest computation with Maximum memory consumption
|
||||
ESP_MN_LOAD_FROM_PSRAM_FLASH = 1, // Load some weights from PSRAM and laod the rest from FLASH (default)
|
||||
ESP_MN_LOAD_FROM_FLASH = 2, // Load more weights from FLASH. Minimum memory consumption with slowest computation
|
||||
} esp_mn_loader_mode_t;
|
||||
|
||||
typedef enum {
|
||||
ESP_MN_GREEDY_SEARCH = 0, // greedy search
|
||||
ESP_MN_BEAM_SEARCH = 1, // beam search
|
||||
ESP_MN_BEAM_SEARCH_WITH_FST = 2, // beam search with trie language model
|
||||
} esp_mn_search_method_t;
|
||||
|
||||
typedef enum {
|
||||
CHINESE_ID = 1, // Chinese language
|
||||
ENGLISH_ID = 2, // English language
|
||||
} language_id_t;
|
||||
|
||||
// Return all possible recognition results
|
||||
typedef struct{
|
||||
esp_mn_state_t state;
|
||||
int num; // The number of phrase in list, num<=5. When num=0, no phrase is recognized.
|
||||
int command_id[ESP_MN_RESULT_MAX_NUM]; // The list of command id.
|
||||
int phrase_id[ESP_MN_RESULT_MAX_NUM]; // The list of phrase id.
|
||||
float prob[ESP_MN_RESULT_MAX_NUM]; // The list of probability.
|
||||
char string[256]; // recognized string with commands graph
|
||||
char raw_string[256]; // recognized string without commands graph
|
||||
} esp_mn_results_t;
|
||||
|
||||
typedef struct {
|
||||
char *string; // command string
|
||||
char *phonemes; // command phonemes, if applicable
|
||||
int16_t command_id; // the command id
|
||||
float threshold; // trigger threshold, default: 0
|
||||
int16_t *wave; // prompt wave data of the phrase
|
||||
} esp_mn_phrase_t;
|
||||
|
||||
typedef struct _mn_node_ {
|
||||
esp_mn_phrase_t *phrase;
|
||||
struct _mn_node_ *next;
|
||||
} esp_mn_node_t;
|
||||
|
||||
typedef struct{
|
||||
int16_t num; // The number of error phrases, which can not added into model
|
||||
esp_mn_phrase_t **phrases; // The array of error phrase pointer
|
||||
} esp_mn_error_t;
|
||||
|
||||
/**
|
||||
* @brief Initialze a model instance with specified model name.
|
||||
*
|
||||
* @param model_name The wakenet model name.
|
||||
* @param duration The duration (ms) to trigger the timeout
|
||||
*
|
||||
* @returns Handle to the model data.
|
||||
*/
|
||||
typedef model_iface_data_t* (*esp_mn_iface_op_create_t)(const char *model_name, int duration);
|
||||
|
||||
/**
|
||||
* @brief Switch multinet mode to change memory consumption and CPU loading
|
||||
*
|
||||
* @warning Just Support multinet6 or later versions
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @param mode The multinet loader mode
|
||||
*
|
||||
* @returns Handle to the model data.
|
||||
*/
|
||||
typedef model_iface_data_t* (*esp_mn_iface_op_switch_loader_mode_t)(model_iface_data_t *model, esp_mn_loader_mode_t mode);
|
||||
|
||||
/**
|
||||
* @brief Callback function type to fetch the amount of samples that need to be passed to the detect function
|
||||
*
|
||||
* Every speech recognition model processes a certain number of samples at the same time. This function
|
||||
* can be used to query that amount. Note that the returned amount is in 16-bit samples, not in bytes.
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @return The amount of samples to feed the detect function
|
||||
*/
|
||||
typedef int (*esp_mn_iface_op_get_samp_chunksize_t)(model_iface_data_t *model);
|
||||
|
||||
/**
|
||||
* @brief Callback function type to fetch the number of frames recognized by the command word
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @return The number of the frames recognized by the command word
|
||||
*/
|
||||
typedef int (*esp_mn_iface_op_get_samp_chunknum_t)(model_iface_data_t *model);
|
||||
|
||||
/**
|
||||
* @brief Set the detection threshold to manually abjust the probability
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @param det_treshold The threshold to trigger speech commands, the range of det_threshold is 0.0~0.9999
|
||||
*/
|
||||
typedef int (*esp_mn_iface_op_set_det_threshold_t)(model_iface_data_t *model, float det_threshold);
|
||||
|
||||
/**
|
||||
* @brief Get the sample rate of the samples to feed to the detect function
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @return The sample rate, in hz
|
||||
*/
|
||||
typedef int (*esp_mn_iface_op_get_samp_rate_t)(model_iface_data_t *model);
|
||||
|
||||
/**
|
||||
* @brief Get the language of model
|
||||
*
|
||||
* @param model The language name
|
||||
* @return Language name string defined in esp_mn_models.h, eg: ESP_MN_CHINESE, ESP_MN_ENGLISH
|
||||
*/
|
||||
typedef char * (*esp_mn_iface_op_get_language_t)(model_iface_data_t *model);
|
||||
|
||||
/**
|
||||
* @brief Feed samples of an audio stream to the speech recognition model and detect if there is a speech command found.
|
||||
*
|
||||
* @param model The model object to query.
|
||||
* @param samples An array of 16-bit signed audio samples. The array size used can be queried by the
|
||||
* get_samp_chunksize function.
|
||||
* @return The state of multinet
|
||||
*/
|
||||
typedef esp_mn_state_t (*esp_mn_iface_op_detect_t)(model_iface_data_t *model, int16_t *samples);
|
||||
|
||||
/**
|
||||
* @brief Destroy a speech commands recognition model
|
||||
*
|
||||
* @param model The Model object to destroy
|
||||
*/
|
||||
typedef void (*esp_mn_iface_op_destroy_t)(model_iface_data_t *model);
|
||||
|
||||
/**
|
||||
* @brief Get recognition results
|
||||
*
|
||||
* @param model The Model object to query
|
||||
*
|
||||
* @return The current results.
|
||||
*/
|
||||
typedef esp_mn_results_t* (*esp_mn_iface_op_get_results_t)(model_iface_data_t *model);
|
||||
|
||||
/**
|
||||
* @brief Open the log print
|
||||
*
|
||||
* @param model_data The model object to query.
|
||||
*
|
||||
*/
|
||||
typedef void (*esp_mn_iface_op_open_log_t)(model_iface_data_t *model_data);
|
||||
|
||||
/**
|
||||
* @brief Clean all status of model
|
||||
*
|
||||
* @param model_data The model object to query.
|
||||
*
|
||||
*/
|
||||
typedef void (*esp_mn_iface_op_clean_t)(model_iface_data_t *model_data);
|
||||
|
||||
/**
|
||||
* @brief Set the speech commands by mn_command_root
|
||||
*
|
||||
* @param model_data The model object to query.
|
||||
* @param mn_command_root The speech commands link.
|
||||
* @return The error phrase id info.
|
||||
*/
|
||||
typedef esp_mn_error_t* (*esp_wn_iface_op_set_speech_commands)(model_iface_data_t *model_data, esp_mn_node_t *mn_command_root);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Print out current commands in fst, note the ones "added" but not "updated" will not be shown here
|
||||
*
|
||||
* @param model_data The model object to query
|
||||
*/
|
||||
typedef void (*esp_mn_iface_op_print_active_speech_commands)(model_iface_data_t *model_data);
|
||||
|
||||
/**
|
||||
* @brief Check if input string can be tokenized
|
||||
*
|
||||
* @param model_data The model object to query
|
||||
* @param str The input string
|
||||
*/
|
||||
typedef int (*esp_mn_iface_op_check_speech_command)(model_iface_data_t *model_data, const char *str);
|
||||
|
||||
typedef struct {
|
||||
esp_mn_iface_op_create_t create;
|
||||
esp_mn_iface_op_get_samp_rate_t get_samp_rate;
|
||||
esp_mn_iface_op_get_samp_chunksize_t get_samp_chunksize;
|
||||
esp_mn_iface_op_get_samp_chunknum_t get_samp_chunknum;
|
||||
esp_mn_iface_op_set_det_threshold_t set_det_threshold;
|
||||
esp_mn_iface_op_get_language_t get_language;
|
||||
esp_mn_iface_op_detect_t detect;
|
||||
esp_mn_iface_op_destroy_t destroy;
|
||||
esp_mn_iface_op_get_results_t get_results;
|
||||
esp_mn_iface_op_open_log_t open_log;
|
||||
esp_mn_iface_op_clean_t clean;
|
||||
esp_wn_iface_op_set_speech_commands set_speech_commands;
|
||||
esp_mn_iface_op_switch_loader_mode_t switch_loader_mode;
|
||||
esp_mn_iface_op_print_active_speech_commands print_active_speech_commands;
|
||||
esp_mn_iface_op_check_speech_command check_speech_command;
|
||||
} esp_mn_iface_t;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
@@ -0,0 +1,66 @@
|
||||
#pragma once
|
||||
#include "esp_mn_iface.h"
|
||||
|
||||
//Contains declarations of all available speech recognion models. Pair this up with the right coefficients and you have a model that can recognize
|
||||
//a specific phrase or word.
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
/**
|
||||
* @brief Get the multinet handle from model name
|
||||
*
|
||||
* @param model_name The name of model
|
||||
* @returns The handle of multinet
|
||||
*/
|
||||
esp_mn_iface_t *esp_mn_handle_from_name(char *model_name);
|
||||
|
||||
/**
|
||||
* @brief Get the multinet language from model name
|
||||
*
|
||||
* @param model_name The name of model
|
||||
* @returns The language of multinet
|
||||
*/
|
||||
char *esp_mn_language_from_name(char *model_name);
|
||||
|
||||
/*
|
||||
Configure wake word to use based on what's selected in menuconfig.
|
||||
*/
|
||||
|
||||
#ifdef CONFIG_SR_MN_CN_MULTINET2_SINGLE_RECOGNITION
|
||||
#include "multinet2_ch.h"
|
||||
#define MULTINET_COEFF get_coeff_multinet2_ch
|
||||
#define MULTINET_MODEL_NAME "mn2_cn"
|
||||
|
||||
#else
|
||||
#define MULTINET_COEFF "COEFF_NULL"
|
||||
#define MULTINET_MODEL_NAME "NULL"
|
||||
#endif
|
||||
|
||||
|
||||
/* example
|
||||
|
||||
static const esp_mn_iface_t *multinet = &MULTINET_MODEL;
|
||||
|
||||
//Initialize MultiNet model data
|
||||
model_iface_data_t *model_data = multinet->create(&MULTINET_COEFF);
|
||||
add_speech_commands(multinet, model_data);
|
||||
|
||||
//Set parameters of buffer
|
||||
int audio_chunksize=model->get_samp_chunksize(model_data);
|
||||
int frequency = model->get_samp_rate(model_data);
|
||||
int16_t *buffer=malloc(audio_chunksize*sizeof(int16_t));
|
||||
|
||||
//Detect
|
||||
int r=model->detect(model_data, buffer);
|
||||
if (r>0) {
|
||||
printf("Detection triggered output %d.\n", r);
|
||||
}
|
||||
|
||||
//Destroy model
|
||||
model->destroy(model_data)
|
||||
|
||||
*/
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
86
managed_components/espressif__esp-sr/include/esp32/esp_ns.h
Normal file
86
managed_components/espressif__esp-sr/include/esp32/esp_ns.h
Normal file
@@ -0,0 +1,86 @@
|
||||
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License
|
||||
#ifndef _ESP_NS_H_
|
||||
#define _ESP_NS_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define NS_USE_SPIARM 0
|
||||
#define NS_FRAME_LENGTH_MS 10 //Supports 10ms, 20ms, 30ms
|
||||
|
||||
/**
|
||||
* The Sampling frequency (Hz) must be 16000Hz
|
||||
*/
|
||||
|
||||
typedef void* ns_handle_t;
|
||||
|
||||
/**
|
||||
* @brief Creates an instance to the NS structure.
|
||||
*
|
||||
* @param frame_length The length of the audio processing can be 10ms, 20ms, 30ms.
|
||||
*
|
||||
* @return
|
||||
* - NULL: Create failed
|
||||
* - Others: The instance of NS
|
||||
*/
|
||||
ns_handle_t ns_create(int frame_length);
|
||||
|
||||
/**
|
||||
* @brief Creates an instance of the more powerful noise suppression algorithm.
|
||||
*
|
||||
* @warning frame_length only supports be 10 ms.
|
||||
*
|
||||
* @param frame_length The length of the audio processing can only be 10ms.
|
||||
* @param mode 0: Mild, 1: Medium, 2: Aggressive
|
||||
* @param sample_rate The sample rate of the audio.
|
||||
*
|
||||
* @return
|
||||
* - NULL: Create failed
|
||||
* - Others: The instance of NS
|
||||
*/
|
||||
ns_handle_t ns_pro_create(int frame_length, int mode, int sample_rate);
|
||||
|
||||
/**
|
||||
* @brief Feed samples of an audio stream to the NS and get the audio stream after Noise suppression.
|
||||
*
|
||||
* @param inst The instance of NS.
|
||||
*
|
||||
* @param indata An array of 16-bit signed audio samples.
|
||||
*
|
||||
* @param outdata An array of 16-bit signed audio samples after noise suppression.
|
||||
*
|
||||
* @return None
|
||||
*
|
||||
*/
|
||||
void ns_process(ns_handle_t inst, int16_t *indata, int16_t *outdata);
|
||||
|
||||
/**
|
||||
* @brief Free the NS instance
|
||||
*
|
||||
* @param inst The instance of NS.
|
||||
*
|
||||
* @return None
|
||||
*
|
||||
*/
|
||||
void ns_destroy(ns_handle_t inst);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif //_ESP_NS_H_
|
||||
@@ -0,0 +1,64 @@
|
||||
#pragma once
|
||||
#include "stdint.h"
|
||||
|
||||
//Opaque model data container
|
||||
typedef struct esp_nsn_data_t esp_nsn_data_t;
|
||||
|
||||
|
||||
/**
|
||||
* @brief Easy function type to initialze a model instance
|
||||
*
|
||||
* @param model_name The name of the model instance
|
||||
* @returns Handle to the model data
|
||||
*/
|
||||
typedef esp_nsn_data_t* (*esp_nsn_iface_op_create_t)(char *model_name);
|
||||
|
||||
/**
|
||||
* @brief Get the amount of samples that need to be passed to the process function
|
||||
*
|
||||
* Every noise suppression model processes a certain number of samples at the same time. This function
|
||||
* can be used to query that amount. Note that the returned amount is in 16-bit samples, not in bytes.
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @return The amount of samples to feed the process function
|
||||
*/
|
||||
typedef int (*esp_nsn_iface_op_get_samp_chunksize_t)(esp_nsn_data_t *model);
|
||||
|
||||
/**
|
||||
* @brief Feed samples of an audio stream to the noise suppression model and get data after process.
|
||||
*
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @param in_data An array of 16-bit signed audio samples. The array size used can be queried by the
|
||||
* get_samp_chunksize function.
|
||||
* @param out_data An array of 16-bit signed audio samples after process.
|
||||
* @return The state of return.
|
||||
*/
|
||||
typedef int (*esp_nsn_iface_op_process_t)(esp_nsn_data_t *model, int16_t *in_data, int16_t *out_data);
|
||||
|
||||
/**
|
||||
* @brief Get the sample rate of the samples to feed to the process function
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @return The sample rate, in hz
|
||||
*/
|
||||
typedef int (*esp_nsn_iface_op_get_samp_rate_t)(esp_nsn_data_t *model);
|
||||
|
||||
/**
|
||||
* @brief Destroy a noise suppression model
|
||||
*
|
||||
* @param model Model object to destroy
|
||||
*/
|
||||
typedef void (*esp_nsn_iface_op_destroy_t)(esp_nsn_data_t *model);
|
||||
|
||||
|
||||
/**
|
||||
* This structure contains the functions used to do operations on a wake word detection model.
|
||||
*/
|
||||
typedef struct {
|
||||
esp_nsn_iface_op_create_t create;
|
||||
esp_nsn_iface_op_get_samp_chunksize_t get_samp_chunksize;
|
||||
esp_nsn_iface_op_process_t process;
|
||||
esp_nsn_iface_op_get_samp_rate_t get_samp_rate;
|
||||
esp_nsn_iface_op_destroy_t destroy;
|
||||
} esp_nsn_iface_t;
|
||||
@@ -0,0 +1,17 @@
|
||||
#pragma once
|
||||
|
||||
#include "esp_nsn_iface.h"
|
||||
|
||||
/*
|
||||
The prefix of nset
|
||||
Now there are nsnet1 and nsnet2
|
||||
*/
|
||||
#define ESP_NSNET_PREFIX "nsnet"
|
||||
|
||||
/**
|
||||
* @brief Get the nsnet handle from model name
|
||||
*
|
||||
* @param model_name The name of model
|
||||
* @returns The handle of multinet
|
||||
*/
|
||||
esp_nsn_iface_t *esp_nsnet_handle_from_name(char *model_name);
|
||||
@@ -0,0 +1,62 @@
|
||||
#pragma once
|
||||
#include "c_speech_features_config.h"
|
||||
#include "stdlib.h"
|
||||
#include <assert.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#ifndef M_2PI
|
||||
#define M_2PI 6.283185307179586476925286766559005
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
float *coeff;
|
||||
int *bank_pos;
|
||||
int nfilter;
|
||||
} esp_mel_filter_t;
|
||||
|
||||
float *esp_mfcc_malloc(size_t size, bool from_psram);
|
||||
|
||||
void esp_mfcc_free(void *ptr);
|
||||
|
||||
/**
|
||||
* @brief Initialize FFT table
|
||||
* @warning For ESP-PLATFORM, use esp-dsp fft
|
||||
* For Other platform, use kiss fft
|
||||
*
|
||||
* @param nfft The input samples number
|
||||
* @return fft-table
|
||||
**/
|
||||
void *esp_fft_init(int nfft);
|
||||
|
||||
/**
|
||||
* @brief Free FFT table
|
||||
* @warning For ESP-PLATFORM, use esp-dsp fft
|
||||
* For Other platform, use kiss fft
|
||||
*
|
||||
* @param fft_table The fft table initialized by esp_fft_init
|
||||
* @param nfft The input samples number
|
||||
* @return fft-table
|
||||
**/
|
||||
void esp_fft_deinit(void *fft_table, int nfft);
|
||||
|
||||
/**
|
||||
* @brief Initial window function
|
||||
* Currently support hanning, hamming, sine, povey, rectangular,
|
||||
* wn9(512-hanning to get wakenet9& multinet5 compatible)
|
||||
**/
|
||||
float *esp_win_func_init(char *win_type, float *window_data, int frame_length);
|
||||
|
||||
float *esp_fftr(float *x, int nfft, void *fft_table);
|
||||
|
||||
float *esp_spectrum_step(float *x, int nfft, bool use_power, void *fft_handle);
|
||||
|
||||
void esp_audio_short_to_float(short *samples, float *x, int len, int remove_dc);
|
||||
|
||||
float *esp_preemphasis_step(float *x, unsigned int len, float coeff, float last);
|
||||
|
||||
esp_mel_filter_t *esp_mel_filter_init(
|
||||
int nfft, int nfilter, int low_freq, int high_freq, int samp_freq, bool from_psram);
|
||||
|
||||
void esp_mel_filter_deinit(esp_mel_filter_t *mel_filter);
|
||||
|
||||
float *esp_mel_dotprod_step(float *x, float *out, esp_mel_filter_t *mel_filter, int use_log_fbank, float epsilon);
|
||||
@@ -0,0 +1,84 @@
|
||||
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License
|
||||
#ifndef _ESP_WEBRTC_H_
|
||||
#define _ESP_WEBRTC_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
#include "esp_agc.h"
|
||||
#include "esp_log.h"
|
||||
#include "esp_ns.h"
|
||||
#include "sr_ringbuf.h"
|
||||
#include <stdint.h>
|
||||
|
||||
#include "esp_heap_caps.h"
|
||||
|
||||
typedef struct {
|
||||
void *ns_handle;
|
||||
void *agc_handle;
|
||||
int frame_size;
|
||||
int sample_rate;
|
||||
int16_t *buff;
|
||||
int16_t *out_data;
|
||||
sr_ringbuf_handle_t rb;
|
||||
} webrtc_handle_t;
|
||||
|
||||
/**
|
||||
* @brief Creates an instance of webrtc.
|
||||
*
|
||||
* @warning frame_length can supports be 10 ms, 20 ms, 30 ms, 32 ms.
|
||||
*
|
||||
* @param frame_length_ms The length of the audio processing
|
||||
* @param ns_mode The mode of NS. -1 means NS is disabled. 0: Mild, 1: Medium, 2: Aggressive
|
||||
* @param agc_mode The model of AGC
|
||||
* @param agc_gain The gain of AGC. default is 9
|
||||
* @param agc_target_level The target level of AGC. default is -3 dbfs
|
||||
* @param sample_rate The sample rate of the audio.
|
||||
*
|
||||
* @return
|
||||
* - NULL: Create failed
|
||||
* - Others: The instance of webrtc
|
||||
*/
|
||||
webrtc_handle_t *webrtc_create(
|
||||
int frame_length_ms, int ns_mode, agc_mode_t agc_mode, int agc_gain, int agc_target_level, int sample_rate);
|
||||
|
||||
/**
|
||||
* @brief Feed samples of an audio stream to the webrtc and get the audio stream after Noise suppression.
|
||||
*
|
||||
* @param handle The instance of NS.
|
||||
* @param in_data An array of 16-bit signed audio samples.
|
||||
* @param out_size The sample size of output data
|
||||
* @param enable_ns Enable noise suppression
|
||||
* @param enable_agc Enable automatic gain control
|
||||
*
|
||||
* @return data after noise suppression
|
||||
*/
|
||||
int16_t *webrtc_process(webrtc_handle_t *handle, int16_t *indata, int *size, bool enable_ns, bool enable_agc);
|
||||
|
||||
/**
|
||||
* @brief Free the webrtc instance
|
||||
*
|
||||
* @param handle The instance of webrtc.
|
||||
*
|
||||
* @return None
|
||||
*
|
||||
*/
|
||||
void webrtc_destroy(webrtc_handle_t *handle);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif //_ESP_NS_H_
|
||||
178
managed_components/espressif__esp-sr/include/esp32/esp_vad.h
Normal file
178
managed_components/espressif__esp-sr/include/esp32/esp_vad.h
Normal file
@@ -0,0 +1,178 @@
|
||||
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License
|
||||
#ifndef _ESP_VAD_H_
|
||||
#define _ESP_VAD_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define SAMPLE_RATE_HZ 16000 // Supports 32000, 16000, 8000
|
||||
#define VAD_FRAME_LENGTH_MS 30 // Supports 10ms, 20ms, 30ms
|
||||
|
||||
/**
|
||||
* @brief Sets the VAD operating mode. A more aggressive (higher mode) VAD is more
|
||||
* restrictive in reporting speech. So If you want trigger more speech, please select lower mode.
|
||||
*/
|
||||
typedef enum {
|
||||
VAD_MODE_0 = 0, // Normal
|
||||
VAD_MODE_1, // Aggressive
|
||||
VAD_MODE_2, // Very Aggressive
|
||||
VAD_MODE_3, // Very Very Aggressive
|
||||
VAD_MODE_4 // Very Very Very Aggressive
|
||||
} vad_mode_t;
|
||||
|
||||
typedef enum {
|
||||
VAD_SILENCE = 0,
|
||||
VAD_SPEECH = 1,
|
||||
} vad_state_t;
|
||||
|
||||
typedef struct vad_trigger_tag {
|
||||
vad_state_t state;
|
||||
unsigned int min_speech_len;
|
||||
unsigned int noise_len;
|
||||
unsigned int min_noise_len;
|
||||
unsigned int speech_len;
|
||||
} vad_trigger_t;
|
||||
|
||||
#define vad_MAX_LEN INT32_MAX - 1
|
||||
/**
|
||||
* @brief Allocate wakenet trigger
|
||||
*
|
||||
* @param min_speech_len Minimum frame number of speech duration
|
||||
* @param min_noise_len Minimum frame number of noise duration
|
||||
*
|
||||
* @return Trigger pointer
|
||||
**/
|
||||
vad_trigger_t *vad_trigger_alloc(int min_speech_len, int min_noise_len);
|
||||
|
||||
/**
|
||||
* @brief Free wakenet trigger
|
||||
**/
|
||||
void vad_trigger_free(vad_trigger_t *trigger);
|
||||
|
||||
/**
|
||||
* @brief Reset wakenet trigger
|
||||
**/
|
||||
void vad_trigger_reset(vad_trigger_t *trigger);
|
||||
|
||||
/**
|
||||
* @brief detect activaty voice by trigger
|
||||
**/
|
||||
vad_state_t vad_trigger_detect(vad_trigger_t *trigger, vad_state_t state);
|
||||
|
||||
typedef struct {
|
||||
vad_trigger_t *trigger;
|
||||
void *vad_inst;
|
||||
int sample_rate;
|
||||
int frame_size;
|
||||
} vad_handle_with_trigger_t;
|
||||
|
||||
typedef vad_handle_with_trigger_t *vad_handle_t;
|
||||
|
||||
// typedef vad_handle_tag * vad_handle_t;
|
||||
|
||||
/**
|
||||
* @brief Creates an instance to the VAD structure.
|
||||
*
|
||||
* @param vad_mode Sets the VAD operating mode.
|
||||
*
|
||||
* @return
|
||||
* - NULL: Create failed
|
||||
* - Others: The instance of VAD
|
||||
*/
|
||||
vad_handle_t vad_create(vad_mode_t vad_mode);
|
||||
|
||||
/**
|
||||
* @brief Creates an instance to the VAD structure.
|
||||
*
|
||||
* @param vad_mode Sets the VAD operating mode.
|
||||
* @param sample_rate Sample rate in Hz
|
||||
* @param one_frame_ms Length of the audio chunksize, can be 10ms, 20ms, 30ms, default: 30.
|
||||
* @param min_speech_ms Minimum speech duration, unit is ms
|
||||
* @param min_noise_ms Minimum noise duration, unit is ms
|
||||
* @return
|
||||
* - NULL: Create failed
|
||||
* - Others: The instance of VAD
|
||||
*/
|
||||
vad_handle_t vad_create_with_param(
|
||||
vad_mode_t vad_mode, int sample_rate, int one_frame_ms, int min_speech_ms, int min_noise_ms);
|
||||
|
||||
/**
|
||||
* @brief Feed samples of an audio stream to the VAD and check if there is someone speaking.
|
||||
*
|
||||
* @param handle The instance of VAD.
|
||||
* @param data An array of 16-bit signed audio samples.
|
||||
* @param sample_rate_hz The Sampling frequency (Hz) can be 32000, 16000, 8000, default: 16000.
|
||||
* @param one_frame_ms The length of the audio processing can be 10ms, 20ms, 30ms, default: 30.
|
||||
* @return
|
||||
* - VAD_SILENCE if no voice
|
||||
* - VAD_SPEECH if voice is detected
|
||||
*
|
||||
*/
|
||||
vad_state_t vad_process(vad_handle_t handle, int16_t *data, int sample_rate_hz, int one_frame_ms);
|
||||
|
||||
/**
|
||||
* @brief Feed samples of an audio stream to the VAD and check if there is someone speaking.
|
||||
*
|
||||
* @param handle The instance of VAD.
|
||||
* @param data An array of 16-bit signed audio samples.
|
||||
* @return
|
||||
* - VAD_SILENCE if no voice
|
||||
* - VAD_SPEECH if voice is detected
|
||||
*
|
||||
*/
|
||||
vad_state_t vad_process_with_trigger(vad_handle_t handle, int16_t *data);
|
||||
|
||||
/**
|
||||
* @brief Reset trigger state as Silence
|
||||
*
|
||||
* @param handle The instance of VAD.
|
||||
*/
|
||||
void vad_reset_trigger(vad_handle_t handle);
|
||||
|
||||
/**
|
||||
* @brief Free the VAD instance
|
||||
*
|
||||
* @param inst The instance of VAD.
|
||||
*
|
||||
* @return None
|
||||
*
|
||||
*/
|
||||
void vad_destroy(vad_handle_t inst);
|
||||
|
||||
/*
|
||||
* Programming Guide:
|
||||
*
|
||||
* @code{c}
|
||||
* vad_handle_t vad_inst = vad_create(VAD_MODE_3, SAMPLE_RATE_HZ, VAD_FRAME_LENGTH_MS); // Creates an instance to
|
||||
* the VAD structure.
|
||||
*
|
||||
* while (1) {
|
||||
* //Use buffer to receive the audio data from MIC.
|
||||
* vad_state_t vad_state = vad_process(vad_inst, buffer); // Feed samples to the VAD process and get the result.
|
||||
* }
|
||||
*
|
||||
* vad_destroy(vad_inst); // Free the VAD instance at the end of whole VAD process
|
||||
*
|
||||
* @endcode
|
||||
*/
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif //_ESP_VAD_H_
|
||||
@@ -0,0 +1,164 @@
|
||||
#pragma once
|
||||
#include "esp_vad.h"
|
||||
#include "stdint.h"
|
||||
#include "dl_lib_convq_queue.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// Opaque model data container
|
||||
typedef struct model_iface_data_t model_iface_data_t;
|
||||
|
||||
// /**
|
||||
// * @brief The state of vad
|
||||
// */
|
||||
// typedef enum {
|
||||
// VAD_NOISE = -1, // Noise
|
||||
// VADNET_STATE_SILENCE = 0, // Silence
|
||||
// VAD_SPEECH = 1 // Speech
|
||||
// } vad_state_t;
|
||||
|
||||
/**
|
||||
* @brief Easy function type to initialze a model instance with a detection mode
|
||||
* and specified model name
|
||||
*
|
||||
* @param model_name The specified model name
|
||||
* @param mode The voice activity detection mode
|
||||
* @param channel_num The number of input audio channels
|
||||
* @param min_speech_ms The minimum duration of speech in ms to trigger vad
|
||||
* speech
|
||||
* @param min_noise_ms The minimum duration of noise in ms to trigger vad
|
||||
* noise
|
||||
* @returns Handle to the model data
|
||||
*/
|
||||
typedef model_iface_data_t *(*esp_vadn_iface_op_create_t)(
|
||||
const void *model_name, vad_mode_t mode, int channel_num, int min_speech_ms, int min_noise_ms);
|
||||
|
||||
/**
|
||||
* @brief Get the amount of samples that need to be passed to the detect
|
||||
* function
|
||||
*
|
||||
* Every speech recognition model processes a certain number of samples at the
|
||||
* same time. This function can be used to query that amount. Note that the
|
||||
* returned amount is in 16-bit samples, not in bytes.
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @return The amount of samples to feed the detect function
|
||||
*/
|
||||
typedef int (*esp_vadn_iface_op_get_samp_chunksize_t)(model_iface_data_t *model);
|
||||
|
||||
/**
|
||||
* @brief Get the channel number of samples that need to be passed to the detect
|
||||
* function
|
||||
*
|
||||
* Every speech recognition model processes a certain number of samples at the
|
||||
* same time. This function can be used to query that amount. Note that the
|
||||
* returned amount is in 16-bit samples, not in bytes.
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @return The amount of samples to feed the detect function
|
||||
*/
|
||||
typedef int (*esp_vadn_iface_op_get_channel_num_t)(model_iface_data_t *model);
|
||||
|
||||
/**
|
||||
* @brief Get the sample rate of the samples to feed to the detect function
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @return The sample rate, in hz
|
||||
*/
|
||||
typedef int (*esp_vadn_iface_op_get_samp_rate_t)(model_iface_data_t *model);
|
||||
|
||||
/**
|
||||
* @brief Set the detection threshold to manually abjust the probability
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @param det_treshold The threshold to trigger wake words, the range of
|
||||
* det_threshold is 0.5~0.9999
|
||||
* @return 0: setting failed, 1: setting success
|
||||
*/
|
||||
typedef int (*esp_vadn_iface_op_set_det_threshold_t)(model_iface_data_t *model, float det_threshold);
|
||||
|
||||
/**
|
||||
* @brief Get the voice activity detection threshold
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @returns the detection threshold
|
||||
*/
|
||||
typedef float (*esp_vadn_iface_op_get_det_threshold_t)(model_iface_data_t *model);
|
||||
|
||||
/**
|
||||
* @brief Feed samples of an audio stream to the vad model and detect whether is
|
||||
* voice.
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @param samples An array of 16-bit signed audio samples. The array size used
|
||||
* can be queried by the get_samp_chunksize function.
|
||||
* @return The index of wake words, return 0 if no wake word is detected, else
|
||||
* the index of the wake words.
|
||||
*/
|
||||
typedef vad_state_t (*esp_vadn_iface_op_detect_t)(model_iface_data_t *model, int16_t *samples);
|
||||
|
||||
/**
|
||||
* @brief Feed MFCC of an audio stream to the vad model and detect whether is
|
||||
* voice.
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @param cq An array of 16-bit MFCC.
|
||||
* @return The index of wake words, return 0 if no wake word is detected, else
|
||||
* the index of the wake words.
|
||||
*/
|
||||
typedef vad_state_t (*esp_vadn_iface_op_detect_mfcc_t)(model_iface_data_t *model, dl_convq_queue_t *cq);
|
||||
|
||||
/**
|
||||
* @brief Get MFCC of an audio stream
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @return MFCC data
|
||||
*/
|
||||
typedef dl_convq_queue_t* (*esp_vadn_iface_op_get_mfcc_data_t)(model_iface_data_t *model);
|
||||
|
||||
/**
|
||||
* @brief Get the triggered channel index. Channel index starts from zero
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @return The channel index
|
||||
*/
|
||||
typedef int (*esp_vadn_iface_op_get_triggered_channel_t)(model_iface_data_t *model);
|
||||
|
||||
/**
|
||||
* @brief Clean all states of model
|
||||
*
|
||||
* @param model The model object to query
|
||||
*/
|
||||
typedef void (*esp_vadn_iface_op_clean_t)(model_iface_data_t *model);
|
||||
|
||||
/**
|
||||
* @brief Destroy a model object
|
||||
*
|
||||
* @param model Model object to destroy
|
||||
*/
|
||||
typedef void (*esp_vadn_iface_op_destroy_t)(model_iface_data_t *model);
|
||||
|
||||
/**
|
||||
* This structure contains the functions used to do operations on a voice
|
||||
* activity detection model.
|
||||
*/
|
||||
typedef struct {
|
||||
esp_vadn_iface_op_create_t create;
|
||||
esp_vadn_iface_op_get_samp_chunksize_t get_samp_chunksize;
|
||||
esp_vadn_iface_op_get_channel_num_t get_channel_num;
|
||||
esp_vadn_iface_op_get_samp_rate_t get_samp_rate;
|
||||
esp_vadn_iface_op_set_det_threshold_t set_det_threshold;
|
||||
esp_vadn_iface_op_get_det_threshold_t get_det_threshold;
|
||||
esp_vadn_iface_op_get_triggered_channel_t get_triggered_channel;
|
||||
esp_vadn_iface_op_detect_t detect;
|
||||
esp_vadn_iface_op_detect_mfcc_t detect_mfcc;
|
||||
esp_vadn_iface_op_get_mfcc_data_t get_mfcc_data;
|
||||
esp_vadn_iface_op_clean_t clean;
|
||||
esp_vadn_iface_op_destroy_t destroy;
|
||||
} esp_vadn_iface_t;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
@@ -0,0 +1,22 @@
|
||||
#pragma once
|
||||
#include "esp_vadn_iface.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// The prefix of vadnet model name is used to filter all wakenet from availabel models.
|
||||
#define ESP_VADN_PREFIX "vadnet"
|
||||
|
||||
/**
|
||||
* @brief Get the wakenet handle from model name
|
||||
*
|
||||
* @param model_name The name of model
|
||||
* @returns The handle of wakenet
|
||||
*/
|
||||
const esp_vadn_iface_t *esp_vadn_handle_from_name(const char *model_name);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
@@ -0,0 +1,90 @@
|
||||
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License
|
||||
#ifndef _ESP_WEBRTC_H_
|
||||
#define _ESP_WEBRTC_H_
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
#include <stdint.h>
|
||||
#include "sr_ringbuf.h"
|
||||
#include "esp_log.h"
|
||||
#include "esp_agc.h"
|
||||
#include "esp_ns.h"
|
||||
|
||||
#include "esp_heap_caps.h"
|
||||
|
||||
typedef struct {
|
||||
void* ns_handle;
|
||||
void* agc_handle;
|
||||
int frame_size;
|
||||
int sample_rate;
|
||||
int16_t *buff;
|
||||
int16_t *out_data;
|
||||
sr_ringbuf_handle_t rb;
|
||||
}webrtc_handle_t;
|
||||
|
||||
/**
|
||||
* @brief Creates an instance of webrtc.
|
||||
*
|
||||
* @warning frame_length can supports be 10 ms, 20 ms, 30 ms, 32 ms.
|
||||
*
|
||||
* @param frame_length_ms The length of the audio processing
|
||||
* @param ns_mode The mode of NS. -1 means NS is disabled. 0: Mild, 1: Medium, 2: Aggressive
|
||||
* @param agc_mode The model of AGC
|
||||
* @param agc_gain The gain of AGC. default is 9
|
||||
* @param agc_target_level The target level of AGC. default is -3 dbfs
|
||||
* @param sample_rate The sample rate of the audio.
|
||||
*
|
||||
* @return
|
||||
* - NULL: Create failed
|
||||
* - Others: The instance of webrtc
|
||||
*/
|
||||
webrtc_handle_t* webrtc_create(
|
||||
int frame_length_ms,
|
||||
int ns_mode,
|
||||
agc_mode_t agc_mode,
|
||||
int agc_gain,
|
||||
int agc_target_level,
|
||||
int sample_rate);
|
||||
|
||||
/**
|
||||
* @brief Feed samples of an audio stream to the webrtc and get the audio stream after Noise suppression.
|
||||
*
|
||||
* @param handle The instance of NS.
|
||||
* @param in_data An array of 16-bit signed audio samples.
|
||||
* @param out_size The sample size of output data
|
||||
* @param enable_ns Enable noise suppression
|
||||
* @param enable_agc Enable automatic gain control
|
||||
*
|
||||
* @return data after noise suppression
|
||||
*/
|
||||
int16_t* webrtc_process(webrtc_handle_t *handle, int16_t *indata, int *size, bool enable_ns, bool enable_agc);
|
||||
|
||||
/**
|
||||
* @brief Free the webrtc instance
|
||||
*
|
||||
* @param handle The instance of webrtc.
|
||||
*
|
||||
* @return None
|
||||
*
|
||||
*/
|
||||
void webrtc_destroy(webrtc_handle_t *handle);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif //_ESP_NS_H_
|
||||
@@ -0,0 +1,226 @@
|
||||
#pragma once
|
||||
#include "stdint.h"
|
||||
#include "dl_lib_convq_queue.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
//Opaque model data container
|
||||
typedef struct model_iface_data_t model_iface_data_t;
|
||||
|
||||
/**
|
||||
* @brief The state of wakeup
|
||||
*/
|
||||
typedef enum
|
||||
{
|
||||
WAKENET_NO_DETECT = 0, // wake word is not detected
|
||||
WAKENET_CHANNEL_VERIFIED = -1, // output channel is verified
|
||||
WAKENET_DETECTED = 1 // wake word is detected
|
||||
} wakenet_state_t;
|
||||
|
||||
//Set wake words recognition operating mode
|
||||
//The probability of being wake words is increased with increasing mode,
|
||||
//As a consequence also the false alarm rate goes up
|
||||
typedef enum {
|
||||
DET_MODE_90 = 0, // Normal
|
||||
DET_MODE_95 = 1, // Aggressive
|
||||
DET_MODE_2CH_90 = 2,
|
||||
DET_MODE_2CH_95 = 3,
|
||||
DET_MODE_3CH_90 = 4,
|
||||
DET_MODE_3CH_95 = 5,
|
||||
DET_MODE_90_COPY_PARAMS = 6, // Aggressive
|
||||
} det_mode_t;
|
||||
|
||||
typedef struct {
|
||||
int wake_word_num; //The number of all wake words
|
||||
char **wake_word_list; //The name list of wake words
|
||||
} wake_word_info_t;
|
||||
|
||||
/**
|
||||
* @brief Easy function type to initialze a model instance with a detection mode and specified wake word coefficient
|
||||
*
|
||||
* @param model_name The specified wake word model coefficient
|
||||
* @param det_mode The wake words detection mode to trigger wake words, DET_MODE_90 or DET_MODE_95
|
||||
* @returns Handle to the model data
|
||||
*/
|
||||
typedef model_iface_data_t* (*esp_wn_iface_op_create_t)(const void *model_name, det_mode_t det_mode);
|
||||
|
||||
/**
|
||||
* @brief Get the amount of samples that need to be passed to the detect function
|
||||
*
|
||||
* Every speech recognition model processes a certain number of samples at the same time. This function
|
||||
* can be used to query that amount. Note that the returned amount is in 16-bit samples, not in bytes.
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @return The amount of samples to feed the detect function
|
||||
*/
|
||||
typedef int (*esp_wn_iface_op_get_samp_chunksize_t)(model_iface_data_t *model);
|
||||
|
||||
/**
|
||||
* @brief Get the channel number of samples that need to be passed to the detect function
|
||||
*
|
||||
* Every speech recognition model processes a certain number of samples at the same time. This function
|
||||
* can be used to query that amount. Note that the returned amount is in 16-bit samples, not in bytes.
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @return The amount of samples to feed the detect function
|
||||
*/
|
||||
typedef int (*esp_wn_iface_op_get_channel_num_t)(model_iface_data_t *model);
|
||||
|
||||
/**
|
||||
* @brief Get the start point of wake word when one wake word is detected.
|
||||
*
|
||||
* @Warning: This function should be called when the channel index is verified.
|
||||
* The returned value is the number of samples from start point of wake word to detected point.
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @return The number of samples from start point to detected point (end point)
|
||||
*/
|
||||
typedef int (*esp_wn_iface_op_get_start_point_t)(model_iface_data_t *model);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Get the sample rate of the samples to feed to the detect function
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @return The sample rate, in hz
|
||||
*/
|
||||
typedef int (*esp_wn_iface_op_get_samp_rate_t)(model_iface_data_t *model);
|
||||
|
||||
/**
|
||||
* @brief Get the number of wake words
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @returns the number of wake words
|
||||
*/
|
||||
typedef int (*esp_wn_iface_op_get_word_num_t)(model_iface_data_t *model);
|
||||
|
||||
/**
|
||||
* @brief Get the name of wake word by index
|
||||
*
|
||||
* @Warning The index of wake word start with 1
|
||||
|
||||
* @param model The model object to query
|
||||
* @param word_index The index of wake word
|
||||
* @returns the detection threshold
|
||||
*/
|
||||
typedef char* (*esp_wn_iface_op_get_word_name_t)(model_iface_data_t *model, int word_index);
|
||||
|
||||
/**
|
||||
* @brief Set the detection threshold to manually abjust the probability
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @param det_treshold The threshold to trigger wake words, the range of det_threshold is 0.4~0.9999
|
||||
* @param word_index The index of wake word
|
||||
* @return 0: setting failed, 1: setting success
|
||||
*/
|
||||
typedef int (*esp_wn_iface_op_set_det_threshold_t)(model_iface_data_t *model, float det_threshold, int word_index);
|
||||
|
||||
/**
|
||||
* @brief Reset the threshold to its initial state
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @return 0: setting failed, 1: setting success
|
||||
*/
|
||||
typedef int (*esp_wn_iface_op_reset_det_threshold_t)(model_iface_data_t *model);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Get the wake word detection threshold of different modes
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @param word_index The index of wake word
|
||||
* @returns the detection threshold
|
||||
*/
|
||||
typedef float (*esp_wn_iface_op_get_det_threshold_t)(model_iface_data_t *model, int word_index);
|
||||
|
||||
/**
|
||||
* @brief Feed samples of an audio stream to the keyword detection model and detect if there is a keyword found.
|
||||
*
|
||||
* @Warning The index of wake word start with 1, 0 means no wake words is detected.
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @param samples An array of 16-bit signed audio samples. The array size used can be queried by the
|
||||
* get_samp_chunksize function.
|
||||
* @return The index of wake words, return 0 if no wake word is detected, else the index of the wake words.
|
||||
*/
|
||||
typedef wakenet_state_t (*esp_wn_iface_op_detect_t)(model_iface_data_t *model, int16_t *samples);
|
||||
|
||||
/**
|
||||
* @brief Get the volume gain
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @param target_db The target dB to calculate volume gain
|
||||
* @returns the volume gain
|
||||
*/
|
||||
typedef float (*esp_wn_iface_op_get_vol_gain_t)(model_iface_data_t *model, float target_db);
|
||||
|
||||
/**
|
||||
* @brief Get the triggered channel index. Channel index starts from zero
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @return The channel index
|
||||
*/
|
||||
typedef int (*esp_wn_iface_op_get_triggered_channel_t)(model_iface_data_t *model);
|
||||
|
||||
/**
|
||||
* @brief Clean all states of model
|
||||
*
|
||||
* @param model The model object to query
|
||||
*/
|
||||
typedef void (*esp_wn_iface_op_clean_t)(model_iface_data_t *model);
|
||||
|
||||
/**
|
||||
* @brief Destroy a speech recognition model
|
||||
*
|
||||
* @param model Model object to destroy
|
||||
*/
|
||||
typedef void (*esp_wn_iface_op_destroy_t)(model_iface_data_t *model);
|
||||
|
||||
/**
|
||||
* @brief Feed MFCC of an audio stream to the vad model and detect whether is
|
||||
* voice.
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @param cq An array of 16-bit MFCC.
|
||||
* @return The index of wake words, return 0 if no wake word is detected, else
|
||||
* the index of the wake words.
|
||||
*/
|
||||
typedef wakenet_state_t (*esp_wn_iface_op_detect_mfcc_t)(model_iface_data_t *model, int16_t *samples, dl_convq_queue_t *cq);
|
||||
|
||||
/**
|
||||
* @brief Get MFCC of an audio stream
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @return MFCC data
|
||||
*/
|
||||
typedef dl_convq_queue_t* (*esp_wn_iface_op_get_mfcc_data_t)(model_iface_data_t *model);
|
||||
|
||||
|
||||
/**
|
||||
* This structure contains the functions used to do operations on a wake word detection model.
|
||||
*/
|
||||
typedef struct {
|
||||
esp_wn_iface_op_create_t create;
|
||||
esp_wn_iface_op_get_start_point_t get_start_point;
|
||||
esp_wn_iface_op_get_samp_chunksize_t get_samp_chunksize;
|
||||
esp_wn_iface_op_get_channel_num_t get_channel_num;
|
||||
esp_wn_iface_op_get_samp_rate_t get_samp_rate;
|
||||
esp_wn_iface_op_get_word_num_t get_word_num;
|
||||
esp_wn_iface_op_get_word_name_t get_word_name;
|
||||
esp_wn_iface_op_set_det_threshold_t set_det_threshold;
|
||||
esp_wn_iface_op_reset_det_threshold_t reset_det_threshold;
|
||||
esp_wn_iface_op_get_det_threshold_t get_det_threshold;
|
||||
esp_wn_iface_op_get_triggered_channel_t get_triggered_channel;
|
||||
esp_wn_iface_op_get_vol_gain_t get_vol_gain;
|
||||
esp_wn_iface_op_detect_t detect;
|
||||
esp_wn_iface_op_detect_mfcc_t detect_mfcc;
|
||||
esp_wn_iface_op_get_mfcc_data_t get_mfcc_data;
|
||||
esp_wn_iface_op_clean_t clean;
|
||||
esp_wn_iface_op_destroy_t destroy;
|
||||
} esp_wn_iface_t;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
@@ -0,0 +1,52 @@
|
||||
#pragma once
|
||||
#include "esp_wn_iface.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// The prefix of wakenet model name is used to filter all wakenet from availabel models.
|
||||
#define ESP_WN_PREFIX "wn"
|
||||
|
||||
/**
|
||||
* @brief Get the wakenet handle from model name
|
||||
*
|
||||
* @param model_name The name of model
|
||||
* @returns The handle of wakenet
|
||||
*/
|
||||
const esp_wn_iface_t *esp_wn_handle_from_name(const char *model_name);
|
||||
|
||||
/**
|
||||
* @brief Get the wake word name from model name
|
||||
*
|
||||
* @param model_name The name of model
|
||||
* @returns The wake word name, like "alexa","hilexin","xiaoaitongxue"
|
||||
*/
|
||||
char *esp_wn_wakeword_from_name(const char *model_name);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
|
||||
static const sr_model_iface_t *model = esp_wn_handle_from_name(model_name);
|
||||
|
||||
//Initialize wakeNet model data
|
||||
static model_iface_data_t *model_data=model->create(model_name, DET_MODE_90);
|
||||
|
||||
//Set parameters of buffer
|
||||
int audio_chunksize=model->get_samp_chunksize(model_data);
|
||||
int frequency = model->get_samp_rate(model_data);
|
||||
int16_t *buffer=malloc(audio_chunksize*sizeof(int16_t));
|
||||
|
||||
//Detect
|
||||
int r=model->detect(model_data, buffer);
|
||||
if (r>0) {
|
||||
printf("Detection triggered output %d.\n", r);
|
||||
}
|
||||
|
||||
//Destroy model
|
||||
model->destroy(model_data)
|
||||
|
||||
*/
|
||||
@@ -0,0 +1,20 @@
|
||||
#ifndef __FLITE_G2P_H__
|
||||
#define __FLITE_G2P_H__
|
||||
|
||||
typedef struct {
|
||||
int num_phonemes;
|
||||
int phoneme_size;
|
||||
char **phonemes;
|
||||
} flite_g2p_result;
|
||||
|
||||
void flite_g2p_result_free(flite_g2p_result *result);
|
||||
|
||||
flite_g2p_result *flite_g2p_get_result(const char *grapheme);
|
||||
|
||||
void flite_g2p_result_print_string(flite_g2p_result *result, int map_phonemes);
|
||||
|
||||
char *flite_g2p_result_get_string(flite_g2p_result *result, int map_phonemes);
|
||||
|
||||
char *flite_g2p(const char *graphemes, int map_phonemes);
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,9 @@
|
||||
//Generated by mkmodel_py
|
||||
#pragma once
|
||||
#include <string.h>
|
||||
#include "dl_lib_coefgetter_if.h"
|
||||
#include "dl_lib_matrix.h"
|
||||
#include "dl_lib_matrixq.h"
|
||||
#include "dl_lib_matrixq8.h"
|
||||
|
||||
extern const model_coeff_getter_t get_coeff_hilexin_wn5;
|
||||
@@ -0,0 +1,9 @@
|
||||
//Generated by mkmodel_py
|
||||
#pragma once
|
||||
#include <string.h>
|
||||
#include "dl_lib_coefgetter_if.h"
|
||||
#include "dl_lib_matrix.h"
|
||||
#include "dl_lib_matrixq.h"
|
||||
#include "dl_lib_matrixq8.h"
|
||||
|
||||
extern const model_coeff_getter_t get_coeff_hilexin_wn5X2;
|
||||
@@ -0,0 +1,9 @@
|
||||
//Generated by mkmodel_py
|
||||
#pragma once
|
||||
#include <string.h>
|
||||
#include "dl_lib_coefgetter_if.h"
|
||||
#include "dl_lib_matrix.h"
|
||||
#include "dl_lib_matrixq.h"
|
||||
#include "dl_lib_matrixq8.h"
|
||||
|
||||
extern const model_coeff_getter_t get_coeff_hilexin_wn5X3;
|
||||
@@ -0,0 +1,9 @@
|
||||
//Generated by mkmodel_py
|
||||
#pragma once
|
||||
#include <string.h>
|
||||
#include "dl_lib_coefgetter_if.h"
|
||||
#include "dl_lib_matrix.h"
|
||||
#include "dl_lib_matrixq.h"
|
||||
#include "dl_lib_matrixq8.h"
|
||||
|
||||
extern const model_coeff_getter_t get_coeff_multinet2_ch;
|
||||
@@ -0,0 +1,9 @@
|
||||
//Generated by mkmodel_py
|
||||
#pragma once
|
||||
#include <string.h>
|
||||
#include "dl_lib_coefgetter_if.h"
|
||||
#include "dl_lib_matrix.h"
|
||||
#include "dl_lib_matrixq.h"
|
||||
#include "dl_lib_matrixq8.h"
|
||||
|
||||
extern const model_coeff_getter_t get_coeff_nihaoxiaoxin_wn5X3;
|
||||
@@ -0,0 +1,9 @@
|
||||
//Generated by mkmodel_py
|
||||
#pragma once
|
||||
#include <string.h>
|
||||
#include "dl_lib_coefgetter_if.h"
|
||||
#include "dl_lib_matrix.h"
|
||||
#include "dl_lib_matrixq.h"
|
||||
#include "dl_lib_matrixq8.h"
|
||||
|
||||
extern const model_coeff_getter_t get_coeff_nihaoxiaozhi_wn5;
|
||||
@@ -0,0 +1,9 @@
|
||||
//Generated by mkmodel_py
|
||||
#pragma once
|
||||
#include <string.h>
|
||||
#include "dl_lib_coefgetter_if.h"
|
||||
#include "dl_lib_matrix.h"
|
||||
#include "dl_lib_matrixq.h"
|
||||
#include "dl_lib_matrixq8.h"
|
||||
|
||||
extern const model_coeff_getter_t get_coeff_nihaoxiaozhi_wn5X2;
|
||||
@@ -0,0 +1,9 @@
|
||||
//Generated by mkmodel_py
|
||||
#pragma once
|
||||
#include <string.h>
|
||||
#include "dl_lib_coefgetter_if.h"
|
||||
#include "dl_lib_matrix.h"
|
||||
#include "dl_lib_matrixq.h"
|
||||
#include "dl_lib_matrixq8.h"
|
||||
|
||||
extern const model_coeff_getter_t get_coeff_nihaoxiaozhi_wn5X3;
|
||||
@@ -0,0 +1,29 @@
|
||||
#pragma once
|
||||
#include <float.h>
|
||||
#include <math.h>
|
||||
|
||||
/* #undef ENABLE_DOUBLE */
|
||||
|
||||
#ifdef ENABLE_DOUBLE
|
||||
# define csf_float double
|
||||
# define csf_ceil ceil
|
||||
# define csf_floor floor
|
||||
# define csf_sin sin
|
||||
# define csf_log log
|
||||
# define csf_log10 log10
|
||||
# define csf_pow pow
|
||||
# define csf_sqrt sqrt
|
||||
# define csf_abs fabs
|
||||
# define csf_float_min DBL_MIN
|
||||
#else
|
||||
# define csf_float float
|
||||
# define csf_ceil ceilf
|
||||
# define csf_floor floorf
|
||||
# define csf_sin sinf
|
||||
# define csf_log logf
|
||||
# define csf_log10 log10f
|
||||
# define csf_pow powf
|
||||
# define csf_sqrt sqrtf
|
||||
# define csf_abs fabsf
|
||||
# define csf_float_min FLT_MIN
|
||||
#endif
|
||||
418
managed_components/espressif__esp-sr/include/esp32c3/dl_lib.h
Normal file
418
managed_components/espressif__esp-sr/include/esp32c3/dl_lib.h
Normal file
@@ -0,0 +1,418 @@
|
||||
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
#ifndef DL_LIB_H
|
||||
#define DL_LIB_H
|
||||
|
||||
#include "dl_lib_matrix.h"
|
||||
#include "dl_lib_matrixq.h"
|
||||
#include "dl_lib_matrixq8.h"
|
||||
|
||||
#ifdef ESP_PLATFORM
|
||||
#include "freertos/FreeRTOS.h"
|
||||
#include "freertos/task.h"
|
||||
#include "freertos/queue.h"
|
||||
#include "esp_system.h"
|
||||
#include "esp_heap_caps.h"
|
||||
#include "sdkconfig.h"
|
||||
#define DL_SPIRAM_SUPPORT 1
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_IDF_TARGET_ESP32S3
|
||||
#include "esp32s3/rom/cache.h"
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef int padding_state;
|
||||
|
||||
// /**
|
||||
// * @brief Allocate a chunk of memory which has the given capabilities.
|
||||
// * Equivalent semantics to libc malloc(), for capability-aware memory.
|
||||
// * In IDF, malloc(p) is equivalent to heap_caps_malloc(p, MALLOC_CAP_8BIT).
|
||||
// *
|
||||
// * @param size In bytes, of the amount of memory to allocate
|
||||
// * @param caps Bitwise OR of MALLOC_CAP_* flags indicating the type of memory to be returned
|
||||
// * MALLOC_CAP_SPIRAM: Memory must be in SPI RAM
|
||||
// * MALLOC_CAP_INTERNAL: Memory must be internal; specifically it should not disappear when flash/spiram cache is switched off
|
||||
// * MALLOC_CAP_DMA: Memory must be able to accessed by DMA
|
||||
// * MALLOC_CAP_DEFAULT: Memory can be returned in a non-capability-specific memory allocation
|
||||
// * @return Pointer to currently allocated heap memory
|
||||
// **/
|
||||
// void *heap_caps_malloc(size_t size, uint32_t caps);
|
||||
|
||||
/**
|
||||
* @brief Allocate aligned memory from internal memory or external memory.
|
||||
* if cnt*size > CONFIG_SPIRAM_MALLOC_ALWAYSINTERNAL, allocate memory from internal RAM
|
||||
* else, allocate memory from PSRAM
|
||||
*
|
||||
* @param cnt Number of continuing chunks of memory to allocate
|
||||
* @param size Size, in bytes, of a chunk of memory to allocate
|
||||
* @param align Aligned size, in bits
|
||||
* @return Pointer to currently allocated heap memory
|
||||
*/
|
||||
void *dl_lib_calloc(int cnt, int size, int align);
|
||||
|
||||
/**
|
||||
* @brief Always allocate aligned memory from external memory.
|
||||
*
|
||||
* @param cnt Number of continuing chunks of memory to allocate
|
||||
* @param size Size, in bytes, of a chunk of memory to allocate
|
||||
* @param align Aligned size, in bits
|
||||
* @return Pointer to currently aligned heap memory
|
||||
*/
|
||||
void *dl_lib_calloc_psram(int cnt, int size, int align);
|
||||
|
||||
/**
|
||||
* @brief Free aligned memory allocated by `dl_lib_calloc` or `dl_lib_calloc_psram`
|
||||
*
|
||||
* @param ptr Pointer to free
|
||||
*/
|
||||
void dl_lib_free(void *ptr);
|
||||
|
||||
/**
|
||||
* @brief Does a fast version of the exp() operation on a floating point number.
|
||||
*
|
||||
* As described in https://codingforspeed.com/using-faster-exponential-approximation/
|
||||
* Should be good til an input of 5 or so with a steps factor of 8.
|
||||
*
|
||||
* @param in Floating point input
|
||||
* @param steps Approximation steps. More is more precise. 8 or 10 should be good enough for most purposes.
|
||||
* @return Exp()'ed output
|
||||
*/
|
||||
fptp_t fast_exp(double x, int steps);
|
||||
|
||||
/**
|
||||
* @brief Does a fast version of the exp() operation on a floating point number.
|
||||
*
|
||||
* @param in Floating point input
|
||||
* @return Exp()'ed output
|
||||
*/
|
||||
double fast_exp_pro(double x);
|
||||
|
||||
/**
|
||||
* @brief Does a softmax operation on a matrix.
|
||||
*
|
||||
* @param in Input matrix
|
||||
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
|
||||
*/
|
||||
void dl_softmax(const dl_matrix2d_t *in, dl_matrix2d_t *out);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Does a softmax operation on a quantized matrix.
|
||||
*
|
||||
* @param in Input matrix
|
||||
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
|
||||
*/
|
||||
void dl_softmax_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out);
|
||||
|
||||
/**
|
||||
* @brief Does a sigmoid operation on a floating point number
|
||||
*
|
||||
* @param in Floating point input
|
||||
* @return Sigmoid output
|
||||
*/
|
||||
|
||||
fptp_t dl_sigmoid_op(fptp_t in);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Does a sigmoid operation on a matrix.
|
||||
*
|
||||
* @param in Input matrix
|
||||
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
|
||||
*/
|
||||
void dl_sigmoid(const dl_matrix2d_t *in, dl_matrix2d_t *out);
|
||||
|
||||
/**
|
||||
* @brief Does a tanh operation on a floating point number
|
||||
*
|
||||
* @param in Floating point input number
|
||||
* @return Tanh value
|
||||
*/
|
||||
fptp_t dl_tanh_op(fptp_t v);
|
||||
|
||||
/**
|
||||
* @brief Does a tanh operation on a matrix.
|
||||
*
|
||||
* @param in Input matrix
|
||||
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
|
||||
*/
|
||||
void dl_tanh(const dl_matrix2d_t *in, dl_matrix2d_t *out);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Does a relu (Rectifier Linear Unit) operation on a floating point number
|
||||
*
|
||||
* @param in Floating point input
|
||||
* @param clip If value is higher than this, it will be clipped to this value
|
||||
* @return Relu output
|
||||
*/
|
||||
fptp_t dl_relu_op(fptp_t in, fptp_t clip);
|
||||
|
||||
/**
|
||||
* @brief Does a ReLu operation on a matrix.
|
||||
*
|
||||
* @param in Input matrix
|
||||
* @param clip If values are higher than this, they will be clipped to this value
|
||||
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
|
||||
*/
|
||||
void dl_relu(const dl_matrix2d_t *in, fptp_t clip, dl_matrix2d_t *out);
|
||||
|
||||
/**
|
||||
* @brief Fully connected layer operation
|
||||
*
|
||||
* @param in Input vector
|
||||
* @param weight Weights of the neurons
|
||||
* @param bias Biases for the neurons. Can be NULL if a bias of 0 is required.
|
||||
* @param out Output array. Outputs are placed here. Needs to be an initialized, weight->w by in->h in size, matrix.
|
||||
*/
|
||||
void dl_fully_connect_layer(const dl_matrix2d_t *in, const dl_matrix2d_t *weight, const dl_matrix2d_t *bias, dl_matrix2d_t *out);
|
||||
|
||||
/**
|
||||
* @brief Pre-calculate the sqrtvari variable for the batch_normalize function.
|
||||
* The sqrtvari matrix depends on the variance and epsilon values, which normally are constant. Hence,
|
||||
* this matrix only needs to be calculated once. This function does that.
|
||||
*
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
void dl_batch_normalize_get_sqrtvar(const dl_matrix2d_t *variance, fptp_t epsilon, dl_matrix2d_t *out);
|
||||
|
||||
/**
|
||||
* @brief Batch-normalize a matrix
|
||||
*
|
||||
* @param m The matrix to normalize
|
||||
* @param offset Offset matrix
|
||||
* @param scale Scale matrix
|
||||
* @param mean Mean matrix
|
||||
* @param sqrtvari Matrix precalculated using dl_batch_normalize_get_sqrtvar
|
||||
* @return
|
||||
*/
|
||||
void dl_batch_normalize(dl_matrix2d_t *m, const dl_matrix2d_t *offset, const dl_matrix2d_t *scale,
|
||||
const dl_matrix2d_t *mean, const dl_matrix2d_t *sqrtvari);
|
||||
|
||||
/**
|
||||
* @brief Do a basic LSTM layer pass.
|
||||
*
|
||||
* @warning Returns state_h pointer, so do not free result.
|
||||
|
||||
* @param in Input vector
|
||||
* @param state_c Internal state of the LSTM network
|
||||
* @param state_h Internal state (previous output values) of the LSTM network
|
||||
* @param weights Weights for the neurons
|
||||
* @param bias Bias for the neurons. Can be NULL if no bias is required
|
||||
* @return Output values of the neurons
|
||||
*/
|
||||
dl_matrix2d_t *dl_basic_lstm_layer(const dl_matrix2d_t *in, dl_matrix2d_t *state_c, dl_matrix2d_t *state_h,
|
||||
const dl_matrix2d_t *weight, const dl_matrix2d_t *bias);
|
||||
|
||||
/**
|
||||
* @brief Do a basic LSTM layer pass, partial quantized version.
|
||||
* This LSTM function accepts 16-bit fixed-point weights and 32-bit float-point bias.
|
||||
*
|
||||
* @warning Returns state_h pointer, so do not free result.
|
||||
|
||||
* @param in Input vector
|
||||
* @param state_c Internal state of the LSTM network
|
||||
* @param state_h Internal state (previous output values) of the LSTM network
|
||||
* @param weights Weights for the neurons, need to be quantised
|
||||
* @param bias Bias for the neurons. Can be NULL if no bias is required
|
||||
* @return Output values of the neurons
|
||||
*/
|
||||
dl_matrix2dq_t *dl_basic_lstm_layer_quantised_weights(const dl_matrix2d_t *in, dl_matrix2d_t *state_c, dl_matrix2d_t *state_h,
|
||||
const dl_matrix2dq_t *weight, const dl_matrix2d_t *bias);
|
||||
|
||||
/**
|
||||
* @brief Do a fully-connected layer pass, fully-quantized version.
|
||||
*
|
||||
* @param in Input vector
|
||||
* @param weight Weights of the neurons
|
||||
* @param bias Bias values of the neurons. Can be NULL if no bias is needed.
|
||||
* @param shift Number of bits to shift the result back by. See dl_lib_matrixq.h for more info
|
||||
* @return Output values of the neurons
|
||||
*/
|
||||
void dl_fully_connect_layer_q(const dl_matrix2dq_t *in, const dl_matrix2dq_t *weight, const dl_matrix2dq_t *bias, dl_matrix2dq_t *out, int shift);
|
||||
|
||||
/**
|
||||
* @brief Do a basic LSTM layer pass, fully-quantized version
|
||||
*
|
||||
* @warning Returns state_h pointer, so do not free result.
|
||||
|
||||
* @param in Input vector
|
||||
* @param state_c Internal state of the LSTM network
|
||||
* @param state_h Internal state (previous output values) of the LSTM network
|
||||
* @param weights Weights for the neurons
|
||||
* @param bias Bias for the neurons. Can be NULL if no bias is required
|
||||
* @param shift Number of bits to shift the result back by. See dl_lib_matrixq.h for more info
|
||||
* @return Output values of the neurons
|
||||
*/
|
||||
dl_matrix2dq_t *dl_basic_lstm_layer_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *state_c, dl_matrix2dq_t *state_h,
|
||||
const dl_matrix2dq_t *weight, const dl_matrix2dq_t *bias, int shift);
|
||||
|
||||
/**
|
||||
* @brief Batch-normalize a matrix, fully-quantized version
|
||||
*
|
||||
* @param m The matrix to normalize
|
||||
* @param offset Offset matrix
|
||||
* @param scale Scale matrix
|
||||
* @param mean Mean matrix
|
||||
* @param sqrtvari Matrix precalculated using dl_batch_normalize_get_sqrtvar
|
||||
* @param shift Number of bits to shift the result back by. See dl_lib_matrixq.h for more info
|
||||
* @return
|
||||
*/
|
||||
void dl_batch_normalize_q(dl_matrix2dq_t *m, const dl_matrix2dq_t *offset, const dl_matrix2dq_t *scale,
|
||||
const dl_matrix2dq_t *mean, const dl_matrix2dq_t *sqrtvari, int shift);
|
||||
|
||||
/**
|
||||
* @brief Does a relu (Rectifier Linear Unit) operation on a fixed-point number
|
||||
* This accepts and returns fixed-point 32-bit number with the last 15 bits being the bits after the decimal
|
||||
* point. (Equivalent to a mantissa in a quantized matrix with exponent -15.)
|
||||
*
|
||||
* @param in Fixed-point input
|
||||
* @param clip If value is higher than this, it will be clipped to this value
|
||||
* @return Relu output
|
||||
*/
|
||||
qtp_t dl_relu_q_op(qtp_t in, qtp_t clip);
|
||||
|
||||
/**
|
||||
* @brief Does a ReLu operation on a matrix, quantized version
|
||||
*
|
||||
* @param in Input matrix
|
||||
* @param clip If values are higher than this, they will be clipped to this value
|
||||
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
|
||||
*/
|
||||
void dl_relu_q(const dl_matrix2dq_t *in, fptp_t clip, dl_matrix2dq_t *out);
|
||||
|
||||
/**
|
||||
* @brief Does a sigmoid operation on a fixed-point number.
|
||||
* This accepts and returns a fixed-point 32-bit number with the last 15 bits being the bits after the decimal
|
||||
* point. (Equivalent to a mantissa in a quantized matrix with exponent -15.)
|
||||
*
|
||||
* @param in Fixed-point input
|
||||
* @return Sigmoid output
|
||||
*/
|
||||
int dl_sigmoid_op_q(const int in);
|
||||
int16_t dl_sigmoid_op_q8(const int16_t in);
|
||||
/**
|
||||
* @brief Does a sigmoid operation on a matrix, quantized version
|
||||
*
|
||||
* @param in Input matrix
|
||||
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
|
||||
*/
|
||||
void dl_sigmoid_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out);
|
||||
|
||||
/**
|
||||
* @brief Does a tanh operation on a matrix, quantized version
|
||||
*
|
||||
* @param in Input matrix
|
||||
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
|
||||
*/
|
||||
void dl_tanh_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out);
|
||||
|
||||
/**
|
||||
* @brief Does a tanh operation on a fixed-point number.
|
||||
* This accepts and returns a fixed-point 32-bit number with the last 15 bits being the bits after the decimal
|
||||
* point. (Equivalent to a mantissa in a quantized matrix with exponent -15.)
|
||||
*
|
||||
* @param in Fixed-point input
|
||||
* @return tanh output
|
||||
*/
|
||||
int dl_tanh_op_q(int v);
|
||||
int16_t dl_tanh_op_q8(int16_t v);
|
||||
|
||||
void load_mat_psram_mn4(void);
|
||||
void load_mat_psram_mn3(void);
|
||||
void free_mat_psram_mn4(void);
|
||||
void free_mat_psram_mn3(void);
|
||||
qtp_t dl_hard_sigmoid_op(qtp_t in, int exponent);
|
||||
qtp_t dl_hard_tanh_op(qtp_t in, int exponent);
|
||||
|
||||
int16_t dl_table_tanh_op(int16_t in, int exponent);
|
||||
int16_t dl_table_sigmoid_op(int16_t in, int exponent);
|
||||
|
||||
void dl_hard_sigmoid_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out);
|
||||
void dl_hard_tanh_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out);
|
||||
|
||||
void dl_table_sigmoid_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out);
|
||||
void dl_table_tanh_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Filter out the number greater than clip in the matrix, quantized version
|
||||
*
|
||||
* @param in Input matrix
|
||||
* @param clip If values are higher than this, they will be clipped to this value
|
||||
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
|
||||
*/
|
||||
void dl_minimum(const dl_matrix2d_t *in, fptp_t clip, dl_matrix2d_t *out);
|
||||
|
||||
/**
|
||||
* @brief Filter out the number greater than clip in the matrix, float version
|
||||
*
|
||||
* @param in Input matrix
|
||||
* @param clip If values are higher than this, they will be clipped to this value
|
||||
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
|
||||
*/
|
||||
void dl_minimum_q(const dl_matrix2dq_t *in, fptp_t clip, dl_matrix2dq_t *out);
|
||||
/**
|
||||
* @brief Do a basic CNN layer pass.
|
||||
*
|
||||
* @Warning This just supports the single channel input image, and the output is single row matrix.
|
||||
That is to say, the height of output is 1, and the weight of output is out_channels*out_image_width*out_image_height
|
||||
*
|
||||
* @param in Input single channel image
|
||||
* @param weight Weights of the neurons, weight->w = out_channels, weight->h = filter_width*filter_height
|
||||
* @param bias Bias for the CNN layer.
|
||||
* @param filter_height The height of convolution kernel
|
||||
* @param filter_width The width of convolution kernel
|
||||
* @param out_channels The number of output channels of convolution kernel
|
||||
* @param stride_x The step length of the convolution window in x(width) direction
|
||||
* @param stride_y The step length of the convolution window in y(height) direction
|
||||
* @param pad One of `"VALID"` or `"SAME"`, 0 is "VALID" and the other is "SAME"
|
||||
* @param out The result of CNN layer, out->h=1.
|
||||
* @return The result of CNN layer.
|
||||
*/
|
||||
dl_matrix2d_t *dl_basic_conv_layer(const dl_matrix2d_t *in, const dl_matrix2d_t *weight, const dl_matrix2d_t *bias, int filter_width, int filter_height,
|
||||
const int out_channels, const int stride_x, const int stride_y, padding_state pad, const dl_matrix2d_t* out);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Do a basic CNN layer pass, quantised wersion.
|
||||
*
|
||||
* @Warning This just supports the single channel input image, and the output is single row matrix.
|
||||
That is to say, the height of output is 1, and the weight of output is out_channels*out_image_width*out_image_height
|
||||
*
|
||||
* @param in Input single channel image
|
||||
* @param weight Weights of the neurons, weight->w = out_channels, weight->h = filter_width*filter_height,
|
||||
* @param bias Bias of the neurons.
|
||||
* @param filter_height The height of convolution kernel
|
||||
* @param filter_width The width of convolution kernel
|
||||
* @param out_channels The number of output channels of convolution kernel
|
||||
* @param stride_x The step length of the convolution window in x(width) direction
|
||||
* @param stride_y The step length of the convolution window in y(height) direction
|
||||
* @param pad One of `"VALID"` or `"SAME"`, 0 is "VALID" and the other is "SAME"
|
||||
* @param out The result of CNN layer, out->h=1
|
||||
* @return The result of CNN layer
|
||||
*/
|
||||
dl_matrix2d_t *dl_basic_conv_layer_quantised_weight(const dl_matrix2d_t *in, const dl_matrix2dq_t *weight, const dl_matrix2d_t *bias, int filter_width, int filter_height,
|
||||
const int out_channels, const int stride_x, const int stride_y, padding_state pad, const dl_matrix2d_t* out);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,80 @@
|
||||
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
#ifndef DL_LIB_COEFGETTER_IF_H
|
||||
#define DL_LIB_COEFGETTER_IF_H
|
||||
|
||||
#include "dl_lib_matrix.h"
|
||||
#include "dl_lib_matrixq.h"
|
||||
#include "dl_lib_matrixq8.h"
|
||||
#include "cJSON.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
//Set this if the coefficient requested is a batch-normalization popvar matrix which needs to be preprocessed by
|
||||
//dl_batch_normalize_get_sqrtvar first.
|
||||
#define COEF_GETTER_HINT_BNVAR (1<<0)
|
||||
|
||||
/*
|
||||
This struct describes the basic information of model data:
|
||||
word_num: the number of wake words or speech commands
|
||||
word_list: the name list of wake words or speech commands
|
||||
thres_list: the threshold list of wake words or speech commands
|
||||
info_str: the string used to reflect the version and information of model data
|
||||
which consist of the architecture of network, the version of model data, wake words and their threshold
|
||||
*/
|
||||
typedef struct {
|
||||
int word_num;
|
||||
char **word_list;
|
||||
int *win_list;
|
||||
float *thresh_list;
|
||||
char *info_str;
|
||||
} model_info_t;
|
||||
|
||||
/*
|
||||
Alphabet struct describes the basic grapheme or phoneme.
|
||||
item_num: the number of baisc item(grapheme or phonemr)
|
||||
items: the list of basic item
|
||||
*/
|
||||
typedef struct {
|
||||
int item_num;
|
||||
char **items;
|
||||
}alphabet_t;
|
||||
|
||||
/*
|
||||
This struct describes a generic coefficient getter: a way to get the constant coefficients needed for a neural network.
|
||||
For the two getters, the name describes the name of the coefficient matrix, usually the same as the Numpy filename the
|
||||
coefficient was originally stored in. The arg argument can be used to optionally pass an additional user-defined argument
|
||||
to the getter (e.g. the directory to look for files in the case of the Numpy file loader getter). The hint argument
|
||||
is a bitwise OR of the COEF_GETTER_HINT_* flags or 0 when none is needed. Use the free_f/free_q functions to release the
|
||||
memory for the returned matrices, when applicable.
|
||||
*/
|
||||
typedef struct {
|
||||
const dl_matrix2d_t* (*getter_f)(const char *name, void *arg, int hint);
|
||||
const dl_matrix2dq_t* (*getter_q)(const char *name, void *arg, int hint);
|
||||
const dl_matrix2dq8_t* (*getter_q8)(const char *name, void *arg, int hint);
|
||||
void (*free_f)(const dl_matrix2d_t *m);
|
||||
void (*free_q)(const dl_matrix2dq_t *m);
|
||||
void (*free_q8)(const dl_matrix2dq8_t *m);
|
||||
const model_info_t* (*getter_info)(void *arg);
|
||||
const alphabet_t* (*getter_alphabet)(void *arg);
|
||||
const cJSON* (*getter_config)(void *arg);
|
||||
} model_coeff_getter_t;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,180 @@
|
||||
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
#ifndef DL_LIB_CONV_QUEUE_H
|
||||
#define DL_LIB_CONV_QUEUE_H
|
||||
|
||||
|
||||
#include "dl_lib_matrix.h"
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef float fptp_t;
|
||||
|
||||
//Flags for matrices
|
||||
// #define DL_MF_FOREIGNDATA (0) /*< Matrix *item data actually points to another matrix and should not be freed */
|
||||
|
||||
//Float convolution FIFO queue.
|
||||
typedef struct {
|
||||
int n; /*< the length of queue */
|
||||
int c; /*< the channel number of queue element*/
|
||||
int front; /*< the front(top) position of queue */
|
||||
int flag; /*< not used*/
|
||||
fptp_t *item; /*< Pointer to item array */
|
||||
} dl_conv_queue_t;
|
||||
|
||||
/**
|
||||
* @brief Allocate a convolution queue
|
||||
*
|
||||
* @param n The length of queue
|
||||
* @param c The channel number of elements in the queue
|
||||
* @return The convolution queue, or NULL if out of memory
|
||||
*/
|
||||
dl_conv_queue_t *dl_conv_queue_alloc(int n, int c);
|
||||
|
||||
/**
|
||||
* @brief Allocate a convolution queue from psram
|
||||
*
|
||||
* @param n The length of queue
|
||||
* @param c The channel number of elements in the queue
|
||||
* @return The convolution queue, or NULL if out of memory
|
||||
*/
|
||||
dl_conv_queue_t *dl_conv_queue_alloc_from_psram(int n, int c);
|
||||
|
||||
/**
|
||||
* @brief Free a convolution queue
|
||||
*
|
||||
* @param cq The convolution queue to free
|
||||
*/
|
||||
void dl_conv_queue_free(dl_conv_queue_t *cq);
|
||||
|
||||
void dl_conv_to_matrix2d(dl_conv_queue_t *cq, dl_matrix2d_t* out);
|
||||
|
||||
/**
|
||||
* @brief Move the front pointer of queue forward,
|
||||
the First(oldest) element become the last(newest) element,
|
||||
*
|
||||
* @param cq Input convolution queue
|
||||
* @return Pointer of oldest element
|
||||
*/
|
||||
fptp_t *dl_conv_queue_pop(dl_conv_queue_t *cq);
|
||||
|
||||
/**
|
||||
* @brief Remove the oldest element, then insert the input element at the end of queue
|
||||
*
|
||||
* @param cq Input convolution queue
|
||||
* @param item The new element
|
||||
*/
|
||||
void dl_conv_queue_push(dl_conv_queue_t *cq, fptp_t* item);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Get the pointer of element in the queue by offset
|
||||
*
|
||||
* @param cq Input convolution queue
|
||||
* @param offset Offset from the front of the queue
|
||||
* @return Pointer of the element
|
||||
*/
|
||||
fptp_t *dl_get_queue_item(dl_conv_queue_t *cq, int offset);
|
||||
|
||||
/**
|
||||
* @brief Does a sigmoid operation on the one of element in the convolution queue.
|
||||
* Gets the pointer of element in the convolution queue by offset, and does a sigmoid operation
|
||||
* by this pointer, then return the pointer
|
||||
*
|
||||
* @param cq Input convolution queue
|
||||
* @param offset Offset from the front of the queue
|
||||
* @return Pointer of the element
|
||||
*/
|
||||
fptp_t *dl_sigmoid_step(dl_conv_queue_t *cq, int offset);
|
||||
|
||||
/**
|
||||
* @brief Does a tanh operation on the one of element in the convolution queue.
|
||||
* Gets the pointer of element in the convolution queue by offset, and does a tanh operation
|
||||
* by this pointer, then return the pointer
|
||||
*
|
||||
* @param cq Input convolution queue
|
||||
* @param offset Offset from the front of the queue
|
||||
* @return Pointer of the element
|
||||
*/
|
||||
fptp_t *dl_tanh_step(dl_conv_queue_t *cq, int offset);
|
||||
|
||||
/**
|
||||
* @brief Does a softmax operation on the one of element in the convolution queue.
|
||||
* Gets the pointer of element in the convolution queue by offset, and does a softmax operation
|
||||
* by this pointer, then return the pointer
|
||||
*
|
||||
* @param cq Input convolution queue
|
||||
* @param offset Offset from the front of the queue
|
||||
* @return Pointer of the element
|
||||
*/
|
||||
fptp_t *dl_softmax_step(dl_conv_queue_t *cq, int offset);
|
||||
|
||||
fptp_t *dl_relu_step(dl_conv_queue_t *cq, int offset);
|
||||
fptp_t *dl_relu_look(dl_matrix2d_t *cq, int offset);
|
||||
dl_matrix2d_t *dl_matrix_concat1(const dl_conv_queue_t *a, const dl_matrix2d_t *b);
|
||||
dl_matrix2d_t *dl_basic_lstm_layer1(const dl_conv_queue_t *in, dl_matrix2d_t *state_c, dl_matrix2d_t *state_h,
|
||||
const dl_matrix2d_t *weight, const dl_matrix2d_t *bias);
|
||||
/**
|
||||
* @brief Fast implement for 1D atrous convolution (a.k.a. convolution with holes or dilated convolution)
|
||||
* based on convolution queue.
|
||||
*
|
||||
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
|
||||
* is first element of output queue and should not be freed separately.
|
||||
*
|
||||
* @param in Input convolution queue
|
||||
* @param out Output convolution queue
|
||||
* @param rate A positive int, the stride with which we sample input value
|
||||
* @param size A positive int, the size of 1D-filter
|
||||
* @param kernel The kernel matrix of filter
|
||||
* @param bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
|
||||
* @return The result of atrous convolution
|
||||
*/
|
||||
fptp_t *dl_atrous_conv1d_step(dl_conv_queue_t *in, dl_conv_queue_t *out, int rate, int size,
|
||||
dl_matrix2d_t* kernel, dl_matrix2d_t* bias);
|
||||
fptp_t *dl_look_conv_step(dl_conv_queue_t *in, dl_matrix2d_t *out, int rate, int size,
|
||||
dl_matrix2d_t* kernel, dl_matrix2d_t* bias);
|
||||
|
||||
/**
|
||||
* @brief Fast implement of dilation layer as follows
|
||||
*
|
||||
* |-> [gate(sigmoid)] -|
|
||||
* input - | |-> (*) - output
|
||||
* |-> [filter(tanh)] -|
|
||||
*
|
||||
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
|
||||
* is first element of output queue and should not be freed separately.
|
||||
*
|
||||
* @param in Input convolution queue
|
||||
* @param out Output convolution queue
|
||||
* @param rate A positive int, the stride with which we sample input value
|
||||
* @param size A positive int, the size of 1D-filter
|
||||
* @param filter_kernel The kernel matrix of filter
|
||||
* @param filter_bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
|
||||
* @param gate_kernel The kernel matrix of gate
|
||||
* @param gate_bias The bias matrix of gate. Can be NULL if a bias of 0 is required.
|
||||
* @return The result of dilation layer
|
||||
*/
|
||||
fptp_t *dl_dilation_layer(dl_conv_queue_t *in, dl_conv_queue_t *out, int rate, int size,
|
||||
dl_matrix2d_t* filter_kernel, dl_matrix2d_t* filter_bias,
|
||||
dl_matrix2d_t* gate_kernel, dl_matrix2d_t* gate_bias);
|
||||
|
||||
|
||||
void test_atrous_conv(int size, int rate, int in_channel, int out_channel);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user