add some code
This commit is contained in:
1
managed_components/espressif__dl_fft/.component_hash
Normal file
1
managed_components/espressif__dl_fft/.component_hash
Normal file
@@ -0,0 +1 @@
|
||||
7dadbd644c0d7ba4733cc3726ec4cff6edf27b043725e1115861dec1609a3d28
|
||||
1
managed_components/espressif__dl_fft/CHECKSUMS.json
Normal file
1
managed_components/espressif__dl_fft/CHECKSUMS.json
Normal file
@@ -0,0 +1 @@
|
||||
{"version": "1.0", "algorithm": "sha256", "created_at": "2025-08-22T07:02:16.273928+00:00", "files": [{"path": "CMakeLists.txt", "size": 1121, "hash": "b8db31748630321376c2a5998ea99d300a2d7f77d41b6a84d6f838ae18001632"}, {"path": "README.md", "size": 3606, "hash": "73f10b8cb40f463d4758f3c67e686105e8d6dea8f65a209219c7e2fac8c6cc81"}, {"path": "benchmark_esp32c5.md", "size": 6313, "hash": "af1c5eec587b0f7addac1e94c31a581a49f7121219826df53cbab973e6249786"}, {"path": "benchmark_esp32p4.md", "size": 5015, "hash": "b204be13020ff47f967c35dd1d60727013a83bbffbea2f0a1ca93874fe1e6226"}, {"path": "benchmark_esp32s3.md", "size": 5015, "hash": "9e9f5fa858453c853f83d63913dc974bd8c5fc7e5883400f7c1e50ffa6ef44a4"}, {"path": "dl_fft.h", "size": 4787, "hash": "f2cca68876cce36d24d5e0e1b9fe8bac3e09f65c5d065c5451b43713cada65c0"}, {"path": "dl_fft.hpp", "size": 9514, "hash": "abb72093b97832af83f5e0eb5289de0a02ead0325049d3192a39295bc166fd55"}, {"path": "dl_fft_f32.c", "size": 2231, "hash": "895ea2322a0740cf02bd6075453a247b5144aefceb8f31b6717248e6eeaf79ee"}, {"path": "dl_fft_s16.c", "size": 2737, "hash": "fb9168e0a6e2b798b88a8e06d5482610db9070a5b3c5e66f32ed8dcf514a0649"}, {"path": "dl_rfft.h", "size": 6523, "hash": "0219466a0cfc4ab218b22af3cf7954c975a0f4e05dd7fc947ab98c3bc3f76796"}, {"path": "dl_rfft_f32.c", "size": 3661, "hash": "1d87595dfe93be1e6ad63e2decd0b04c94f8414da8ccff96a4a9068a1128baaa"}, {"path": "dl_rfft_s16.c", "size": 3228, "hash": "7449d35f33c2acc30c4d4d09e9557c8a9e1ca9c7d5964047d248b35a8e756e63"}, {"path": "idf_component.yml", "size": 361, "hash": "2fa5bee9d2ede17e4724582ce74e654c2082416b060b7caf0f7d97df0eb0a596"}, {"path": "base/dl_fft2r_fc32_ansi.c", "size": 7479, "hash": "d41e48229ba05e2aeb07057ca56016525f8344cce00f2e38de4dd41305478590"}, {"path": "base/dl_fft2r_sc16_ansi.c", "size": 20141, "hash": "30e62e058832ce77cb1f94d587ceb99342824deca8c287fe931274d51d4906e4"}, {"path": "base/dl_fft4r_fc32_ansi.c", "size": 9025, "hash": "2cf9d2acbd80e4acd69601545baaef72a67924df1aacd7476530d01926ee3d7b"}, {"path": "base/dl_fft_base.c", "size": 1729, "hash": "25caaeecf1a7d6ba5387292fd448a0da0884615af1d95159f093ee93b701746f"}, {"path": "base/dl_fft_base.h", "size": 3270, "hash": "a28f0e60e4c5bdae7c3aa70e39920cef17ed6d8e3f425abc85922689b798d493"}, {"path": "base/dl_fft_dtype.h", "size": 427, "hash": "209a8da6f977565f2bbf80d8d8726afb1d3c9a6dbf1e48e342c1db6023066306"}, {"path": "base/isa/dl_fft_platform.h", "size": 1552, "hash": "f5d76981793e26cdbf549d25faaae589d8e1776519a84315da4d733cef190087"}, {"path": "base/isa/esp32/dl_fft2r_fc32_ae32.S", "size": 7019, "hash": "2aa2a8f98f05076b485c9e45056d9dc49fc17a200f1bdba609b01b1443a778df"}, {"path": "base/isa/esp32/dl_fft4r_fc32_ae32.S", "size": 12062, "hash": "e64fa52065bde43eb844b89c107ed3d33a930511c6b43e90e52de9853a1a32e3"}, {"path": "base/isa/esp32p4/dl_fft2r_fc32_arp4.S", "size": 5487, "hash": "25d706406cd3e7de3407ad8518790fb157097bad1a7e705a9d67736c4115f76b"}, {"path": "base/isa/esp32p4/dl_fft4r_fc32_arp4.S", "size": 11937, "hash": "16c9a05d68cda7e3b189d7c7290013bc7614aa879cc70b1e10e7c3ec4803b1ed"}, {"path": "base/isa/esp32s3/dl_fft2r_fc32_aes3.S", "size": 5901, "hash": "53e570f9b1d888cf4b36a07ea099ecd80905beb2e957c5f9d390e2a14b5d49b4"}, {"path": "base/isa/esp32s3/dl_fft4r_fc32_aes3.S", "size": 10950, "hash": "51840adc29034c0e0983439b366bfe2f10e697c1c9667e27cdcb1394905d3930"}]}
|
||||
36
managed_components/espressif__dl_fft/CMakeLists.txt
Normal file
36
managed_components/espressif__dl_fft/CMakeLists.txt
Normal file
@@ -0,0 +1,36 @@
|
||||
idf_build_get_property(target IDF_TARGET)
|
||||
|
||||
set(srcs "dl_fft_f32.c"
|
||||
"dl_fft_s16.c"
|
||||
"dl_rfft_f32.c"
|
||||
"dl_rfft_s16.c"
|
||||
"dl_fft.hpp"
|
||||
"base/dl_fft2r_fc32_ansi.c"
|
||||
"base/dl_fft4r_fc32_ansi.c"
|
||||
"base/dl_fft2r_sc16_ansi.c"
|
||||
"base/dl_fft_base.c"
|
||||
)
|
||||
|
||||
set(include_dirs "."
|
||||
"base"
|
||||
"base/isa"
|
||||
)
|
||||
|
||||
if(CONFIG_IDF_TARGET_ESP32)
|
||||
list(APPEND srcs "base/isa/esp32/dl_fft2r_fc32_ae32.S"
|
||||
"base/isa/esp32/dl_fft4r_fc32_ae32.S" )
|
||||
|
||||
elseif(CONFIG_IDF_TARGET_ESP32S3)
|
||||
list(APPEND srcs "base/isa/esp32s3/dl_fft2r_fc32_aes3.S"
|
||||
"base/isa/esp32s3/dl_fft4r_fc32_aes3.S" )
|
||||
|
||||
elseif(CONFIG_IDF_TARGET_ESP32P4)
|
||||
list(APPEND srcs "base/isa/esp32p4/dl_fft2r_fc32_arp4.S"
|
||||
"base/isa/esp32p4/dl_fft4r_fc32_arp4.S" )
|
||||
|
||||
endif()
|
||||
|
||||
|
||||
idf_component_register(SRCS ${srcs} INCLUDE_DIRS ${include_dirs})
|
||||
|
||||
component_compile_options(-ffast-math -O2)
|
||||
89
managed_components/espressif__dl_fft/README.md
Normal file
89
managed_components/espressif__dl_fft/README.md
Normal file
@@ -0,0 +1,89 @@
|
||||
# DL_FFT
|
||||
|
||||
DL_FFT is a lightweight FFT library supporting both float32 and int16 data types.
|
||||
|
||||
The float FFT implementation is come from esp-dsp. And we further optimized the int16 FFT to achieving better precision.
|
||||
For int16 FFT, we recommend to use `dl_fft_s16_hp_run` or `dl_rfft_s16_hp_run` interface. `hp` means "high precision".
|
||||
|
||||
## Get Started
|
||||
|
||||
|
||||
### C interface
|
||||
```
|
||||
|
||||
#include "dl_fft.h"
|
||||
#include "dl_rfft.h"
|
||||
|
||||
// float fft
|
||||
float x[nfft*2];
|
||||
|
||||
float *x = (float *)heap_caps_aligned_alloc(16, nfft * sizeof(float) *2, MALLOC_CAP_8BIT);
|
||||
dl_fft_f32_t *fft_handle = dl_fft_f32_init(nfft, MALLOC_CAP_8BIT);
|
||||
dl_fft_f32_run(fft_handle, x);
|
||||
dl_ifft_f32_run(fft_handle, x);
|
||||
dl_fft_f32_deinit(fft_handle);
|
||||
|
||||
// float rfft
|
||||
float *x = (float *)heap_caps_aligned_alloc(16, nfft * sizeof(float), MALLOC_CAP_8BIT);
|
||||
dl_fft_f32_t *fft_handle = dl_rfft_f32_init(nfft, MALLOC_CAP_8BIT);
|
||||
dl_rfft_f32_run(fft_handle, x);
|
||||
dl_irfft_f32_run(fft_handle, x);
|
||||
dl_rfft_f32_deinit(fft_handle);
|
||||
|
||||
// int16 fft
|
||||
int16_t *x= (float *)heap_caps_aligned_alloc(16, nfft * sizeof(int16_t) * 2, MALLOC_CAP_8BIT);
|
||||
float *y = (float *)heap_caps_aligned_alloc(16, nfft * sizeof(float) *2, MALLOC_CAP_8BIT);
|
||||
int in_exponent = -15; // float y = x * 2^in_exponent;
|
||||
int fft_exponent;
|
||||
int ifft_exponent;
|
||||
dl_fft_s16_t *fft_handle = dl_fft_s16_init(nfft, MALLOC_CAP_8BIT);
|
||||
dl_fft_s16_hp_run(fft_handle, x, in_exponent, &fft_exponent);
|
||||
dl_fft_s16_hp_run(fft_handle, x, fft_exponent, &ifft_exponent);
|
||||
dl_short_to_float(x, nfft, ifft_exponent, y); // convert output from int16_t to float
|
||||
dl_fft_s16_deinit(fft_handle);
|
||||
|
||||
// int16 rfft
|
||||
int16_t *x= (float *)heap_caps_aligned_alloc(16, nfft * sizeof(int16_t), MALLOC_CAP_8BIT);
|
||||
float *y = (float *)heap_caps_aligned_alloc(16, nfft * sizeof(float), MALLOC_CAP_8BIT);
|
||||
int in_exponent = -15; // float y = x * 2^in_exponent;
|
||||
int fft_exponent;
|
||||
int ifft_exponent;
|
||||
dl_fft_s16_t *fft_handle = dl_rfft_s16_init(nfft, MALLOC_CAP_8BIT);
|
||||
dl_rfft_s16_hp_run(fft_handle, x, in_exponent, &fft_exponent);
|
||||
dl_rfft_s16_hp_run(fft_handle, x, fft_exponent, &ifft_exponent);
|
||||
dl_short_to_float(x, nfft, ifft_exponent, y); // convert output from int16_t to float
|
||||
dl_rfft_s16_deinit(fft_handle);
|
||||
|
||||
|
||||
```
|
||||
Please refer to [dl_fft.h](./dl_fft.h) and [dl_rfft.h](./dl_rfft.h) for more details.
|
||||
> Note: The input array x must be allocated with heap_caps_aligned_alloc and aligned to 16 bytes.
|
||||
|
||||
|
||||
## FAQ:
|
||||
|
||||
#### 1. Why not just use esp-dsp directly?
|
||||
|
||||
Because esp-dsp uses global variables to share FFT tables and other parameters in order to minimize memory consumption. This introduces significant risks for independent components. Your FFT results might be corrupted by other programs, and this is something you have little control over.
|
||||
|
||||
#### 2. What does dl_fft do?
|
||||
|
||||
1. Provides an unified and simple FFT/IFFT interface. Users no longer need to worry about their FFT results being affected by other programs. All FFT tables are allocated and released within the function scope.
|
||||
2. Reimplements an int16 FFT/IFFT. Dynamic quantization is used during butterfly operations to achieve better precision.
|
||||
3. [TODO] Uses built-in FFT instructions on ESP32-S3 and ESP32-P4 to further accelerate int16 FFT/IFFT.
|
||||
|
||||
|
||||
## Benchmark
|
||||
|
||||
test code: [test_apps/dl_fft](https://github.com/espressif/esp-dl/tree/master/test_apps/dl_fft)
|
||||
|
||||
- [ESP32-S3 fft benchmark](./benchmark_esp32s3.md)
|
||||
- [ESP32-P4 fft benchmark](./benchmark_esp32p4.md)
|
||||
- [ESP32-C5 fft benchmark](./benchmark_esp32c5.md)
|
||||
|
||||
|
||||
## Reference
|
||||
|
||||
- [esp-dsp](https://github.com/espressif/esp-dsp)
|
||||
- [kissfft](https://github.com/mborgerding/kissfft)
|
||||
- [fftw](https://github.com/FFTW/fftw3)
|
||||
258
managed_components/espressif__dl_fft/base/dl_fft2r_fc32_ansi.c
Normal file
258
managed_components/espressif__dl_fft/base/dl_fft2r_fc32_ansi.c
Normal file
@@ -0,0 +1,258 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "dl_fft_base.h"
|
||||
|
||||
// unsigned short reverse(unsigned short x, unsigned short N, int order);
|
||||
|
||||
esp_err_t dl_fft2r_fc32_ansi(float *data, int N, float *w)
|
||||
{
|
||||
esp_err_t result = ESP_OK;
|
||||
|
||||
int ie, ia, m;
|
||||
float re_temp, im_temp;
|
||||
float c, s;
|
||||
ie = 1;
|
||||
for (int N2 = N / 2; N2 > 0; N2 >>= 1) {
|
||||
ia = 0;
|
||||
for (int j = 0; j < ie; j++) {
|
||||
c = w[2 * j];
|
||||
s = w[2 * j + 1];
|
||||
for (int i = 0; i < N2; i++) {
|
||||
m = ia + N2;
|
||||
re_temp = c * data[2 * m] + s * data[2 * m + 1];
|
||||
im_temp = c * data[2 * m + 1] - s * data[2 * m];
|
||||
data[2 * m] = data[2 * ia] - re_temp;
|
||||
data[2 * m + 1] = data[2 * ia + 1] - im_temp;
|
||||
data[2 * ia] = data[2 * ia] + re_temp;
|
||||
data[2 * ia + 1] = data[2 * ia + 1] + im_temp;
|
||||
ia++;
|
||||
}
|
||||
ia += N2;
|
||||
}
|
||||
ie <<= 1;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
esp_err_t dl_ifft2r_fc32_ansi(float *data, int N, float *w)
|
||||
{
|
||||
esp_err_t result = ESP_OK;
|
||||
|
||||
int ie, ia, m;
|
||||
float re_temp, im_temp;
|
||||
float c, s;
|
||||
ie = 1;
|
||||
for (int N2 = N / 2; N2 > 0; N2 >>= 1) {
|
||||
ia = 0;
|
||||
for (int j = 0; j < ie; j++) {
|
||||
c = w[2 * j];
|
||||
s = -w[2 * j + 1];
|
||||
for (int i = 0; i < N2; i++) {
|
||||
m = ia + N2;
|
||||
re_temp = c * data[2 * m] + s * data[2 * m + 1];
|
||||
im_temp = c * data[2 * m + 1] - s * data[2 * m];
|
||||
data[2 * m] = data[2 * ia] - re_temp;
|
||||
data[2 * m + 1] = data[2 * ia + 1] - im_temp;
|
||||
data[2 * ia] = data[2 * ia] + re_temp;
|
||||
data[2 * ia + 1] = data[2 * ia + 1] + im_temp;
|
||||
ia++;
|
||||
}
|
||||
ia += N2;
|
||||
}
|
||||
ie <<= 1;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
esp_err_t dl_bitrev2r_fc32_ansi(float *data, int N, uint16_t *bitrev_table, int bitrev_size)
|
||||
{
|
||||
esp_err_t result = ESP_OK;
|
||||
|
||||
if (bitrev_table) {
|
||||
float r_temp, i_temp;
|
||||
for (int n = 0; n < bitrev_size; n++) {
|
||||
uint16_t i = bitrev_table[n * 2];
|
||||
uint16_t j = bitrev_table[n * 2 + 1];
|
||||
r_temp = data[j];
|
||||
data[j] = data[i];
|
||||
data[i] = r_temp;
|
||||
i_temp = data[j + 1];
|
||||
data[j + 1] = data[i + 1];
|
||||
data[i + 1] = i_temp;
|
||||
}
|
||||
} else {
|
||||
int j, k;
|
||||
float r_temp, i_temp;
|
||||
j = 0;
|
||||
for (int i = 1; i < (N - 1); i++) {
|
||||
k = N >> 1;
|
||||
while (k <= j) {
|
||||
j -= k;
|
||||
k >>= 1;
|
||||
}
|
||||
j += k;
|
||||
if (i < j) {
|
||||
r_temp = data[j * 2];
|
||||
data[j * 2] = data[i * 2];
|
||||
data[i * 2] = r_temp;
|
||||
i_temp = data[j * 2 + 1];
|
||||
data[j * 2 + 1] = data[i * 2 + 1];
|
||||
data[i * 2 + 1] = i_temp;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
esp_err_t dl_rfft_post_proc_fc32_ansi(float *data, int N, float *table)
|
||||
{
|
||||
dl_fc32_t *result = (dl_fc32_t *)data;
|
||||
// Original formula...
|
||||
// result[0].re = result[0].re + result[0].im;
|
||||
// result[N].re = result[0].re - result[0].im;
|
||||
// result[0].im = 0;
|
||||
// result[N].im = 0;
|
||||
// Optimized one:
|
||||
float tmp_re = result[0].re;
|
||||
result[0].re = tmp_re + result[0].im;
|
||||
result[0].im = tmp_re - result[0].im;
|
||||
|
||||
dl_fc32_t f1k, f2k;
|
||||
for (int k = 1; k <= N / 2; k++) {
|
||||
dl_fc32_t fpk = result[k];
|
||||
dl_fc32_t fpnk = result[N - k];
|
||||
f1k.re = fpk.re + fpnk.re;
|
||||
f1k.im = fpk.im - fpnk.im;
|
||||
f2k.re = fpk.re - fpnk.re;
|
||||
f2k.im = fpk.im + fpnk.im;
|
||||
|
||||
float c = -table[k * 2 - 1];
|
||||
float s = -table[k * 2 - 2];
|
||||
dl_fc32_t tw;
|
||||
tw.re = c * f2k.re - s * f2k.im;
|
||||
tw.im = s * f2k.re + c * f2k.im;
|
||||
|
||||
result[k].re = 0.5 * (f1k.re + tw.re);
|
||||
result[k].im = 0.5 * (f1k.im + tw.im);
|
||||
result[N - k].re = 0.5 * (f1k.re - tw.re);
|
||||
result[N - k].im = 0.5 * (tw.im - f1k.im);
|
||||
}
|
||||
return ESP_OK;
|
||||
}
|
||||
|
||||
esp_err_t dl_rfft_pre_proc_fc32_ansi(float *data, int N, float *table)
|
||||
{
|
||||
dl_fc32_t *result = (dl_fc32_t *)data;
|
||||
float tmp_re = result[0].re;
|
||||
result[0].re = (tmp_re + result[0].im) * 0.5;
|
||||
result[0].im = (tmp_re - result[0].im) * 0.5;
|
||||
|
||||
dl_fc32_t f1k, f2k;
|
||||
for (int k = 1; k <= N / 2; k++) {
|
||||
dl_fc32_t fpk = result[k];
|
||||
dl_fc32_t fpnk = result[N - k];
|
||||
f1k.re = fpk.re + fpnk.re;
|
||||
f1k.im = fpk.im - fpnk.im;
|
||||
f2k.re = fpk.re - fpnk.re;
|
||||
f2k.im = fpk.im + fpnk.im;
|
||||
|
||||
float c = -table[k * 2 - 1];
|
||||
float s = table[k * 2 - 2];
|
||||
dl_fc32_t tw;
|
||||
tw.re = c * f2k.re - s * f2k.im;
|
||||
tw.im = s * f2k.re + c * f2k.im;
|
||||
|
||||
result[k].re = 0.5 * (f1k.re + tw.re);
|
||||
result[k].im = 0.5 * (f1k.im + tw.im);
|
||||
result[N - k].re = 0.5 * (f1k.re - tw.re);
|
||||
result[N - k].im = 0.5 * (tw.im - f1k.im);
|
||||
}
|
||||
return ESP_OK;
|
||||
}
|
||||
|
||||
float *dl_gen_rfft_table_f32(int fft_point, uint32_t caps)
|
||||
{
|
||||
float *fft_table = (float *)heap_caps_aligned_alloc(16, fft_point * sizeof(float), caps);
|
||||
|
||||
if (fft_table) {
|
||||
for (int i = 1; i <= fft_point >> 1; i++) {
|
||||
float angle = 2 * M_PI * i * 1.0 / fft_point;
|
||||
fft_table[2 * i - 2] = cosf(angle);
|
||||
fft_table[2 * i - 1] = sinf(angle);
|
||||
}
|
||||
}
|
||||
|
||||
return fft_table;
|
||||
}
|
||||
|
||||
uint16_t *dl_gen_bitrev2r_table(int N, uint32_t caps, int *bitrev_size)
|
||||
{
|
||||
int count = 0, idx = 0;
|
||||
int j = 0, k;
|
||||
for (int i = 1; i < (N - 1); i++) {
|
||||
k = N >> 1;
|
||||
while (k <= j) {
|
||||
j -= k;
|
||||
k >>= 1;
|
||||
}
|
||||
j += k;
|
||||
if (i < j) {
|
||||
count++;
|
||||
}
|
||||
}
|
||||
if (count * 2 > UINT16_MAX) {
|
||||
return NULL;
|
||||
}
|
||||
bitrev_size[0] = count;
|
||||
uint16_t *bitrev_table = (uint16_t *)heap_caps_malloc(2 * count * sizeof(uint16_t), caps);
|
||||
|
||||
if (bitrev_table) {
|
||||
j = 0;
|
||||
for (int i = 1; i < (N - 1); i++) {
|
||||
k = N >> 1;
|
||||
while (k <= j) {
|
||||
j -= k;
|
||||
k >>= 1;
|
||||
}
|
||||
j += k;
|
||||
if (i < j) {
|
||||
bitrev_table[idx * 2] = j * 2;
|
||||
bitrev_table[idx * 2 + 1] = i * 2;
|
||||
idx++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return bitrev_table;
|
||||
}
|
||||
|
||||
float *dl_gen_fftr2_table_f32(int fft_point, uint32_t caps)
|
||||
{
|
||||
float *fft_table = (float *)heap_caps_aligned_alloc(16, fft_point * sizeof(float), caps);
|
||||
|
||||
if (fft_table) {
|
||||
float e = M_PI * 2.0 / fft_point;
|
||||
|
||||
for (int i = 0; i < (fft_point >> 1); i++) {
|
||||
fft_table[2 * i] = cosf(i * e);
|
||||
fft_table[2 * i + 1] = sinf(i * e);
|
||||
}
|
||||
|
||||
dl_bitrev2r_fc32_ansi(fft_table, fft_point >> 1, NULL, 0);
|
||||
}
|
||||
|
||||
return fft_table;
|
||||
}
|
||||
580
managed_components/espressif__dl_fft/base/dl_fft2r_sc16_ansi.c
Normal file
580
managed_components/espressif__dl_fft/base/dl_fft2r_sc16_ansi.c
Normal file
@@ -0,0 +1,580 @@
|
||||
#include "dl_fft_base.h"
|
||||
|
||||
static inline int16_t dl_xtfixed_bf_1(
|
||||
int16_t a0, int16_t a1, int16_t a2, int16_t a3, int16_t a4, int result_shift, int add_rount_mult)
|
||||
{
|
||||
int result = a0;
|
||||
result = result << 15;
|
||||
result -= (int32_t)a1 * (int32_t)a2 + (int32_t)a3 * (int32_t)a4;
|
||||
result += add_rount_mult;
|
||||
result = result >> result_shift;
|
||||
|
||||
return (int16_t)result;
|
||||
}
|
||||
|
||||
static inline int16_t dl_xtfixed_bf_2(
|
||||
int16_t a0, int16_t a1, int16_t a2, int16_t a3, int16_t a4, int result_shift, int add_rount_mult)
|
||||
{
|
||||
int result = a0;
|
||||
result = result << 15;
|
||||
result -= ((int32_t)a1 * (int32_t)a2 - (int32_t)a3 * (int32_t)a4);
|
||||
result += add_rount_mult;
|
||||
result = result >> result_shift;
|
||||
|
||||
return (int16_t)result;
|
||||
}
|
||||
|
||||
static inline int16_t dl_xtfixed_bf_3(
|
||||
int16_t a0, int16_t a1, int16_t a2, int16_t a3, int16_t a4, int result_shift, int add_rount_mult)
|
||||
{
|
||||
int result = a0;
|
||||
result = result << 15;
|
||||
result += (int32_t)a1 * (int32_t)a2 + (int32_t)a3 * (int32_t)a4;
|
||||
result += add_rount_mult;
|
||||
result = result >> result_shift;
|
||||
|
||||
return (int16_t)result;
|
||||
}
|
||||
|
||||
static inline int16_t dl_xtfixed_bf_4(
|
||||
int16_t a0, int16_t a1, int16_t a2, int16_t a3, int16_t a4, int result_shift, int add_rount_mult)
|
||||
{
|
||||
int result = a0;
|
||||
result = result << 15;
|
||||
result += (int32_t)a1 * (int32_t)a2 - (int32_t)a3 * (int32_t)a4;
|
||||
result += add_rount_mult;
|
||||
result = result >> result_shift;
|
||||
|
||||
return (int16_t)result;
|
||||
}
|
||||
|
||||
esp_err_t dl_fft2r_sc16_ansi(int16_t *data, int N, int16_t *table)
|
||||
{
|
||||
esp_err_t result = ESP_OK;
|
||||
|
||||
uint32_t *w = (uint32_t *)table;
|
||||
uint32_t *in_data = (uint32_t *)data;
|
||||
|
||||
int ie, ia, m;
|
||||
dl_sc16_t cs; // c - re, s - im
|
||||
dl_sc16_t m_data;
|
||||
dl_sc16_t a_data;
|
||||
int add_rount_mult = 1 << 15;
|
||||
|
||||
ie = 1;
|
||||
for (int N2 = N / 2; N2 > 0; N2 >>= 1) {
|
||||
ia = 0;
|
||||
for (int j = 0; j < ie; j++) {
|
||||
cs.data = w[j];
|
||||
// c = w[2 * j];
|
||||
// s = w[2 * j + 1];
|
||||
for (int i = 0; i < N2; i++) {
|
||||
m = ia + N2;
|
||||
m_data.data = in_data[m];
|
||||
a_data.data = in_data[ia];
|
||||
// data[2 * m] = data[2 * ia] - re_temp;
|
||||
// data[2 * m + 1] = data[2 * ia + 1] - im_temp;
|
||||
dl_sc16_t m1;
|
||||
m1.re = dl_xtfixed_bf_1(a_data.re,
|
||||
cs.re,
|
||||
m_data.re,
|
||||
cs.im,
|
||||
m_data.im,
|
||||
16,
|
||||
add_rount_mult); //(a_data.re - temp.re + shift_const) >> 1;
|
||||
m1.im = dl_xtfixed_bf_2(a_data.im,
|
||||
cs.re,
|
||||
m_data.im,
|
||||
cs.im,
|
||||
m_data.re,
|
||||
16,
|
||||
add_rount_mult); //(a_data.im - temp.im + shift_const) >> 1;
|
||||
in_data[m] = m1.data;
|
||||
|
||||
// data[2 * ia] = data[2 * ia] + re_temp;
|
||||
// data[2 * ia + 1] = data[2 * ia + 1] + im_temp;
|
||||
dl_sc16_t m2;
|
||||
m2.re = dl_xtfixed_bf_3(a_data.re,
|
||||
cs.re,
|
||||
m_data.re,
|
||||
cs.im,
|
||||
m_data.im,
|
||||
16,
|
||||
add_rount_mult); //(a_data.re + temp.re + shift_const) >> 1;
|
||||
m2.im = dl_xtfixed_bf_4(a_data.im,
|
||||
cs.re,
|
||||
m_data.im,
|
||||
cs.im,
|
||||
m_data.re,
|
||||
16,
|
||||
add_rount_mult); //(a_data.im + temp.im + shift_const)>>1;
|
||||
in_data[ia] = m2.data;
|
||||
ia++;
|
||||
}
|
||||
ia += N2;
|
||||
}
|
||||
ie <<= 1;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
esp_err_t dl_ifft2r_sc16_ansi(int16_t *data, int N, int16_t *table)
|
||||
{
|
||||
esp_err_t result = ESP_OK;
|
||||
|
||||
uint32_t *w = (uint32_t *)table;
|
||||
uint32_t *in_data = (uint32_t *)data;
|
||||
|
||||
int ie, ia, m;
|
||||
dl_sc16_t cs; // c - re, s - im
|
||||
dl_sc16_t m_data;
|
||||
dl_sc16_t a_data;
|
||||
int add_rount_mult = 1 << 15;
|
||||
|
||||
ie = 1;
|
||||
for (int N2 = N / 2; N2 > 0; N2 >>= 1) {
|
||||
ia = 0;
|
||||
for (int j = 0; j < ie; j++) {
|
||||
cs.data = w[j];
|
||||
cs.im = -cs.im;
|
||||
// c = w[2 * j];
|
||||
// s = w[2 * j + 1];
|
||||
for (int i = 0; i < N2; i++) {
|
||||
m = ia + N2;
|
||||
m_data.data = in_data[m];
|
||||
a_data.data = in_data[ia];
|
||||
// data[2 * m] = data[2 * ia] - re_temp;
|
||||
// data[2 * m + 1] = data[2 * ia + 1] - im_temp;
|
||||
dl_sc16_t m1;
|
||||
m1.re = dl_xtfixed_bf_1(a_data.re,
|
||||
cs.re,
|
||||
m_data.re,
|
||||
cs.im,
|
||||
m_data.im,
|
||||
16,
|
||||
add_rount_mult); //(a_data.re - temp.re + shift_const) >> 1;
|
||||
m1.im = dl_xtfixed_bf_2(a_data.im,
|
||||
cs.re,
|
||||
m_data.im,
|
||||
cs.im,
|
||||
m_data.re,
|
||||
16,
|
||||
add_rount_mult); //(a_data.im - temp.im + shift_const) >> 1;
|
||||
in_data[m] = m1.data;
|
||||
|
||||
// data[2 * ia] = data[2 * ia] + re_temp;
|
||||
// data[2 * ia + 1] = data[2 * ia + 1] + im_temp;
|
||||
dl_sc16_t m2;
|
||||
m2.re = dl_xtfixed_bf_3(a_data.re,
|
||||
cs.re,
|
||||
m_data.re,
|
||||
cs.im,
|
||||
m_data.im,
|
||||
16,
|
||||
add_rount_mult); //(a_data.re + temp.re + shift_const) >> 1;
|
||||
m2.im = dl_xtfixed_bf_4(a_data.im,
|
||||
cs.re,
|
||||
m_data.im,
|
||||
cs.im,
|
||||
m_data.re,
|
||||
16,
|
||||
add_rount_mult); //(a_data.im + temp.im + shift_const)>>1;
|
||||
in_data[ia] = m2.data;
|
||||
ia++;
|
||||
}
|
||||
ia += N2;
|
||||
}
|
||||
ie <<= 1;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
esp_err_t dl_fft2r_sc16_hp_ansi(int16_t *data, int N, int16_t *table, int *shift)
|
||||
{
|
||||
esp_err_t result = ESP_OK;
|
||||
|
||||
uint32_t *w = (uint32_t *)table;
|
||||
uint32_t *in_data = (uint32_t *)data;
|
||||
|
||||
int ie, ia, m, loop_num = 2;
|
||||
dl_sc16_t cs; // c - re, s - im
|
||||
dl_sc16_t m_data;
|
||||
dl_sc16_t a_data;
|
||||
int add_rount_mult = 1 << 15;
|
||||
|
||||
ie = 1;
|
||||
shift[0] = 0;
|
||||
for (int N2 = N / 2; N2 > 0; N2 >>= 1) {
|
||||
ia = 0;
|
||||
int loop_shift = 16;
|
||||
if (loop_num == 2) {
|
||||
loop_shift = dl_array_max_q_s16(data, N * 2);
|
||||
if (loop_shift < 16) {
|
||||
loop_shift += 1;
|
||||
}
|
||||
loop_num = 0;
|
||||
} else {
|
||||
loop_num += 1;
|
||||
}
|
||||
shift[0] += loop_shift - 15;
|
||||
add_rount_mult = 1 << (loop_shift - 1);
|
||||
for (int j = 0; j < ie; j++) {
|
||||
cs.data = w[j];
|
||||
// c = w[2 * j];
|
||||
// s = w[2 * j + 1];
|
||||
for (int i = 0; i < N2; i++) {
|
||||
m = ia + N2;
|
||||
m_data.data = in_data[m];
|
||||
a_data.data = in_data[ia];
|
||||
// data[2 * m] = data[2 * ia] - re_temp;
|
||||
// data[2 * m + 1] = data[2 * ia + 1] - im_temp;
|
||||
dl_sc16_t m1;
|
||||
m1.re = dl_xtfixed_bf_1(a_data.re,
|
||||
cs.re,
|
||||
m_data.re,
|
||||
cs.im,
|
||||
m_data.im,
|
||||
loop_shift,
|
||||
add_rount_mult); //(a_data.re - temp.re + shift_const) >> 1;
|
||||
m1.im = dl_xtfixed_bf_2(a_data.im,
|
||||
cs.re,
|
||||
m_data.im,
|
||||
cs.im,
|
||||
m_data.re,
|
||||
loop_shift,
|
||||
add_rount_mult); //(a_data.im - temp.im + shift_const) >> 1;
|
||||
in_data[m] = m1.data;
|
||||
|
||||
// data[2 * ia] = data[2 * ia] + re_temp;
|
||||
// data[2 * ia + 1] = data[2 * ia + 1] + im_temp;
|
||||
dl_sc16_t m2;
|
||||
m2.re = dl_xtfixed_bf_3(a_data.re,
|
||||
cs.re,
|
||||
m_data.re,
|
||||
cs.im,
|
||||
m_data.im,
|
||||
loop_shift,
|
||||
add_rount_mult); //(a_data.re + temp.re + shift_const) >> 1;
|
||||
m2.im = dl_xtfixed_bf_4(a_data.im,
|
||||
cs.re,
|
||||
m_data.im,
|
||||
cs.im,
|
||||
m_data.re,
|
||||
loop_shift,
|
||||
add_rount_mult); //(a_data.im + temp.im + shift_const)>>1;
|
||||
in_data[ia] = m2.data;
|
||||
ia++;
|
||||
}
|
||||
ia += N2;
|
||||
}
|
||||
ie <<= 1;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
esp_err_t dl_ifft2r_sc16_hp_ansi(int16_t *data, int N, int16_t *table, int *shift)
|
||||
{
|
||||
esp_err_t result = ESP_OK;
|
||||
|
||||
uint32_t *w = (uint32_t *)table;
|
||||
uint32_t *in_data = (uint32_t *)data;
|
||||
|
||||
int ie, ia, m, loop_num = 2;
|
||||
dl_sc16_t cs; // c - re, s - im
|
||||
dl_sc16_t m_data;
|
||||
dl_sc16_t a_data;
|
||||
int add_rount_mult = 1 << 15;
|
||||
|
||||
ie = 1;
|
||||
shift[0] = 0;
|
||||
for (int N2 = N / 2; N2 > 0; N2 >>= 1) {
|
||||
ia = 0;
|
||||
int loop_shift = 16;
|
||||
if (loop_num == 2) {
|
||||
loop_shift = dl_array_max_q_s16(data, N * 2);
|
||||
if (loop_shift < 16) {
|
||||
loop_shift += 1;
|
||||
}
|
||||
loop_num = 0;
|
||||
} else {
|
||||
loop_num += 1;
|
||||
}
|
||||
shift[0] += loop_shift - 15;
|
||||
add_rount_mult = 1 << (loop_shift - 1);
|
||||
for (int j = 0; j < ie; j++) {
|
||||
cs.data = w[j];
|
||||
cs.im = -cs.im;
|
||||
// c = w[2 * j];
|
||||
// s = w[2 * j + 1];
|
||||
for (int i = 0; i < N2; i++) {
|
||||
m = ia + N2;
|
||||
m_data.data = in_data[m];
|
||||
a_data.data = in_data[ia];
|
||||
// data[2 * m] = data[2 * ia] - re_temp;
|
||||
// data[2 * m + 1] = data[2 * ia + 1] - im_temp;
|
||||
dl_sc16_t m1;
|
||||
m1.re = dl_xtfixed_bf_1(a_data.re,
|
||||
cs.re,
|
||||
m_data.re,
|
||||
cs.im,
|
||||
m_data.im,
|
||||
loop_shift,
|
||||
add_rount_mult); //(a_data.re - temp.re + shift_const) >> 1;
|
||||
m1.im = dl_xtfixed_bf_2(a_data.im,
|
||||
cs.re,
|
||||
m_data.im,
|
||||
cs.im,
|
||||
m_data.re,
|
||||
loop_shift,
|
||||
add_rount_mult); //(a_data.im - temp.im + shift_const) >> 1;
|
||||
in_data[m] = m1.data;
|
||||
|
||||
// data[2 * ia] = data[2 * ia] + re_temp;
|
||||
// data[2 * ia + 1] = data[2 * ia + 1] + im_temp;
|
||||
dl_sc16_t m2;
|
||||
m2.re = dl_xtfixed_bf_3(a_data.re,
|
||||
cs.re,
|
||||
m_data.re,
|
||||
cs.im,
|
||||
m_data.im,
|
||||
loop_shift,
|
||||
add_rount_mult); //(a_data.re + temp.re + shift_const) >> 1;
|
||||
m2.im = dl_xtfixed_bf_4(a_data.im,
|
||||
cs.re,
|
||||
m_data.im,
|
||||
cs.im,
|
||||
m_data.re,
|
||||
loop_shift,
|
||||
add_rount_mult); //(a_data.im + temp.im + shift_const)>>1;
|
||||
in_data[ia] = m2.data;
|
||||
ia++;
|
||||
}
|
||||
ia += N2;
|
||||
}
|
||||
ie <<= 1;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline unsigned short reverse_sc16(unsigned short x, unsigned short N, int order)
|
||||
{
|
||||
unsigned short b = x;
|
||||
|
||||
b = (b & 0xff00) >> 8 | (b & 0x00fF) << 8;
|
||||
b = (b & 0xf0F0) >> 4 | (b & 0x0f0F) << 4;
|
||||
b = (b & 0xCCCC) >> 2 | (b & 0x3333) << 2;
|
||||
b = (b & 0xAAAA) >> 1 | (b & 0x5555) << 1;
|
||||
return b >> (16 - order);
|
||||
}
|
||||
|
||||
esp_err_t dl_bitrev2r_sc16_ansi(int16_t *data, int N)
|
||||
{
|
||||
esp_err_t result = ESP_OK;
|
||||
|
||||
int j, k;
|
||||
uint32_t temp;
|
||||
uint32_t *in_data = (uint32_t *)data;
|
||||
j = 0;
|
||||
for (int i = 1; i < (N - 1); i++) {
|
||||
k = N >> 1;
|
||||
while (k <= j) {
|
||||
j -= k;
|
||||
k >>= 1;
|
||||
}
|
||||
j += k;
|
||||
if (i < j) {
|
||||
temp = in_data[j];
|
||||
in_data[j] = in_data[i];
|
||||
in_data[i] = temp;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
esp_err_t dl_cplx2reC_sc16(int16_t *data, int N)
|
||||
{
|
||||
esp_err_t result = ESP_OK;
|
||||
|
||||
int i;
|
||||
int n2 = N << (1); // we will operate with int32 indexes
|
||||
uint32_t *in_data = (uint32_t *)data;
|
||||
|
||||
dl_sc16_t kl;
|
||||
dl_sc16_t kh;
|
||||
dl_sc16_t nl;
|
||||
dl_sc16_t nh;
|
||||
|
||||
for (i = 0; i < (N / 4); i++) {
|
||||
kl.data = in_data[i + 1];
|
||||
nl.data = in_data[N - i - 1];
|
||||
kh.data = in_data[i + 1 + N / 2];
|
||||
nh.data = in_data[N - i - 1 - N / 2];
|
||||
|
||||
data[i * 2 + 0 + 2] = kl.re + nl.re;
|
||||
data[i * 2 + 1 + 2] = kl.im - nl.im;
|
||||
|
||||
data[n2 - i * 2 - 1 - N] = kh.re + nh.re;
|
||||
data[n2 - i * 2 - 2 - N] = kh.im - nh.im;
|
||||
|
||||
data[i * 2 + 0 + 2 + N] = kl.im + nl.im;
|
||||
data[i * 2 + 1 + 2 + N] = kl.re - nl.re;
|
||||
|
||||
data[n2 - i * 2 - 1] = kh.im + nh.im;
|
||||
data[n2 - i * 2 - 2] = kh.re - nh.re;
|
||||
}
|
||||
data[N] = data[1];
|
||||
data[1] = 0;
|
||||
data[N + 1] = 0;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
esp_err_t dl_rfft_post_proc_sc16_ansi(int16_t *data, int N, int16_t *table)
|
||||
{
|
||||
dl_sc16_t *result = (dl_sc16_t *)data;
|
||||
// Original formula...
|
||||
// result[0].re = result[0].re + result[0].im;
|
||||
// result[N].re = result[0].re - result[0].im;
|
||||
// result[0].im = 0;
|
||||
// result[N].im = 0;
|
||||
// Optimized one:
|
||||
int32_t tmp_re = result[0].re + 1;
|
||||
result[0].re = (tmp_re + result[0].im) >> 1;
|
||||
result[0].im = (tmp_re - result[0].im) >> 1;
|
||||
int round = 1 << 16;
|
||||
|
||||
int32_t f1k_re, f1k_im, f2k_re, f2k_im, tw_re, tw_im;
|
||||
for (int k = 1; k <= N / 2; k++) {
|
||||
dl_sc16_t fpk = result[k];
|
||||
dl_sc16_t fpnk = result[N - k];
|
||||
f1k_re = fpk.re + fpnk.re;
|
||||
f1k_im = fpk.im - fpnk.im;
|
||||
f2k_re = fpk.re - fpnk.re;
|
||||
f2k_im = fpk.im + fpnk.im;
|
||||
|
||||
int16_t c = -table[k * 2 - 1];
|
||||
int16_t s = -table[k * 2 - 2];
|
||||
tw_re = c * f2k_re - s * f2k_im;
|
||||
tw_im = s * f2k_re + c * f2k_im;
|
||||
f1k_re = f1k_re << 15;
|
||||
f1k_im = f1k_im << 15;
|
||||
|
||||
result[k].re = (f1k_re + tw_re + round) >> 17;
|
||||
result[k].im = (f1k_im + tw_im + round) >> 17;
|
||||
result[N - k].re = (f1k_re - tw_re + round) >> 17;
|
||||
result[N - k].im = (tw_im - f1k_im + round) >> 17;
|
||||
}
|
||||
return ESP_OK;
|
||||
}
|
||||
|
||||
esp_err_t dl_rfft_pre_proc_sc16_ansi(int16_t *data, int N, int16_t *table)
|
||||
{
|
||||
dl_sc16_t *result = (dl_sc16_t *)data;
|
||||
|
||||
int32_t tmp_re = result[0].re + 2;
|
||||
result[0].re = (tmp_re + result[0].im) >> 2;
|
||||
result[0].im = (tmp_re - result[0].im) >> 2;
|
||||
int round = 1 << 16;
|
||||
|
||||
int32_t f1k_re, f1k_im, f2k_re, f2k_im, tw_re, tw_im;
|
||||
for (int k = 1; k <= N / 2; k++) {
|
||||
dl_sc16_t fpk = result[k];
|
||||
dl_sc16_t fpnk = result[N - k];
|
||||
f1k_re = fpk.re + fpnk.re;
|
||||
f1k_im = fpk.im - fpnk.im;
|
||||
f2k_re = fpk.re - fpnk.re;
|
||||
f2k_im = fpk.im + fpnk.im;
|
||||
|
||||
int16_t c = -table[k * 2 - 1];
|
||||
int16_t s = table[k * 2 - 2];
|
||||
tw_re = c * f2k_re - s * f2k_im;
|
||||
tw_im = s * f2k_re + c * f2k_im;
|
||||
f1k_re = f1k_re << 15;
|
||||
f1k_im = f1k_im << 15;
|
||||
|
||||
result[k].re = (f1k_re + tw_re + round) >> 17;
|
||||
result[k].im = (f1k_im + tw_im + round) >> 17;
|
||||
result[N - k].re = (f1k_re - tw_re + round) >> 17;
|
||||
result[N - k].im = (tw_im - f1k_im + round) >> 17;
|
||||
}
|
||||
return ESP_OK;
|
||||
}
|
||||
|
||||
esp_err_t dl_cplx2real_sc16_hp_ansi(int16_t *data, int N, int16_t *table, int *shift)
|
||||
{
|
||||
dl_sc16_t *result = (dl_sc16_t *)data;
|
||||
// Original formula...
|
||||
// result[0].re = result[0].re + result[0].im;
|
||||
// result[N].re = result[0].re - result[0].im;
|
||||
// result[0].im = 0;
|
||||
// result[N].im = 0;
|
||||
// Optimized one:
|
||||
int loop_shift = dl_array_max_q_s16(data, N);
|
||||
int round = 1 << loop_shift;
|
||||
int32_t tmp_re = result[0].re;
|
||||
shift[0] += loop_shift - 15;
|
||||
|
||||
if (loop_shift >= 15) {
|
||||
result[0].re = (tmp_re + result[0].im) >> (loop_shift - 15);
|
||||
result[0].im = (tmp_re - result[0].im) >> (loop_shift - 15);
|
||||
} else {
|
||||
result[0].re = (tmp_re + result[0].im) << (15 - loop_shift);
|
||||
result[0].im = (tmp_re - result[0].im) << (15 - loop_shift);
|
||||
}
|
||||
|
||||
int32_t f1k_re, f1k_im, f2k_re, f2k_im, tw_re, tw_im;
|
||||
loop_shift += 1;
|
||||
for (int k = 1; k <= N / 2; k++) {
|
||||
dl_sc16_t fpk = result[k];
|
||||
dl_sc16_t fpnk = result[N - k];
|
||||
f1k_re = fpk.re + fpnk.re;
|
||||
f1k_im = fpk.im - fpnk.im;
|
||||
f2k_re = fpk.re - fpnk.re;
|
||||
f2k_im = fpk.im + fpnk.im;
|
||||
|
||||
int16_t c = -table[k * 2 - 1];
|
||||
int16_t s = -table[k * 2 - 2];
|
||||
tw_re = c * f2k_re - s * f2k_im;
|
||||
tw_im = s * f2k_re + c * f2k_im;
|
||||
f1k_re = f1k_re << 15;
|
||||
f1k_im = f1k_im << 15;
|
||||
|
||||
result[k].re = (f1k_re + tw_re + round) >> loop_shift;
|
||||
result[k].im = (f1k_im + tw_im + round) >> loop_shift;
|
||||
result[N - k].re = (f1k_re - tw_re + round) >> loop_shift;
|
||||
result[N - k].im = (tw_im - f1k_im + round) >> loop_shift;
|
||||
}
|
||||
return ESP_OK;
|
||||
}
|
||||
|
||||
int16_t *dl_gen_fft_table_sc16(int fft_point, uint32_t caps)
|
||||
{
|
||||
int16_t *fft_table = (int16_t *)heap_caps_aligned_alloc(16, fft_point * sizeof(int16_t), caps);
|
||||
|
||||
if (fft_table) {
|
||||
float e = M_PI * 2.0 / fft_point;
|
||||
for (int i = 0; i < (fft_point >> 1); i++) {
|
||||
fft_table[2 * i] = (int16_t)roundf(INT16_MAX * cosf(i * e));
|
||||
fft_table[2 * i + 1] = (int16_t)roundf(INT16_MAX * sinf(i * e));
|
||||
}
|
||||
dl_bitrev2r_sc16_ansi(fft_table, fft_point >> 1);
|
||||
}
|
||||
|
||||
return fft_table;
|
||||
}
|
||||
|
||||
int16_t *dl_gen_rfft_table_s16(int fft_point, uint32_t caps)
|
||||
{
|
||||
int16_t *fft_table = (int16_t *)heap_caps_aligned_alloc(16, fft_point * sizeof(int16_t), caps);
|
||||
|
||||
if (fft_table) {
|
||||
float e = M_PI * 2.0 / fft_point;
|
||||
|
||||
for (int i = 0; i < (fft_point >> 1); i++) {
|
||||
fft_table[2 * i] = (int16_t)roundf(INT16_MAX * cosf((i + 1) * e));
|
||||
fft_table[2 * i + 1] = (int16_t)roundf(INT16_MAX * sinf((i + 1) * e));
|
||||
}
|
||||
}
|
||||
|
||||
return fft_table;
|
||||
}
|
||||
277
managed_components/espressif__dl_fft/base/dl_fft4r_fc32_ansi.c
Normal file
277
managed_components/espressif__dl_fft/base/dl_fft4r_fc32_ansi.c
Normal file
@@ -0,0 +1,277 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
#include "dl_fft_base.h"
|
||||
|
||||
esp_err_t dl_fft4r_fc32_ansi(float *data, int length, float *table, int table_size)
|
||||
{
|
||||
dl_fc32_t bfly[4];
|
||||
int log2N = dl_power_of_two(length);
|
||||
int log4N = log2N >> 1;
|
||||
if ((log2N & 0x01) != 0) {
|
||||
return ESP_FAIL;
|
||||
}
|
||||
|
||||
int m = 2;
|
||||
int wind_step = 2;
|
||||
while (1) { /// radix 4
|
||||
if (log4N == 0) {
|
||||
break;
|
||||
}
|
||||
length = length >> 2;
|
||||
for (int j = 0; j < m; j += 2) { // j: which FFT of this step
|
||||
int start_index = j * (length << 1); // n: n-point FFT
|
||||
|
||||
dl_fc32_t *ptrc0 = (dl_fc32_t *)data + start_index;
|
||||
dl_fc32_t *ptrc1 = ptrc0 + length;
|
||||
dl_fc32_t *ptrc2 = ptrc1 + length;
|
||||
dl_fc32_t *ptrc3 = ptrc2 + length;
|
||||
|
||||
dl_fc32_t *winc0 = (dl_fc32_t *)table;
|
||||
dl_fc32_t *winc1 = winc0;
|
||||
dl_fc32_t *winc2 = winc0;
|
||||
|
||||
for (int k = 0; k < length; k++) {
|
||||
dl_fc32_t in0 = *ptrc0;
|
||||
dl_fc32_t in2 = *ptrc2;
|
||||
dl_fc32_t in1 = *ptrc1;
|
||||
dl_fc32_t in3 = *ptrc3;
|
||||
|
||||
bfly[0].re = in0.re + in2.re + in1.re + in3.re;
|
||||
bfly[0].im = in0.im + in2.im + in1.im + in3.im;
|
||||
|
||||
bfly[1].re = in0.re - in2.re + in1.im - in3.im;
|
||||
bfly[1].im = in0.im - in2.im - in1.re + in3.re;
|
||||
|
||||
bfly[2].re = in0.re + in2.re - in1.re - in3.re;
|
||||
bfly[2].im = in0.im + in2.im - in1.im - in3.im;
|
||||
|
||||
bfly[3].re = in0.re - in2.re - in1.im + in3.im;
|
||||
bfly[3].im = in0.im - in2.im + in1.re - in3.re;
|
||||
|
||||
*ptrc0 = bfly[0];
|
||||
ptrc1->re = bfly[1].re * winc0->re + bfly[1].im * winc0->im;
|
||||
ptrc1->im = bfly[1].im * winc0->re - bfly[1].re * winc0->im;
|
||||
ptrc2->re = bfly[2].re * winc1->re + bfly[2].im * winc1->im;
|
||||
ptrc2->im = bfly[2].im * winc1->re - bfly[2].re * winc1->im;
|
||||
ptrc3->re = bfly[3].re * winc2->re + bfly[3].im * winc2->im;
|
||||
ptrc3->im = bfly[3].im * winc2->re - bfly[3].re * winc2->im;
|
||||
|
||||
winc0 += 1 * wind_step;
|
||||
winc1 += 2 * wind_step;
|
||||
winc2 += 3 * wind_step;
|
||||
|
||||
ptrc0++;
|
||||
ptrc1++;
|
||||
ptrc2++;
|
||||
ptrc3++;
|
||||
}
|
||||
}
|
||||
m = m << 2;
|
||||
wind_step = wind_step << 2;
|
||||
log4N--;
|
||||
}
|
||||
return ESP_OK;
|
||||
}
|
||||
|
||||
esp_err_t dl_ifft4r_fc32_ansi(float *data, int length, float *table, int table_size)
|
||||
{
|
||||
dl_fc32_t bfly[4];
|
||||
int log2N = dl_power_of_two(length);
|
||||
int log4N = log2N >> 1;
|
||||
if ((log2N & 0x01) != 0) {
|
||||
return ESP_FAIL;
|
||||
}
|
||||
|
||||
int m = 2;
|
||||
int wind_step = 2;
|
||||
while (1) { /// radix 4
|
||||
if (log4N == 0) {
|
||||
break;
|
||||
}
|
||||
length = length >> 2;
|
||||
for (int j = 0; j < m; j += 2) { // j: which FFT of this step
|
||||
int start_index = j * (length << 1); // n: n-point FFT
|
||||
|
||||
dl_fc32_t *ptrc0 = (dl_fc32_t *)data + start_index;
|
||||
dl_fc32_t *ptrc1 = ptrc0 + length;
|
||||
dl_fc32_t *ptrc2 = ptrc1 + length;
|
||||
dl_fc32_t *ptrc3 = ptrc2 + length;
|
||||
|
||||
dl_fc32_t *winc0 = (dl_fc32_t *)table;
|
||||
dl_fc32_t *winc1 = winc0;
|
||||
dl_fc32_t *winc2 = winc0;
|
||||
|
||||
for (int k = 0; k < length; k++) {
|
||||
dl_fc32_t in0 = *ptrc0;
|
||||
dl_fc32_t in2 = *ptrc2;
|
||||
dl_fc32_t in1 = *ptrc1;
|
||||
dl_fc32_t in3 = *ptrc3;
|
||||
|
||||
bfly[0].re = in0.re + in2.re + in1.re + in3.re;
|
||||
bfly[0].im = in0.im + in2.im + in1.im + in3.im;
|
||||
|
||||
bfly[1].re = in0.re - in2.re - in1.im + in3.im; // this fft & ifft is different
|
||||
bfly[1].im = in0.im - in2.im + in1.re - in3.re; // this fft & ifft is different
|
||||
|
||||
bfly[2].re = in0.re + in2.re - in1.re - in3.re;
|
||||
bfly[2].im = in0.im + in2.im - in1.im - in3.im;
|
||||
|
||||
bfly[3].re = in0.re - in2.re + in1.im - in3.im; // this fft & ifft is different
|
||||
bfly[3].im = in0.im - in2.im - in1.re + in3.re; // this fft & ifft is different
|
||||
|
||||
*ptrc0 = bfly[0];
|
||||
ptrc1->re = bfly[1].re * winc0->re - bfly[1].im * winc0->im; // this fft & ifft is different
|
||||
ptrc1->im = bfly[1].im * winc0->re + bfly[1].re * winc0->im; // this fft & ifft is different
|
||||
ptrc2->re = bfly[2].re * winc1->re - bfly[2].im * winc1->im; // this fft & ifft is different
|
||||
ptrc2->im = bfly[2].im * winc1->re + bfly[2].re * winc1->im; // this fft & ifft is different
|
||||
ptrc3->re = bfly[3].re * winc2->re - bfly[3].im * winc2->im; // this fft & ifft is different
|
||||
ptrc3->im = bfly[3].im * winc2->re + bfly[3].re * winc2->im; // this fft & ifft is different
|
||||
|
||||
winc0 += 1 * wind_step;
|
||||
winc1 += 2 * wind_step;
|
||||
winc2 += 3 * wind_step;
|
||||
|
||||
ptrc0++;
|
||||
ptrc1++;
|
||||
ptrc2++;
|
||||
ptrc3++;
|
||||
}
|
||||
}
|
||||
m = m << 2;
|
||||
wind_step = wind_step << 2;
|
||||
log4N--;
|
||||
}
|
||||
return ESP_OK;
|
||||
}
|
||||
|
||||
esp_err_t dl_bitrev4r_fc32_ansi(float *data, int N, uint16_t *bitrev_table, int bitrev_size)
|
||||
{
|
||||
esp_err_t result = ESP_OK;
|
||||
if (bitrev_table) {
|
||||
float r_temp, i_temp;
|
||||
for (int n = 0; n < bitrev_size; n++) {
|
||||
uint16_t i = bitrev_table[n * 2];
|
||||
uint16_t j = bitrev_table[n * 2 + 1];
|
||||
r_temp = data[j];
|
||||
i_temp = data[j + 1];
|
||||
data[j] = data[i];
|
||||
data[i] = r_temp;
|
||||
data[j + 1] = data[i + 1];
|
||||
data[i + 1] = i_temp;
|
||||
}
|
||||
} else {
|
||||
int log2N = dl_power_of_two(N);
|
||||
int log4N = log2N >> 1;
|
||||
if ((log2N & 0x01) != 0) {
|
||||
return ESP_FAIL;
|
||||
}
|
||||
float r_temp, i_temp;
|
||||
for (int i = 0; i < N; i++) {
|
||||
int cnt;
|
||||
int xx;
|
||||
int bits2;
|
||||
xx = 0;
|
||||
cnt = log4N;
|
||||
int j = i;
|
||||
while (cnt > 0) {
|
||||
bits2 = j & 0x3;
|
||||
xx = (xx << 2) + bits2;
|
||||
j = j >> 2;
|
||||
cnt--;
|
||||
}
|
||||
if (i < xx) {
|
||||
r_temp = data[i * 2 + 0];
|
||||
i_temp = data[i * 2 + 1];
|
||||
data[i * 2 + 0] = data[xx * 2 + 0];
|
||||
data[i * 2 + 1] = data[xx * 2 + 1];
|
||||
data[xx * 2 + 0] = r_temp;
|
||||
data[xx * 2 + 1] = i_temp;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
uint16_t *dl_gen_bitrev4r_table(int N, uint32_t caps, int *bitrev_size)
|
||||
{
|
||||
int log2N = dl_power_of_two(N);
|
||||
int log4N = log2N >> 1;
|
||||
if ((log2N & 0x01) != 0) {
|
||||
bitrev_size[0] = 0;
|
||||
return NULL;
|
||||
}
|
||||
int count = 0, idx = 0;
|
||||
for (int i = 0; i < N; i++) {
|
||||
int cnt;
|
||||
int xx;
|
||||
int bits2;
|
||||
xx = 0;
|
||||
cnt = log4N;
|
||||
int j = i;
|
||||
while (cnt > 0) {
|
||||
bits2 = j & 0x3;
|
||||
xx = (xx << 2) + bits2;
|
||||
j = j >> 2;
|
||||
cnt--;
|
||||
}
|
||||
if (i < xx) {
|
||||
count++;
|
||||
}
|
||||
}
|
||||
|
||||
if (count * 2 > UINT16_MAX) {
|
||||
return NULL;
|
||||
}
|
||||
bitrev_size[0] = count;
|
||||
uint16_t *bitrev_table = (uint16_t *)heap_caps_malloc(2 * count * sizeof(uint16_t), caps);
|
||||
|
||||
if (bitrev_table) {
|
||||
for (int i = 0; i < N; i++) {
|
||||
int cnt;
|
||||
int xx;
|
||||
int bits2;
|
||||
xx = 0;
|
||||
cnt = log4N;
|
||||
int j = i;
|
||||
while (cnt > 0) {
|
||||
bits2 = j & 0x3;
|
||||
xx = (xx << 2) + bits2;
|
||||
j = j >> 2;
|
||||
cnt--;
|
||||
}
|
||||
if (i < xx) {
|
||||
bitrev_table[idx * 2] = i * 2;
|
||||
bitrev_table[idx * 2 + 1] = xx * 2;
|
||||
idx++;
|
||||
}
|
||||
}
|
||||
}
|
||||
return bitrev_table;
|
||||
}
|
||||
|
||||
float *dl_gen_fft4r_table_f32(int fft_point, uint32_t caps)
|
||||
{
|
||||
float *fft_table = (float *)heap_caps_aligned_alloc(16, fft_point * sizeof(float) * 2, caps);
|
||||
|
||||
if (fft_table) {
|
||||
for (int i = 0; i < fft_point; i++) {
|
||||
float angle = 2 * M_PI * i * 1.0 / fft_point;
|
||||
fft_table[2 * i] = cosf(angle);
|
||||
fft_table[2 * i + 1] = sinf(angle);
|
||||
}
|
||||
}
|
||||
|
||||
return fft_table;
|
||||
}
|
||||
92
managed_components/espressif__dl_fft/base/dl_fft_base.c
Normal file
92
managed_components/espressif__dl_fft/base/dl_fft_base.c
Normal file
@@ -0,0 +1,92 @@
|
||||
#include "dl_fft_base.h"
|
||||
|
||||
bool dl_is_power_of_two(int x)
|
||||
{
|
||||
return (x != 0) && ((x & (x - 1)) == 0);
|
||||
}
|
||||
|
||||
int dl_power_of_two(uint32_t n)
|
||||
{
|
||||
int pos = 0;
|
||||
if (n >= 1 << 16) {
|
||||
n >>= 16;
|
||||
pos += 16;
|
||||
}
|
||||
if (n >= 1 << 8) {
|
||||
n >>= 8;
|
||||
pos += 8;
|
||||
}
|
||||
if (n >= 1 << 4) {
|
||||
n >>= 4;
|
||||
pos += 4;
|
||||
}
|
||||
if (n >= 1 << 2) {
|
||||
n >>= 2;
|
||||
pos += 2;
|
||||
}
|
||||
if (n >= 1 << 1) {
|
||||
pos += 1;
|
||||
}
|
||||
return pos;
|
||||
}
|
||||
|
||||
float *dl_short_to_float(const int16_t *x, int len, int exponent, float *y)
|
||||
{
|
||||
float scale = powf(2, exponent);
|
||||
// printf("scale: %f\n", scale);
|
||||
for (int i = 0; i < len; i++) {
|
||||
y[i] = scale * x[i];
|
||||
}
|
||||
return y;
|
||||
}
|
||||
|
||||
int16_t dl_array_max_q_s16(const int16_t *x, int size)
|
||||
{
|
||||
int16_t max = 0;
|
||||
for (int i = 1; i < size; i++) {
|
||||
if (x[i] > max) {
|
||||
max = x[i];
|
||||
} else if (-x[i] > max) {
|
||||
max = -x[i];
|
||||
}
|
||||
}
|
||||
|
||||
if (max == 0) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
int16_t k = 2;
|
||||
while (max > 1) {
|
||||
k++;
|
||||
max = max >> 1;
|
||||
}
|
||||
|
||||
return k;
|
||||
}
|
||||
|
||||
int dl_array_max_q_f32(const float *x, int size, float eps)
|
||||
{
|
||||
float max = 0;
|
||||
for (int i = 1; i < size; i++) {
|
||||
if (x[i] > max) {
|
||||
max = x[i];
|
||||
} else if (-x[i] > max) {
|
||||
max = -x[i];
|
||||
}
|
||||
}
|
||||
int max_int = ceilf(max + eps);
|
||||
|
||||
return dl_power_of_two(max_int);
|
||||
}
|
||||
|
||||
int dl_float_to_short(const float *x, int len, int16_t *y, int out_exponent)
|
||||
{
|
||||
int exponent = out_exponent - dl_array_max_q_f32(x, len, 1e-8);
|
||||
float scale = powf(2, exponent);
|
||||
|
||||
for (int i = 0; i < len; i++) {
|
||||
y[i] = (int16_t)roundf(x[i] * scale);
|
||||
}
|
||||
|
||||
return -exponent;
|
||||
}
|
||||
88
managed_components/espressif__dl_fft/base/dl_fft_base.h
Normal file
88
managed_components/espressif__dl_fft/base/dl_fft_base.h
Normal file
@@ -0,0 +1,88 @@
|
||||
#pragma once
|
||||
|
||||
#include "dl_fft_dtype.h"
|
||||
#include "esp_attr.h"
|
||||
#include "esp_err.h"
|
||||
#include "esp_heap_caps.h"
|
||||
#include "esp_log.h"
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include "dl_fft_platform.h"
|
||||
|
||||
// common function
|
||||
bool dl_is_power_of_two(int x);
|
||||
int dl_power_of_two(uint32_t n);
|
||||
float *dl_short_to_float(const int16_t *x, int len, int exponent, float *y);
|
||||
int16_t dl_array_max_q_s16(const int16_t *x, int size);
|
||||
int dl_float_to_short(const float *x, int len, int16_t *y, int out_exponent);
|
||||
|
||||
// float fftr2
|
||||
float *dl_gen_fftr2_table_f32(int fft_point, uint32_t caps);
|
||||
uint16_t *dl_gen_bitrev2r_table(int N, uint32_t caps, int *bitrev_size);
|
||||
|
||||
esp_err_t dl_fft2r_fc32_ansi(float *data, int N, float *w);
|
||||
esp_err_t dl_ifft2r_fc32_ansi(float *data, int N, float *w);
|
||||
|
||||
esp_err_t dl_bitrev2r_fc32_ansi(float *data, int N, uint16_t *reverse_tab, int bitrev_size);
|
||||
|
||||
// float fftr4
|
||||
float *dl_gen_rfft_table_f32(int fft_point, uint32_t caps);
|
||||
float *dl_gen_fft4r_table_f32(int fft_point, uint32_t caps);
|
||||
uint16_t *dl_gen_bitrev4r_table(int N, uint32_t caps, int *bitrev_size);
|
||||
|
||||
esp_err_t dl_fft4r_fc32_ansi(float *data, int length, float *table, int table_size);
|
||||
esp_err_t dl_ifft4r_fc32_ansi(float *data, int length, float *table, int table_size);
|
||||
esp_err_t dl_bitrev4r_fc32_ansi(float *data, int N, uint16_t *reverse_tab, int bitrev_size);
|
||||
esp_err_t dl_rfft_post_proc_fc32_ansi(float *data, int N, float *table);
|
||||
esp_err_t dl_rfft_pre_proc_fc32_ansi(float *data, int N, float *table);
|
||||
|
||||
// int16 fft and rfft
|
||||
int16_t *dl_gen_fft_table_sc16(int fft_point, uint32_t caps);
|
||||
int16_t *dl_gen_rfft_table_s16(int fft_point, uint32_t caps);
|
||||
|
||||
esp_err_t dl_fft2r_sc16_hp_ansi(int16_t *data, int N, int16_t *table, int *shift);
|
||||
esp_err_t dl_fft2r_sc16_ansi(int16_t *data, int N, int16_t *table);
|
||||
|
||||
esp_err_t dl_ifft2r_sc16_hp_ansi(int16_t *data, int N, int16_t *table, int *shift);
|
||||
esp_err_t dl_ifft2r_sc16_ansi(int16_t *data, int N, int16_t *table);
|
||||
|
||||
esp_err_t dl_bitrev2r_sc16_ansi(int16_t *data, int N);
|
||||
esp_err_t dl_rfft_post_proc_sc16_ansi(int16_t *data, int N, int16_t *table);
|
||||
esp_err_t dl_rfft_pre_proc_sc16_ansi(int16_t *data, int N, int16_t *table);
|
||||
esp_err_t dl_cplx2real_sc16_hp_ansi(int16_t *data, int N, int16_t *table, int *shift);
|
||||
|
||||
#if CONFIG_IDF_TARGET_ESP32
|
||||
#define dl_fft2r_fc32 dl_fft2r_fc32_ae32_
|
||||
#define dl_ifft2r_fc32 dl_ifft2r_fc32_ae32_
|
||||
#define dl_fft4r_fc32 dl_fft4r_fc32_ae32_
|
||||
#define dl_ifft4r_fc32 dl_ifft4r_fc32_ae32_
|
||||
#elif CONFIG_IDF_TARGET_ESP32S3
|
||||
#define dl_fft2r_fc32 dl_fft2r_fc32_aes3_
|
||||
#define dl_ifft2r_fc32 dl_ifft2r_fc32_aes3_
|
||||
#define dl_fft4r_fc32 dl_fft4r_fc32_aes3_
|
||||
#define dl_ifft4r_fc32 dl_ifft4r_fc32_aes3_
|
||||
#elif CONFIG_IDF_TARGET_ESP32P4
|
||||
#define dl_fft2r_fc32 dl_fft2r_fc32_arp4_
|
||||
#define dl_ifft2r_fc32 dl_ifft2r_fc32_arp4_
|
||||
#define dl_fft4r_fc32 dl_fft4r_fc32_arp4_
|
||||
#define dl_ifft4r_fc32 dl_ifft4r_fc32_arp4_
|
||||
#else
|
||||
#define dl_fft2r_fc32 dl_fft2r_fc32_ansi
|
||||
#define dl_ifft2r_fc32 dl_ifft2r_fc32_ansi
|
||||
#define dl_fft4r_fc32 dl_fft4r_fc32_ansi
|
||||
#define dl_ifft4r_fc32 dl_ifft4r_fc32_ansi
|
||||
#endif
|
||||
|
||||
#define dl_fft2r_sc16 dl_fft2r_sc16_ansi
|
||||
#define dl_fft2r_sc16_hp dl_fft2r_sc16_hp_ansi
|
||||
#define dl_ifft2r_sc16 dl_ifft2r_sc16_ansi
|
||||
#define dl_ifft2r_sc16_hp dl_ifft2r_sc16_hp_ansi
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
30
managed_components/espressif__dl_fft/base/dl_fft_dtype.h
Normal file
30
managed_components/espressif__dl_fft/base/dl_fft_dtype.h
Normal file
@@ -0,0 +1,30 @@
|
||||
#pragma once
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// union to simplify access to the 16 bit data
|
||||
typedef union dl_sc16_u {
|
||||
struct {
|
||||
int16_t re;
|
||||
int16_t im;
|
||||
};
|
||||
uint32_t data;
|
||||
} dl_sc16_t;
|
||||
|
||||
typedef union dl_fc32_u {
|
||||
struct {
|
||||
float re;
|
||||
float im;
|
||||
};
|
||||
uint64_t data;
|
||||
} dl_fc32_t;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
@@ -0,0 +1,36 @@
|
||||
#pragma once
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#if CONFIG_IDF_TARGET_ESP32
|
||||
void dl_fft2r_fc32_ae32_(float *data, int N, float *table);
|
||||
void dl_ifft2r_fc32_ae32_(float *data, int N, float *table);
|
||||
void dl_fft4r_fc32_ae32_(float *data, int N, float *table, int table_size);
|
||||
void dl_ifft4r_fc32_ae32_(float *data, int N, float *table, int table_size);
|
||||
|
||||
#elif CONFIG_IDF_TARGET_ESP32S3
|
||||
void dl_fft2r_fc32_aes3_(float *data, int N, float *table);
|
||||
void dl_ifft2r_fc32_aes3_(float *data, int N, float *table);
|
||||
void dl_fft4r_fc32_aes3_(float *data, int N, float *table, int table_size);
|
||||
void dl_ifft4r_fc32_aes3_(float *data, int N, float *table, int table_size);
|
||||
|
||||
// void test_radix2_fft_bf_s16(int16_t *data, int16_t *table, int16_t fft_point, int16_t log2n, int16_t);
|
||||
// int test_radix2_fft_bf_s16_hp(int16_t *, int16_t *, int16_t, int16_t, int16_t);
|
||||
// void test_radix2_bit_reverse(int16_t *data, int16_t cpx_point, int16_t log2n);
|
||||
// void test_fftr_s16(int16_t *, int16_t *, int16_t);
|
||||
// void test_ffti_s16(int16_t *, int16_t *, int16_t);
|
||||
// void test_radix2_ifft_bf_s16(int16_t *, int16_t *, int16_t, int16_t, int16_t);
|
||||
// int test_radix2_ifft_bf_s16_hp(int16_t *, int16_t *, int16_t, int16_t, int16_t);
|
||||
|
||||
#elif CONFIG_IDF_TARGET_ESP32P4
|
||||
void dl_fft2r_fc32_arp4_(float *data, int N, float *table);
|
||||
void dl_ifft2r_fc32_arp4_(float *data, int N, float *table);
|
||||
void dl_fft4r_fc32_arp4_(float *data, int N, float *table, int table_size);
|
||||
void dl_ifft4r_fc32_arp4_(float *data, int N, float *table, int table_size);
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
@@ -0,0 +1,236 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2018-2025 Espressif Systems (Shanghai) CO LTD
|
||||
* SPDX-FileContributor: 2024 f4lcOn @ Libera Chat IRC
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
|
||||
.text
|
||||
.align 4
|
||||
.global dl_fft2r_fc32_ae32_
|
||||
.type dl_fft2r_fc32_ae32_,@function
|
||||
|
||||
// The function implements the following C code:
|
||||
//esp_err_t dl_fft2r_fc32_ansi(float *data, int N)
|
||||
//{
|
||||
// float *w = dl_fft_w_table_fc32;
|
||||
//
|
||||
// int ie, ia, m;
|
||||
// float re_temp, im_temp;
|
||||
// float c, s;
|
||||
// int N2 = N;
|
||||
// ie = 1;
|
||||
// for (int N2 = N/2; N2 > 0; N2 >>= 1) {
|
||||
// ia = 0;
|
||||
// for (int j = 0; j < ie; j++) {
|
||||
// c = w[2 * j];
|
||||
// s = w[2 * j + 1];
|
||||
// for (int i = 0; i < N2; i++) {
|
||||
// m = ia + N2;
|
||||
// re_temp = c * data[2 * m] + s * data[2 * m + 1];
|
||||
// im_temp = c * data[2 * m + 1] - s * data[2 * m];
|
||||
// data[2 * m] = data[2 * ia] - re_temp;
|
||||
// data[2 * m + 1] = data[2 * ia + 1] - im_temp;
|
||||
// data[2 * ia] = data[2 * ia] + re_temp;
|
||||
// data[2 * ia + 1] = data[2 * ia + 1] + im_temp;
|
||||
// ia++;
|
||||
// }
|
||||
// ia += N2;
|
||||
// }
|
||||
// ie <<= 1;
|
||||
// }
|
||||
// return result;
|
||||
//}
|
||||
|
||||
|
||||
dl_fft2r_fc32_ae32_:
|
||||
//esp_err_t dl_fft2r_fc32_ansi(float *data, int N, float* dl_fft_w_table_fc32)
|
||||
|
||||
entry a1, 16
|
||||
|
||||
// Array increment for floating point data should be 4
|
||||
// data - a2
|
||||
// N - a3
|
||||
// dl_fft_w_table_fc32 - a4
|
||||
|
||||
// a6 - k, main loop counter; N2 - for (int N2 = N/2; N2 > 0; N2 >>= 1)
|
||||
// a7 - ie
|
||||
// a8 - j
|
||||
// a10 - (j*2)<<2, or a10 - j<<3
|
||||
// f0 - c or w[2 * j]
|
||||
// f1 - s or w[2 * j + 1]
|
||||
// a11 - ia
|
||||
// a12 - m
|
||||
// a13 - ia pointer
|
||||
// a14 - m pointer
|
||||
// f6 - re_temp
|
||||
// f7 - im_temp
|
||||
|
||||
srli a6, a3, 1 // a6 = N2 = N/2
|
||||
movi.n a7, 1 // a7 - ie
|
||||
|
||||
.fft2r_l1:
|
||||
movi.n a8, 0 // a8 - j
|
||||
movi.n a11,0 // a11 = ia = 0;
|
||||
|
||||
.fft2r_l2: // loop for j, a8 - j
|
||||
addx8 a10, a8, a4 // a8 - shift for cos () -- c = w[2 * j]; -- pointer to cos
|
||||
lsi f0, a10, 0
|
||||
lsi f1, a10, 4
|
||||
|
||||
loopnez a6, .fft2r_l3
|
||||
add.n a12, a11, a6 // a12 = m = ia + N2
|
||||
addx8 a14, a12, a2 // a14 - pointer for m*2
|
||||
addx8 a13, a11, a2 // a13 - pointer for ia*2
|
||||
|
||||
lsi f4, a14, 0 // data[2 * m]
|
||||
mul.s f6, f0, f4 // re_temp = c * data[2 * m]
|
||||
lsi f5, a14, 4 // data[2 * m + 1]
|
||||
mul.s f7, f0, f5 // im_temp = c * data[2 * m + 1]
|
||||
|
||||
lsi f2, a13, 0 // data[2 * ia]
|
||||
madd.s f6, f1, f5 // re_temp += s * data[2 * m + 1];
|
||||
lsi f3, a13, 4 // data[2 * ia + 1]
|
||||
msub.s f7, f1, f4 // im_temp -= s * data[2 * m];
|
||||
|
||||
addi a11, a11, 1 // ia++
|
||||
|
||||
sub.s f8, f2, f6 // = data[2 * ia] - re_temp;
|
||||
add.s f10, f2, f6 // = data[2 * ia] + re_temp;
|
||||
sub.s f9, f3, f7 // = data[2 * ia + 1] - im_temp;
|
||||
add.s f11, f3, f7 // = data[2 * ia + 1] + im_temp;
|
||||
|
||||
ssi f8, a14, 0
|
||||
ssi f10, a13, 0
|
||||
ssi f9, a14, 4
|
||||
ssi f11, a13, 4
|
||||
.fft2r_l3:
|
||||
add.n a11, a11, a6
|
||||
|
||||
addi.n a8, a8, 1 // j++
|
||||
bne a8, a7, .fft2r_l2
|
||||
slli a7, a7, 1 // ie = ie<<1
|
||||
// main loop: for (int k = N/2; k > 0; k >>= 1)
|
||||
srli a6, a6, 1 // a6 = a6>>1
|
||||
bnez a6, .fft2r_l1 // Jump if > 0
|
||||
|
||||
// movi.n a2, 0 // return status ESP_OK
|
||||
retw
|
||||
|
||||
|
||||
|
||||
|
||||
.text
|
||||
.align 4
|
||||
.global dl_ifft2r_fc32_ae32_
|
||||
.type dl_ifft2r_fc32_ae32_,@function
|
||||
|
||||
// The function implements the following C code:
|
||||
//esp_err_t dl_fft2r_fc32_ansi(float *data, int N)
|
||||
//{
|
||||
// float *w = dl_fft_w_table_fc32;
|
||||
//
|
||||
// int ie, ia, m;
|
||||
// float re_temp, im_temp;
|
||||
// float c, s;
|
||||
// int N2 = N;
|
||||
// ie = 1;
|
||||
// for (int N2 = N/2; N2 > 0; N2 >>= 1) {
|
||||
// ia = 0;
|
||||
// for (int j = 0; j < ie; j++) {
|
||||
// c = w[2 * j];
|
||||
// s = -w[2 * j + 1];
|
||||
// for (int i = 0; i < N2; i++) {
|
||||
// m = ia + N2;
|
||||
// re_temp = c * data[2 * m] + s * data[2 * m + 1];
|
||||
// im_temp = c * data[2 * m + 1] - s * data[2 * m];
|
||||
// data[2 * m] = data[2 * ia] - re_temp;
|
||||
// data[2 * m + 1] = data[2 * ia + 1] - im_temp;
|
||||
// data[2 * ia] = data[2 * ia] + re_temp;
|
||||
// data[2 * ia + 1] = data[2 * ia + 1] + im_temp;
|
||||
// ia++;
|
||||
// }
|
||||
// ia += N2;
|
||||
// }
|
||||
// ie <<= 1;
|
||||
// }
|
||||
// return result;
|
||||
//}
|
||||
|
||||
|
||||
dl_ifft2r_fc32_ae32_:
|
||||
//esp_err_t dl_fft2r_fc32_ansi(float *data, int N, float* dl_fft_w_table_fc32)
|
||||
|
||||
entry a1, 16
|
||||
|
||||
// Array increment for floating point data should be 4
|
||||
// data - a2
|
||||
// N - a3
|
||||
// dl_fft_w_table_fc32 - a4
|
||||
|
||||
// a6 - k, main loop counter; N2 - for (int N2 = N/2; N2 > 0; N2 >>= 1)
|
||||
// a7 - ie
|
||||
// a8 - j
|
||||
// a10 - (j*2)<<2, or a10 - j<<3
|
||||
// f0 - c or w[2 * j]
|
||||
// f1 - s or w[2 * j + 1]
|
||||
// a11 - ia
|
||||
// a12 - m
|
||||
// a13 - ia pointer
|
||||
// a14 - m pointer
|
||||
// f6 - re_temp
|
||||
// f7 - im_temp
|
||||
|
||||
srli a6, a3, 1 // a6 = N2 = N/2
|
||||
movi.n a7, 1 // a7 - ie
|
||||
|
||||
.ifft2r_l1:
|
||||
movi.n a8, 0 // a8 - j
|
||||
movi.n a11,0 // a11 = ia = 0;
|
||||
|
||||
.ifft2r_l2: // loop for j, a8 - j
|
||||
addx8 a10, a8, a4 // a8 - shift for cos () -- c = w[2 * j]; -- pointer to cos
|
||||
lsi f0, a10, 0
|
||||
lsi f1, a10, 4
|
||||
// CHANGE: Negate the imaginary part of twiddle factors
|
||||
neg.s f1, f1
|
||||
|
||||
loopnez a6, .ifft2r_l3
|
||||
add.n a12, a11, a6 // a12 = m = ia + N2
|
||||
addx8 a14, a12, a2 // a14 - pointer for m*2
|
||||
addx8 a13, a11, a2 // a13 - pointer for ia*2
|
||||
|
||||
lsi f4, a14, 0 // data[2 * m]
|
||||
mul.s f6, f0, f4 // re_temp = c * data[2 * m]
|
||||
lsi f5, a14, 4 // data[2 * m + 1]
|
||||
mul.s f7, f0, f5 // im_temp = c * data[2 * m + 1]
|
||||
|
||||
lsi f2, a13, 0 // data[2 * ia]
|
||||
madd.s f6, f1, f5 // re_temp += s * data[2 * m + 1];
|
||||
lsi f3, a13, 4 // data[2 * ia + 1]
|
||||
msub.s f7, f1, f4 // im_temp -= s * data[2 * m];
|
||||
|
||||
addi a11, a11, 1 // ia++
|
||||
|
||||
sub.s f8, f2, f6 // = data[2 * ia] - re_temp;
|
||||
add.s f10, f2, f6 // = data[2 * ia] + re_temp;
|
||||
sub.s f9, f3, f7 // = data[2 * ia + 1] - im_temp;
|
||||
add.s f11, f3, f7 // = data[2 * ia + 1] + im_temp;
|
||||
|
||||
ssi f8, a14, 0
|
||||
ssi f10, a13, 0
|
||||
ssi f9, a14, 4
|
||||
ssi f11, a13, 4
|
||||
.ifft2r_l3:
|
||||
add.n a11, a11, a6
|
||||
|
||||
addi.n a8, a8, 1 // j++
|
||||
bne a8, a7, .ifft2r_l2
|
||||
slli a7, a7, 1 // ie = ie<<1
|
||||
// main loop: for (int k = N/2; k > 0; k >>= 1)
|
||||
srli a6, a6, 1 // a6 = a6>>1
|
||||
bnez a6, .ifft2r_l1 // Jump if > 0
|
||||
|
||||
// movi.n a2, 0 // return status ESP_OK
|
||||
retw
|
||||
@@ -0,0 +1,332 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2018-2025 Espressif Systems (Shanghai) CO LTD
|
||||
* SPDX-FileContributor: 2024 f4lcOn @ Libera Chat IRC
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
|
||||
|
||||
.section .text # placed in IRAM instead of FLASH .text
|
||||
.global dl_fft4r_fc32_ae32_
|
||||
.type dl_fft4r_fc32_ae32_,@function
|
||||
|
||||
// The function implements the following C code:
|
||||
// esp_err_t dl_fft4r_fc32_ansi_(float *data, int length, float *table, int table_size)
|
||||
// {
|
||||
// if (0 == dl_fft4r_initialized) {
|
||||
// return ESP_ERR_DSP_UNINITIALIZED;
|
||||
// }
|
||||
//
|
||||
// uint log2N = dl_power_of_two(length);
|
||||
// if ((log2N & 0x01) != 0) {
|
||||
// return ESP_ERR_DSP_INVALID_LENGTH;
|
||||
// }
|
||||
// uint log4N = log2N >> 1;
|
||||
//
|
||||
// fc32_t bfly[4];
|
||||
// uint m = 2;
|
||||
// uint wind_step = table_size / length;
|
||||
// while (1) { ///radix 4
|
||||
// if (log4N == 0) {
|
||||
// break;
|
||||
// }
|
||||
// length = length >> 2;
|
||||
// for (int j = 0; j < m; j += 2) { // j: which FFT of this step
|
||||
// int start_index = j * (length << 1); // n: n-point FFT
|
||||
//
|
||||
// fc32_t *ptrc0 = (fc32_t *)data + start_index;
|
||||
// fc32_t *ptrc1 = ptrc0 + length;
|
||||
// fc32_t *ptrc2 = ptrc1 + length;
|
||||
// fc32_t *ptrc3 = ptrc2 + length;
|
||||
//
|
||||
// fc32_t *winc0 = (fc32_t *)table;
|
||||
// fc32_t *winc1 = winc0;
|
||||
// fc32_t *winc2 = winc0;
|
||||
//
|
||||
// for (int k = 0; k < length; k++) {
|
||||
// fc32_t in0 = *ptrc0;
|
||||
// fc32_t in2 = *ptrc2;
|
||||
// fc32_t in1 = *ptrc1;
|
||||
// fc32_t in3 = *ptrc3;
|
||||
//
|
||||
// bfly[0].re = in0.re + in2.re + in1.re + in3.re;
|
||||
// bfly[0].im = in0.im + in2.im + in1.im + in3.im;
|
||||
//
|
||||
// bfly[1].re = in0.re - in2.re + in1.im - in3.im;
|
||||
// bfly[1].im = in0.im - in2.im - in1.re + in3.re;
|
||||
//
|
||||
// bfly[2].re = in0.re + in2.re - in1.re - in3.re;
|
||||
// bfly[2].im = in0.im + in2.im - in1.im - in3.im;
|
||||
//
|
||||
// bfly[3].re = in0.re - in2.re - in1.im + in3.im;
|
||||
// bfly[3].im = in0.im - in2.im + in1.re - in3.re;
|
||||
//
|
||||
// *ptrc0 = bfly[0];
|
||||
// ptrc1->re = bfly[1].re * winc0->re + bfly[1].im * winc0->im;
|
||||
// ptrc1->im = bfly[1].im * winc0->re - bfly[1].re * winc0->im;
|
||||
// ptrc2->re = bfly[2].re * winc1->re + bfly[2].im * winc1->im;
|
||||
// ptrc2->im = bfly[2].im * winc1->re - bfly[2].re * winc1->im;
|
||||
// ptrc3->re = bfly[3].re * winc2->re + bfly[3].im * winc2->im;
|
||||
// ptrc3->im = bfly[3].im * winc2->re - bfly[3].re * winc2->im;
|
||||
//
|
||||
// winc0 += 1 * wind_step;
|
||||
// winc1 += 2 * wind_step;
|
||||
// winc2 += 3 * wind_step;
|
||||
//
|
||||
// ptrc0++;
|
||||
// ptrc1++;
|
||||
// ptrc2++;
|
||||
// ptrc3++;
|
||||
// }
|
||||
// }
|
||||
// m = m << 2;
|
||||
// wind_step = wind_step << 2;
|
||||
// log4N--;
|
||||
// }
|
||||
// return ESP_OK;
|
||||
// }
|
||||
|
||||
// esp_err_t dl_fft4r_fc32_ae32_(data, N, dl_fft4r_w_table_fc32, dl_fft4r_w_table_size)
|
||||
|
||||
//.ret_DSP_INVALID_LENGTH:
|
||||
// movi.n a2, ESP_ERR_DSP_INVALID_LENGTH
|
||||
// retw.n
|
||||
|
||||
.align 4
|
||||
dl_fft4r_fc32_ae32_:
|
||||
|
||||
entry a1, 16 # no auto vars on stack
|
||||
|
||||
// bltui a3, 4, .ret_DSP_INVALID_LENGTH # if N < 4 : return(ESP_ERR_DSP_INVALID_LENGTH)
|
||||
|
||||
// addi.n a6, a3, -1
|
||||
// and a6, a3, a6
|
||||
// bnez a6, .ret_DSP_INVALID_LENGTH # if N not power of 2 : return(ESP_ERR_DSP_INVALID_LENGTH)
|
||||
|
||||
nsau a6, a3 # inline dl_power_of_two(N)
|
||||
movi.n a7, 31
|
||||
xor a6, a6, a7
|
||||
|
||||
// bbsi a6, 0, .ret_DSP_INVALID_LENGTH # if N not power of 4 : return(ESP_ERR_DSP_INVALID_LENGTH)
|
||||
|
||||
srli a7, a6, 1 # log4N = dl_power_of_two(N) >> 1;
|
||||
|
||||
addi.n a6, a6, -1
|
||||
ssr a6
|
||||
srl a6, a5 # w_step = table_size >> (dl_power_of_two(N) - 1)
|
||||
|
||||
movi.n a5, 2 # m = 2
|
||||
|
||||
.stage:
|
||||
srli a3, a3, 2 # N >>= 2
|
||||
|
||||
movi.n a8, 0 # j = 0
|
||||
|
||||
.group:
|
||||
mov.n a9, a4 # w0 = w
|
||||
mov.n a10, a4 # w1 = w
|
||||
mov.n a11, a4 # w2 = w
|
||||
|
||||
mul16u a12, a8, a3
|
||||
slli a12, a12, 1 # start_index = (j * N) << 1
|
||||
|
||||
addx8 a12, a12, a2 # p0 = data + (start_index << 1)
|
||||
addx8 a13, a3, a12 # p1 = p0 + (N << 1)
|
||||
addx8 a14, a3, a13 # p2 = p1 + (N << 1)
|
||||
addx8 a15, a3, a14 # p3 = p2 + (N << 1)
|
||||
|
||||
loopnez a3, .bf4_loop_end # for (uint k = 0; k < N; k++)
|
||||
lsi f1, a12, 4 # f1 = in0.im = *(p0 + 1)
|
||||
lsi f3, a14, 4 # f3 = in2.im = *(p2 + 1)
|
||||
lsi f0, a12, 0 # f0 = in0.re = *p0
|
||||
lsi f2, a14, 0 # f2 = in2.re = *p2
|
||||
add.s f5, f1, f3 # f5 = in0.im + in2.im
|
||||
sub.s f7, f1, f3 # f7 = in0.im - in2.im
|
||||
lsi f1, a13, 4 # f1 = in1.im = *(p1 + 1)
|
||||
lsi f3, a15, 4 # f3 = in3.im = *(p3 + 1)
|
||||
add.s f4, f0, f2 # f4 = in0.re + in2.re
|
||||
sub.s f6, f0, f2 # f6 = in0.re - in2.re
|
||||
add.s f9, f1, f3 # f9 = in1.im + in3.im
|
||||
sub.s f11, f1, f3 # f11 = in1.im - in3.im
|
||||
lsi f0, a13, 0 # f0 = in1.re = *p1
|
||||
lsi f2, a15, 0 # f2 = in3.re = *p3
|
||||
lsi f12, a9, 0 # f12 = w0->re
|
||||
lsi f13, a10, 0 # f13 = w1->re
|
||||
lsi f14, a11, 0 # f14 = w2->re
|
||||
add.s f8, f0, f2 # f8 = in1.re + in3.re
|
||||
sub.s f10, f0, f2 # f10 = in1.re - in3.re
|
||||
sub.s f1, f5, f9 # f1 = bf2.im = in0.im + in2.im - in1.im - in3.im
|
||||
add.s f5, f5, f9 # f5 = bf0.im = in0.im + in2.im + in1.im + in3.im
|
||||
add.s f2, f6, f11 # f2 = bf1.re = in0.re - in2.re + in1.im - in3.im
|
||||
sub.s f6, f6, f11 # f6 = bf3.re = in0.re - in2.re - in1.im + in3.im
|
||||
sub.s f0, f4, f8 # f0 = bf2.re = in0.re + in2.re - in1.re - in3.re
|
||||
add.s f4, f4, f8 # f4 = bf0.re = in0.re + in2.re + in1.re + in3.re
|
||||
sub.s f3, f7, f10 # f3 = bf1.im = in0.im - in2.im - in1.re + in3.re
|
||||
add.s f7, f7, f10 # f7 = bf3.im = in0.im - in2.im + in1.re - in3.re
|
||||
ssi f5, a12, 4 # *(p0 + 1) = f5 = bf0.im
|
||||
ssip f4, a12, 8 # *p0 = f4 = bf0.re , p0 += 2
|
||||
mul.s f5, f3, f12 # f5 = bf1.im * w0->re
|
||||
mul.s f4, f2, f12 # f4 = bf1.re * w0->re
|
||||
mul.s f9, f1, f13 # f9 = bf2.im * w1->re
|
||||
mul.s f8, f0, f13 # f8 = bf2.re * w1->re
|
||||
mul.s f11, f7, f14 # f11 = bf3.im * w2->re
|
||||
mul.s f10, f6, f14 # f10 = bf3.re * w2->re
|
||||
lsi f12, a9, 4 # f12 = w0->im
|
||||
lsi f13, a10, 4 # f13 = w1->im
|
||||
lsi f14, a11, 4 # f14 = w2->im
|
||||
addx4 a9, a6, a9 # w0 += m
|
||||
addx8 a10, a6, a10 # w1 += 2 * m
|
||||
addx4 a11, a6, a11
|
||||
addx8 a11, a6, a11 # w2 += 3 * m
|
||||
msub.s f5, f2, f12 # f5 = bf1.im * w0->re - bf1.re * w0->im
|
||||
madd.s f4, f3, f12 # f4 = bf1.re * w0->re + bf1.im * w0->im
|
||||
msub.s f9, f0, f13 # f9 = bf2.im * w1->re - bf2.re * w1->im
|
||||
madd.s f8, f1, f13 # f8 = bf2.re * w1->re + bf2.im * w1->im
|
||||
msub.s f11, f6, f14 # f11 = bf3.im * w2->re - bf3.re * w2->im
|
||||
madd.s f10, f7, f14 # f10 = bf3.re * w2->re + bf3.im * w2->im
|
||||
ssi f5, a13, 4 # *(p1 + 1) = f5
|
||||
ssip f4, a13, 8 # *p1 = f4, p1 += 2
|
||||
ssi f9, a14, 4 # *(p2 + 1) = f9
|
||||
ssip f8, a14, 8 # *p2 = f8, p2 += 2
|
||||
ssi f11, a15, 4 # *(p3 + 1) = f11
|
||||
ssip f10, a15, 8 # *p3 = f10, p3 += 2
|
||||
.bf4_loop_end:
|
||||
|
||||
addi.n a8, a8, 2 # j += 2
|
||||
bgeu a8, a5, .stage_next # if j >= m
|
||||
j .group
|
||||
|
||||
.stage_next:
|
||||
slli a5, a5, 2 # m <<= 2
|
||||
slli a6, a6, 2 # w_step <<= 2
|
||||
addi.n a7, a7, -1 # log4N--
|
||||
bnez a7, .stage # if log4N > 0
|
||||
|
||||
// movi.n a2, DSP_OK # return(DSP_OK)
|
||||
retw
|
||||
|
||||
|
||||
|
||||
.section .text # placed in IRAM instead of FLASH .text
|
||||
.global dl_ifft4r_fc32_ae32_
|
||||
.type dl_ifft4r_fc32_ae32_,@function
|
||||
|
||||
// esp_err_t dl_ifft4r_fc32_ae32_(data, N, dl_fft4r_w_table_fc32, dl_fft4r_w_table_size)
|
||||
|
||||
//.ret_DSP_INVALID_LENGTH:
|
||||
// movi.n a2, ESP_ERR_DSP_INVALID_LENGTH
|
||||
// retw.n
|
||||
|
||||
.align 4
|
||||
dl_ifft4r_fc32_ae32_:
|
||||
|
||||
entry a1, 16 # no auto vars on stack
|
||||
|
||||
// bltui a3, 4, .ret_DSP_INVALID_LENGTH # if N < 4 : return(ESP_ERR_DSP_INVALID_LENGTH)
|
||||
|
||||
// addi.n a6, a3, -1
|
||||
// and a6, a3, a6
|
||||
// bnez a6, .ret_DSP_INVALID_LENGTH # if N not power of 2 : return(ESP_ERR_DSP_INVALID_LENGTH)
|
||||
|
||||
nsau a6, a3 # inline dl_power_of_two(N)
|
||||
movi.n a7, 31
|
||||
xor a6, a6, a7
|
||||
|
||||
// bbsi a6, 0, .ret_DSP_INVALID_LENGTH # if N not power of 4 : return(ESP_ERR_DSP_INVALID_LENGTH)
|
||||
|
||||
srli a7, a6, 1 # log4N = dl_power_of_two(N) >> 1;
|
||||
|
||||
addi.n a6, a6, -1
|
||||
ssr a6
|
||||
srl a6, a5 # w_step = table_size >> (dl_power_of_two(N) - 1)
|
||||
|
||||
movi.n a5, 2 # m = 2
|
||||
|
||||
.ifft_stage:
|
||||
srli a3, a3, 2 # N >>= 2
|
||||
|
||||
movi.n a8, 0 # j = 0
|
||||
|
||||
.ifft_group:
|
||||
mov.n a9, a4 # w0 = w
|
||||
mov.n a10, a4 # w1 = w
|
||||
mov.n a11, a4 # w2 = w
|
||||
|
||||
mul16u a12, a8, a3
|
||||
slli a12, a12, 1 # start_index = (j * N) << 1
|
||||
|
||||
addx8 a12, a12, a2 # p0 = data + (start_index << 1)
|
||||
addx8 a13, a3, a12 # p1 = p0 + (N << 1)
|
||||
addx8 a14, a3, a13 # p2 = p1 + (N << 1)
|
||||
addx8 a15, a3, a14 # p3 = p2 + (N << 1)
|
||||
|
||||
loopnez a3, .inv_bf4_loop_end # for (uint k = 0; k < N; k++)
|
||||
lsi f1, a12, 4 # f1 = in0.im = *(p0 + 1)
|
||||
lsi f3, a14, 4 # f3 = in2.im = *(p2 + 1)
|
||||
lsi f0, a12, 0 # f0 = in0.re = *p0
|
||||
lsi f2, a14, 0 # f2 = in2.re = *p2
|
||||
add.s f5, f1, f3 # f5 = in0.im + in2.im
|
||||
sub.s f7, f1, f3 # f7 = in0.im - in2.im
|
||||
lsi f1, a13, 4 # f1 = in1.im = *(p1 + 1)
|
||||
lsi f3, a15, 4 # f3 = in3.im = *(p3 + 1)
|
||||
add.s f4, f0, f2 # f4 = in0.re + in2.re
|
||||
sub.s f6, f0, f2 # f6 = in0.re - in2.re
|
||||
add.s f9, f1, f3 # f9 = in1.im + in3.im
|
||||
sub.s f11, f1, f3 # f11 = in1.im - in3.im
|
||||
lsi f0, a13, 0 # f0 = in1.re = *p1
|
||||
lsi f2, a15, 0 # f2 = in3.re = *p3
|
||||
lsi f12, a9, 0 # f12 = w0->re
|
||||
lsi f13, a10, 0 # f13 = w1->re
|
||||
lsi f14, a11, 0 # f14 = w2->re
|
||||
add.s f8, f0, f2 # f8 = in1.re + in3.re
|
||||
sub.s f10, f0, f2 # f10 = in1.re - in3.re
|
||||
sub.s f1, f5, f9 # f1 = bf2.im = in0.im + in2.im - in1.im - in3.im
|
||||
add.s f5, f5, f9 # f5 = bf0.im = in0.im + in2.im + in1.im + in3.im
|
||||
sub.s f2, f6, f11 # f2 = bf1.re = in0.re - in2.re + in1.im - in3.im
|
||||
add.s f6, f6, f11 # f6 = bf3.re = in0.re - in2.re - in1.im + in3.im
|
||||
sub.s f0, f4, f8 # f0 = bf2.re = in0.re + in2.re - in1.re - in3.re
|
||||
add.s f4, f4, f8 # f4 = bf0.re = in0.re + in2.re + in1.re + in3.re
|
||||
add.s f3, f7, f10 # f3 = bf1.im = in0.im - in2.im - in1.re + in3.re
|
||||
sub.s f7, f7, f10 # f7 = bf3.im = in0.im - in2.im + in1.re - in3.re
|
||||
ssi f5, a12, 4 # *(p0 + 1) = f5 = bf0.im
|
||||
ssip f4, a12, 8 # *p0 = f4 = bf0.re , p0 += 2
|
||||
mul.s f5, f3, f12 # f5 = bf1.im * w0->re
|
||||
mul.s f4, f2, f12 # f4 = bf1.re * w0->re
|
||||
mul.s f9, f1, f13 # f9 = bf2.im * w1->re
|
||||
mul.s f8, f0, f13 # f8 = bf2.re * w1->re
|
||||
mul.s f11, f7, f14 # f11 = bf3.im * w2->re
|
||||
mul.s f10, f6, f14 # f10 = bf3.re * w2->re
|
||||
lsi f12, a9, 4 # f12 = w0->im
|
||||
lsi f13, a10, 4 # f13 = w1->im
|
||||
lsi f14, a11, 4 # f14 = w2->im
|
||||
addx4 a9, a6, a9 # w0 += m
|
||||
addx8 a10, a6, a10 # w1 += 2 * m
|
||||
addx4 a11, a6, a11
|
||||
addx8 a11, a6, a11 # w2 += 3 * m
|
||||
madd.s f5, f2, f12 # f5 = bf1.im * w0->re - bf1.re * w0->im
|
||||
msub.s f4, f3, f12 # f4 = bf1.re * w0->re + bf1.im * w0->im
|
||||
madd.s f9, f0, f13 # f9 = bf2.im * w1->re - bf2.re * w1->im
|
||||
msub.s f8, f1, f13 # f8 = bf2.re * w1->re + bf2.im * w1->im
|
||||
madd.s f11, f6, f14 # f11 = bf3.im * w2->re - bf3.re * w2->im
|
||||
msub.s f10, f7, f14 # f10 = bf3.re * w2->re + bf3.im * w2->im
|
||||
ssi f5, a13, 4 # *(p1 + 1) = f5
|
||||
ssip f4, a13, 8 # *p1 = f4, p1 += 2
|
||||
ssi f9, a14, 4 # *(p2 + 1) = f9
|
||||
ssip f8, a14, 8 # *p2 = f8, p2 += 2
|
||||
ssi f11, a15, 4 # *(p3 + 1) = f11
|
||||
ssip f10, a15, 8 # *p3 = f10, p3 += 2
|
||||
.inv_bf4_loop_end:
|
||||
|
||||
addi.n a8, a8, 2 # j += 2
|
||||
bgeu a8, a5, .ifft_stage_next # if j >= m
|
||||
j .ifft_group
|
||||
|
||||
.ifft_stage_next:
|
||||
slli a5, a5, 2 # m <<= 2
|
||||
slli a6, a6, 2 # w_step <<= 2
|
||||
addi.n a7, a7, -1 # log4N--
|
||||
bnez a7, .ifft_stage # if log4N > 0
|
||||
|
||||
// movi.n a2, DSP_OK # return(DSP_OK)
|
||||
retw
|
||||
@@ -0,0 +1,153 @@
|
||||
// Copyright 2024 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
|
||||
// This is matrix multipliction function for esp32p4 processor.
|
||||
.text
|
||||
.align 4
|
||||
.global dl_fft2r_fc32_arp4_
|
||||
.type dl_fft2r_fc32_arp4_,@function
|
||||
|
||||
dl_fft2r_fc32_arp4_:
|
||||
//esp_err_t dl_fft2r_fc32_arp4_(float *data, int N, float* dl_fft_w_table_fc32)
|
||||
|
||||
add sp,sp,-16
|
||||
#
|
||||
srli t6, a1, 1 // a6 = N2 = N/2
|
||||
li t0, 1 // a7 - ie
|
||||
|
||||
.fft2r_l1:
|
||||
li t1, 0 // a8 - j
|
||||
li t4, 0 // a11 = ia = 0;
|
||||
|
||||
.fft2r_l2: // loop for j, a8 - j
|
||||
|
||||
slli t3, t1, 3 // a10 = j<<3 // shift for cos () -- c = w[2 * j];
|
||||
add t3, t3, a2 // a10 - pointer to cos
|
||||
flw fa0, 0(t3)
|
||||
flw fa1, 4(t3)
|
||||
|
||||
esp.lp.setup 0, t6, .fft2r_l3 // .fft2r_l3 - label to the last executed instruction
|
||||
add t5, t4, t6 // a12 = m = ia + N2
|
||||
|
||||
slli a4, t5, 3 // a14 - pointer for m*2
|
||||
slli a3, t4, 3 // a13 - pointer for ia*2
|
||||
add a4, a4, a0 // pointers to data arrays
|
||||
add a3, a3, a0 //
|
||||
|
||||
flw fa4, 0(a4)
|
||||
flw fa5, 4(a4)
|
||||
flw fa2, 0(a3)
|
||||
flw fa3, 4(a3)
|
||||
|
||||
fmul.s ft6, fa0, fa4 // re_temp = c * data[2 * m]
|
||||
fmul.s ft7, fa0, fa5 // im_temp = c * data[2 * m + 1]
|
||||
fmadd.s ft6, fa1, fa5, ft6 // re_temp += s * data[2 * m + 1];
|
||||
fnmsub.s ft7, fa1, fa4, ft7 // im_temp -= s * data[2 * m];
|
||||
fsub.s ft8, fa2, ft6 // = data[2 * ia] - re_temp;
|
||||
fsub.s ft9, fa3, ft7 // = data[2 * ia + 1] - im_temp;
|
||||
|
||||
fadd.s ft10, fa2, ft6 // = data[2 * ia] + re_temp;
|
||||
fadd.s ft11, fa3, ft7 // = data[2 * ia + 1] + im_temp;
|
||||
|
||||
fsw ft8, 0(a4)
|
||||
fsw ft9, 4(a4)
|
||||
fsw ft10, 0(a3)
|
||||
fsw ft11, 4(a3)
|
||||
|
||||
.fft2r_l3: add t4, t4, 1 // ia++
|
||||
|
||||
add t4, t4, t6
|
||||
add t1, t1, 1 // j++
|
||||
|
||||
BNE t1, t0, .fft2r_l2
|
||||
slli t0, t0, 1 // ie = ie<<1
|
||||
srli t6, t6, 1 // a6 = a6>>1
|
||||
BNEZ t6, .fft2r_l1// Jump if > 0
|
||||
|
||||
#
|
||||
add sp,sp,16
|
||||
li a0,0
|
||||
ret
|
||||
|
||||
|
||||
// This is matrix multipliction function for esp32p4 processor.
|
||||
.text
|
||||
.align 4
|
||||
.global dl_ifft2r_fc32_arp4_
|
||||
.type dl_ifft2r_fc32_arp4_,@function
|
||||
|
||||
dl_ifft2r_fc32_arp4_:
|
||||
//esp_err_t dl_ifft2r_fc32_arp4_(float *data, int N, float* dl_fft_w_table_fc32)
|
||||
|
||||
add sp,sp,-16
|
||||
#
|
||||
srli t6, a1, 1 // a6 = N2 = N/2
|
||||
li t0, 1 // a7 - ie
|
||||
|
||||
.ifft2r_l1:
|
||||
li t1, 0 // a8 - j
|
||||
li t4, 0 // a11 = ia = 0;
|
||||
|
||||
.ifft2r_l2: // loop for j, a8 - j
|
||||
|
||||
slli t3, t1, 3 // a10 = j<<3 // shift for cos () -- c = w[2 * j];
|
||||
add t3, t3, a2 // a10 - pointer to cos
|
||||
flw fa0, 0(t3)
|
||||
flw fa1, 4(t3)
|
||||
// CHANGE: Negate the imaginary part of twiddle factors (complex conjugate)
|
||||
fneg.s fa1, fa1 // s = -s (since w^-1 = w*)
|
||||
|
||||
esp.lp.setup 0, t6, .ifft2r_l3 // .fft2r_l3 - label to the last executed instruction
|
||||
add t5, t4, t6 // a12 = m = ia + N2
|
||||
|
||||
slli a4, t5, 3 // a14 - pointer for m*2
|
||||
slli a3, t4, 3 // a13 - pointer for ia*2
|
||||
add a4, a4, a0 // pointers to data arrays
|
||||
add a3, a3, a0 //
|
||||
|
||||
flw fa4, 0(a4)
|
||||
flw fa5, 4(a4)
|
||||
flw fa2, 0(a3)
|
||||
flw fa3, 4(a3)
|
||||
|
||||
fmul.s ft6, fa0, fa4 // re_temp = c * data[2 * m]
|
||||
fmul.s ft7, fa0, fa5 // im_temp = c * data[2 * m + 1]
|
||||
fmadd.s ft6, fa1, fa5, ft6 // re_temp += s * data[2 * m + 1];
|
||||
fnmsub.s ft7, fa1, fa4, ft7 // im_temp -= s * data[2 * m];
|
||||
fsub.s ft8, fa2, ft6 // = data[2 * ia] - re_temp;
|
||||
fsub.s ft9, fa3, ft7 // = data[2 * ia + 1] - im_temp;
|
||||
|
||||
fadd.s ft10, fa2, ft6 // = data[2 * ia] + re_temp;
|
||||
fadd.s ft11, fa3, ft7 // = data[2 * ia + 1] + im_temp;
|
||||
|
||||
fsw ft8, 0(a4)
|
||||
fsw ft9, 4(a4)
|
||||
fsw ft10, 0(a3)
|
||||
fsw ft11, 4(a3)
|
||||
|
||||
.ifft2r_l3: add t4, t4, 1 // ia++
|
||||
|
||||
add t4, t4, t6
|
||||
add t1, t1, 1 // j++
|
||||
|
||||
BNE t1, t0, .ifft2r_l2
|
||||
slli t0, t0, 1 // ie = ie<<1
|
||||
srli t6, t6, 1 // a6 = a6>>1
|
||||
BNEZ t6, .ifft2r_l1// Jump if > 0
|
||||
|
||||
#
|
||||
add sp,sp,16
|
||||
li a0,0
|
||||
ret
|
||||
@@ -0,0 +1,304 @@
|
||||
// Copyright 2024 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
.text
|
||||
.align 4
|
||||
.global dl_fft4r_fc32_arp4_
|
||||
.type dl_fft4r_fc32_arp4_,@function
|
||||
|
||||
dl_fft4r_fc32_arp4_:
|
||||
//esp_err_t dl_fft4r_fc32_arp4_(float *data, int N, float *table, int table_size)
|
||||
|
||||
// table_size - a3
|
||||
// m - t0
|
||||
// j - t1
|
||||
add sp,sp,-16
|
||||
#
|
||||
srli t6, a1, 1 // t6 = log4N = N/2
|
||||
li t0, 2 // t0 - m
|
||||
|
||||
div a3, a3, a1 // wind_step = table_size / N
|
||||
slli a3, a3, 3 // wind_step = complex step = 8 bytes
|
||||
|
||||
.fft2r_l1:
|
||||
li t1, 0 // t1 - j
|
||||
srli a1, a1, 2 // a1 = length = length >> 2;
|
||||
.fft2r_l2: // loop for j, t1 - j
|
||||
slli t2, a1, 4 // t2 = length << 1 << 3 (8 bytes for one complex sample)
|
||||
slli t3, a1, 3 // t2 = length << 1 << 3 (8 bytes for one complex sample)
|
||||
// start_index = j * (length << 1); // n: n-point FFT
|
||||
mul t2,t2,t1
|
||||
add a4, a0, t2 // fc32_t *ptrc0
|
||||
add a5, a4, t3 // fc32_t *ptrc1
|
||||
add a6, a5, t3 // fc32_t *ptrc2
|
||||
add a7, a6, t3 // fc32_t *ptrc3
|
||||
|
||||
# flw fa0, 0(a4)
|
||||
# fsw fa0, 0(t3)
|
||||
# add t3, t3, 4
|
||||
mv t2, a2 // winc0
|
||||
mv t3, a2 // winc0
|
||||
mv t4, a2 // winc0
|
||||
|
||||
esp.lp.setup 0, a1, .fft2r_l3 // .fft2r_l3 - label to the last executed instruction
|
||||
|
||||
flw fa0, 0(a4) // in0.re
|
||||
flw fa4, 0(a6) // in2.re
|
||||
fadd.s ft0, fa0, fa4 // in0.re + in2.re
|
||||
flw fa1, 4(a4) // in0.im
|
||||
fsub.s ft1, fa0, fa4 // in0.re - in2.re
|
||||
flw fa5, 4(a6) // in2.im
|
||||
fadd.s ft2, fa1, fa5 // in0.im + in2.im
|
||||
flw fa2, 0(a5) // in1.re
|
||||
fsub.s ft3, fa1, fa5 // in0.im - in2.im
|
||||
flw fa6, 0(a7) // in3.re
|
||||
fadd.s ft4, fa2, fa6 // in1.re + in3.re
|
||||
flw fa3, 4(a5) // in1.im
|
||||
fsub.s ft5, fa2, fa6 // in1.re - in3.re
|
||||
flw fa7, 4(a7) // in3.im
|
||||
fadd.s ft6, fa3, fa7 // in1.im + in3.im
|
||||
fsub.s ft7, fa3, fa7 // in1.im - in3.im
|
||||
|
||||
# bfly[0].re = ft0 + ft4;
|
||||
fadd.s fa0, ft0, ft4;
|
||||
# bfly[0].im = ft2 + ft6;
|
||||
fadd.s fa1, ft2, ft6;
|
||||
# bfly[1].re = ft1 + ft7;
|
||||
fadd.s fa2, ft1, ft7;
|
||||
# bfly[1].im = ft3 - ft5;
|
||||
fsub.s fa3, ft3, ft5;
|
||||
# bfly[2].re = ft0 - ft5;
|
||||
fsub.s fa4, ft0, ft4;
|
||||
flw ft0, 0(t2) // winc0->re
|
||||
# bfly[2].im = ft2 - ft7;
|
||||
fsub.s fa5, ft2, ft6;
|
||||
flw ft2, 0(t3) // winc1->re
|
||||
# bfly[3].re = ft1 - ft6;
|
||||
fsub.s fa6, ft1, ft7;
|
||||
flw ft1, 4(t2) // winc0->im
|
||||
# bfly[3].im = ft3 + ft5;
|
||||
fadd.s fa7, ft3, ft5;
|
||||
|
||||
// *ptrc0 = bfly[0];
|
||||
fsw fa0, 0(a4) // in0.re
|
||||
fsw fa1, 4(a4) // in0.im
|
||||
|
||||
flw ft3, 4(t3) // winc1->im
|
||||
|
||||
// ptrc1->re = bfly[1].re * winc0->re + bfly[1].im * winc0->im;
|
||||
// ptrc1->im = bfly[1].im * winc0->re - bfly[1].re * winc0->im;
|
||||
// ptrc2->re = bfly[2].re * winc1->re + bfly[2].im * winc1->im;
|
||||
fmul.s fa0, fa2, ft0
|
||||
add t2, t2, a3 // winc0 += 1 * wind_step;
|
||||
fmul.s fa1, fa3, ft0
|
||||
fmul.s ft0, fa4, ft2
|
||||
fmul.s ft2, fa5, ft2
|
||||
|
||||
flw ft4, 0(t4) // winc2->re
|
||||
flw ft5, 4(t4) // winc3->im
|
||||
|
||||
fmadd.s fa0, fa3, ft1, fa0
|
||||
add t3, t3, a3 // winc1 += 2 * wind_step;
|
||||
fnmsub.s fa1, fa2, ft1, fa1
|
||||
add t3, t3, a3 //
|
||||
fmul.s fa2, fa6, ft4
|
||||
fmul.s fa3, fa7, ft4
|
||||
|
||||
|
||||
add t4, t4, a3 // winc2 += 3 * wind_step;
|
||||
fmadd.s ft0, fa5, ft3, ft0
|
||||
add t4, t4, a3 //
|
||||
fnmsub.s ft2, fa4, ft3, ft2
|
||||
|
||||
fmadd.s ft3, fa7, ft5, fa2
|
||||
add t4, t4, a3 //
|
||||
fnmsub.s fa3, fa6, ft5, fa3
|
||||
|
||||
fsw fa0, 0(a5) // in1.re
|
||||
add a4, a4, 8
|
||||
fsw fa1, 4(a5) // in1.im
|
||||
add a5, a5, 8
|
||||
fsw ft0, 0(a6) // in2.re
|
||||
// ptrc2->im = bfly[2].im * winc1->re - bfly[2].re * winc1->im;
|
||||
fsw ft2, 4(a6) // in2.re
|
||||
// ptrc3->re = bfly[3].re * winc2->re + bfly[3].im * winc2->im;
|
||||
add a6, a6, 8
|
||||
|
||||
fsw ft3, 0(a7) // in2.re
|
||||
// ptrc3->im = bfly[3].im * winc2->re - bfly[3].re * winc2->im;
|
||||
fsw fa3, 4(a7) // in2.re
|
||||
|
||||
add a7, a7, 8
|
||||
|
||||
// Temp solution
|
||||
|
||||
.fft2r_l3: nop
|
||||
add t1, t1, 2 // j+=2
|
||||
BNE t1, t0, .fft2r_l2
|
||||
|
||||
slli t0, t0, 2 // t0 = m = m<<2
|
||||
srli t6, t6, 2 // t6 = log4N >>= 2
|
||||
slli a3, a3, 2 // wind_step = wind_step << 2;
|
||||
BNEZ t6, .fft2r_l1// Jump if > 0
|
||||
|
||||
#
|
||||
add sp,sp,16
|
||||
li a0,0
|
||||
ret
|
||||
|
||||
|
||||
.text
|
||||
.align 4
|
||||
.global dl_ifft4r_fc32_arp4_
|
||||
.type dl_ifft4r_fc32_arp4_,@function
|
||||
|
||||
dl_ifft4r_fc32_arp4_:
|
||||
//esp_err_t dl_ifft4r_fc32_arp4_(float *data, int N, float *table, int table_size)
|
||||
|
||||
// table_size - a3
|
||||
// m - t0
|
||||
// j - t1
|
||||
add sp,sp,-16
|
||||
#
|
||||
srli t6, a1, 1 // t6 = log4N = N/2
|
||||
li t0, 2 // t0 - m
|
||||
|
||||
div a3, a3, a1 // wind_step = table_size / N
|
||||
slli a3, a3, 3 // wind_step = complex step = 8 bytes
|
||||
|
||||
.ifft2r_l1:
|
||||
li t1, 0 // t1 - j
|
||||
srli a1, a1, 2 // a1 = length = length >> 2;
|
||||
.ifft2r_l2: // loop for j, t1 - j
|
||||
slli t2, a1, 4 // t2 = length << 1 << 3 (8 bytes for one complex sample)
|
||||
slli t3, a1, 3 // t2 = length << 1 << 3 (8 bytes for one complex sample)
|
||||
// start_index = j * (length << 1); // n: n-point FFT
|
||||
mul t2,t2,t1
|
||||
add a4, a0, t2 // fc32_t *ptrc0
|
||||
add a5, a4, t3 // fc32_t *ptrc1
|
||||
add a6, a5, t3 // fc32_t *ptrc2
|
||||
add a7, a6, t3 // fc32_t *ptrc3
|
||||
|
||||
# flw fa0, 0(a4)
|
||||
# fsw fa0, 0(t3)
|
||||
# add t3, t3, 4
|
||||
mv t2, a2 // winc0
|
||||
mv t3, a2 // winc0
|
||||
mv t4, a2 // winc0
|
||||
|
||||
esp.lp.setup 0, a1, .ifft2r_l3 // .ifft2r_l3 - label to the last executed instruction
|
||||
|
||||
flw fa0, 0(a4) // in0.re
|
||||
flw fa4, 0(a6) // in2.re
|
||||
fadd.s ft0, fa0, fa4 // in0.re + in2.re
|
||||
flw fa1, 4(a4) // in0.im
|
||||
fsub.s ft1, fa0, fa4 // in0.re - in2.re
|
||||
flw fa5, 4(a6) // in2.im
|
||||
fadd.s ft2, fa1, fa5 // in0.im + in2.im
|
||||
flw fa2, 0(a5) // in1.re
|
||||
fsub.s ft3, fa1, fa5 // in0.im - in2.im
|
||||
flw fa6, 0(a7) // in3.re
|
||||
fadd.s ft4, fa2, fa6 // in1.re + in3.re
|
||||
flw fa3, 4(a5) // in1.im
|
||||
fsub.s ft5, fa2, fa6 // in1.re - in3.re
|
||||
flw fa7, 4(a7) // in3.im
|
||||
fadd.s ft6, fa3, fa7 // in1.im + in3.im
|
||||
fsub.s ft7, fa3, fa7 // in1.im - in3.im
|
||||
|
||||
# bfly[0].re = ft0 + ft4;
|
||||
fadd.s fa0, ft0, ft4;
|
||||
# bfly[0].im = ft2 + ft6;
|
||||
fadd.s fa1, ft2, ft6;
|
||||
# bfly[1].re = ft1 - ft7;
|
||||
fsub.s fa2, ft1, ft7;
|
||||
# bfly[1].im = ft3 + ft5;
|
||||
fadd.s fa3, ft3, ft5;
|
||||
# bfly[2].re = ft0 - ft5;
|
||||
fsub.s fa4, ft0, ft4;
|
||||
flw ft0, 0(t2) // winc0->re
|
||||
# bfly[2].im = ft2 - ft7;
|
||||
fsub.s fa5, ft2, ft6;
|
||||
flw ft2, 0(t3) // winc1->re
|
||||
# bfly[3].re = ft1 + ft6;
|
||||
fadd.s fa6, ft1, ft7;
|
||||
flw ft1, 4(t2) // winc0->im
|
||||
# bfly[3].im = ft3 - ft5;
|
||||
fsub.s fa7, ft3, ft5;
|
||||
|
||||
// *ptrc0 = bfly[0];
|
||||
fsw fa0, 0(a4) // in0.re
|
||||
fsw fa1, 4(a4) // in0.im
|
||||
|
||||
flw ft3, 4(t3) // winc1->im
|
||||
|
||||
// ptrc1->re = bfly[1].re * winc0->re + bfly[1].im * winc0->im;
|
||||
// ptrc1->im = bfly[1].im * winc0->re - bfly[1].re * winc0->im;
|
||||
// ptrc2->re = bfly[2].re * winc1->re + bfly[2].im * winc1->im;
|
||||
fmul.s fa0, fa2, ft0
|
||||
add t2, t2, a3 // winc0 += 1 * wind_step;
|
||||
fmul.s fa1, fa3, ft0
|
||||
fmul.s ft0, fa4, ft2
|
||||
fmul.s ft2, fa5, ft2
|
||||
|
||||
flw ft4, 0(t4) // winc2->re
|
||||
flw ft5, 4(t4) // winc3->im
|
||||
|
||||
fnmsub.s fa0, fa3, ft1, fa0
|
||||
add t3, t3, a3 // winc1 += 2 * wind_step;
|
||||
fmadd.s fa1, fa2, ft1, fa1
|
||||
add t3, t3, a3 //
|
||||
fmul.s fa2, fa6, ft4
|
||||
fmul.s fa3, fa7, ft4
|
||||
|
||||
|
||||
add t4, t4, a3 // winc2 += 3 * wind_step;
|
||||
fnmsub.s ft0, fa5, ft3, ft0
|
||||
add t4, t4, a3 //
|
||||
fmadd.s ft2, fa4, ft3, ft2
|
||||
|
||||
fnmsub.s ft3, fa7, ft5, fa2
|
||||
add t4, t4, a3 //
|
||||
fmadd.s fa3, fa6, ft5, fa3
|
||||
|
||||
fsw fa0, 0(a5) // in1.re
|
||||
add a4, a4, 8
|
||||
fsw fa1, 4(a5) // in1.im
|
||||
add a5, a5, 8
|
||||
fsw ft0, 0(a6) // in2.re
|
||||
// ptrc2->im = bfly[2].im * winc1->re - bfly[2].re * winc1->im;
|
||||
fsw ft2, 4(a6) // in2.re
|
||||
// ptrc3->re = bfly[3].re * winc2->re + bfly[3].im * winc2->im;
|
||||
add a6, a6, 8
|
||||
|
||||
fsw ft3, 0(a7) // in2.re
|
||||
// ptrc3->im = bfly[3].im * winc2->re - bfly[3].re * winc2->im;
|
||||
fsw fa3, 4(a7) // in2.re
|
||||
|
||||
add a7, a7, 8
|
||||
|
||||
// Temp solution
|
||||
|
||||
.ifft2r_l3: nop
|
||||
add t1, t1, 2 // j+=2
|
||||
BNE t1, t0, .ifft2r_l2
|
||||
|
||||
slli t0, t0, 2 // t0 = m = m<<2
|
||||
srli t6, t6, 2 // t6 = log4N >>= 2
|
||||
slli a3, a3, 2 // wind_step = wind_step << 2;
|
||||
BNEZ t6, .ifft2r_l1// Jump if > 0
|
||||
|
||||
#
|
||||
add sp,sp,16
|
||||
li a0,0
|
||||
ret
|
||||
@@ -0,0 +1,197 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2018-2025 Espressif Systems (Shanghai) CO LTD
|
||||
* SPDX-FileContributor: 2024 f4lcOn @ Libera Chat IRC
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
|
||||
.text
|
||||
.align 4
|
||||
.global dl_fft2r_fc32_aes3_
|
||||
.type dl_fft2r_fc32_aes3_,@function
|
||||
|
||||
// The function implements the following C code:
|
||||
//esp_err_t dl_fft2r_fc32_ansi(float *data, int N)
|
||||
//{
|
||||
// float *w = dl_fft_w_table_fc32;
|
||||
//
|
||||
// int ie, ia, m;
|
||||
// float re_temp, im_temp;
|
||||
// float c, s;
|
||||
// int N2 = N;
|
||||
// ie = 1;
|
||||
// for (int N2 = N/2; N2 > 0; N2 >>= 1) {
|
||||
// ia = 0;
|
||||
// for (int j = 0; j < ie; j++) {
|
||||
// c = w[2 * j];
|
||||
// s = w[2 * j + 1];
|
||||
// for (int i = 0; i < N2; i++) {
|
||||
// m = ia + N2;
|
||||
// re_temp = c * data[2 * m] + s * data[2 * m + 1];
|
||||
// im_temp = c * data[2 * m + 1] - s * data[2 * m];
|
||||
// data[2 * m] = data[2 * ia] - re_temp;
|
||||
// data[2 * m + 1] = data[2 * ia + 1] - im_temp;
|
||||
// data[2 * ia] = data[2 * ia] + re_temp;
|
||||
// data[2 * ia + 1] = data[2 * ia + 1] + im_temp;
|
||||
// ia++;
|
||||
// }
|
||||
// ia += N2;
|
||||
// }
|
||||
// ie <<= 1;
|
||||
// }
|
||||
// return result;
|
||||
//}
|
||||
|
||||
|
||||
dl_fft2r_fc32_aes3_:
|
||||
//esp_err_t dl_fft2r_fc32_ansi(float *data, int N, float* dl_fft_w_table_fc32)
|
||||
|
||||
entry a1, 16
|
||||
|
||||
// Array increment for floating point data should be 4
|
||||
// data - a2
|
||||
// N - a3
|
||||
// dl_fft_w_table_fc32 - a4
|
||||
|
||||
// a6 - k, main loop counter; N2 - for (int N2 = N/2; N2 > 0; N2 >>= 1)
|
||||
// a7 - ie
|
||||
// a8 - j
|
||||
// a10 - (j*2)<<2, or a10 - j<<3
|
||||
// f0 - c or w[2 * j]
|
||||
// f1 - s or w[2 * j + 1]
|
||||
// a11 - ia
|
||||
// a12 - m
|
||||
// a13 - ia pointer
|
||||
// a14 - m pointer
|
||||
// f6 - re_temp
|
||||
// f7 - im_temp
|
||||
|
||||
srli a6, a3, 1 // a6 = N2 = N/2
|
||||
movi.n a7, 1 // a7 - ie
|
||||
|
||||
.ifft2r_l1:
|
||||
movi.n a8, 0 // a8 - j
|
||||
movi.n a11,0 // a11 = ia = 0;
|
||||
|
||||
.ifft2r_l2: // loop for j, a8 - j
|
||||
addx8 a10, a8, a4 // a8 - shift for cos () -- c = w[2 * j]; -- pointer to cos
|
||||
ee.ldf.64.ip f1, f0, a10, 0
|
||||
add.n a12, a11, a6 // a12 = m = ia + N2
|
||||
addx8 a14, a12, a2 // a14 - pointer for m*2
|
||||
|
||||
loopnez a6, .ifft2r_l3
|
||||
ee.ldf.64.ip f5, f4, a14, 0 // data[2 * m], data[2 * m + 1]
|
||||
mul.s f6, f0, f4 // re_temp = c * data[2 * m]
|
||||
mul.s f7, f0, f5 // im_temp = c * data[2 * m + 1]
|
||||
|
||||
addx8 a13, a11, a2 // a13 - pointer for ia*2
|
||||
ee.ldf.64.ip f3, f2, a13, 0 // data[2 * ia], data[2 * ia + 1]
|
||||
|
||||
madd.s f6, f1, f5 // re_temp += s * data[2 * m + 1];
|
||||
msub.s f7, f1, f4 // im_temp -= s * data[2 * m];
|
||||
|
||||
addi a11, a11, 1 // ia++
|
||||
add.n a12, a11, a6 // a12 = m = ia + N2
|
||||
|
||||
sub.s f8, f2, f6 // = data[2 * ia] - re_temp;
|
||||
sub.s f9, f3, f7 // = data[2 * ia + 1] - im_temp;
|
||||
add.s f10, f2, f6 // = data[2 * ia] + re_temp;
|
||||
add.s f11, f3, f7 // = data[2 * ia + 1] + im_temp;
|
||||
|
||||
ee.stf.64.ip f9, f8, a14, 0
|
||||
addx8 a14, a12, a2 // a14 - pointer for m*2
|
||||
ee.stf.64.ip f11, f10, a13, 0
|
||||
.ifft2r_l3:
|
||||
add.n a11, a11, a6
|
||||
|
||||
addi.n a8, a8, 1 // j++
|
||||
bne a8, a7, .ifft2r_l2
|
||||
slli a7, a7, 1 // ie = ie<<1
|
||||
// main loop: for (int k = N/2; k > 0; k >>= 1)
|
||||
srli a6, a6, 1 // a6 = a6>>1
|
||||
bnez a6, .ifft2r_l1 // Jump if > 0
|
||||
|
||||
retw
|
||||
|
||||
|
||||
|
||||
|
||||
.text
|
||||
.align 4
|
||||
.global dl_ifft2r_fc32_aes3_
|
||||
.type dl_ifft2r_fc32_aes3_,@function
|
||||
|
||||
|
||||
dl_ifft2r_fc32_aes3_:
|
||||
//esp_err_t dl_fft2r_fc32_ansi(float *data, int N, float* dl_fft_w_table_fc32)
|
||||
|
||||
entry a1, 16
|
||||
|
||||
// Array increment for floating point data should be 4
|
||||
// data - a2
|
||||
// N - a3
|
||||
// dl_fft_w_table_fc32 - a4
|
||||
|
||||
// a6 - k, main loop counter; N2 - for (int N2 = N/2; N2 > 0; N2 >>= 1)
|
||||
// a7 - ie
|
||||
// a8 - j
|
||||
// a10 - (j*2)<<2, or a10 - j<<3
|
||||
// f0 - c or w[2 * j]
|
||||
// f1 - s or w[2 * j + 1]
|
||||
// a11 - ia
|
||||
// a12 - m
|
||||
// a13 - ia pointer
|
||||
// a14 - m pointer
|
||||
// f6 - re_temp
|
||||
// f7 - im_temp
|
||||
|
||||
srli a6, a3, 1 // a6 = N2 = N/2
|
||||
movi.n a7, 1 // a7 - ie
|
||||
|
||||
.fft2r_l1:
|
||||
movi.n a8, 0 // a8 - j
|
||||
movi.n a11,0 // a11 = ia = 0;
|
||||
|
||||
.fft2r_l2: // loop for j, a8 - j
|
||||
addx8 a10, a8, a4 // a8 - shift for cos () -- c = w[2 * j]; -- pointer to cos
|
||||
ee.ldf.64.ip f1, f0, a10, 0
|
||||
|
||||
// CHANGE: Negate the imaginary part of twiddle factors
|
||||
neg.s f1, f1
|
||||
add.n a12, a11, a6 // a12 = m = ia + N2
|
||||
addx8 a14, a12, a2 // a14 - pointer for m*2
|
||||
|
||||
loopnez a6, .fft2r_l3
|
||||
ee.ldf.64.ip f5, f4, a14, 0 // data[2 * m], data[2 * m + 1]
|
||||
mul.s f6, f0, f4 // re_temp = c * data[2 * m]
|
||||
mul.s f7, f0, f5 // im_temp = c * data[2 * m + 1]
|
||||
|
||||
addx8 a13, a11, a2 // a13 - pointer for ia*2
|
||||
ee.ldf.64.ip f3, f2, a13, 0 // data[2 * ia], data[2 * ia + 1]
|
||||
|
||||
madd.s f6, f1, f5 // re_temp += s * data[2 * m + 1];
|
||||
msub.s f7, f1, f4 // im_temp -= s * data[2 * m];
|
||||
|
||||
addi a11, a11, 1 // ia++
|
||||
add.n a12, a11, a6 // a12 = m = ia + N2
|
||||
|
||||
sub.s f8, f2, f6 // = data[2 * ia] - re_temp;
|
||||
sub.s f9, f3, f7 // = data[2 * ia + 1] - im_temp;
|
||||
add.s f10, f2, f6 // = data[2 * ia] + re_temp;
|
||||
add.s f11, f3, f7 // = data[2 * ia + 1] + im_temp;
|
||||
|
||||
ee.stf.64.ip f9, f8, a14, 0
|
||||
addx8 a14, a12, a2 // a14 - pointer for m*2
|
||||
ee.stf.64.ip f11, f10, a13, 0
|
||||
.fft2r_l3:
|
||||
add.n a11, a11, a6
|
||||
|
||||
addi.n a8, a8, 1 // j++
|
||||
bne a8, a7, .fft2r_l2
|
||||
slli a7, a7, 1 // ie = ie<<1
|
||||
// main loop: for (int k = N/2; k > 0; k >>= 1)
|
||||
srli a6, a6, 1 // a6 = a6>>1
|
||||
bnez a6, .fft2r_l1 // Jump if > 0
|
||||
|
||||
retw
|
||||
@@ -0,0 +1,288 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2018-2025 Espressif Systems (Shanghai) CO LTD
|
||||
* SPDX-FileContributor: 2024 f4lcOn @ Libera Chat IRC
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
|
||||
.section .text
|
||||
.global dl_fft4r_fc32_aes3_
|
||||
.type dl_fft4r_fc32_aes3_,@function
|
||||
|
||||
// The function implements the following C code:
|
||||
// esp_err_t dl_fft4r_fc32_ansi_(float *data, int length, float *table, int table_size)
|
||||
// {
|
||||
// if (0 == dl_fft4r_initialized) {
|
||||
// return ESP_ERR_DSP_UNINITIALIZED;
|
||||
// }
|
||||
//
|
||||
// uint log2N = dl_power_of_two(length);
|
||||
// if ((log2N & 0x01) != 0) {
|
||||
// return ESP_ERR_DSP_INVALID_LENGTH;
|
||||
// }
|
||||
// uint log4N = log2N >> 1;
|
||||
//
|
||||
// fc32_t bfly[4];
|
||||
// uint m = 2;
|
||||
// uint wind_step = table_size / length;
|
||||
// while (1) { ///radix 4
|
||||
// if (log4N == 0) {
|
||||
// break;
|
||||
// }
|
||||
// length = length >> 2;
|
||||
// for (int j = 0; j < m; j += 2) { // j: which FFT of this step
|
||||
// int start_index = j * (length << 1); // n: n-point FFT
|
||||
//
|
||||
// fc32_t *ptrc0 = (fc32_t *)data + start_index;
|
||||
// fc32_t *ptrc1 = ptrc0 + length;
|
||||
// fc32_t *ptrc2 = ptrc1 + length;
|
||||
// fc32_t *ptrc3 = ptrc2 + length;
|
||||
//
|
||||
// fc32_t *winc0 = (fc32_t *)table;
|
||||
// fc32_t *winc1 = winc0;
|
||||
// fc32_t *winc2 = winc0;
|
||||
//
|
||||
// for (int k = 0; k < length; k++) {
|
||||
// fc32_t in0 = *ptrc0;
|
||||
// fc32_t in2 = *ptrc2;
|
||||
// fc32_t in1 = *ptrc1;
|
||||
// fc32_t in3 = *ptrc3;
|
||||
//
|
||||
// bfly[0].re = in0.re + in2.re + in1.re + in3.re;
|
||||
// bfly[0].im = in0.im + in2.im + in1.im + in3.im;
|
||||
//
|
||||
// bfly[1].re = in0.re - in2.re + in1.im - in3.im;
|
||||
// bfly[1].im = in0.im - in2.im - in1.re + in3.re;
|
||||
//
|
||||
// bfly[2].re = in0.re + in2.re - in1.re - in3.re;
|
||||
// bfly[2].im = in0.im + in2.im - in1.im - in3.im;
|
||||
//
|
||||
// bfly[3].re = in0.re - in2.re - in1.im + in3.im;
|
||||
// bfly[3].im = in0.im - in2.im + in1.re - in3.re;
|
||||
//
|
||||
// *ptrc0 = bfly[0];
|
||||
// ptrc1->re = bfly[1].re * winc0->re + bfly[1].im * winc0->im;
|
||||
// ptrc1->im = bfly[1].im * winc0->re - bfly[1].re * winc0->im;
|
||||
// ptrc2->re = bfly[2].re * winc1->re + bfly[2].im * winc1->im;
|
||||
// ptrc2->im = bfly[2].im * winc1->re - bfly[2].re * winc1->im;
|
||||
// ptrc3->re = bfly[3].re * winc2->re + bfly[3].im * winc2->im;
|
||||
// ptrc3->im = bfly[3].im * winc2->re - bfly[3].re * winc2->im;
|
||||
//
|
||||
// winc0 += 1 * wind_step;
|
||||
// winc1 += 2 * wind_step;
|
||||
// winc2 += 3 * wind_step;
|
||||
//
|
||||
// ptrc0++;
|
||||
// ptrc1++;
|
||||
// ptrc2++;
|
||||
// ptrc3++;
|
||||
// }
|
||||
// }
|
||||
// m = m << 2;
|
||||
// wind_step = wind_step << 2;
|
||||
// log4N--;
|
||||
// }
|
||||
// return ESP_OK;
|
||||
// }
|
||||
|
||||
// esp_err_t dl_fft4r_fc32_aes3_(data, N, dl_fft4r_w_table_fc32, dl_fft4r_w_table_size)
|
||||
|
||||
.align 4
|
||||
dl_fft4r_fc32_aes3_:
|
||||
|
||||
entry a1, 16 # no auto vars on stack
|
||||
|
||||
nsau a6, a3 # inline dl_power_of_two(N)
|
||||
movi.n a7, 31
|
||||
xor a6, a6, a7
|
||||
|
||||
srli a7, a6, 1 # log4N = dl_power_of_two(N) >> 1;
|
||||
|
||||
addi.n a6, a6, -1
|
||||
ssr a6
|
||||
srl a6, a5 # w_step = table_size >> (dl_power_of_two(N) - 1)
|
||||
|
||||
movi.n a5, 2 # m = 2
|
||||
|
||||
.stage:
|
||||
srli a3, a3, 2 # N >>= 2
|
||||
|
||||
movi.n a8, 0 # j = 0
|
||||
|
||||
.group:
|
||||
mov.n a9, a4 # w0 = w
|
||||
mov.n a10, a4 # w1 = w
|
||||
mov.n a11, a4 # w2 = w
|
||||
|
||||
mul16u a12, a8, a3
|
||||
slli a12, a12, 1 # start_index = (j * N) << 1
|
||||
|
||||
addx8 a12, a12, a2 # p0 = data + (start_index << 1)
|
||||
addx8 a13, a3, a12 # p1 = p0 + (N << 1)
|
||||
addx8 a14, a3, a13 # p2 = p1 + (N << 1)
|
||||
addx8 a15, a3, a14 # p3 = p2 + (N << 1)
|
||||
|
||||
loopnez a3, .bf4_loop_end # for (uint k = 0; k < N; k++)
|
||||
ee.ldf.64.ip f1, f0, a12, 0 # f0 = in0.re = *p0, f1 = in0.im = *(p0 + 1)
|
||||
ee.ldf.64.ip f3, f2, a14, 0 # f2 = in2.re = *p2, f3 = in2.im = *(p2 + 1)
|
||||
add.s f5, f1, f3 # f5 = in0.im + in2.im
|
||||
sub.s f7, f1, f3 # f7 = in0.im - in2.im
|
||||
add.s f4, f0, f2 # f4 = in0.re + in2.re
|
||||
sub.s f6, f0, f2 # f6 = in0.re - in2.re
|
||||
ee.ldf.64.ip f1, f0, a13, 0 # f0 = in1.re = *p1, f1 = in1.im = *(p1 + 1)
|
||||
ee.ldf.64.ip f3, f2, a15, 0 # f2 = in3.re = *p3, f3 = in3.im = *(p3 + 1)
|
||||
add.s f9, f1, f3 # f9 = in1.im + in3.im
|
||||
sub.s f11, f1, f3 # f11 = in1.im - in3.im
|
||||
lsi f12, a9, 0 # f12 = w0->re
|
||||
lsi f13, a10, 0 # f13 = w1->re
|
||||
lsi f14, a11, 0 # f14 = w2->re
|
||||
add.s f8, f0, f2 # f8 = in1.re + in3.re
|
||||
sub.s f10, f0, f2 # f10 = in1.re - in3.re
|
||||
sub.s f1, f5, f9 # f1 = bf2.im = in0.im + in2.im - in1.im - in3.im
|
||||
add.s f5, f5, f9 # f5 = bf0.im = in0.im + in2.im + in1.im + in3.im
|
||||
add.s f2, f6, f11 # f2 = bf1.re = in0.re - in2.re + in1.im - in3.im
|
||||
sub.s f6, f6, f11 # f6 = bf3.re = in0.re - in2.re - in1.im + in3.im
|
||||
sub.s f0, f4, f8 # f0 = bf2.re = in0.re + in2.re - in1.re - in3.re
|
||||
add.s f4, f4, f8 # f4 = bf0.re = in0.re + in2.re + in1.re + in3.re
|
||||
sub.s f3, f7, f10 # f3 = bf1.im = in0.im - in2.im - in1.re + in3.re
|
||||
add.s f7, f7, f10 # f7 = bf3.im = in0.im - in2.im + in1.re - in3.re
|
||||
mul.s f10, f6, f14 # f10 = bf3.re * w2->re
|
||||
ee.stf.64.ip f5, f4, a12, 8 # *p0 = f4 = bf0.re, *(p0 + 1) = f5 = bf0.im, p0 += 2
|
||||
mul.s f4, f2, f12 # f4 = bf1.re * w0->re
|
||||
mul.s f11, f7, f14 # f11 = bf3.im * w2->re
|
||||
mul.s f5, f3, f12 # f5 = bf1.im * w0->re
|
||||
mul.s f8, f0, f13 # f8 = bf2.re * w1->re
|
||||
mul.s f9, f1, f13 # f9 = bf2.im * w1->re
|
||||
lsi f12, a9, 4 # f12 = w0->im
|
||||
lsi f13, a10, 4 # f13 = w1->im
|
||||
lsi f14, a11, 4 # f14 = w2->im
|
||||
msub.s f5, f2, f12 # f5 = bf1.im * w0->re - bf1.re * w0->im
|
||||
madd.s f4, f3, f12 # f4 = bf1.re * w0->re + bf1.im * w0->im
|
||||
msub.s f9, f0, f13 # f9 = bf2.im * w1->re - bf2.re * w1->im
|
||||
madd.s f8, f1, f13 # f8 = bf2.re * w1->re + bf2.im * w1->im
|
||||
msub.s f11, f6, f14 # f11 = bf3.im * w2->re - bf3.re * w2->im
|
||||
madd.s f10, f7, f14 # f10 = bf3.re * w2->re + bf3.im * w2->im
|
||||
addx4 a9, a6, a9 # w0 += w_step
|
||||
addx8 a10, a6, a10 # w1 += 2 * w_step
|
||||
addx4 a11, a6, a11
|
||||
addx8 a11, a6, a11 # w2 += 3 * w_step
|
||||
ee.stf.64.ip f5, f4, a13, 8 # *p1 = f4, *(p1 + 1) = f5, p1 += 2
|
||||
ee.stf.64.ip f9, f8, a14, 8 # *p2 = f8, *(p2 + 1) = f9, p2 += 2
|
||||
ee.stf.64.ip f11, f10, a15, 8 # *p3 = f10, *(p3 + 1) = f11, p3 += 2
|
||||
.bf4_loop_end:
|
||||
|
||||
addi.n a8, a8, 2 # j += 2
|
||||
bgeu a8, a5, .stage_next # if j >= m
|
||||
j .group
|
||||
|
||||
.stage_next:
|
||||
slli a5, a5, 2 # m <<= 2
|
||||
slli a6, a6, 2 # w_step <<= 2
|
||||
addi.n a7, a7, -1 # log4N--
|
||||
bnez a7, .stage # if log4N > 0
|
||||
|
||||
retw
|
||||
|
||||
|
||||
|
||||
.section .text
|
||||
.global dl_ifft4r_fc32_aes3_
|
||||
.type dl_ifft4r_fc32_aes3_,@function
|
||||
// esp_err_t dl_ifft4r_fc32_aes3_(data, N, dl_fft4r_w_table_fc32, dl_fft4r_w_table_size)
|
||||
|
||||
.align 4
|
||||
dl_ifft4r_fc32_aes3_:
|
||||
|
||||
entry a1, 16 # no auto vars on stack
|
||||
|
||||
nsau a6, a3 # inline dl_power_of_two(N)
|
||||
movi.n a7, 31
|
||||
xor a6, a6, a7
|
||||
|
||||
srli a7, a6, 1 # log4N = dl_power_of_two(N) >> 1;
|
||||
|
||||
addi.n a6, a6, -1
|
||||
ssr a6
|
||||
srl a6, a5 # w_step = table_size >> (dl_power_of_two(N) - 1)
|
||||
|
||||
movi.n a5, 2 # m = 2
|
||||
|
||||
.ifft_stage:
|
||||
srli a3, a3, 2 # N >>= 2
|
||||
|
||||
movi.n a8, 0 # j = 0
|
||||
|
||||
.ifft_group:
|
||||
mov.n a9, a4 # w0 = w
|
||||
mov.n a10, a4 # w1 = w
|
||||
mov.n a11, a4 # w2 = w
|
||||
|
||||
mul16u a12, a8, a3
|
||||
slli a12, a12, 1 # start_index = (j * N) << 1
|
||||
|
||||
addx8 a12, a12, a2 # p0 = data + (start_index << 1)
|
||||
addx8 a13, a3, a12 # p1 = p0 + (N << 1)
|
||||
addx8 a14, a3, a13 # p2 = p1 + (N << 1)
|
||||
addx8 a15, a3, a14 # p3 = p2 + (N << 1)
|
||||
|
||||
loopnez a3, .inv_bf4_loop_end # for (uint k = 0; k < N; k++)
|
||||
ee.ldf.64.ip f1, f0, a12, 0 # f0 = in0.re = *p0, f1 = in0.im = *(p0 + 1)
|
||||
ee.ldf.64.ip f3, f2, a14, 0 # f2 = in2.re = *p2, f3 = in2.im = *(p2 + 1)
|
||||
add.s f5, f1, f3 # f5 = in0.im + in2.im
|
||||
sub.s f7, f1, f3 # f7 = in0.im - in2.im
|
||||
add.s f4, f0, f2 # f4 = in0.re + in2.re
|
||||
sub.s f6, f0, f2 # f6 = in0.re - in2.re
|
||||
ee.ldf.64.ip f1, f0, a13, 0 # f0 = in1.re = *p1, f1 = in1.im = *(p1 + 1)
|
||||
ee.ldf.64.ip f3, f2, a15, 0 # f2 = in3.re = *p3, f3 = in3.im = *(p3 + 1)
|
||||
add.s f9, f1, f3 # f9 = in1.im + in3.im
|
||||
sub.s f11, f1, f3 # f11 = in1.im - in3.im
|
||||
lsi f12, a9, 0 # f12 = w0->re
|
||||
lsi f13, a10, 0 # f13 = w1->re
|
||||
lsi f14, a11, 0 # f14 = w2->re
|
||||
add.s f8, f0, f2 # f8 = in1.re + in3.re
|
||||
sub.s f10, f0, f2 # f10 = in1.re - in3.re
|
||||
sub.s f1, f5, f9 # f1 = bf2.im = in0.im + in2.im - in1.im - in3.im
|
||||
add.s f5, f5, f9 # f5 = bf0.im = in0.im + in2.im + in1.im + in3.im
|
||||
sub.s f2, f6, f11 # f2 = bf1.re = in0.re - in2.re + in1.im - in3.im //ifft change
|
||||
add.s f6, f6, f11 # f6 = bf3.re = in0.re - in2.re - in1.im + in3.im //ifft change
|
||||
sub.s f0, f4, f8 # f0 = bf2.re = in0.re + in2.re - in1.re - in3.re
|
||||
add.s f4, f4, f8 # f4 = bf0.re = in0.re + in2.re + in1.re + in3.re
|
||||
add.s f3, f7, f10 # f3 = bf1.im = in0.im - in2.im - in1.re + in3.re //ifft change
|
||||
sub.s f7, f7, f10 # f7 = bf3.im = in0.im - in2.im + in1.re - in3.re //ifft change
|
||||
mul.s f10, f6, f14 # f10 = bf3.re * w2->re
|
||||
ee.stf.64.ip f5, f4, a12, 8 # *p0 = f4 = bf0.re, *(p0 + 1) = f5 = bf0.im, p0 += 2
|
||||
mul.s f4, f2, f12 # f4 = bf1.re * w0->re
|
||||
mul.s f11, f7, f14 # f11 = bf3.im * w2->re
|
||||
mul.s f5, f3, f12 # f5 = bf1.im * w0->re
|
||||
mul.s f8, f0, f13 # f8 = bf2.re * w1->re
|
||||
mul.s f9, f1, f13 # f9 = bf2.im * w1->re
|
||||
lsi f12, a9, 4 # f12 = w0->im
|
||||
lsi f13, a10, 4 # f13 = w1->im
|
||||
lsi f14, a11, 4 # f14 = w2->im
|
||||
madd.s f5, f2, f12 # f5 = bf1.im * w0->re - bf1.re * w0->im //ifft change
|
||||
msub.s f4, f3, f12 # f4 = bf1.re * w0->re + bf1.im * w0->im //ifft change
|
||||
madd.s f9, f0, f13 # f9 = bf2.im * w1->re - bf2.re * w1->im //ifft change
|
||||
msub.s f8, f1, f13 # f8 = bf2.re * w1->re + bf2.im * w1->im //ifft change
|
||||
madd.s f11, f6, f14 # f11 = bf3.im * w2->re - bf3.re * w2->im //ifft change
|
||||
msub.s f10, f7, f14 # f10 = bf3.re * w2->re + bf3.im * w2->im //ifft change
|
||||
addx4 a9, a6, a9 # w0 += w_step
|
||||
addx8 a10, a6, a10 # w1 += 2 * w_step
|
||||
addx4 a11, a6, a11
|
||||
addx8 a11, a6, a11 # w2 += 3 * w_step
|
||||
ee.stf.64.ip f5, f4, a13, 8 # *p1 = f4, *(p1 + 1) = f5, p1 += 2
|
||||
ee.stf.64.ip f9, f8, a14, 8 # *p2 = f8, *(p2 + 1) = f9, p2 += 2
|
||||
ee.stf.64.ip f11, f10, a15, 8 # *p3 = f10, *(p3 + 1) = f11, p3 += 2
|
||||
.inv_bf4_loop_end:
|
||||
|
||||
addi.n a8, a8, 2 # j += 2
|
||||
bgeu a8, a5, .ifft_stage_next # if j >= m
|
||||
j .ifft_group
|
||||
|
||||
.ifft_stage_next:
|
||||
slli a5, a5, 2 # m <<= 2
|
||||
slli a6, a6, 2 # w_step <<= 2
|
||||
addi.n a7, a7, -1 # log4N--
|
||||
bnez a7, .ifft_stage # if log4N > 0
|
||||
|
||||
retw
|
||||
65
managed_components/espressif__dl_fft/benchmark_esp32c5.md
Normal file
65
managed_components/espressif__dl_fft/benchmark_esp32c5.md
Normal file
@@ -0,0 +1,65 @@
|
||||
## ESP32-C5 fft benchmark:
|
||||
|
||||
| Test Name | Size | SNR (dB) | RMSE | Time (μs) | Test Time (ms) |
|
||||
|-----------------------|----------|--------------|------------|---------------|--------------------|
|
||||
| 1. test dl fft | 128 | 105 | 0.000316 | 1220 | 703 |
|
||||
| | 256 | 108 | 0.000316 | 2801 | |
|
||||
| | 512 | 111 | 0.000316 | 6323 | |
|
||||
| | 1024 | 114 | 0.000316 | 14083 | |
|
||||
| | 2048 | 117 | 0.000316 | 31060 | |
|
||||
| 2. test dl ifft | 128 | 85.7 | 0.000316 | 1278 | 772 |
|
||||
| | 256 | 85.4 | 0.000316 | 2919 | |
|
||||
| | 512 | 85.4 | 0.000316 | 6548 | |
|
||||
| | 1024 | 85.4 | 0.000316 | 14537 | |
|
||||
| | 2048 | 85.2 | 0.000316 | 31963 | |
|
||||
| 3. test dl rfft | 128 | 101 | 0.000316 | 621 | 449 |
|
||||
| | 256 | 105 | 0.000316 | 1534 | |
|
||||
| | 512 | 108 | 0.000316 | 3119 | |
|
||||
| | 1024 | 110 | 0.000316 | 7577 | |
|
||||
| | 2048 | 114 | 0.000316 | 15088 | |
|
||||
| 4. test dl irfft | 128 | 85.5 | 0.000316 | 668 | 491 |
|
||||
| | 256 | 85.9 | 0.000316 | 1630 | |
|
||||
| | 512 | 84.5 | 0.000316 | 3305 | |
|
||||
| | 1024 | 85.3 | 0.000316 | 7948 | |
|
||||
| | 2048 | 85.4 | 0.000316 | 15804 | |
|
||||
| 5. test dl fft s16 | 128 | 65.9 | 0.001719 | 87 | 139 |
|
||||
| | 256 | 62.0 | 0.003524 | 194 | |
|
||||
| | 512 | 59.2 | 0.006614 | 429 | |
|
||||
| | 1024 | 56.8 | 0.013190 | 938 | |
|
||||
| | 2048 | 53.7 | 0.026223 | 2038 | |
|
||||
| 6. test dl ifft s16 | 128 | 59.8 | 0.000527 | 89 | 143 |
|
||||
| | 256 | 52.2 | 0.000902 | 198 | |
|
||||
| | 512 | 51.4 | 0.000917 | 435 | |
|
||||
| | 1024 | 51.1 | 0.000960 | 951 | |
|
||||
| | 2048 | 45.7 | 0.001737 | 2063 | |
|
||||
| 7. test dl fft hp s16 | 128 | 76.1 | 0.000621 | 111 | 149 |
|
||||
| | 256 | 73.6 | 0.000975 | 248 | |
|
||||
| | 512 | 72.6 | 0.001546 | 537 | |
|
||||
| | 1024 | 73.0 | 0.002084 | 1188 | |
|
||||
| | 2048 | 69.9 | 0.004387 | 2545 | |
|
||||
| 8. test dl ifft hp s16 | 128 | 72.6 | 0.000327 | 112 | 158 |
|
||||
| | 256 | 71.5 | 0.000328 | 251 | |
|
||||
| | 512 | 68.9 | 0.000334 | 542 | |
|
||||
| | 1024 | 68.9 | 0.000335 | 1199 | |
|
||||
| | 2048 | 67.8 | 0.000345 | 2566 | |
|
||||
| 9. test dl rfft s16 | 128 | 63.8 | 0.001403 | 45 | 130 |
|
||||
| | 256 | 60.7 | 0.002885 | 99 | |
|
||||
| | 512 | 58.2 | 0.005433 | 218 | |
|
||||
| | 1024 | 54.5 | 0.011284 | 477 | |
|
||||
| | 2048 | 51.9 | 0.022304 | 1034 | |
|
||||
| 10. test dl irfft s16 | 128 | 57.8 | 0.000596 | 45 | 134 |
|
||||
| | 256 | 52.2 | 0.000937 | 100 | |
|
||||
| | 512 | 50.2 | 0.000984 | 220 | |
|
||||
| | 1024 | 45.6 | 0.001740 | 480 | |
|
||||
| | 2048 | 40.2 | 0.003298 | 1041 | |
|
||||
| 11. test dl rfft hp s16 | 128 | 75.7 | 0.000464 | 57 | 135 |
|
||||
| | 256 | 74.2 | 0.000730 | 123 | |
|
||||
| | 512 | 72.7 | 0.001103 | 272 | |
|
||||
| | 1024 | 73.0 | 0.001432 | 585 | |
|
||||
| | 2048 | 70.3 | 0.002952 | 1284 | |
|
||||
| 12. test dl irfft hp s16 | 128 | 72.8 | 0.000324 | 58 | 139 |
|
||||
| | 256 | 71.1 | 0.000330 | 123 | |
|
||||
| | 512 | 67.8 | 0.000338 | 273 | |
|
||||
| | 1024 | 69.5 | 0.000335 | 587 | |
|
||||
| | 2048 | 67.4 | 0.000346 | 1289 | |
|
||||
|
||||
65
managed_components/espressif__dl_fft/benchmark_esp32p4.md
Normal file
65
managed_components/espressif__dl_fft/benchmark_esp32p4.md
Normal file
@@ -0,0 +1,65 @@
|
||||
## ESP32-P4 fft benchmark:
|
||||
|
||||
| Test Name | Size | SNR (dB) | RMSE | Time (μs) |
|
||||
|-----------------------|----------|--------------|------------|---------------|
|
||||
| 1. test dl fft | 128 | 104.868538 | 0.000316 | 38 |
|
||||
| | 256 | 107.637619 | 0.000316 | 85 |
|
||||
| | 512 | 110.548630 | 0.000316 | 188 |
|
||||
| | 1024 | 113.582832 | 0.000316 | 415 |
|
||||
| | 2048 | 116.905914 | 0.000316 | 904 |
|
||||
| 2. test dl ifft | 128 | 85.701355 | 0.000316 | 46 |
|
||||
| | 256 | 85.375244 | 0.000316 | 99 |
|
||||
| | 512 | 85.372276 | 0.000316 | 217 |
|
||||
| | 1024 | 85.351921 | 0.000316 | 471 |
|
||||
| | 2048 | 85.206238 | 0.000316 | 1017 |
|
||||
| 3. test dl rfft | 128 | 101.360046 | 0.000316 | 18 |
|
||||
| | 256 | 105.289742 | 0.000316 | 44 |
|
||||
| | 512 | 107.978775 | 0.000316 | 88 |
|
||||
| | 1024 | 110.488144 | 0.000316 | 212 |
|
||||
| | 2048 | 113.904335 | 0.000316 | 416 |
|
||||
| 4. test dl irfft | 128 | 85.467148 | 0.000316 | 22 |
|
||||
| | 256 | 85.928154 | 0.000316 | 52 |
|
||||
| | 512 | 84.540436 | 0.000316 | 102 |
|
||||
| | 1024 | 85.282562 | 0.000316 | 244 |
|
||||
| | 2048 | 85.383667 | 0.000316 | 465 |
|
||||
| 5. test dl fft s16 | 128 | 65.917183 | 0.001719 | 60 |
|
||||
| | 256 | 61.950771 | 0.003524 | 135 |
|
||||
| | 512 | 59.240242 | 0.006614 | 299 |
|
||||
| | 1024 | 56.814144 | 0.013190 | 654 |
|
||||
| | 2048 | 53.681591 | 0.026223 | 1422 |
|
||||
| 6. test dl ifft s16 | 128 | 59.837440 | 0.000527 | 61 |
|
||||
| | 256 | 52.158340 | 0.000902 | 137 |
|
||||
| | 512 | 51.414349 | 0.000917 | 303 |
|
||||
| | 1024 | 51.119301 | 0.000960 | 663 |
|
||||
| | 2048 | 45.654255 | 0.001737 | 1439 |
|
||||
| 7. test dl fft hp s16 | 128 | 76.132126 | 0.000621 | 79 |
|
||||
| | 256 | 73.598412 | 0.000975 | 177 |
|
||||
| | 512 | 72.596603 | 0.001546 | 384 |
|
||||
| | 1024 | 73.045952 | 0.002084 | 853 |
|
||||
| | 2048 | 69.902023 | 0.004387 | 1826 |
|
||||
| 8. test dl ifft hp s16 | 128 | 72.633217 | 0.000327 | 80 |
|
||||
| | 256 | 71.462891 | 0.000328 | 180 |
|
||||
| | 512 | 68.908401 | 0.000334 | 389 |
|
||||
| | 1024 | 68.920097 | 0.000335 | 862 |
|
||||
| | 2048 | 67.777245 | 0.000345 | 1842 |
|
||||
| 9. test dl rfft s16 | 128 | 63.782593 | 0.001403 | 32 |
|
||||
| | 256 | 60.652668 | 0.002885 | 70 |
|
||||
| | 512 | 58.204708 | 0.005433 | 154 |
|
||||
| | 1024 | 54.490803 | 0.011284 | 337 |
|
||||
| | 2048 | 51.854618 | 0.022304 | 730 |
|
||||
| 10. test dl irfft s16 | 128 | 57.822262 | 0.000596 | 32 |
|
||||
| | 256 | 52.207390 | 0.000937 | 71 |
|
||||
| | 512 | 50.153603 | 0.000984 | 155 |
|
||||
| | 1024 | 45.564911 | 0.001740 | 338 |
|
||||
| | 2048 | 40.217754 | 0.003298 | 733 |
|
||||
| 11. test dl rfft hp s16 | 128 | 75.728333 | 0.000464 | 41 |
|
||||
| | 256 | 74.201035 | 0.000730 | 88 |
|
||||
| | 512 | 72.743904 | 0.001103 | 196 |
|
||||
| | 1024 | 72.959915 | 0.001432 | 422 |
|
||||
| | 2048 | 70.298073 | 0.002952 | 928 |
|
||||
| 12. test dl irfft hp s16 | 128 | 72.830231 | 0.000324 | 41 |
|
||||
| | 256 | 71.144485 | 0.000330 | 89 |
|
||||
| | 512 | 67.758896 | 0.000338 | 198 |
|
||||
| | 1024 | 69.508110 | 0.000335 | 424 |
|
||||
| | 2048 | 67.428802 | 0.000346 | 933 |
|
||||
|
||||
65
managed_components/espressif__dl_fft/benchmark_esp32s3.md
Normal file
65
managed_components/espressif__dl_fft/benchmark_esp32s3.md
Normal file
@@ -0,0 +1,65 @@
|
||||
## ESP32-S3 fft benchmark:
|
||||
|
||||
| Test Name | Size | SNR (dB) | RMSE | Time (μs) |
|
||||
|-----------------------|----------|--------------|------------|---------------|
|
||||
| 1. test dl fft | 128 | 105 | 0.000316 | 61 |
|
||||
| | 256 | 108 | 0.000316 | 136 |
|
||||
| | 512 | 111 | 0.000316 | 299 |
|
||||
| | 1024 | 114 | 0.000316 | 653 |
|
||||
| | 2048 | 117 | 0.000316 | 1413 |
|
||||
| 2. test dl ifft | 128 | 85.7 | 0.000316 | 80 |
|
||||
| | 256 | 85.4 | 0.000316 | 175 |
|
||||
| | 512 | 85.4 | 0.000316 | 375 |
|
||||
| | 1024 | 85.4 | 0.000316 | 807 |
|
||||
| | 2048 | 85.2 | 0.000316 | 1721 |
|
||||
| 3. test dl rfft | 128 | 101 | 0.000316 | 34 |
|
||||
| | 256 | 105 | 0.000316 | 73 |
|
||||
| | 512 | 108 | 0.000316 | 156 |
|
||||
| | 1024 | 110 | 0.000316 | 347 |
|
||||
| | 2048 | 114 | 0.000316 | 714 |
|
||||
| 4. test dl irfft | 128 | 85.5 | 0.000316 | 47 |
|
||||
| | 256 | 85.9 | 0.000316 | 97 |
|
||||
| | 512 | 84.5 | 0.000316 | 197 |
|
||||
| | 1024 | 85.3 | 0.000316 | 432 |
|
||||
| | 2048 | 85.4 | 0.000316 | 868 |
|
||||
| 5. test dl fft s16 | 128 | 65.9 | 0.001719 | 131 |
|
||||
| | 256 | 62.0 | 0.003524 | 289 |
|
||||
| | 512 | 59.2 | 0.006614 | 633 |
|
||||
| | 1024 | 56.8 | 0.013190 | 1374 |
|
||||
| | 2048 | 53.7 | 0.026223 | 2966 |
|
||||
| 6. test dl ifft s16 | 128 | 59.8 | 0.000527 | 133 |
|
||||
| | 256 | 52.2 | 0.000902 | 293 |
|
||||
| | 512 | 51.4 | 0.000917 | 640 |
|
||||
| | 1024 | 51.1 | 0.000960 | 1387 |
|
||||
| | 2048 | 45.7 | 0.001737 | 2992 |
|
||||
| 7. test dl fft hp s16 | 128 | 76.1 | 0.000621 | 189 |
|
||||
| | 256 | 73.6 | 0.000975 | 424 |
|
||||
| | 512 | 72.6 | 0.001546 | 914 |
|
||||
| | 1024 | 73.0 | 0.002084 | 2023 |
|
||||
| | 2048 | 69.9 | 0.004387 | 4322 |
|
||||
| 8. test dl ifft hp s16 | 128 | 72.6 | 0.000327 | 190 |
|
||||
| | 256 | 71.5 | 0.000328 | 427 |
|
||||
| | 512 | 68.9 | 0.000334 | 920 |
|
||||
| | 1024 | 68.9 | 0.000335 | 2036 |
|
||||
| | 2048 | 67.8 | 0.000345 | 4349 |
|
||||
| 9. test dl rfft s16 | 128 | 63.8 | 0.001403 | 70 |
|
||||
| | 256 | 60.7 | 0.002885 | 152 |
|
||||
| | 512 | 58.2 | 0.005433 | 331 |
|
||||
| | 1024 | 54.5 | 0.011284 | 717 |
|
||||
| | 2048 | 51.9 | 0.022304 | 1542 |
|
||||
| 10. test dl irfft s16 | 128 | 57.8 | 0.000596 | 70 |
|
||||
| | 256 | 52.2 | 0.000937 | 153 |
|
||||
| | 512 | 50.2 | 0.000984 | 334 |
|
||||
| | 1024 | 45.6 | 0.001740 | 720 |
|
||||
| | 2048 | 40.2 | 0.003298 | 1547 |
|
||||
| 11. test dl rfft hp s16 | 128 | 75.7 | 0.000464 | 98 |
|
||||
| | 256 | 74.2 | 0.000730 | 210 |
|
||||
| | 512 | 72.7 | 0.001103 | 466 |
|
||||
| | 1024 | 73.0 | 0.001432 | 998 |
|
||||
| | 2048 | 70.3 | 0.002952 | 2190 |
|
||||
| 12. test dl irfft hp s16 | 128 | 72.8 | 0.000324 | 98 |
|
||||
| | 256 | 71.1 | 0.000330 | 210 |
|
||||
| | 512 | 67.8 | 0.000338 | 468 |
|
||||
| | 1024 | 69.5 | 0.000335 | 1001 |
|
||||
| | 2048 | 67.4 | 0.000346 | 2196 |
|
||||
|
||||
126
managed_components/espressif__dl_fft/dl_fft.h
Normal file
126
managed_components/espressif__dl_fft/dl_fft.h
Normal file
@@ -0,0 +1,126 @@
|
||||
#pragma once
|
||||
#include "dl_fft_base.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @brief Single-precision floating-point FFT instance structure
|
||||
* @param fft_point Number of FFT points
|
||||
* @param log2n Log base 2 of FFT points
|
||||
* @param fft_table FFT real to complex coefficient table
|
||||
* @param rfft_table FFT complex to real coefficient table
|
||||
*/
|
||||
typedef struct {
|
||||
int fft_point;
|
||||
int log2n;
|
||||
float *fft_table;
|
||||
float *rfft_table;
|
||||
uint16_t *bitrev_table;
|
||||
int bitrev_size;
|
||||
} dl_fft_f32_t;
|
||||
|
||||
/**
|
||||
* @brief 16-bit fixed-point FFT instance structure
|
||||
* @param fft_point Number of FFT points
|
||||
* @param log2n Log base 2 of FFT points
|
||||
* @param fft_table FFT real to complex coefficient table
|
||||
* @param rfft_table FFT complex to real coefficient table
|
||||
*/
|
||||
typedef struct {
|
||||
int fft_point;
|
||||
int log2n;
|
||||
int16_t *fft_table;
|
||||
int16_t *rfft_table;
|
||||
} dl_fft_s16_t;
|
||||
|
||||
/**
|
||||
* @brief Initialize a single-precision floating-point FFT instance
|
||||
* @param fft_point Number of FFT points (must be power of two)
|
||||
* @param caps Configuration flags for memory allocation, same with esp-idf heap_caps_malloc
|
||||
* (e.g., MALLOC_CAP_8BIT, MALLOC_CAP_INTERNAL, MALLOC_CAP_SPIRAM)
|
||||
* @return dl_fft_f32_t* Handle to FFT instance
|
||||
*/
|
||||
dl_fft_f32_t *dl_fft_f32_init(int fft_point, uint32_t caps);
|
||||
|
||||
/**
|
||||
* @brief Deinitialize a single-precision floating-point FFT instance
|
||||
* @param handle FFT instance handle created by dl_fft_f32_init()
|
||||
*/
|
||||
void dl_fft_f32_deinit(dl_fft_f32_t *handle);
|
||||
|
||||
/**
|
||||
* @brief Execute single-precision floating-point FFT transform
|
||||
* @param handle FFT instance handle
|
||||
* @param data Input/output buffer, in-place fft calculation
|
||||
* @return esp_err_t ESP_OK on success, error code otherwise
|
||||
*/
|
||||
esp_err_t dl_fft_f32_run(dl_fft_f32_t *handle, float *data);
|
||||
|
||||
/**
|
||||
* @brief Execute single-precision floating-point inverse FFT transform
|
||||
* @param handle FFT instance handle
|
||||
* @param data Input/output buffer, in-place ifft calculation
|
||||
* @return esp_err_t ESP_OK on success, error code otherwise
|
||||
*/
|
||||
esp_err_t dl_ifft_f32_run(dl_fft_f32_t *handle, float *data);
|
||||
|
||||
/**
|
||||
* @brief Initialize a 16-bit fixed-point FFT instance
|
||||
* @param fft_point Number of FFT points (must be power of two)
|
||||
* @param caps Configuration flags for memory allocation, same with esp-idf heap_caps_malloc
|
||||
* (e.g., MALLOC_CAP_8BIT, MALLOC_CAP_INTERNAL, MALLOC_CAP_SPIRAM)
|
||||
* @return dl_fft_s16_t* Handle to FFT instance
|
||||
*/
|
||||
dl_fft_s16_t *dl_fft_s16_init(int fft_point, uint32_t caps);
|
||||
|
||||
/**
|
||||
* @brief Deinitialize a 16-bit fixed-point FFT instance
|
||||
* @param handle FFT instance handle created by dl_fft_s16_init()
|
||||
*/
|
||||
void dl_fft_s16_deinit(dl_fft_s16_t *handle);
|
||||
|
||||
/**
|
||||
* @brief Execute 16-bit fixed-point FFT transform
|
||||
* @param handle FFT instance handle
|
||||
* @param data Input/output buffer, in-place fft calculation
|
||||
* @param in_exponent Input data exponent (2^in_exponent scaling factor)
|
||||
* @param out_exponent Output data exponent (2^out_exponent scaling factor)
|
||||
* @return esp_err_t ESP_OK on success, error code otherwise
|
||||
*/
|
||||
esp_err_t dl_fft_s16_run(dl_fft_s16_t *handle, int16_t *data, int in_exponent, int *out_exponent);
|
||||
|
||||
/**
|
||||
* @brief Execute 16-bit fixed-point inverse FFT transform
|
||||
* @param handle FFT instance handle
|
||||
* @param data Input/output buffer, in-place fft calculation
|
||||
* @param in_exponent Input data exponent (2^in_exponent scaling factor)
|
||||
* @param out_exponent Output data exponent (2^out_exponent scaling factor)
|
||||
* @return esp_err_t ESP_OK on success, error code otherwise
|
||||
*/
|
||||
esp_err_t dl_ifft_s16_run(dl_fft_s16_t *handle, int16_t *data, int in_exponent, int *out_exponent);
|
||||
|
||||
/**
|
||||
* @brief Execute 16-bit fixed-point FFT with high-precision scaling
|
||||
* @param handle FFT instance handle
|
||||
* @param data Input/output buffer, in-place fft calculation
|
||||
* @param in_exponent Input data exponent (2^in_exponent scaling factor)
|
||||
* @param out_exponent Output data exponent (2^out_exponent scaling factor)
|
||||
* @return esp_err_t ESP_OK on success, error code otherwise
|
||||
*/
|
||||
esp_err_t dl_fft_s16_hp_run(dl_fft_s16_t *handle, int16_t *data, int in_exponent, int *out_exponent);
|
||||
|
||||
/**
|
||||
* @brief Execute 16-bit fixed-point inverse FFT with high-precision scaling
|
||||
* @param handle FFT instance handle
|
||||
* @param data Input/output buffer, in-place fft calculation
|
||||
* @param in_exponent Input data exponent (2^in_exponent scaling factor)
|
||||
* @param out_exponent Output data exponent (2^out_exponent scaling factor)
|
||||
* @return esp_err_t ESP_OK on success, error code otherwise
|
||||
*/
|
||||
esp_err_t dl_ifft_s16_hp_run(dl_fft_s16_t *handle, int16_t *data, int in_exponent, int *out_exponent);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
292
managed_components/espressif__dl_fft/dl_fft.hpp
Normal file
292
managed_components/espressif__dl_fft/dl_fft.hpp
Normal file
@@ -0,0 +1,292 @@
|
||||
#pragma once
|
||||
|
||||
#include <mutex>
|
||||
#include <vector>
|
||||
|
||||
#include "dl_fft.h"
|
||||
#include "dl_rfft.h"
|
||||
|
||||
namespace dl {
|
||||
class FFT {
|
||||
private:
|
||||
// Private constructor for singleton
|
||||
FFT() = default;
|
||||
~FFT() = default;
|
||||
|
||||
// Delete copy constructor and assignment operator
|
||||
FFT(const FFT &) = delete;
|
||||
FFT &operator=(const FFT &) = delete;
|
||||
|
||||
// Four handle vectors for different FFT types
|
||||
std::vector<dl_fft_f32_t *> fft_f32_handles;
|
||||
std::vector<dl_fft_s16_t *> fft_s16_handles;
|
||||
std::vector<dl_fft_f32_t *> rfft_f32_handles;
|
||||
std::vector<dl_fft_s16_t *> rfft_s16_handles;
|
||||
|
||||
// Mutex for thread safety (only used during handle initialization)
|
||||
std::mutex mutex_;
|
||||
|
||||
uint32_t m_caps = MALLOC_CAP_8BIT; // Default memory allocation capabilities
|
||||
|
||||
// Helper function to find or create handle
|
||||
template <typename HandleType, typename InitFunc>
|
||||
HandleType *get_or_create_handle(int fft_length, std::vector<HandleType *> &handles, InitFunc init_func)
|
||||
{
|
||||
// First check without lock (lock-free read)
|
||||
for (auto *handle : handles) {
|
||||
if (handle->fft_point == fft_length) {
|
||||
return handle;
|
||||
}
|
||||
}
|
||||
|
||||
// Lock only for handle creation
|
||||
std::lock_guard<std::mutex> lock(mutex_);
|
||||
|
||||
// Double-check after acquiring lock (avoid race condition)
|
||||
for (auto *handle : handles) {
|
||||
if (handle->fft_point == fft_length) {
|
||||
return handle;
|
||||
}
|
||||
}
|
||||
|
||||
// Create new handle
|
||||
HandleType *new_handle = init_func(fft_length, m_caps); // 0 for default memory allocation
|
||||
if (new_handle) {
|
||||
handles.push_back(new_handle);
|
||||
}
|
||||
return new_handle;
|
||||
}
|
||||
|
||||
public:
|
||||
// Get singleton instance
|
||||
static FFT *get_instance()
|
||||
{
|
||||
static FFT instance;
|
||||
return &instance;
|
||||
}
|
||||
|
||||
uint32_t get_caps() { return m_caps; }
|
||||
|
||||
void set_caps(uint32_t caps) { m_caps = caps; }
|
||||
|
||||
// FFT for float32
|
||||
esp_err_t fft(float *data, int fft_length)
|
||||
{
|
||||
dl_fft_f32_t *handle = get_or_create_handle(
|
||||
fft_length, fft_f32_handles, [](int len, uint32_t caps) { return dl_fft_f32_init(len, caps); });
|
||||
|
||||
if (!handle) {
|
||||
return ESP_FAIL;
|
||||
}
|
||||
|
||||
return dl_fft_f32_run(handle, data);
|
||||
}
|
||||
|
||||
// IFFT for float32
|
||||
esp_err_t ifft(float *data, int fft_length)
|
||||
{
|
||||
dl_fft_f32_t *handle = get_or_create_handle(
|
||||
fft_length, fft_f32_handles, [](int len, uint32_t caps) { return dl_fft_f32_init(len, caps); });
|
||||
|
||||
if (!handle) {
|
||||
return ESP_FAIL;
|
||||
}
|
||||
|
||||
return dl_ifft_f32_run(handle, data);
|
||||
}
|
||||
|
||||
// RFFT for float32
|
||||
esp_err_t rfft(float *data, int fft_length)
|
||||
{
|
||||
dl_fft_f32_t *handle = get_or_create_handle(
|
||||
fft_length, rfft_f32_handles, [](int len, uint32_t caps) { return dl_rfft_f32_init(len, caps); });
|
||||
|
||||
if (!handle) {
|
||||
return ESP_FAIL;
|
||||
}
|
||||
|
||||
return dl_rfft_f32_run(handle, data);
|
||||
}
|
||||
|
||||
// IRFFT for float32
|
||||
esp_err_t irfft(float *data, int fft_length)
|
||||
{
|
||||
dl_fft_f32_t *handle = get_or_create_handle(
|
||||
fft_length, rfft_f32_handles, [](int len, uint32_t caps) { return dl_rfft_f32_init(len, caps); });
|
||||
|
||||
if (!handle) {
|
||||
return ESP_FAIL;
|
||||
}
|
||||
|
||||
return dl_irfft_f32_run(handle, data);
|
||||
}
|
||||
|
||||
// FFT for int16
|
||||
esp_err_t fft(int16_t *data, int fft_length, int in_exponent = 0, int *out_exponent = nullptr)
|
||||
{
|
||||
dl_fft_s16_t *handle = get_or_create_handle(
|
||||
fft_length, fft_s16_handles, [](int len, uint32_t caps) { return dl_fft_s16_init(len, caps); });
|
||||
|
||||
if (!handle) {
|
||||
return ESP_FAIL;
|
||||
}
|
||||
|
||||
int temp_out_exp = 0;
|
||||
esp_err_t result = dl_fft_s16_run(handle, data, in_exponent, out_exponent ? out_exponent : &temp_out_exp);
|
||||
return result;
|
||||
}
|
||||
|
||||
// IFFT for int16
|
||||
esp_err_t ifft(int16_t *data, int fft_length, int in_exponent = 0, int *out_exponent = nullptr)
|
||||
{
|
||||
dl_fft_s16_t *handle = get_or_create_handle(
|
||||
fft_length, fft_s16_handles, [](int len, uint32_t caps) { return dl_fft_s16_init(len, caps); });
|
||||
|
||||
if (!handle) {
|
||||
return ESP_FAIL;
|
||||
}
|
||||
|
||||
int temp_out_exp = 0;
|
||||
esp_err_t result = dl_ifft_s16_run(handle, data, in_exponent, out_exponent ? out_exponent : &temp_out_exp);
|
||||
return result;
|
||||
}
|
||||
|
||||
// RFFT for int16
|
||||
esp_err_t rfft(int16_t *data, int fft_length, int in_exponent = 0, int *out_exponent = nullptr)
|
||||
{
|
||||
dl_fft_s16_t *handle = get_or_create_handle(
|
||||
fft_length, rfft_s16_handles, [](int len, uint32_t caps) { return dl_rfft_s16_init(len, caps); });
|
||||
|
||||
if (!handle) {
|
||||
return ESP_FAIL;
|
||||
}
|
||||
|
||||
int temp_out_exp = 0;
|
||||
esp_err_t result = dl_rfft_s16_run(handle, data, in_exponent, out_exponent ? out_exponent : &temp_out_exp);
|
||||
return result;
|
||||
}
|
||||
|
||||
// IRFFT for int16
|
||||
esp_err_t irfft(int16_t *data, int fft_length, int in_exponent = 0, int *out_exponent = nullptr)
|
||||
{
|
||||
dl_fft_s16_t *handle = get_or_create_handle(
|
||||
fft_length, rfft_s16_handles, [](int len, uint32_t caps) { return dl_rfft_s16_init(len, caps); });
|
||||
|
||||
if (!handle) {
|
||||
return ESP_FAIL;
|
||||
}
|
||||
|
||||
int temp_out_exp = 0;
|
||||
esp_err_t result = dl_irfft_s16_run(handle, data, in_exponent, out_exponent ? out_exponent : &temp_out_exp);
|
||||
return result;
|
||||
}
|
||||
|
||||
// FFT with high precision for int16
|
||||
esp_err_t fft_hp(int16_t *data, int fft_length, int in_exponent = 0, int *out_exponent = nullptr)
|
||||
{
|
||||
dl_fft_s16_t *handle = get_or_create_handle(
|
||||
fft_length, fft_s16_handles, [](int len, uint32_t caps) { return dl_fft_s16_init(len, caps); });
|
||||
|
||||
if (!handle) {
|
||||
return ESP_FAIL;
|
||||
}
|
||||
|
||||
int temp_out_exp = 0;
|
||||
esp_err_t result = dl_fft_s16_hp_run(handle, data, in_exponent, out_exponent ? out_exponent : &temp_out_exp);
|
||||
return result;
|
||||
}
|
||||
|
||||
// IFFT with high precision for int16
|
||||
esp_err_t ifft_hp(int16_t *data, int fft_length, int in_exponent = 0, int *out_exponent = nullptr)
|
||||
{
|
||||
dl_fft_s16_t *handle = get_or_create_handle(
|
||||
fft_length, fft_s16_handles, [](int len, uint32_t caps) { return dl_fft_s16_init(len, caps); });
|
||||
|
||||
if (!handle) {
|
||||
return ESP_FAIL;
|
||||
}
|
||||
|
||||
int temp_out_exp = 0;
|
||||
esp_err_t result = dl_ifft_s16_hp_run(handle, data, in_exponent, out_exponent ? out_exponent : &temp_out_exp);
|
||||
return result;
|
||||
}
|
||||
|
||||
// RFFT with high precision for int16
|
||||
esp_err_t rfft_hp(int16_t *data, int fft_length, int in_exponent = 0, int *out_exponent = nullptr)
|
||||
{
|
||||
dl_fft_s16_t *handle = get_or_create_handle(
|
||||
fft_length, rfft_s16_handles, [](int len, uint32_t caps) { return dl_rfft_s16_init(len, caps); });
|
||||
|
||||
if (!handle) {
|
||||
return ESP_FAIL;
|
||||
}
|
||||
|
||||
int temp_out_exp = 0;
|
||||
esp_err_t result = dl_rfft_s16_hp_run(handle, data, in_exponent, out_exponent ? out_exponent : &temp_out_exp);
|
||||
return result;
|
||||
}
|
||||
|
||||
// IRFFT with high precision for int16
|
||||
esp_err_t irfft_hp(int16_t *data, int fft_length, int in_exponent = 0, int *out_exponent = nullptr)
|
||||
{
|
||||
dl_fft_s16_t *handle = get_or_create_handle(
|
||||
fft_length, rfft_s16_handles, [](int len, uint32_t caps) { return dl_rfft_s16_init(len, caps); });
|
||||
|
||||
if (!handle) {
|
||||
return ESP_FAIL;
|
||||
}
|
||||
|
||||
int temp_out_exp = 0;
|
||||
esp_err_t result = dl_irfft_s16_hp_run(handle, data, in_exponent, out_exponent ? out_exponent : &temp_out_exp);
|
||||
return result;
|
||||
}
|
||||
|
||||
// WARNING: This function is NOT thread-safe with respect to concurrent FFT operations.
|
||||
// It should only be called when no other FFT methods are running, as it will deinitialize all handles
|
||||
// and may cause undefined behavior if other threads are using FFT functions.
|
||||
// Ensure all FFT operations have completed before calling clear().
|
||||
void clear()
|
||||
{
|
||||
ESP_LOGW("FFT",
|
||||
"This function is NOT thread-safe. Ensure all FFT operations have completed before calling clear()");
|
||||
|
||||
std::lock_guard<std::mutex> lock(mutex_);
|
||||
|
||||
// Clear FFT float32 handles
|
||||
for (auto *handle : fft_f32_handles) {
|
||||
dl_fft_f32_deinit(handle);
|
||||
}
|
||||
fft_f32_handles.clear();
|
||||
std::vector<dl_fft_f32_t *>().swap(fft_f32_handles);
|
||||
|
||||
// Clear FFT int16 handles
|
||||
for (auto *handle : fft_s16_handles) {
|
||||
dl_fft_s16_deinit(handle);
|
||||
}
|
||||
fft_s16_handles.clear();
|
||||
std::vector<dl_fft_s16_t *>().swap(fft_s16_handles);
|
||||
|
||||
// Clear RFFT float32 handles
|
||||
for (auto *handle : rfft_f32_handles) {
|
||||
dl_rfft_f32_deinit(handle);
|
||||
}
|
||||
rfft_f32_handles.clear();
|
||||
std::vector<dl_fft_f32_t *>().swap(rfft_f32_handles);
|
||||
|
||||
// Clear RFFT int16 handles
|
||||
for (auto *handle : rfft_s16_handles) {
|
||||
dl_rfft_s16_deinit(handle);
|
||||
}
|
||||
rfft_s16_handles.clear();
|
||||
std::vector<dl_fft_s16_t *>().swap(rfft_s16_handles);
|
||||
}
|
||||
|
||||
// Get handle count for debugging
|
||||
size_t get_handle_count()
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(mutex_);
|
||||
return fft_f32_handles.size() + fft_s16_handles.size() + rfft_f32_handles.size() + rfft_s16_handles.size();
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace dl
|
||||
88
managed_components/espressif__dl_fft/dl_fft_f32.c
Normal file
88
managed_components/espressif__dl_fft/dl_fft_f32.c
Normal file
@@ -0,0 +1,88 @@
|
||||
#include "dl_fft.h"
|
||||
#include "esp_log.h"
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
|
||||
static const char *TAG = "dl fft";
|
||||
|
||||
// Create a new FFT handle
|
||||
dl_fft_f32_t *dl_fft_f32_init(int fft_point, uint32_t caps)
|
||||
{
|
||||
if (!dl_is_power_of_two(fft_point)) {
|
||||
ESP_LOGE(TAG, "FFT point must be power of two");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
dl_fft_f32_t *handle = (dl_fft_f32_t *)heap_caps_malloc(sizeof(dl_fft_f32_t), caps);
|
||||
if (!handle) {
|
||||
ESP_LOGE(TAG, "Failed to allocate FFT handle");
|
||||
return NULL;
|
||||
}
|
||||
handle->fft_table = NULL;
|
||||
handle->rfft_table = NULL;
|
||||
handle->bitrev_table = NULL;
|
||||
handle->fft_point = fft_point;
|
||||
handle->log2n = dl_power_of_two(fft_point);
|
||||
|
||||
// Allocate and generate FFT table
|
||||
handle->fft_table = dl_gen_fftr2_table_f32(fft_point, caps);
|
||||
if (!handle->fft_table) {
|
||||
ESP_LOGE(TAG, "Failed to generate FFT table");
|
||||
dl_fft_f32_deinit(handle);
|
||||
return NULL;
|
||||
}
|
||||
handle->bitrev_table = dl_gen_bitrev2r_table(fft_point, caps, &handle->bitrev_size);
|
||||
|
||||
return handle;
|
||||
}
|
||||
|
||||
// Free FFT handle
|
||||
void dl_fft_f32_deinit(dl_fft_f32_t *handle)
|
||||
{
|
||||
if (handle) {
|
||||
if (handle->fft_table) {
|
||||
free(handle->fft_table);
|
||||
}
|
||||
if (handle->rfft_table) {
|
||||
free(handle->rfft_table);
|
||||
}
|
||||
if (handle->bitrev_table) {
|
||||
free(handle->bitrev_table);
|
||||
}
|
||||
free(handle);
|
||||
}
|
||||
}
|
||||
|
||||
// Perform FFT
|
||||
esp_err_t dl_fft_f32_run(dl_fft_f32_t *handle, float *data)
|
||||
{
|
||||
if (!handle || !data) {
|
||||
return ESP_FAIL;
|
||||
}
|
||||
|
||||
int fft_point = handle->fft_point;
|
||||
dl_fft2r_fc32(data, fft_point, handle->fft_table);
|
||||
dl_bitrev2r_fc32_ansi(data, fft_point, handle->bitrev_table, handle->bitrev_size);
|
||||
|
||||
return ESP_OK;
|
||||
}
|
||||
|
||||
esp_err_t dl_ifft_f32_run(dl_fft_f32_t *handle, float *data)
|
||||
{
|
||||
if (!handle || !data) {
|
||||
return ESP_FAIL;
|
||||
}
|
||||
|
||||
int fft_point = handle->fft_point;
|
||||
float scale = 1.0f / fft_point;
|
||||
|
||||
dl_ifft2r_fc32(data, fft_point, handle->fft_table);
|
||||
dl_bitrev2r_fc32_ansi(data, fft_point, handle->bitrev_table, handle->bitrev_size);
|
||||
|
||||
// Scale by 1/N
|
||||
for (int i = 0; i < fft_point * 2; i++) {
|
||||
data[i] *= scale;
|
||||
}
|
||||
|
||||
return ESP_OK;
|
||||
}
|
||||
106
managed_components/espressif__dl_fft/dl_fft_s16.c
Normal file
106
managed_components/espressif__dl_fft/dl_fft_s16.c
Normal file
@@ -0,0 +1,106 @@
|
||||
#include "dl_fft.h"
|
||||
#include "esp_log.h"
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
|
||||
static const char *TAG = "dl fft";
|
||||
|
||||
// Create a new FFT handle
|
||||
dl_fft_s16_t *dl_fft_s16_init(int fft_point, uint32_t caps)
|
||||
{
|
||||
if (!dl_is_power_of_two(fft_point)) {
|
||||
ESP_LOGE(TAG, "FFT point must be power of two");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
dl_fft_s16_t *handle = (dl_fft_s16_t *)heap_caps_malloc(sizeof(dl_fft_s16_t), caps);
|
||||
if (!handle) {
|
||||
ESP_LOGE(TAG, "Failed to allocate FFT handle");
|
||||
return NULL;
|
||||
}
|
||||
handle->fft_table = NULL;
|
||||
handle->rfft_table = NULL;
|
||||
handle->fft_point = fft_point;
|
||||
handle->log2n = dl_power_of_two(fft_point);
|
||||
|
||||
// Allocate and generate FFT table
|
||||
handle->fft_table = dl_gen_fft_table_sc16(fft_point, caps);
|
||||
if (!handle->fft_table) {
|
||||
ESP_LOGE(TAG, "Failed to generate FFT table");
|
||||
dl_fft_s16_deinit(handle);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return handle;
|
||||
}
|
||||
|
||||
// Free FFT handle
|
||||
void dl_fft_s16_deinit(dl_fft_s16_t *handle)
|
||||
{
|
||||
if (handle) {
|
||||
if (handle->fft_table) {
|
||||
free(handle->fft_table);
|
||||
}
|
||||
free(handle);
|
||||
}
|
||||
}
|
||||
|
||||
// Perform FFT
|
||||
esp_err_t dl_fft_s16_run(dl_fft_s16_t *handle, int16_t *data, int in_exponent, int *out_exponent)
|
||||
{
|
||||
if (!handle || !data) {
|
||||
return ESP_FAIL;
|
||||
}
|
||||
|
||||
int fft_point = handle->fft_point;
|
||||
dl_fft2r_sc16(data, fft_point, handle->fft_table);
|
||||
dl_bitrev2r_sc16_ansi(data, fft_point);
|
||||
out_exponent[0] = in_exponent + handle->log2n;
|
||||
|
||||
return ESP_OK;
|
||||
}
|
||||
|
||||
esp_err_t dl_ifft_s16_run(dl_fft_s16_t *handle, int16_t *data, int in_exponent, int *out_exponent)
|
||||
{
|
||||
if (!handle || !data) {
|
||||
return ESP_FAIL;
|
||||
}
|
||||
|
||||
int fft_point = handle->fft_point;
|
||||
dl_ifft2r_sc16(data, fft_point, handle->fft_table);
|
||||
dl_bitrev2r_sc16_ansi(data, fft_point);
|
||||
|
||||
out_exponent[0] = in_exponent;
|
||||
|
||||
return ESP_OK;
|
||||
}
|
||||
|
||||
esp_err_t dl_fft_s16_hp_run(dl_fft_s16_t *handle, int16_t *data, int in_exponent, int *out_exponent)
|
||||
{
|
||||
if (!handle || !data) {
|
||||
return ESP_FAIL;
|
||||
}
|
||||
|
||||
int fft_point = handle->fft_point;
|
||||
out_exponent[0] = 0;
|
||||
dl_fft2r_sc16_hp(data, fft_point, handle->fft_table, out_exponent);
|
||||
dl_bitrev2r_sc16_ansi(data, fft_point);
|
||||
out_exponent[0] = in_exponent + out_exponent[0];
|
||||
|
||||
return ESP_OK;
|
||||
}
|
||||
|
||||
esp_err_t dl_ifft_s16_hp_run(dl_fft_s16_t *handle, int16_t *data, int in_exponent, int *out_exponent)
|
||||
{
|
||||
if (!handle || !data) {
|
||||
return ESP_FAIL;
|
||||
}
|
||||
|
||||
int fft_point = handle->fft_point;
|
||||
out_exponent[0] = 0;
|
||||
dl_ifft2r_sc16_hp(data, fft_point, handle->fft_table, out_exponent);
|
||||
dl_bitrev2r_sc16_ansi(data, fft_point);
|
||||
out_exponent[0] = in_exponent + out_exponent[0] - handle->log2n;
|
||||
|
||||
return ESP_OK;
|
||||
}
|
||||
146
managed_components/espressif__dl_fft/dl_rfft.h
Normal file
146
managed_components/espressif__dl_fft/dl_rfft.h
Normal file
@@ -0,0 +1,146 @@
|
||||
#pragma once
|
||||
#include "dl_fft.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/**data format for in-place rfft
|
||||
|
||||
input: real data, size = fft_point
|
||||
output: only one side values are returned because the real-to-complex Fourier transform satisfies the conjugate
|
||||
symmetry x[0] = real part of DC component x[1] = real part of fft_point/2 component x[2] = real part of 1st component
|
||||
x[3] = image part of 1st component
|
||||
......
|
||||
x[fft_point-2] = real part of fft_point/2-1 component
|
||||
x[fft_point-1] = image part of fft_point/2-1 component
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Initialize a single-precision floating-point real FFT instance
|
||||
* @param fft_point Number of FFT points (must be power of two)
|
||||
* @param caps Configuration flags for memory allocation, same with esp-idf heap_caps_malloc
|
||||
* (e.g., MALLOC_CAP_8BIT, MALLOC_CAP_INTERNAL, MALLOC_CAP_SPIRAM)
|
||||
* @return dl_fft_f32_t* Handle to FFT instance
|
||||
*/
|
||||
dl_fft_f32_t *dl_rfft_f32_init(int fft_point, uint32_t caps);
|
||||
|
||||
/**
|
||||
* @brief Deinitialize a single-precision floating-point real FFT instance
|
||||
* @param handle FFT instance handle created by dl_rfft_f32_init()
|
||||
*/
|
||||
void dl_rfft_f32_deinit(dl_fft_f32_t *handle);
|
||||
|
||||
/**
|
||||
* @brief Execute single-precision floating-point real FFT transform
|
||||
* @param handle FFT instance handle
|
||||
* @param data Input/output buffer, in-place fft calculation
|
||||
* @return esp_err_t ESP_OK on success, error code otherwise
|
||||
*/
|
||||
esp_err_t dl_rfft_f32_run(dl_fft_f32_t *handle, float *data);
|
||||
|
||||
/**
|
||||
* @brief Execute single-precision floating-point real inverse FFT transform
|
||||
* @param handle FFT instance handle
|
||||
* @param data Input/output buffer, in-place fft calculation
|
||||
* @return esp_err_t ESP_OK on success, error code otherwise
|
||||
*/
|
||||
esp_err_t dl_irfft_f32_run(dl_fft_f32_t *handle, float *data);
|
||||
|
||||
/**
|
||||
* @brief Execute 16-bit fixed-point FFT transform
|
||||
* @param handle FFT instance handle
|
||||
* @param data Input/output buffer, in-place fft calculation
|
||||
* @param in_exponent Input data exponent (2^in_exponent scaling factor)
|
||||
* @param out_exponent Output data exponent (2^out_exponent scaling factor)
|
||||
* @return esp_err_t ESP_OK on success, error code otherwise
|
||||
*/
|
||||
esp_err_t dl_fft_s16_run(dl_fft_s16_t *handle, int16_t *data, int in_exponent, int *out_exponent);
|
||||
|
||||
/**
|
||||
* @brief Execute inverse 16-bit fixed-point FFT transform
|
||||
* @param handle FFT instance handle
|
||||
* @param data Input/output buffer, in-place fft calculation
|
||||
* @param in_exponent Input data exponent (2^in_exponent scaling factor)
|
||||
* @param out_exponent Output data exponent (2^out_exponent scaling factor)
|
||||
* @return esp_err_t ESP_OK on success, error code otherwise
|
||||
*/
|
||||
esp_err_t dl_ifft_s16_run(dl_fft_s16_t *handle, int16_t *data, int in_exponent, int *out_exponent);
|
||||
|
||||
/**
|
||||
* @brief Execute 16-bit fixed-point FFT with high-precision scaling
|
||||
* @param handle FFT instance handle
|
||||
* @param data Input/output buffer, in-place fft calculation
|
||||
* @param in_exponent Input data exponent (2^in_exponent scaling factor)
|
||||
* @param out_exponent Output data exponent (2^out_exponent scaling factor)
|
||||
* @return esp_err_t ESP_OK on success, error code otherwise
|
||||
*/
|
||||
esp_err_t dl_fft_s16_hp_run(dl_fft_s16_t *handle, int16_t *data, int in_exponent, int *out_exponent);
|
||||
|
||||
/**
|
||||
* @brief Execute inverse 16-bit fixed-point FFT with high-precision scaling
|
||||
* @param handle FFT instance handle
|
||||
* @param data Input/output buffer, in-place fft calculation
|
||||
* @param in_exponent Input data exponent (2^in_exponent scaling factor)
|
||||
* @param out_exponent Output data exponent (2^out_exponent scaling factor)
|
||||
* @return esp_err_t ESP_OK on success, error code otherwise
|
||||
*/
|
||||
esp_err_t dl_ifft_s16_hp_run(dl_fft_s16_t *handle, int16_t *data, int in_exponent, int *out_exponent);
|
||||
|
||||
/**
|
||||
* @brief Initialize a 16-bit fixed-point real FFT instance
|
||||
* @param fft_point Number of FFT points (must be power of two)
|
||||
* @param caps Configuration flags for memory allocation, same with esp-idf heap_caps_malloc
|
||||
* (e.g., MALLOC_CAP_8BIT, MALLOC_CAP_INTERNAL, MALLOC_CAP_SPIRAM)
|
||||
* @return dl_fft_s16_t* Handle to FFT instance
|
||||
*/
|
||||
dl_fft_s16_t *dl_rfft_s16_init(int fft_point, uint32_t caps);
|
||||
|
||||
/**
|
||||
* @brief Deinitialize a 16-bit fixed-point real FFT instance
|
||||
* @param handle FFT instance handle created by dl_rfft_s16_init()
|
||||
*/
|
||||
void dl_rfft_s16_deinit(dl_fft_s16_t *handle);
|
||||
|
||||
/**
|
||||
* @brief Execute 16-bit fixed-point real FFT transform
|
||||
* @param handle FFT instance handle
|
||||
* @param data Input/output buffer, in-place fft calculation
|
||||
* @param in_exponent Input data exponent (2^in_exponent scaling factor)
|
||||
* @param out_exponent Output data exponent (2^out_exponent scaling factor)
|
||||
* @return esp_err_t ESP_OK on success, error code otherwise
|
||||
*/
|
||||
esp_err_t dl_rfft_s16_run(dl_fft_s16_t *handle, int16_t *data, int in_exponent, int *out_exponent);
|
||||
|
||||
/**
|
||||
* @brief Execute 16-bit fixed-point real FFT with high-precision scaling
|
||||
* @param handle FFT instance handle
|
||||
* @param data Input/output buffer, in-place fft calculation
|
||||
* @param in_exponent Input data exponent (2^in_exponent scaling factor)
|
||||
* @param out_exponent Output data exponent (2^out_exponent scaling factor)
|
||||
* @return esp_err_t ESP_OK on success, error code otherwise
|
||||
*/
|
||||
esp_err_t dl_rfft_s16_hp_run(dl_fft_s16_t *handle, int16_t *data, int in_exponent, int *out_exponent);
|
||||
|
||||
/**
|
||||
* @brief Execute 16-bit fixed-point real inverse FFT transform
|
||||
* @param handle FFT instance handle
|
||||
* @param data Input/output buffer, in-place fft calculation
|
||||
* @param in_exponent Input data exponent (2^in_exponent scaling factor)
|
||||
* @param out_exponent Output data exponent (2^out_exponent scaling factor)
|
||||
* @return esp_err_t ESP_OK on success, error code otherwise
|
||||
*/
|
||||
esp_err_t dl_irfft_s16_run(dl_fft_s16_t *handle, int16_t *data, int in_exponent, int *out_exponent);
|
||||
|
||||
/**
|
||||
* @brief Execute 16-bit fixed-point real inverse FFT with high-precision scaling
|
||||
* @param handle FFT instance handle
|
||||
* @param data Input/output buffer, in-place fft calculation
|
||||
* @param in_exponent Input data exponent (2^in_exponent scaling factor)
|
||||
* @param out_exponent Output data exponent (2^out_exponent scaling factor)
|
||||
* @return esp_err_t ESP_OK on success, error code otherwise
|
||||
*/
|
||||
esp_err_t dl_irfft_s16_hp_run(dl_fft_s16_t *handle, int16_t *data, int in_exponent, int *out_exponent);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
120
managed_components/espressif__dl_fft/dl_rfft_f32.c
Normal file
120
managed_components/espressif__dl_fft/dl_rfft_f32.c
Normal file
@@ -0,0 +1,120 @@
|
||||
#include "dl_rfft.h"
|
||||
#include "esp_attr.h"
|
||||
#include "esp_log.h"
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
|
||||
static const char *TAG = "dl rfft";
|
||||
|
||||
dl_fft_f32_t *dl_rfft_f32_init(int fft_point, uint32_t caps)
|
||||
{
|
||||
if (!dl_is_power_of_two(fft_point)) {
|
||||
ESP_LOGE(TAG, "FFT point must be power of two");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
dl_fft_f32_t *handle = (dl_fft_f32_t *)heap_caps_malloc(sizeof(dl_fft_f32_t), caps);
|
||||
if (!handle) {
|
||||
ESP_LOGE(TAG, "Failed to allocate FFT handle");
|
||||
return NULL;
|
||||
}
|
||||
handle->fft_table = NULL;
|
||||
handle->rfft_table = NULL;
|
||||
handle->bitrev_table = NULL;
|
||||
handle->fft_point = fft_point;
|
||||
handle->log2n = dl_power_of_two(fft_point);
|
||||
|
||||
// rfft table
|
||||
handle->rfft_table = dl_gen_rfft_table_f32(fft_point, caps);
|
||||
if (!handle->rfft_table) {
|
||||
ESP_LOGE(TAG, "Failed to generate FFT table");
|
||||
dl_rfft_f32_deinit(handle);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (handle->log2n % 2 == 1) {
|
||||
handle->bitrev_table = dl_gen_bitrev4r_table(fft_point, caps, &handle->bitrev_size);
|
||||
handle->fft_table = dl_gen_fft4r_table_f32(fft_point, caps);
|
||||
if (!handle->fft_table) {
|
||||
ESP_LOGE(TAG, "Failed to generate FFT table");
|
||||
dl_rfft_f32_deinit(handle);
|
||||
return NULL;
|
||||
}
|
||||
} else {
|
||||
handle->bitrev_table = dl_gen_bitrev2r_table(fft_point >> 1, caps, &handle->bitrev_size);
|
||||
handle->fft_table = dl_gen_fftr2_table_f32(fft_point >> 1, caps);
|
||||
if (!handle->fft_table) {
|
||||
ESP_LOGE(TAG, "Failed to generate FFT table");
|
||||
dl_rfft_f32_deinit(handle);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
return handle;
|
||||
}
|
||||
|
||||
void dl_rfft_f32_deinit(dl_fft_f32_t *handle)
|
||||
{
|
||||
if (handle) {
|
||||
if (handle->fft_table) {
|
||||
free(handle->fft_table);
|
||||
}
|
||||
if (handle->rfft_table) {
|
||||
free(handle->rfft_table);
|
||||
}
|
||||
if (handle->bitrev_table) {
|
||||
free(handle->bitrev_table);
|
||||
}
|
||||
free(handle);
|
||||
}
|
||||
}
|
||||
esp_err_t dl_rfft_f32_run(dl_fft_f32_t *handle, float *data)
|
||||
{
|
||||
if (!handle || !data) {
|
||||
return ESP_FAIL;
|
||||
}
|
||||
int fft_point = handle->fft_point;
|
||||
float *fft_table = handle->fft_table;
|
||||
float *rfft_table = handle->rfft_table;
|
||||
|
||||
if (handle->log2n % 2 == 1) {
|
||||
dl_fft4r_fc32(data, fft_point >> 1, fft_table, fft_point);
|
||||
dl_bitrev4r_fc32_ansi(data, fft_point >> 1, handle->bitrev_table, handle->bitrev_size);
|
||||
} else {
|
||||
dl_fft2r_fc32(data, fft_point >> 1, fft_table);
|
||||
dl_bitrev2r_fc32_ansi(data, fft_point >> 1, handle->bitrev_table, handle->bitrev_size);
|
||||
}
|
||||
|
||||
// Convert one complex vector with length N/2 to one real spectrum vector with length N/2
|
||||
dl_rfft_post_proc_fc32_ansi(data, fft_point >> 1, rfft_table);
|
||||
|
||||
return ESP_OK;
|
||||
}
|
||||
|
||||
esp_err_t dl_irfft_f32_run(dl_fft_f32_t *handle, float *data)
|
||||
{
|
||||
if (!handle || !data) {
|
||||
return ESP_FAIL;
|
||||
}
|
||||
int fft_point = handle->fft_point;
|
||||
float *fft_table = handle->fft_table;
|
||||
float *rfft_table = handle->rfft_table;
|
||||
float scale = 2.0 / fft_point;
|
||||
|
||||
dl_rfft_pre_proc_fc32_ansi(data, fft_point >> 1, rfft_table);
|
||||
|
||||
if (handle->log2n % 2 == 1) {
|
||||
dl_ifft4r_fc32(data, fft_point >> 1, fft_table, fft_point);
|
||||
dl_bitrev4r_fc32_ansi(data, fft_point >> 1, handle->bitrev_table, handle->bitrev_size);
|
||||
} else {
|
||||
dl_ifft2r_fc32(data, fft_point >> 1, fft_table);
|
||||
dl_bitrev2r_fc32_ansi(data, fft_point >> 1, handle->bitrev_table, handle->bitrev_size);
|
||||
}
|
||||
|
||||
// Scale by 1/N
|
||||
for (int i = 0; i < fft_point; i++) {
|
||||
data[i] *= scale;
|
||||
}
|
||||
|
||||
return ESP_OK;
|
||||
}
|
||||
119
managed_components/espressif__dl_fft/dl_rfft_s16.c
Normal file
119
managed_components/espressif__dl_fft/dl_rfft_s16.c
Normal file
@@ -0,0 +1,119 @@
|
||||
#include "dl_rfft.h"
|
||||
#include "esp_log.h"
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
|
||||
static const char *TAG = "dl rfft";
|
||||
|
||||
dl_fft_s16_t *dl_rfft_s16_init(int fft_point, uint32_t caps)
|
||||
{
|
||||
dl_fft_s16_t *handle = (dl_fft_s16_t *)heap_caps_malloc(sizeof(dl_fft_s16_t), caps);
|
||||
if (!handle) {
|
||||
ESP_LOGE(TAG, "Failed to allocate FFT handle");
|
||||
return NULL;
|
||||
}
|
||||
handle->fft_table = NULL;
|
||||
handle->rfft_table = NULL;
|
||||
handle->fft_point = fft_point;
|
||||
handle->log2n = dl_power_of_two(fft_point);
|
||||
|
||||
// rfft table
|
||||
handle->rfft_table = dl_gen_rfft_table_s16(fft_point, caps);
|
||||
if (!handle->rfft_table) {
|
||||
ESP_LOGE(TAG, "Failed to generate FFT table");
|
||||
dl_rfft_s16_deinit(handle);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// fft table
|
||||
handle->fft_table = dl_gen_fft_table_sc16(fft_point >> 1, caps);
|
||||
if (!handle->fft_table) {
|
||||
ESP_LOGE(TAG, "Failed to generate FFT table");
|
||||
dl_rfft_s16_deinit(handle);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return handle;
|
||||
}
|
||||
|
||||
// Free FFT handle
|
||||
void dl_rfft_s16_deinit(dl_fft_s16_t *handle)
|
||||
{
|
||||
if (handle) {
|
||||
if (handle->fft_table) {
|
||||
free(handle->fft_table);
|
||||
}
|
||||
if (handle->rfft_table) {
|
||||
free(handle->rfft_table);
|
||||
}
|
||||
free(handle);
|
||||
}
|
||||
}
|
||||
|
||||
// Perform FFT
|
||||
esp_err_t dl_rfft_s16_run(dl_fft_s16_t *handle, int16_t *data, int in_exponent, int *out_exponent)
|
||||
{
|
||||
if (!handle || !data) {
|
||||
return ESP_FAIL;
|
||||
}
|
||||
|
||||
int cpx_point = handle->fft_point >> 1;
|
||||
dl_fft2r_sc16(data, cpx_point, handle->fft_table);
|
||||
dl_bitrev2r_sc16_ansi(data, cpx_point);
|
||||
dl_rfft_post_proc_sc16_ansi(data, cpx_point, handle->rfft_table);
|
||||
out_exponent[0] = in_exponent + handle->log2n;
|
||||
|
||||
return ESP_OK;
|
||||
}
|
||||
|
||||
esp_err_t dl_rfft_s16_hp_run(dl_fft_s16_t *handle, int16_t *data, int in_exponent, int *out_exponent)
|
||||
{
|
||||
if (!handle || !data) {
|
||||
return ESP_FAIL;
|
||||
}
|
||||
|
||||
int cpx_point = handle->fft_point >> 1;
|
||||
out_exponent[0] = 0;
|
||||
dl_fft2r_sc16_hp(data, cpx_point, handle->fft_table, out_exponent);
|
||||
dl_bitrev2r_sc16_ansi(data, cpx_point);
|
||||
dl_rfft_post_proc_sc16_ansi(data, cpx_point, handle->rfft_table);
|
||||
out_exponent[0] = in_exponent + out_exponent[0] + 1;
|
||||
|
||||
return ESP_OK;
|
||||
}
|
||||
|
||||
esp_err_t dl_irfft_s16_run(dl_fft_s16_t *handle, int16_t *data, int in_exponent, int *out_exponent)
|
||||
{
|
||||
if (!handle || !data) {
|
||||
return ESP_FAIL;
|
||||
}
|
||||
|
||||
int cpx_point = handle->fft_point >> 1;
|
||||
out_exponent[0] = 0;
|
||||
|
||||
dl_rfft_pre_proc_sc16_ansi(data, cpx_point, handle->rfft_table);
|
||||
dl_ifft2r_sc16(data, cpx_point, handle->fft_table);
|
||||
dl_bitrev2r_sc16_ansi(data, cpx_point);
|
||||
|
||||
out_exponent[0] = in_exponent + 1;
|
||||
|
||||
return ESP_OK;
|
||||
}
|
||||
|
||||
esp_err_t dl_irfft_s16_hp_run(dl_fft_s16_t *handle, int16_t *data, int in_exponent, int *out_exponent)
|
||||
{
|
||||
if (!handle || !data) {
|
||||
return ESP_FAIL;
|
||||
}
|
||||
|
||||
int cpx_point = handle->fft_point >> 1;
|
||||
out_exponent[0] = 0;
|
||||
|
||||
dl_rfft_pre_proc_sc16_ansi(data, cpx_point, handle->rfft_table);
|
||||
dl_ifft2r_sc16_hp(data, cpx_point, handle->fft_table, out_exponent);
|
||||
dl_bitrev2r_sc16_ansi(data, cpx_point);
|
||||
|
||||
out_exponent[0] = in_exponent + out_exponent[0] + 2 - handle->log2n;
|
||||
|
||||
return ESP_OK;
|
||||
}
|
||||
10
managed_components/espressif__dl_fft/idf_component.yml
Normal file
10
managed_components/espressif__dl_fft/idf_component.yml
Normal file
@@ -0,0 +1,10 @@
|
||||
dependencies:
|
||||
idf: '>=5.0'
|
||||
description: dl_fft is a lightweight and efficient fft library for all espressif chips.
|
||||
license: MIT
|
||||
repository: git://github.com/espressif/esp-dl.git
|
||||
repository_info:
|
||||
commit_sha: 48f53066553a3483d6c710998609aedff0ea20bc
|
||||
path: tools/dl_fft
|
||||
url: https://github.com/espressif/esp-dl/tree/master/esp-dl/tools/dl_fft
|
||||
version: 0.3.1
|
||||
Reference in New Issue
Block a user