add some code

This commit is contained in:
2025-09-05 13:25:11 +08:00
parent 9ff0a99e7a
commit 3cf1229a85
8911 changed files with 2535396 additions and 0 deletions

View File

@@ -0,0 +1,95 @@
// Copyright 2018-2023 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "dsps_fir_platform.h"
#if (dsps_fir_f32_ae32_enabled == 1)
#include "dsps_dotprod_f32_m_ae32.S"
// This is FIR filter for ESP32 processor.
.text
.align 4
.global dsps_fir_f32_ae32
.type dsps_fir_f32_ae32,@function
// The function implements the following C code:
//esp_err_t dsps_fir_f32_ae32(fir_f32_t* fir, const float* input, float* output, int len);
dsps_fir_f32_ae32:
// fir - a2
// input - a3
// output - a4
// len - a5
entry a1, 16
// Array increment for floating point data should be 4
l32i a7, a2, 12 // a7 - pos
movi a10, 4
mull a13, a7, a10// a13 - a7*4
l32i a6, a2, 8 // a6 - N
mull a6, a6, a10// a6 = a6*4
l32i a10, a2, 0 // a10 - coeffs
l32i a6, a2, 8 // a6 - N
movi.n a9, 0
movi.n a8, 4
movi.n a12, 4
// a13 - delay index
fir_loop_len:
// Store to delay line
l32i a11, a2, 4 // a11 - delay line
lsi f0, a3, 0 // f0 = x[i]
addi a3, a3, 4 // x++
ssx f0, a11, a13 // delay[a13] = f0;
addi a13, a13, 4 // a13++
addi a7, a7, 1 // a7++
// verify deley line
blt a7, a6, do_not_reset_a13
movi a13, 0
movi a7, 0
do_not_reset_a13:
// Calc amount for delay line before end
mov a15, a10 // a15 - coeffs
wfr f2, a9 // f2 = 0;
sub a14, a6, a7 // a14 = N-pos
// a11 = &delay[pos]
add a11, a11, a13
loopnez a14, first_fir_loop // pos...N-1
lsxp f1, a15, a8 // f1 = *(coeffs--)
lsxp f0, a11, a12 // load delay f0 = *(delay++)
madd.s f2, f0, f1 // f2 += f0*f1
first_fir_loop:
l32i a11, a2, 4 // a11 - delay line
loopnez a7, second_fir_loop // 0..pos
lsxp f1, a15, a8 // f1 = *(coeffs--)
lsxp f0, a11, a12 // load delay f0 = *(delay++)
madd.s f2, f0, f1 // f2 += f0*f1
second_fir_loop:
// and after end
// Store result
ssi f2, a4, 0
addi a4, a4, 4 // y++ - increment output pointer
// Check loop
addi a5, a5, -1
bnez a5, fir_loop_len
// store state
s32i a7, a2, 12 // pos = a7
movi.n a2, 0 // return status ESP_OK
retw.n
#endif // dsps_fir_f32_ae32_enabled

View File

@@ -0,0 +1,233 @@
// Copyright 2018-2023 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "dsps_fir_platform.h"
#if (dsps_fir_f32_aes3_enabled == 1)
// This is FIR filter for Esp32s3 processor.
.text
.align 4
.global dsps_fir_f32_aes3
.type dsps_fir_f32_aes3,@function
// The function implements the following C code:
//esp_err_t dsps_fir_f32_aes3(fir_f32_t* fir, const float* input, float* output, int len);
dsps_fir_f32_aes3:
// fir - a2
// input - a3
// output - a4
// len - a5
// a2 - fir structure
// a3 - input
// a4 - output
// a5 - length
// a6 - fir length
// a7 - position in delay line
// a8 - temp
// a9 - const 0
// a10 - coeffs ptr
// a11 - delay line ptr
// a12 - const
// a13 -
// a14 - temp for loops
// a15 - delay line rounded to 16
entry a1, 16
// Array increment for floating point data should be 4
l32i a7, a2, 12 // a7 - pos
l32i a6, a2, 8 // a6 - N - amount of coefficients
l32i a10, a2, 0 // a10 - coeffs
l32i a11, a2, 4 // a11 - delay line
addx4 a11, a7, a11 // a11 = a11 + a7*4
l32i a6, a2, 8 // a6 - N
movi.n a9, 0
movi.n a12, 3
movi.n a12, -16
movi.n a13, 15
// Main loop for input samples
.fir_loop_len:
// Store to delay line
lsip f15, a3, 4 // a3 += 4, f15 = input[n]
ssip f15, a11, 4 // a11 += 4, *a11 = f15
addi a7, a7, 1 // a7++ - position in delay line
//
blt a7, a6, .do_not_reset_a11
l32i a11, a2, 4 // Load delay line
movi a7, 0
.do_not_reset_a11:
// Load rounded delay line address
and a15, a11, a12
l32i a10, a2, 0 // a10 - coeffs
// Clear f4, f5 for multiplications
const.s f4, 0
const.s f5, 0
const.s f6, 0
const.s f7, 0
and a8, a11, a13 // a8 = a11 & 15
beqz a8, .offset_0
addi a8, a8, -4
beqz a8, .offset_1
addi a8, a8, -4
beqz a8, .offset_2
addi a8, a8, -4
beqz a8, .offset_3
// a10 - coeffs
// a11 - delay line
.offset_0:
sub a14, a6, a7 // a14 = N-pos
srli a14, a14, 2
loopnez a14, .first_fir_loop_0 // pos...N-1
EE.LDF.128.IP f3, f2, f1, f0, a10, 16 // Load coeffs
EE.LDF.128.IP f11, f10, f9, f8, a15, 16 // Load data from delay line
madd.s f4, f0, f8
madd.s f5, f1, f9
madd.s f6, f2, f10
madd.s f7, f3, f11
.first_fir_loop_0:
l32i a15, a2, 4 // a11 - delay line [0]
srli a14, a7, 2
loopnez a14, .second_fir_loop_0 // 0..pos
EE.LDF.128.IP f3, f2, f1, f0, a10, 16 // Load coeffs
EE.LDF.128.IP f11, f10, f9, f8, a15, 16 // Load data from delay line
madd.s f4, f0, f8
madd.s f5, f1, f9
madd.s f6, f2, f10
madd.s f7, f3, f11
.second_fir_loop_0:
j .store_fir_result;
.offset_1:
sub a14, a6, a7 // a14 = N-pos
addi a14, a14, 3
srli a14, a14, 2
EE.LDF.128.IP f11, f10, f9, f12, a15, 16 // Load data from delay line
// f12 - delay[N-1], store for the last operation
// f9..f11 - delay[0..2]
loopnez a14, .first_fir_loop_1 // pos...N-1
EE.LDF.128.IP f3, f2, f1, f0, a10, 16 // Load coeffs
madd.s f4, f0, f9
madd.s f5, f1, f10
madd.s f6, f2, f11
EE.LDF.128.IP f11, f10, f9, f8, a15, 16 // Load data from delay line
madd.s f7, f3, f8
.first_fir_loop_1:
l32i a15, a2, 4 // a11 - delay line [0]
EE.LDF.128.IP f11, f10, f9, f8, a15, 16 // Load data from delay line
srli a14, a7, 2
loopnez a14, .second_fir_loop_1 // 0..pos
madd.s f4, f3, f8
EE.LDF.128.IP f3, f2, f1, f0, a10, 16 // Load coeffs
madd.s f5, f0, f9
madd.s f6, f1, f10
madd.s f7, f2, f11
EE.LDF.128.IP f11, f10, f9, f8, a15, 16 // Load data from delay line
.second_fir_loop_1:
madd.s f4, f3, f12
j .store_fir_result;
.offset_2:
sub a14, a6, a7 // a14 = N-pos
addi a14, a14, 3
srli a14, a14, 2
EE.LDF.128.IP f11, f10, f13, f12, a15, 16 // Load data from delay line
// f12, f13 - delay[N-1], delay[N-2], store for the last operation
// f10..f11 - delay[0..1]
loopnez a14, .first_fir_loop_2 // pos...N-1
EE.LDF.128.IP f3, f2, f1, f0, a10, 16 // Load coeffs
madd.s f4, f0, f10
madd.s f5, f1, f11
EE.LDF.128.IP f11, f10, f9, f8, a15, 16 // Load data from delay line
madd.s f6, f2, f8
madd.s f7, f3, f9
.first_fir_loop_2:
l32i a15, a2, 4 // a11 - delay line [0]
EE.LDF.128.IP f11, f10, f9, f8, a15, 16 // Load data from delay line
srli a14, a7, 2
loopnez a14, .second_fir_loop_2 // 0..pos
madd.s f4, f2, f8
madd.s f5, f3, f9
EE.LDF.128.IP f3, f2, f1, f0, a10, 16 // Load coeffs
madd.s f6, f0, f10
madd.s f7, f1, f11
EE.LDF.128.IP f11, f10, f9, f8, a15, 16 // Load data from delay line
.second_fir_loop_2:
madd.s f4, f2, f12
madd.s f5, f3, f13
j .store_fir_result;
.offset_3:
sub a14, a6, a7 // a14 = N-pos
addi a14, a14, 3
srli a14, a14, 2
EE.LDF.128.IP f11, f14, f13, f12, a15, 16 // Load data from delay line
// f12, f13, f14 - delay[N-1], delay[N-2], delay[N-3], store for the last operation
// f11 - delay[0]
loopnez a14, .first_fir_loop_3 // pos...N-1
EE.LDF.128.IP f3, f2, f1, f0, a10, 16 // Load coeffs
madd.s f4, f0, f11
EE.LDF.128.IP f11, f10, f9, f8, a15, 16 // Load data from delay line
madd.s f5, f1, f8
madd.s f6, f2, f9
madd.s f7, f3, f10
.first_fir_loop_3:
l32i a15, a2, 4 // a11 - delay line [0]
EE.LDF.128.IP f11, f10, f9, f8, a15, 16 // Load data from delay line
srli a14, a7, 2
loopnez a14, .second_fir_loop_3 // 0..pos
madd.s f4, f1, f8
madd.s f5, f2, f9
madd.s f6, f3, f10
EE.LDF.128.IP f3, f2, f1, f0, a10, 16 // Load coeffs
madd.s f7, f0, f11
EE.LDF.128.IP f11, f10, f9, f8, a15, 16 // Load data from delay line
.second_fir_loop_3:
madd.s f4, f1, f12
madd.s f5, f2, f13
madd.s f4, f3, f14
.store_fir_result:
add.s f4, f4, f5
add.s f6, f6, f7
add.s f4, f4, f6
// Store result
ssip f4, a4, 4 // y++ - save result and increment output pointer
// Check loop length
addi a5, a5, -1
bnez a5, .fir_loop_len
// store state
s32i a7, a2, 12 // pos = a7
movi.n a2, 0 // return status ESP_OK
retw.n
#endif // dsps_fir_f32_aes3_enabled

View File

@@ -0,0 +1,36 @@
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "dsps_fir.h"
esp_err_t dsps_fir_f32_ansi(fir_f32_t *fir, const float *input, float *output, int len)
{
for (int i = 0 ; i < len ; i++) {
float acc = 0;
int coeff_pos = 0;
fir->delay[fir->pos] = input[i];
fir->pos++;
if (fir->pos >= fir->N) {
fir->pos = 0;
}
for (int n = fir->pos; n < fir->N ; n++) {
acc += fir->coeffs[coeff_pos++] * fir->delay[n];
}
for (int n = 0; n < fir->pos ; n++) {
acc += fir->coeffs[coeff_pos++] * fir->delay[n];
}
output[i] = acc;
}
return ESP_OK;
}

View File

@@ -0,0 +1,67 @@
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "dsps_fir.h"
#include "malloc.h"
esp_err_t dsps_fir_init_f32(fir_f32_t *fir, float *coeffs, float *delay, int coeffs_len)
{
// Allocate delay line in case if it's NULL
if (delay == NULL) {
#ifdef CONFIG_IDF_TARGET_ESP32S3
delay = (float *)memalign(16, (coeffs_len + 4) * sizeof(float));
#else
delay = (float *)malloc((coeffs_len + 4) * sizeof(float));
#endif // CONFIG_IDF_TARGET_ESP32S3
fir->use_delay = 1;
} else {
fir->use_delay = 0;
}
for (int i = 0; i < (coeffs_len + 4); i++) {
delay[i] = 0;
}
fir->coeffs = coeffs;
fir->delay = delay;
fir->N = coeffs_len;
fir->pos = 0;
#ifdef CONFIG_IDF_TARGET_ESP32S3
if (fir->N % 4 != 0) {
return ESP_ERR_DSP_INVALID_LENGTH;
}
// The coeffs array should be aligned to 16
if (((uint32_t)coeffs) & 0x0f) {
return ESP_ERR_DSP_ARRAY_NOT_ALIGNED;
}
// The delay array should be aligned to 16
if (((uint32_t)delay) & 0x0f) {
return ESP_ERR_DSP_ARRAY_NOT_ALIGNED;
}
#endif // CONFIG_IDF_TARGET_ESP32S3
for (int i = 0 ; i < coeffs_len; i++) {
fir->delay[i] = 0;
}
return ESP_OK;
}
esp_err_t dsps_fir_f32_free(fir_f32_t *fir)
{
if (fir->use_delay != 0) {
fir->use_delay = 0;
free(fir->delay);
}
return ESP_OK;
}

View File

@@ -0,0 +1,98 @@
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "dsps_fir_platform.h"
#if (dsps_fird_f32_ae32_enabled == 1)
#include "dsps_dotprod_f32_m_ae32.S"
// This is FIR filter for ESP32 processor.
.text
.align 4
.global dsps_fird_f32_ae32
.type dsps_fird_f32_ae32,@function
// The function implements the following C code:
//esp_err_t dsps_fird_f32_ae32(fir_f32_t* fir, const float* input, float* output, int len);
dsps_fird_f32_ae32:
// fir - a2
// input - a3
// output - a4
// len - a5
entry a1, 16
// Array increment for floating point data should be 4
l32i a7, a2, 12 // a7 - pos
movi a10, 4
mull a13, a7, a10// a13 - a7*4
l32i a6, a2, 8 // a6 - N
mull a6, a6, a10// a6 = a6*4
l32i a10, a2, 0 // a10 - coeffs
l32i a11, a2, 4 // a11 - delay line
l32i a6, a2, 8 // a6 - N
l32i a12, a2, 16 // a12 - decimation
movi a8, 0 // result = 0;
// a13 - delay index
fird_loop_len:
// Store to delay line
loopnez a12, .fird_load_data // K loops
lsip f0, a3, 4 // f0 = x[i++]
ssx f0, a11, a13 // delay[a13] = f0;
addi a13, a13, 4 // a13++
addi a7, a7, 1 // a7++
// verify deley line
blt a7, a6, do_not_reset_a13
movi a13, 0
movi a7, 0
do_not_reset_a13:
const.s f2, 0
.fird_load_data:
addi a8, a8, 1
// Calc amount for delay line before end
mov a15, a10 // a15 - coeffs
sub a14, a6, a7 // a14 = N-pos
loopnez a14, first_fird_loop // pos...N-1
lsip f1, a15, 4 // a15++
lsx f0, a11, a13 // load delay f0 = delay[pos]
addi a13, a13, 4 // a13++, pos++
madd.s f2, f0, f1 // f2 += f0*f1
first_fird_loop:
movi a13, 0 // load delay line counter to 0
loopnez a7, second_fird_loop // 0..pos
lsip f1, a15, 4 // a15++
lsx f0, a11, a13 // load delay f0 = delay[pos]
addi a13, a13, 4 // a13++, pos++
madd.s f2, f0, f1 // f2 += f0*f1
second_fird_loop:
// and after end
// Store result
ssi f2, a4, 0
addi a4, a4, 4 // y++ - increment output pointer
next_itt_fir32:
// Check loop
addi a5, a5, -1
bnez a5, fird_loop_len
// store state
s32i a7, a2, 12 // pos = a7
mov a2, a8 // return status ESP_OK
retw.n
#endif // dsps_fird_f32_ae32_enabled

View File

@@ -0,0 +1,239 @@
// Copyright 2018-2023 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "dsps_fir_platform.h"
#if (dsps_fird_f32_aes3_enabled == 1)
// This is FIR filter for Esp32s3 processor.
.text
.align 4
.global dsps_fird_f32_aes3
.type dsps_fird_f32_aes3,@function
// The function implements the following C code:
//esp_err_t dsps_fird_f32_aes3(fir_f32_t* fir, const float* input, float* output, int len);
dsps_fird_f32_aes3:
// fir - a2
// input - a3
// output - a4
// len - a5
// a2 - fir structure
// a3 - input
// a4 - output
// a5 - length
// a6 - fir length
// a7 - position in delay line
// a8 - temp
// a10 - coeffs ptr
// a11 - delay line ptr
// a12 - const
// a13 -
// a14 - temp for loops
// a15 - delay line rounded to 16
entry a1, 16
// Array increment for floating point data should be 4
l32i a7, a2, 12 // a7 - pos
l32i a6, a2, 8 // a6 - N - amount of coefficients
l32i a10, a2, 0 // a10 - coeffs
l32i a11, a2, 4 // a11 - delay line
addx4 a11, a7, a11 // a11 = a11 + a7*4
l32i a6, a2, 8 // a6 - N
mov.n a9, a5
movi.n a12, 3
movi.n a12, -16
movi.n a13, 15
// Main loop for input samples
.fird_loop_len:
// Store K values from input to delay line:
l32i a14, a2, 16 // a14 - decimation
loopnez a14, .fird_load_data // K loops
// Store to delay line
lsip f15, a3, 4 // a3 += 4, f15 = input[n]
ssip f15, a11, 4 // a11 += 4, *a11 = f15
addi a7, a7, 1 // a7++ - position in delay line
blt a7, a6, .do_not_reset_a11
l32i a11, a2, 4 // Load delay line
movi a7, 0
.do_not_reset_a11:
and a15, a11, a12
.fird_load_data:
//
// Process data
//
// Load rounded delay line address
l32i a10, a2, 0 // a10 - coeffs
// Clear f4, f5 for multiplications
const.s f4, 0
const.s f5, 0
const.s f6, 0
const.s f7, 0
and a8, a11, a13 // a8 = a11 & 15
beqz a8, .offset_0
addi a8, a8, -4
beqz a8, .offset_1
addi a8, a8, -4
beqz a8, .offset_2
addi a8, a8, -4
beqz a8, .offset_3
// a10 - coeffs
// a11 - delay line
.offset_0:
sub a14, a6, a7 // a14 = N-pos
srli a14, a14, 2
loopnez a14, .first_fir_loop_0 // pos...N-1
EE.LDF.128.IP f3, f2, f1, f0, a10, 16 // Load coeffs
EE.LDF.128.IP f11, f10, f9, f8, a15, 16 // Load data from delay line
madd.s f4, f0, f8
madd.s f5, f1, f9
madd.s f6, f2, f10
madd.s f7, f3, f11
.first_fir_loop_0:
l32i a15, a2, 4 // a11 - delay line [0]
srli a14, a7, 2
loopnez a14, .second_fir_loop_0 // 0..pos
EE.LDF.128.IP f3, f2, f1, f0, a10, 16 // Load coeffs
EE.LDF.128.IP f11, f10, f9, f8, a15, 16 // Load data from delay line
madd.s f4, f0, f8
madd.s f5, f1, f9
madd.s f6, f2, f10
madd.s f7, f3, f11
.second_fir_loop_0:
j .store_fir_result;
.offset_1:
sub a14, a6, a7 // a14 = N-pos
addi a14, a14, 3
srli a14, a14, 2
EE.LDF.128.IP f11, f10, f9, f12, a15, 16 // Load data from delay line
// f12 - delay[N-1], store for the last operation
// f9..f11 - delay[0..2]
loopnez a14, .first_fir_loop_1 // pos...N-1
EE.LDF.128.IP f3, f2, f1, f0, a10, 16 // Load coeffs
madd.s f4, f0, f9
madd.s f5, f1, f10
madd.s f6, f2, f11
EE.LDF.128.IP f11, f10, f9, f8, a15, 16 // Load data from delay line
madd.s f7, f3, f8
.first_fir_loop_1:
l32i a15, a2, 4 // a11 - delay line [0]
EE.LDF.128.IP f11, f10, f9, f8, a15, 16 // Load data from delay line
srli a14, a7, 2
loopnez a14, .second_fir_loop_1 // 0..pos
madd.s f4, f3, f8
EE.LDF.128.IP f3, f2, f1, f0, a10, 16 // Load coeffs
madd.s f5, f0, f9
madd.s f6, f1, f10
madd.s f7, f2, f11
EE.LDF.128.IP f11, f10, f9, f8, a15, 16 // Load data from delay line
.second_fir_loop_1:
madd.s f4, f3, f12
j .store_fir_result;
.offset_2:
sub a14, a6, a7 // a14 = N-pos
addi a14, a14, 3
srli a14, a14, 2
EE.LDF.128.IP f11, f10, f13, f12, a15, 16 // Load data from delay line
// f12, f13 - delay[N-1], delay[N-2], store for the last operation
// f10..f11 - delay[0..1]
loopnez a14, .first_fir_loop_2 // pos...N-1
EE.LDF.128.IP f3, f2, f1, f0, a10, 16 // Load coeffs
madd.s f4, f0, f10
madd.s f5, f1, f11
EE.LDF.128.IP f11, f10, f9, f8, a15, 16 // Load data from delay line
madd.s f6, f2, f8
madd.s f7, f3, f9
.first_fir_loop_2:
l32i a15, a2, 4 // a11 - delay line [0]
EE.LDF.128.IP f11, f10, f9, f8, a15, 16 // Load data from delay line
srli a14, a7, 2
loopnez a14, .second_fir_loop_2 // 0..pos
madd.s f4, f2, f8
madd.s f5, f3, f9
EE.LDF.128.IP f3, f2, f1, f0, a10, 16 // Load coeffs
madd.s f6, f0, f10
madd.s f7, f1, f11
EE.LDF.128.IP f11, f10, f9, f8, a15, 16 // Load data from delay line
.second_fir_loop_2:
madd.s f4, f2, f12
madd.s f5, f3, f13
j .store_fir_result;
.offset_3:
sub a14, a6, a7 // a14 = N-pos
addi a14, a14, 3
srli a14, a14, 2
EE.LDF.128.IP f11, f14, f13, f12, a15, 16 // Load data from delay line
// f12, f13, f14 - delay[N-1], delay[N-2], delay[N-3], store for the last operation
// f11 - delay[0]
loopnez a14, .first_fir_loop_3 // pos...N-1
EE.LDF.128.IP f3, f2, f1, f0, a10, 16 // Load coeffs
madd.s f4, f0, f11
EE.LDF.128.IP f11, f10, f9, f8, a15, 16 // Load data from delay line
madd.s f5, f1, f8
madd.s f6, f2, f9
madd.s f7, f3, f10
.first_fir_loop_3:
l32i a15, a2, 4 // a11 - delay line [0]
EE.LDF.128.IP f11, f10, f9, f8, a15, 16 // Load data from delay line
srli a14, a7, 2
loopnez a14, .second_fir_loop_3 // 0..pos
madd.s f4, f1, f8
madd.s f5, f2, f9
madd.s f6, f3, f10
EE.LDF.128.IP f3, f2, f1, f0, a10, 16 // Load coeffs
madd.s f7, f0, f11
EE.LDF.128.IP f11, f10, f9, f8, a15, 16 // Load data from delay line
.second_fir_loop_3:
madd.s f4, f1, f12
madd.s f5, f2, f13
madd.s f4, f3, f14
.store_fir_result:
add.s f4, f4, f5
add.s f6, f6, f7
add.s f4, f4, f6
// Store result
ssip f4, a4, 4 // y++ - save result and increment output pointer
// Check loop length
addi a5, a5, -1
bnez a5, .fird_loop_len
// store state
s32i a7, a2, 12 // pos = a7
mov.n a2, a9
retw.n
#endif // dsps_fir_f32_aes3_enabled

View File

@@ -0,0 +1,38 @@
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "dsps_fir.h"
int dsps_fird_f32_ansi(fir_f32_t *fir, const float *input, float *output, int len)
{
int result = 0;
for (int i = 0; i < len ; i++) {
for (int k = 0 ; k < fir->decim ; k++) {
fir->delay[fir->pos++] = *input++;
if (fir->pos >= fir->N) {
fir->pos = 0;
}
}
float acc = 0;
int coeff_pos = 0;
for (int n = fir->pos; n < fir->N ; n++) {
acc += fir->coeffs[coeff_pos++] * fir->delay[n];
}
for (int n = 0; n < fir->pos ; n++) {
acc += fir->coeffs[coeff_pos++] * fir->delay[n];
}
output[result++] = acc;
}
return result;
}

View File

@@ -0,0 +1,99 @@
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "dsps_fir_platform.h"
#if (dsps_fird_f32_arp4_enabled == 1)
// This is FIR filter for esp32p4 processor.
.text
.align 4
.global dsps_fird_f32_arp4
.type dsps_fird_f32_arp4,@function
// The function implements the following C code:
//esp_err_t dsps_fird_f32_arp4(fir_f32_t* fir, const float* input, float* output, int len);
dsps_fird_f32_arp4:
add sp,sp,-16
mv a6, a3
lw t1, 4(a0) // t1 - delay
lw a4, 4(a0) // a4 - delay
lw t2, 8(a0) // t2 - N :FIR filter coefficients amount
lw t3, 12(a0) // t3 - pos
lw t4, 16(a0) // t4 - decim
slli t3, t3, 2 // t5 = pos*4 (bytes)
add t1, t1, t3 // delay[pos]
slli t6, t2, 2 // t6 = N*4 (bytes)
add t3, a4, t6 // last position for the daly[N]
nop
.fird_loop_len:
// p.lw a1, 4(a1)
//fmv.w.x fa5,zero
flw fa0, 0(a1) // f0 = x[i], first load
esp.lp.setup 0, t4, .fird_load_data // label to the last executed instruction
add a1, a1, 4 // i++
fsw fa0, 0(t1) // delay[pos]
add t1, t1, 4
blt t1, t3, .do_not_reset_pos # if t0 < t1 then target
lw t1, 4(a0) // t1 - delay
.do_not_reset_pos:
.fird_load_data: flw fa0, 0(a1) // f0 = x[i]
lw t0, 0(a0) // t0 - coeffs
sub t5, t3, t1 // (last_pos - pos)*4
srli t5, t5, 2 // N-pos
sub t6, t1, a4
srli t6, t6, 2 // pos
fmv.w.x fa2,zero
lw a5, 0(a0) // a5 - coeffs
esp.lp.setup 0, t5, .first_fird_loop
flw fa1, 0(a5)
flw fa0, 0(t1)
addi a5, a5, 4
fmadd.s fa2, fa1, fa0, fa2
.first_fird_loop: addi t1, t1, 4
lw t1, 4(a0) // t1 - delay
beqz t6, .skeep_loop
esp.lp.setup 0, t6, .second_fird_loop
flw fa1, 0(a5)
flw fa0, 0(t1)
addi a5, a5, 4
fmadd.s fa2, fa1, fa0, fa2
.second_fird_loop: addi t1, t1, 4
.skeep_loop:
// Store result
fsw fa2, 0(a2)
addi a2, a2, 4
addi a3, a3, -1
BNEZ a3, .fird_loop_len// Jump if > 0
sub t6, t1, a4
srli t6, t6, 2 // pos
sw t6, 12(a0) // t3 - pos
mv a0, a6
add sp,sp,16
ret
#endif //

View File

@@ -0,0 +1,46 @@
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "dsps_fir.h"
esp_err_t dsps_fird_init_f32(fir_f32_t *fir, float *coeffs, float *delay, int N, int decim)
{
fir->coeffs = coeffs;
fir->delay = delay;
fir->N = N;
fir->pos = 0;
fir->decim = decim;
#ifdef CONFIG_IDF_TARGET_ESP32S3
// The amount of coefficients should be divided to 4,
// if not, add zero coefficients to round length to 0
if (fir->N % 4 != 0) {
return ESP_ERR_DSP_INVALID_LENGTH;
}
// The coeffs array should be aligned to 16
if (((uint32_t)coeffs) & 0x0f) {
return ESP_ERR_DSP_ARRAY_NOT_ALIGNED;
}
// The delay array should be aligned to 16
if (((uint32_t)delay) & 0x0f) {
return ESP_ERR_DSP_ARRAY_NOT_ALIGNED;
}
#endif // CONFIG_IDF_TARGET_ESP32S3
for (int i = 0 ; i < N; i++) {
fir->delay[i] = 0;
}
return ESP_OK;
}