add some code
This commit is contained in:
@@ -0,0 +1,95 @@
|
||||
// Copyright 2018-2023 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "dsps_fir_platform.h"
|
||||
#if (dsps_fir_f32_ae32_enabled == 1)
|
||||
|
||||
#include "dsps_dotprod_f32_m_ae32.S"
|
||||
|
||||
// This is FIR filter for ESP32 processor.
|
||||
.text
|
||||
.align 4
|
||||
.global dsps_fir_f32_ae32
|
||||
.type dsps_fir_f32_ae32,@function
|
||||
// The function implements the following C code:
|
||||
//esp_err_t dsps_fir_f32_ae32(fir_f32_t* fir, const float* input, float* output, int len);
|
||||
|
||||
dsps_fir_f32_ae32:
|
||||
// fir - a2
|
||||
// input - a3
|
||||
// output - a4
|
||||
// len - a5
|
||||
|
||||
entry a1, 16
|
||||
// Array increment for floating point data should be 4
|
||||
l32i a7, a2, 12 // a7 - pos
|
||||
movi a10, 4
|
||||
mull a13, a7, a10// a13 - a7*4
|
||||
l32i a6, a2, 8 // a6 - N
|
||||
mull a6, a6, a10// a6 = a6*4
|
||||
l32i a10, a2, 0 // a10 - coeffs
|
||||
l32i a6, a2, 8 // a6 - N
|
||||
|
||||
movi.n a9, 0
|
||||
movi.n a8, 4
|
||||
movi.n a12, 4
|
||||
|
||||
// a13 - delay index
|
||||
fir_loop_len:
|
||||
// Store to delay line
|
||||
l32i a11, a2, 4 // a11 - delay line
|
||||
lsi f0, a3, 0 // f0 = x[i]
|
||||
addi a3, a3, 4 // x++
|
||||
ssx f0, a11, a13 // delay[a13] = f0;
|
||||
addi a13, a13, 4 // a13++
|
||||
addi a7, a7, 1 // a7++
|
||||
// verify deley line
|
||||
blt a7, a6, do_not_reset_a13
|
||||
movi a13, 0
|
||||
movi a7, 0
|
||||
do_not_reset_a13:
|
||||
// Calc amount for delay line before end
|
||||
mov a15, a10 // a15 - coeffs
|
||||
wfr f2, a9 // f2 = 0;
|
||||
sub a14, a6, a7 // a14 = N-pos
|
||||
|
||||
// a11 = &delay[pos]
|
||||
add a11, a11, a13
|
||||
|
||||
loopnez a14, first_fir_loop // pos...N-1
|
||||
lsxp f1, a15, a8 // f1 = *(coeffs--)
|
||||
lsxp f0, a11, a12 // load delay f0 = *(delay++)
|
||||
madd.s f2, f0, f1 // f2 += f0*f1
|
||||
first_fir_loop:
|
||||
l32i a11, a2, 4 // a11 - delay line
|
||||
loopnez a7, second_fir_loop // 0..pos
|
||||
lsxp f1, a15, a8 // f1 = *(coeffs--)
|
||||
lsxp f0, a11, a12 // load delay f0 = *(delay++)
|
||||
madd.s f2, f0, f1 // f2 += f0*f1
|
||||
second_fir_loop:
|
||||
|
||||
// and after end
|
||||
// Store result
|
||||
ssi f2, a4, 0
|
||||
addi a4, a4, 4 // y++ - increment output pointer
|
||||
// Check loop
|
||||
addi a5, a5, -1
|
||||
bnez a5, fir_loop_len
|
||||
// store state
|
||||
|
||||
s32i a7, a2, 12 // pos = a7
|
||||
movi.n a2, 0 // return status ESP_OK
|
||||
retw.n
|
||||
|
||||
#endif // dsps_fir_f32_ae32_enabled
|
||||
@@ -0,0 +1,233 @@
|
||||
// Copyright 2018-2023 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "dsps_fir_platform.h"
|
||||
#if (dsps_fir_f32_aes3_enabled == 1)
|
||||
|
||||
// This is FIR filter for Esp32s3 processor.
|
||||
.text
|
||||
.align 4
|
||||
.global dsps_fir_f32_aes3
|
||||
.type dsps_fir_f32_aes3,@function
|
||||
// The function implements the following C code:
|
||||
//esp_err_t dsps_fir_f32_aes3(fir_f32_t* fir, const float* input, float* output, int len);
|
||||
|
||||
dsps_fir_f32_aes3:
|
||||
// fir - a2
|
||||
// input - a3
|
||||
// output - a4
|
||||
// len - a5
|
||||
|
||||
// a2 - fir structure
|
||||
// a3 - input
|
||||
// a4 - output
|
||||
// a5 - length
|
||||
|
||||
// a6 - fir length
|
||||
// a7 - position in delay line
|
||||
// a8 - temp
|
||||
// a9 - const 0
|
||||
// a10 - coeffs ptr
|
||||
// a11 - delay line ptr
|
||||
// a12 - const
|
||||
// a13 -
|
||||
// a14 - temp for loops
|
||||
// a15 - delay line rounded to 16
|
||||
|
||||
entry a1, 16
|
||||
// Array increment for floating point data should be 4
|
||||
l32i a7, a2, 12 // a7 - pos
|
||||
|
||||
l32i a6, a2, 8 // a6 - N - amount of coefficients
|
||||
l32i a10, a2, 0 // a10 - coeffs
|
||||
l32i a11, a2, 4 // a11 - delay line
|
||||
addx4 a11, a7, a11 // a11 = a11 + a7*4
|
||||
l32i a6, a2, 8 // a6 - N
|
||||
|
||||
movi.n a9, 0
|
||||
movi.n a12, 3
|
||||
|
||||
movi.n a12, -16
|
||||
movi.n a13, 15
|
||||
// Main loop for input samples
|
||||
.fir_loop_len:
|
||||
// Store to delay line
|
||||
lsip f15, a3, 4 // a3 += 4, f15 = input[n]
|
||||
ssip f15, a11, 4 // a11 += 4, *a11 = f15
|
||||
addi a7, a7, 1 // a7++ - position in delay line
|
||||
|
||||
//
|
||||
blt a7, a6, .do_not_reset_a11
|
||||
l32i a11, a2, 4 // Load delay line
|
||||
movi a7, 0
|
||||
.do_not_reset_a11:
|
||||
// Load rounded delay line address
|
||||
and a15, a11, a12
|
||||
|
||||
l32i a10, a2, 0 // a10 - coeffs
|
||||
|
||||
// Clear f4, f5 for multiplications
|
||||
const.s f4, 0
|
||||
const.s f5, 0
|
||||
const.s f6, 0
|
||||
const.s f7, 0
|
||||
|
||||
and a8, a11, a13 // a8 = a11 & 15
|
||||
beqz a8, .offset_0
|
||||
addi a8, a8, -4
|
||||
beqz a8, .offset_1
|
||||
addi a8, a8, -4
|
||||
beqz a8, .offset_2
|
||||
addi a8, a8, -4
|
||||
beqz a8, .offset_3
|
||||
|
||||
// a10 - coeffs
|
||||
// a11 - delay line
|
||||
.offset_0:
|
||||
sub a14, a6, a7 // a14 = N-pos
|
||||
srli a14, a14, 2
|
||||
loopnez a14, .first_fir_loop_0 // pos...N-1
|
||||
EE.LDF.128.IP f3, f2, f1, f0, a10, 16 // Load coeffs
|
||||
EE.LDF.128.IP f11, f10, f9, f8, a15, 16 // Load data from delay line
|
||||
madd.s f4, f0, f8
|
||||
madd.s f5, f1, f9
|
||||
madd.s f6, f2, f10
|
||||
madd.s f7, f3, f11
|
||||
.first_fir_loop_0:
|
||||
|
||||
l32i a15, a2, 4 // a11 - delay line [0]
|
||||
srli a14, a7, 2
|
||||
loopnez a14, .second_fir_loop_0 // 0..pos
|
||||
EE.LDF.128.IP f3, f2, f1, f0, a10, 16 // Load coeffs
|
||||
EE.LDF.128.IP f11, f10, f9, f8, a15, 16 // Load data from delay line
|
||||
madd.s f4, f0, f8
|
||||
madd.s f5, f1, f9
|
||||
madd.s f6, f2, f10
|
||||
madd.s f7, f3, f11
|
||||
.second_fir_loop_0:
|
||||
j .store_fir_result;
|
||||
|
||||
.offset_1:
|
||||
sub a14, a6, a7 // a14 = N-pos
|
||||
addi a14, a14, 3
|
||||
srli a14, a14, 2
|
||||
EE.LDF.128.IP f11, f10, f9, f12, a15, 16 // Load data from delay line
|
||||
// f12 - delay[N-1], store for the last operation
|
||||
// f9..f11 - delay[0..2]
|
||||
loopnez a14, .first_fir_loop_1 // pos...N-1
|
||||
EE.LDF.128.IP f3, f2, f1, f0, a10, 16 // Load coeffs
|
||||
madd.s f4, f0, f9
|
||||
madd.s f5, f1, f10
|
||||
madd.s f6, f2, f11
|
||||
EE.LDF.128.IP f11, f10, f9, f8, a15, 16 // Load data from delay line
|
||||
madd.s f7, f3, f8
|
||||
.first_fir_loop_1:
|
||||
|
||||
l32i a15, a2, 4 // a11 - delay line [0]
|
||||
EE.LDF.128.IP f11, f10, f9, f8, a15, 16 // Load data from delay line
|
||||
srli a14, a7, 2
|
||||
loopnez a14, .second_fir_loop_1 // 0..pos
|
||||
madd.s f4, f3, f8
|
||||
EE.LDF.128.IP f3, f2, f1, f0, a10, 16 // Load coeffs
|
||||
madd.s f5, f0, f9
|
||||
madd.s f6, f1, f10
|
||||
madd.s f7, f2, f11
|
||||
EE.LDF.128.IP f11, f10, f9, f8, a15, 16 // Load data from delay line
|
||||
.second_fir_loop_1:
|
||||
|
||||
madd.s f4, f3, f12
|
||||
j .store_fir_result;
|
||||
|
||||
.offset_2:
|
||||
sub a14, a6, a7 // a14 = N-pos
|
||||
addi a14, a14, 3
|
||||
srli a14, a14, 2
|
||||
EE.LDF.128.IP f11, f10, f13, f12, a15, 16 // Load data from delay line
|
||||
// f12, f13 - delay[N-1], delay[N-2], store for the last operation
|
||||
// f10..f11 - delay[0..1]
|
||||
loopnez a14, .first_fir_loop_2 // pos...N-1
|
||||
EE.LDF.128.IP f3, f2, f1, f0, a10, 16 // Load coeffs
|
||||
madd.s f4, f0, f10
|
||||
madd.s f5, f1, f11
|
||||
EE.LDF.128.IP f11, f10, f9, f8, a15, 16 // Load data from delay line
|
||||
madd.s f6, f2, f8
|
||||
madd.s f7, f3, f9
|
||||
.first_fir_loop_2:
|
||||
|
||||
l32i a15, a2, 4 // a11 - delay line [0]
|
||||
EE.LDF.128.IP f11, f10, f9, f8, a15, 16 // Load data from delay line
|
||||
srli a14, a7, 2
|
||||
loopnez a14, .second_fir_loop_2 // 0..pos
|
||||
madd.s f4, f2, f8
|
||||
madd.s f5, f3, f9
|
||||
EE.LDF.128.IP f3, f2, f1, f0, a10, 16 // Load coeffs
|
||||
madd.s f6, f0, f10
|
||||
madd.s f7, f1, f11
|
||||
EE.LDF.128.IP f11, f10, f9, f8, a15, 16 // Load data from delay line
|
||||
.second_fir_loop_2:
|
||||
|
||||
madd.s f4, f2, f12
|
||||
madd.s f5, f3, f13
|
||||
j .store_fir_result;
|
||||
|
||||
.offset_3:
|
||||
sub a14, a6, a7 // a14 = N-pos
|
||||
addi a14, a14, 3
|
||||
srli a14, a14, 2
|
||||
EE.LDF.128.IP f11, f14, f13, f12, a15, 16 // Load data from delay line
|
||||
// f12, f13, f14 - delay[N-1], delay[N-2], delay[N-3], store for the last operation
|
||||
// f11 - delay[0]
|
||||
loopnez a14, .first_fir_loop_3 // pos...N-1
|
||||
EE.LDF.128.IP f3, f2, f1, f0, a10, 16 // Load coeffs
|
||||
madd.s f4, f0, f11
|
||||
EE.LDF.128.IP f11, f10, f9, f8, a15, 16 // Load data from delay line
|
||||
madd.s f5, f1, f8
|
||||
madd.s f6, f2, f9
|
||||
madd.s f7, f3, f10
|
||||
.first_fir_loop_3:
|
||||
|
||||
l32i a15, a2, 4 // a11 - delay line [0]
|
||||
EE.LDF.128.IP f11, f10, f9, f8, a15, 16 // Load data from delay line
|
||||
srli a14, a7, 2
|
||||
loopnez a14, .second_fir_loop_3 // 0..pos
|
||||
madd.s f4, f1, f8
|
||||
madd.s f5, f2, f9
|
||||
madd.s f6, f3, f10
|
||||
EE.LDF.128.IP f3, f2, f1, f0, a10, 16 // Load coeffs
|
||||
madd.s f7, f0, f11
|
||||
EE.LDF.128.IP f11, f10, f9, f8, a15, 16 // Load data from delay line
|
||||
.second_fir_loop_3:
|
||||
|
||||
madd.s f4, f1, f12
|
||||
madd.s f5, f2, f13
|
||||
madd.s f4, f3, f14
|
||||
|
||||
.store_fir_result:
|
||||
|
||||
add.s f4, f4, f5
|
||||
add.s f6, f6, f7
|
||||
add.s f4, f4, f6
|
||||
|
||||
// Store result
|
||||
ssip f4, a4, 4 // y++ - save result and increment output pointer
|
||||
// Check loop length
|
||||
addi a5, a5, -1
|
||||
bnez a5, .fir_loop_len
|
||||
// store state
|
||||
|
||||
s32i a7, a2, 12 // pos = a7
|
||||
movi.n a2, 0 // return status ESP_OK
|
||||
retw.n
|
||||
|
||||
#endif // dsps_fir_f32_aes3_enabled
|
||||
@@ -0,0 +1,36 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "dsps_fir.h"
|
||||
|
||||
esp_err_t dsps_fir_f32_ansi(fir_f32_t *fir, const float *input, float *output, int len)
|
||||
{
|
||||
for (int i = 0 ; i < len ; i++) {
|
||||
float acc = 0;
|
||||
int coeff_pos = 0;
|
||||
fir->delay[fir->pos] = input[i];
|
||||
fir->pos++;
|
||||
if (fir->pos >= fir->N) {
|
||||
fir->pos = 0;
|
||||
}
|
||||
for (int n = fir->pos; n < fir->N ; n++) {
|
||||
acc += fir->coeffs[coeff_pos++] * fir->delay[n];
|
||||
}
|
||||
for (int n = 0; n < fir->pos ; n++) {
|
||||
acc += fir->coeffs[coeff_pos++] * fir->delay[n];
|
||||
}
|
||||
output[i] = acc;
|
||||
}
|
||||
return ESP_OK;
|
||||
}
|
||||
@@ -0,0 +1,67 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "dsps_fir.h"
|
||||
#include "malloc.h"
|
||||
|
||||
|
||||
esp_err_t dsps_fir_init_f32(fir_f32_t *fir, float *coeffs, float *delay, int coeffs_len)
|
||||
{
|
||||
// Allocate delay line in case if it's NULL
|
||||
if (delay == NULL) {
|
||||
#ifdef CONFIG_IDF_TARGET_ESP32S3
|
||||
delay = (float *)memalign(16, (coeffs_len + 4) * sizeof(float));
|
||||
#else
|
||||
delay = (float *)malloc((coeffs_len + 4) * sizeof(float));
|
||||
#endif // CONFIG_IDF_TARGET_ESP32S3
|
||||
fir->use_delay = 1;
|
||||
} else {
|
||||
fir->use_delay = 0;
|
||||
}
|
||||
for (int i = 0; i < (coeffs_len + 4); i++) {
|
||||
delay[i] = 0;
|
||||
}
|
||||
fir->coeffs = coeffs;
|
||||
fir->delay = delay;
|
||||
fir->N = coeffs_len;
|
||||
fir->pos = 0;
|
||||
|
||||
#ifdef CONFIG_IDF_TARGET_ESP32S3
|
||||
if (fir->N % 4 != 0) {
|
||||
return ESP_ERR_DSP_INVALID_LENGTH;
|
||||
}
|
||||
// The coeffs array should be aligned to 16
|
||||
if (((uint32_t)coeffs) & 0x0f) {
|
||||
return ESP_ERR_DSP_ARRAY_NOT_ALIGNED;
|
||||
}
|
||||
// The delay array should be aligned to 16
|
||||
if (((uint32_t)delay) & 0x0f) {
|
||||
return ESP_ERR_DSP_ARRAY_NOT_ALIGNED;
|
||||
}
|
||||
#endif // CONFIG_IDF_TARGET_ESP32S3
|
||||
|
||||
for (int i = 0 ; i < coeffs_len; i++) {
|
||||
fir->delay[i] = 0;
|
||||
}
|
||||
return ESP_OK;
|
||||
}
|
||||
|
||||
esp_err_t dsps_fir_f32_free(fir_f32_t *fir)
|
||||
{
|
||||
if (fir->use_delay != 0) {
|
||||
fir->use_delay = 0;
|
||||
free(fir->delay);
|
||||
}
|
||||
return ESP_OK;
|
||||
}
|
||||
@@ -0,0 +1,98 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "dsps_fir_platform.h"
|
||||
#if (dsps_fird_f32_ae32_enabled == 1)
|
||||
|
||||
#include "dsps_dotprod_f32_m_ae32.S"
|
||||
|
||||
// This is FIR filter for ESP32 processor.
|
||||
.text
|
||||
.align 4
|
||||
.global dsps_fird_f32_ae32
|
||||
.type dsps_fird_f32_ae32,@function
|
||||
// The function implements the following C code:
|
||||
//esp_err_t dsps_fird_f32_ae32(fir_f32_t* fir, const float* input, float* output, int len);
|
||||
|
||||
dsps_fird_f32_ae32:
|
||||
// fir - a2
|
||||
// input - a3
|
||||
// output - a4
|
||||
// len - a5
|
||||
|
||||
entry a1, 16
|
||||
// Array increment for floating point data should be 4
|
||||
l32i a7, a2, 12 // a7 - pos
|
||||
movi a10, 4
|
||||
mull a13, a7, a10// a13 - a7*4
|
||||
l32i a6, a2, 8 // a6 - N
|
||||
mull a6, a6, a10// a6 = a6*4
|
||||
l32i a10, a2, 0 // a10 - coeffs
|
||||
l32i a11, a2, 4 // a11 - delay line
|
||||
l32i a6, a2, 8 // a6 - N
|
||||
l32i a12, a2, 16 // a12 - decimation
|
||||
movi a8, 0 // result = 0;
|
||||
|
||||
// a13 - delay index
|
||||
fird_loop_len:
|
||||
// Store to delay line
|
||||
|
||||
loopnez a12, .fird_load_data // K loops
|
||||
lsip f0, a3, 4 // f0 = x[i++]
|
||||
ssx f0, a11, a13 // delay[a13] = f0;
|
||||
addi a13, a13, 4 // a13++
|
||||
addi a7, a7, 1 // a7++
|
||||
// verify deley line
|
||||
blt a7, a6, do_not_reset_a13
|
||||
movi a13, 0
|
||||
movi a7, 0
|
||||
do_not_reset_a13:
|
||||
const.s f2, 0
|
||||
.fird_load_data:
|
||||
|
||||
addi a8, a8, 1
|
||||
|
||||
// Calc amount for delay line before end
|
||||
mov a15, a10 // a15 - coeffs
|
||||
sub a14, a6, a7 // a14 = N-pos
|
||||
loopnez a14, first_fird_loop // pos...N-1
|
||||
lsip f1, a15, 4 // a15++
|
||||
lsx f0, a11, a13 // load delay f0 = delay[pos]
|
||||
addi a13, a13, 4 // a13++, pos++
|
||||
madd.s f2, f0, f1 // f2 += f0*f1
|
||||
first_fird_loop:
|
||||
movi a13, 0 // load delay line counter to 0
|
||||
loopnez a7, second_fird_loop // 0..pos
|
||||
lsip f1, a15, 4 // a15++
|
||||
lsx f0, a11, a13 // load delay f0 = delay[pos]
|
||||
addi a13, a13, 4 // a13++, pos++
|
||||
madd.s f2, f0, f1 // f2 += f0*f1
|
||||
second_fird_loop:
|
||||
|
||||
// and after end
|
||||
// Store result
|
||||
ssi f2, a4, 0
|
||||
addi a4, a4, 4 // y++ - increment output pointer
|
||||
next_itt_fir32:
|
||||
// Check loop
|
||||
addi a5, a5, -1
|
||||
bnez a5, fird_loop_len
|
||||
// store state
|
||||
|
||||
s32i a7, a2, 12 // pos = a7
|
||||
|
||||
mov a2, a8 // return status ESP_OK
|
||||
retw.n
|
||||
|
||||
#endif // dsps_fird_f32_ae32_enabled
|
||||
@@ -0,0 +1,239 @@
|
||||
// Copyright 2018-2023 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "dsps_fir_platform.h"
|
||||
#if (dsps_fird_f32_aes3_enabled == 1)
|
||||
|
||||
// This is FIR filter for Esp32s3 processor.
|
||||
.text
|
||||
.align 4
|
||||
.global dsps_fird_f32_aes3
|
||||
.type dsps_fird_f32_aes3,@function
|
||||
// The function implements the following C code:
|
||||
//esp_err_t dsps_fird_f32_aes3(fir_f32_t* fir, const float* input, float* output, int len);
|
||||
|
||||
dsps_fird_f32_aes3:
|
||||
// fir - a2
|
||||
// input - a3
|
||||
// output - a4
|
||||
// len - a5
|
||||
|
||||
// a2 - fir structure
|
||||
// a3 - input
|
||||
// a4 - output
|
||||
// a5 - length
|
||||
|
||||
// a6 - fir length
|
||||
// a7 - position in delay line
|
||||
// a8 - temp
|
||||
// a10 - coeffs ptr
|
||||
// a11 - delay line ptr
|
||||
// a12 - const
|
||||
// a13 -
|
||||
// a14 - temp for loops
|
||||
// a15 - delay line rounded to 16
|
||||
|
||||
entry a1, 16
|
||||
// Array increment for floating point data should be 4
|
||||
l32i a7, a2, 12 // a7 - pos
|
||||
|
||||
l32i a6, a2, 8 // a6 - N - amount of coefficients
|
||||
l32i a10, a2, 0 // a10 - coeffs
|
||||
l32i a11, a2, 4 // a11 - delay line
|
||||
addx4 a11, a7, a11 // a11 = a11 + a7*4
|
||||
l32i a6, a2, 8 // a6 - N
|
||||
|
||||
mov.n a9, a5
|
||||
movi.n a12, 3
|
||||
|
||||
movi.n a12, -16
|
||||
movi.n a13, 15
|
||||
// Main loop for input samples
|
||||
.fird_loop_len:
|
||||
// Store K values from input to delay line:
|
||||
|
||||
l32i a14, a2, 16 // a14 - decimation
|
||||
loopnez a14, .fird_load_data // K loops
|
||||
// Store to delay line
|
||||
lsip f15, a3, 4 // a3 += 4, f15 = input[n]
|
||||
ssip f15, a11, 4 // a11 += 4, *a11 = f15
|
||||
addi a7, a7, 1 // a7++ - position in delay line
|
||||
|
||||
blt a7, a6, .do_not_reset_a11
|
||||
l32i a11, a2, 4 // Load delay line
|
||||
movi a7, 0
|
||||
.do_not_reset_a11:
|
||||
and a15, a11, a12
|
||||
.fird_load_data:
|
||||
//
|
||||
// Process data
|
||||
//
|
||||
// Load rounded delay line address
|
||||
|
||||
l32i a10, a2, 0 // a10 - coeffs
|
||||
|
||||
// Clear f4, f5 for multiplications
|
||||
const.s f4, 0
|
||||
const.s f5, 0
|
||||
const.s f6, 0
|
||||
const.s f7, 0
|
||||
|
||||
and a8, a11, a13 // a8 = a11 & 15
|
||||
beqz a8, .offset_0
|
||||
addi a8, a8, -4
|
||||
beqz a8, .offset_1
|
||||
addi a8, a8, -4
|
||||
beqz a8, .offset_2
|
||||
addi a8, a8, -4
|
||||
beqz a8, .offset_3
|
||||
|
||||
// a10 - coeffs
|
||||
// a11 - delay line
|
||||
.offset_0:
|
||||
sub a14, a6, a7 // a14 = N-pos
|
||||
srli a14, a14, 2
|
||||
loopnez a14, .first_fir_loop_0 // pos...N-1
|
||||
EE.LDF.128.IP f3, f2, f1, f0, a10, 16 // Load coeffs
|
||||
EE.LDF.128.IP f11, f10, f9, f8, a15, 16 // Load data from delay line
|
||||
madd.s f4, f0, f8
|
||||
madd.s f5, f1, f9
|
||||
madd.s f6, f2, f10
|
||||
madd.s f7, f3, f11
|
||||
.first_fir_loop_0:
|
||||
|
||||
l32i a15, a2, 4 // a11 - delay line [0]
|
||||
srli a14, a7, 2
|
||||
loopnez a14, .second_fir_loop_0 // 0..pos
|
||||
EE.LDF.128.IP f3, f2, f1, f0, a10, 16 // Load coeffs
|
||||
EE.LDF.128.IP f11, f10, f9, f8, a15, 16 // Load data from delay line
|
||||
madd.s f4, f0, f8
|
||||
madd.s f5, f1, f9
|
||||
madd.s f6, f2, f10
|
||||
madd.s f7, f3, f11
|
||||
.second_fir_loop_0:
|
||||
j .store_fir_result;
|
||||
|
||||
.offset_1:
|
||||
sub a14, a6, a7 // a14 = N-pos
|
||||
addi a14, a14, 3
|
||||
srli a14, a14, 2
|
||||
EE.LDF.128.IP f11, f10, f9, f12, a15, 16 // Load data from delay line
|
||||
// f12 - delay[N-1], store for the last operation
|
||||
// f9..f11 - delay[0..2]
|
||||
loopnez a14, .first_fir_loop_1 // pos...N-1
|
||||
EE.LDF.128.IP f3, f2, f1, f0, a10, 16 // Load coeffs
|
||||
madd.s f4, f0, f9
|
||||
madd.s f5, f1, f10
|
||||
madd.s f6, f2, f11
|
||||
EE.LDF.128.IP f11, f10, f9, f8, a15, 16 // Load data from delay line
|
||||
madd.s f7, f3, f8
|
||||
.first_fir_loop_1:
|
||||
|
||||
l32i a15, a2, 4 // a11 - delay line [0]
|
||||
EE.LDF.128.IP f11, f10, f9, f8, a15, 16 // Load data from delay line
|
||||
srli a14, a7, 2
|
||||
loopnez a14, .second_fir_loop_1 // 0..pos
|
||||
madd.s f4, f3, f8
|
||||
EE.LDF.128.IP f3, f2, f1, f0, a10, 16 // Load coeffs
|
||||
madd.s f5, f0, f9
|
||||
madd.s f6, f1, f10
|
||||
madd.s f7, f2, f11
|
||||
EE.LDF.128.IP f11, f10, f9, f8, a15, 16 // Load data from delay line
|
||||
.second_fir_loop_1:
|
||||
|
||||
madd.s f4, f3, f12
|
||||
j .store_fir_result;
|
||||
|
||||
.offset_2:
|
||||
sub a14, a6, a7 // a14 = N-pos
|
||||
addi a14, a14, 3
|
||||
srli a14, a14, 2
|
||||
EE.LDF.128.IP f11, f10, f13, f12, a15, 16 // Load data from delay line
|
||||
// f12, f13 - delay[N-1], delay[N-2], store for the last operation
|
||||
// f10..f11 - delay[0..1]
|
||||
loopnez a14, .first_fir_loop_2 // pos...N-1
|
||||
EE.LDF.128.IP f3, f2, f1, f0, a10, 16 // Load coeffs
|
||||
madd.s f4, f0, f10
|
||||
madd.s f5, f1, f11
|
||||
EE.LDF.128.IP f11, f10, f9, f8, a15, 16 // Load data from delay line
|
||||
madd.s f6, f2, f8
|
||||
madd.s f7, f3, f9
|
||||
.first_fir_loop_2:
|
||||
|
||||
l32i a15, a2, 4 // a11 - delay line [0]
|
||||
EE.LDF.128.IP f11, f10, f9, f8, a15, 16 // Load data from delay line
|
||||
srli a14, a7, 2
|
||||
loopnez a14, .second_fir_loop_2 // 0..pos
|
||||
madd.s f4, f2, f8
|
||||
madd.s f5, f3, f9
|
||||
EE.LDF.128.IP f3, f2, f1, f0, a10, 16 // Load coeffs
|
||||
madd.s f6, f0, f10
|
||||
madd.s f7, f1, f11
|
||||
EE.LDF.128.IP f11, f10, f9, f8, a15, 16 // Load data from delay line
|
||||
.second_fir_loop_2:
|
||||
|
||||
madd.s f4, f2, f12
|
||||
madd.s f5, f3, f13
|
||||
j .store_fir_result;
|
||||
|
||||
.offset_3:
|
||||
sub a14, a6, a7 // a14 = N-pos
|
||||
addi a14, a14, 3
|
||||
srli a14, a14, 2
|
||||
EE.LDF.128.IP f11, f14, f13, f12, a15, 16 // Load data from delay line
|
||||
// f12, f13, f14 - delay[N-1], delay[N-2], delay[N-3], store for the last operation
|
||||
// f11 - delay[0]
|
||||
loopnez a14, .first_fir_loop_3 // pos...N-1
|
||||
EE.LDF.128.IP f3, f2, f1, f0, a10, 16 // Load coeffs
|
||||
madd.s f4, f0, f11
|
||||
EE.LDF.128.IP f11, f10, f9, f8, a15, 16 // Load data from delay line
|
||||
madd.s f5, f1, f8
|
||||
madd.s f6, f2, f9
|
||||
madd.s f7, f3, f10
|
||||
.first_fir_loop_3:
|
||||
|
||||
l32i a15, a2, 4 // a11 - delay line [0]
|
||||
EE.LDF.128.IP f11, f10, f9, f8, a15, 16 // Load data from delay line
|
||||
srli a14, a7, 2
|
||||
loopnez a14, .second_fir_loop_3 // 0..pos
|
||||
madd.s f4, f1, f8
|
||||
madd.s f5, f2, f9
|
||||
madd.s f6, f3, f10
|
||||
EE.LDF.128.IP f3, f2, f1, f0, a10, 16 // Load coeffs
|
||||
madd.s f7, f0, f11
|
||||
EE.LDF.128.IP f11, f10, f9, f8, a15, 16 // Load data from delay line
|
||||
.second_fir_loop_3:
|
||||
|
||||
madd.s f4, f1, f12
|
||||
madd.s f5, f2, f13
|
||||
madd.s f4, f3, f14
|
||||
|
||||
.store_fir_result:
|
||||
|
||||
add.s f4, f4, f5
|
||||
add.s f6, f6, f7
|
||||
add.s f4, f4, f6
|
||||
|
||||
// Store result
|
||||
ssip f4, a4, 4 // y++ - save result and increment output pointer
|
||||
// Check loop length
|
||||
addi a5, a5, -1
|
||||
bnez a5, .fird_loop_len
|
||||
// store state
|
||||
|
||||
s32i a7, a2, 12 // pos = a7
|
||||
mov.n a2, a9
|
||||
retw.n
|
||||
|
||||
#endif // dsps_fir_f32_aes3_enabled
|
||||
@@ -0,0 +1,38 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "dsps_fir.h"
|
||||
|
||||
int dsps_fird_f32_ansi(fir_f32_t *fir, const float *input, float *output, int len)
|
||||
{
|
||||
int result = 0;
|
||||
for (int i = 0; i < len ; i++) {
|
||||
for (int k = 0 ; k < fir->decim ; k++) {
|
||||
fir->delay[fir->pos++] = *input++;
|
||||
if (fir->pos >= fir->N) {
|
||||
fir->pos = 0;
|
||||
}
|
||||
}
|
||||
float acc = 0;
|
||||
int coeff_pos = 0;
|
||||
for (int n = fir->pos; n < fir->N ; n++) {
|
||||
acc += fir->coeffs[coeff_pos++] * fir->delay[n];
|
||||
}
|
||||
for (int n = 0; n < fir->pos ; n++) {
|
||||
acc += fir->coeffs[coeff_pos++] * fir->delay[n];
|
||||
}
|
||||
output[result++] = acc;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
@@ -0,0 +1,99 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "dsps_fir_platform.h"
|
||||
#if (dsps_fird_f32_arp4_enabled == 1)
|
||||
|
||||
// This is FIR filter for esp32p4 processor.
|
||||
.text
|
||||
.align 4
|
||||
.global dsps_fird_f32_arp4
|
||||
.type dsps_fird_f32_arp4,@function
|
||||
// The function implements the following C code:
|
||||
//esp_err_t dsps_fird_f32_arp4(fir_f32_t* fir, const float* input, float* output, int len);
|
||||
|
||||
dsps_fird_f32_arp4:
|
||||
add sp,sp,-16
|
||||
|
||||
mv a6, a3
|
||||
lw t1, 4(a0) // t1 - delay
|
||||
lw a4, 4(a0) // a4 - delay
|
||||
lw t2, 8(a0) // t2 - N :FIR filter coefficients amount
|
||||
lw t3, 12(a0) // t3 - pos
|
||||
lw t4, 16(a0) // t4 - decim
|
||||
slli t3, t3, 2 // t5 = pos*4 (bytes)
|
||||
add t1, t1, t3 // delay[pos]
|
||||
slli t6, t2, 2 // t6 = N*4 (bytes)
|
||||
add t3, a4, t6 // last position for the daly[N]
|
||||
|
||||
nop
|
||||
.fird_loop_len:
|
||||
// p.lw a1, 4(a1)
|
||||
//fmv.w.x fa5,zero
|
||||
flw fa0, 0(a1) // f0 = x[i], first load
|
||||
esp.lp.setup 0, t4, .fird_load_data // label to the last executed instruction
|
||||
add a1, a1, 4 // i++
|
||||
fsw fa0, 0(t1) // delay[pos]
|
||||
add t1, t1, 4
|
||||
blt t1, t3, .do_not_reset_pos # if t0 < t1 then target
|
||||
lw t1, 4(a0) // t1 - delay
|
||||
.do_not_reset_pos:
|
||||
.fird_load_data: flw fa0, 0(a1) // f0 = x[i]
|
||||
|
||||
lw t0, 0(a0) // t0 - coeffs
|
||||
sub t5, t3, t1 // (last_pos - pos)*4
|
||||
srli t5, t5, 2 // N-pos
|
||||
sub t6, t1, a4
|
||||
srli t6, t6, 2 // pos
|
||||
|
||||
fmv.w.x fa2,zero
|
||||
|
||||
lw a5, 0(a0) // a5 - coeffs
|
||||
esp.lp.setup 0, t5, .first_fird_loop
|
||||
flw fa1, 0(a5)
|
||||
flw fa0, 0(t1)
|
||||
addi a5, a5, 4
|
||||
fmadd.s fa2, fa1, fa0, fa2
|
||||
.first_fird_loop: addi t1, t1, 4
|
||||
|
||||
|
||||
lw t1, 4(a0) // t1 - delay
|
||||
|
||||
beqz t6, .skeep_loop
|
||||
esp.lp.setup 0, t6, .second_fird_loop
|
||||
flw fa1, 0(a5)
|
||||
flw fa0, 0(t1)
|
||||
addi a5, a5, 4
|
||||
fmadd.s fa2, fa1, fa0, fa2
|
||||
.second_fird_loop: addi t1, t1, 4
|
||||
|
||||
.skeep_loop:
|
||||
// Store result
|
||||
|
||||
fsw fa2, 0(a2)
|
||||
addi a2, a2, 4
|
||||
|
||||
addi a3, a3, -1
|
||||
BNEZ a3, .fird_loop_len// Jump if > 0
|
||||
|
||||
sub t6, t1, a4
|
||||
srli t6, t6, 2 // pos
|
||||
|
||||
sw t6, 12(a0) // t3 - pos
|
||||
|
||||
mv a0, a6
|
||||
add sp,sp,16
|
||||
ret
|
||||
|
||||
#endif //
|
||||
@@ -0,0 +1,46 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "dsps_fir.h"
|
||||
|
||||
|
||||
esp_err_t dsps_fird_init_f32(fir_f32_t *fir, float *coeffs, float *delay, int N, int decim)
|
||||
{
|
||||
fir->coeffs = coeffs;
|
||||
fir->delay = delay;
|
||||
fir->N = N;
|
||||
fir->pos = 0;
|
||||
fir->decim = decim;
|
||||
|
||||
#ifdef CONFIG_IDF_TARGET_ESP32S3
|
||||
// The amount of coefficients should be divided to 4,
|
||||
// if not, add zero coefficients to round length to 0
|
||||
if (fir->N % 4 != 0) {
|
||||
return ESP_ERR_DSP_INVALID_LENGTH;
|
||||
}
|
||||
// The coeffs array should be aligned to 16
|
||||
if (((uint32_t)coeffs) & 0x0f) {
|
||||
return ESP_ERR_DSP_ARRAY_NOT_ALIGNED;
|
||||
}
|
||||
// The delay array should be aligned to 16
|
||||
if (((uint32_t)delay) & 0x0f) {
|
||||
return ESP_ERR_DSP_ARRAY_NOT_ALIGNED;
|
||||
}
|
||||
#endif // CONFIG_IDF_TARGET_ESP32S3
|
||||
|
||||
for (int i = 0 ; i < N; i++) {
|
||||
fir->delay[i] = 0;
|
||||
}
|
||||
return ESP_OK;
|
||||
}
|
||||
Reference in New Issue
Block a user