add some code
This commit is contained in:
@@ -0,0 +1,94 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2022 Espressif Systems (Shanghai) CO LTD
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
.macro fir_s16_ae32_mul x1, x2, count, ID
|
||||
// This macro calculates fixed point dot product for ((count + 1)*4) int16 samples
|
||||
// x1 - input array1 register (samples)
|
||||
// x2 - input array2 register (coefficients) the array is inverted and is being decremented
|
||||
// count - counter register (for example a7)
|
||||
// count - (samples_count / 4) - 1
|
||||
// acc += x1[i + 0]*x2[N - i - 1] + x1[i + 1]*x2[N - i - 2] + x1[i + 2]*x2[N - i - 3] + x1[i + 3]*x2[N - i - 4]; i: 0..count
|
||||
// acchi, and acclo have to be initialized before
|
||||
// Result - acchi || acclo
|
||||
// Modifies:
|
||||
// m0, m1, m2, m3
|
||||
// acchi || acclo - must be loaded before (for example 0x3fff to acclo).
|
||||
|
||||
/*
|
||||
* Data schedule. Each line represents instruction and columns represent
|
||||
* register contents. Last column (MUL) shows the multiplication which
|
||||
* takes place. Values loaded in the given cycle are shown in square brackets.
|
||||
*
|
||||
* m0 m1 m2 m3 MUL
|
||||
* ----------------- pre-load --------------------------
|
||||
*[x0 x1] (no MULs in the first 3 instructions)
|
||||
* x0 x1 [y(N-1) y(N-2)]
|
||||
* x0 x1 [x2 x3] y(N-1) y(N-2)
|
||||
* x0 x1 x2 x3 y(N-1) y(N-2) [y(N-3) y(N-4)] x0*y(N-1)
|
||||
* -------------------- loop ------------------------ (the following 4 instructions are
|
||||
*[x4 x5] x2 x3 y(N-1) y(N-2) y(N-3) y(N-4) x1*y(N-2) repeated as much as needed)
|
||||
* x4 x5 x2 x3 [y(N-5) y(M-6)] y(N-3) y(N-4) x2*y(N-3)
|
||||
* x4 x5 [x6 x7] y(N-5) y(M-6) y(N-3) y(N-4) x3*y(N-4)
|
||||
* x4 x5 x6 x7 y(N-5) y(M-6) [y(N-7) y(M-8)] x4*y(N-5)
|
||||
* ------------------- finalize ----------------------
|
||||
* x4 x5 x6 x7 y(N-5) y(M-6) y(N-7) y(M-8) x5*y(N-6) (nothing is load)
|
||||
* x4 x5 x6 x7 y(N-5) y(M-6) y(N-7) y(M-8) x6*y(N-7)
|
||||
* x4 x5 x6 x7 y(N-5) y(M-6) y(N-7) y(M-8) x7*y(N-8)
|
||||
*/
|
||||
|
||||
ldinc m0, \x1
|
||||
lddec m2, \x2
|
||||
ldinc m1, \x1
|
||||
|
||||
mula.dd.lh.lddec m3, \x2, m0, m2
|
||||
loopnez \count, .loop_end_\ID
|
||||
.loop_\ID:
|
||||
mula.dd.hl.ldinc m0, \x1, m0, m2
|
||||
mula.dd.lh.lddec m2, \x2, m1, m3
|
||||
mula.dd.hl.ldinc m1, \x1, m1, m3
|
||||
mula.dd.lh.lddec m3, \x2, m0, m2
|
||||
.loop_end_\ID:
|
||||
|
||||
mula.dd.hl m0, m2
|
||||
mula.dd.lh m1, m3
|
||||
mula.dd.hl m1, m3
|
||||
|
||||
.endm // fir_s16_ae32_mul
|
||||
|
||||
.macro fir_s16_ae32_full x1, x2, count, full_count, ID
|
||||
// This macro calculates fixed point dot product for ((count + 1)*4) int16 samples
|
||||
// x1 - input array1 register (for example a2)
|
||||
// x2 - input array2 register (for example a3)
|
||||
// count - counter register (for example a7)
|
||||
// count - samples_count / 4 - 1
|
||||
// full_count - samples_count
|
||||
// acc += x1[i + 0]*x2[N - i - 1] + x1[i + 1]*x2[N - i - 2] + x1[i + 2]*x2[N - i - 3] + x1[i + 3]*x2[N - i - 4]; i: 0..count
|
||||
// acchi, and acclo have to be initialized before
|
||||
// Result - acchi || acclo
|
||||
// Modifies:
|
||||
// m0, m1, m2, m3
|
||||
// acchi || acclo - must be loaded before (for example 0x3fff to acclo).
|
||||
|
||||
// the main mac16 multiplication loop is skipped for cases with less than 4 samples
|
||||
blti \full_count, 4, .less_than_4_operands_\ID
|
||||
fir_s16_ae32_mul \x1, \x2, \count, \ID
|
||||
|
||||
.less_than_4_operands_\ID:
|
||||
|
||||
bbci \full_count, 1, .mod2chk_\ID
|
||||
ldinc m0, \x1
|
||||
lddec m2, \x2
|
||||
mula.dd.hl m0, m2
|
||||
mula.dd.lh m0, m2
|
||||
.mod2chk_\ID:
|
||||
|
||||
bbci \full_count, 0, .mod1chk_\ID
|
||||
ldinc m0, \x1
|
||||
lddec m2, \x2
|
||||
mula.dd.lh m0, m2
|
||||
.mod1chk_\ID:
|
||||
|
||||
.endm // fir_s16_ae32_full
|
||||
@@ -0,0 +1,157 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2022-2023 Espressif Systems (Shanghai) CO LTD
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include "dsps_fir.h"
|
||||
#include "malloc.h"
|
||||
#include <string.h>
|
||||
#include "dsp_tests.h"
|
||||
|
||||
#define ROUNDING_VALUE 0x7fff
|
||||
|
||||
esp_err_t dsps_fird_init_s16(fir_s16_t *fir, int16_t *coeffs, int16_t *delay, int16_t coeffs_len, int16_t decim, int16_t start_pos, int16_t shift)
|
||||
{
|
||||
fir->coeffs = coeffs;
|
||||
fir->delay = delay;
|
||||
fir->coeffs_len = coeffs_len;
|
||||
fir->pos = 0;
|
||||
fir->decim = decim;
|
||||
fir->d_pos = start_pos;
|
||||
fir->shift = shift;
|
||||
fir->rounding_val = (int16_t)(ROUNDING_VALUE);
|
||||
fir->free_status = 0;
|
||||
|
||||
if (fir->coeffs_len < 2) { // number of coeffcients must be higer than 1
|
||||
return ESP_ERR_DSP_INVALID_LENGTH;
|
||||
}
|
||||
|
||||
if ((fir->shift > 40) || (fir->shift < -40)) { // shift amount must be within a range from -40 to 40
|
||||
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
|
||||
}
|
||||
|
||||
if (fir->d_pos >= fir->decim) { // start position must be lower than decimation
|
||||
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
|
||||
}
|
||||
|
||||
#if CONFIG_DSP_OPTIMIZED
|
||||
|
||||
// Rounding value buffer primary for a purpose of ee.ld.accx.ip, but used for both the esp32 and esp32s3
|
||||
// dsps_fird_s16_aexx_free() must be called to free the memory after the FIR function is finished
|
||||
int32_t *aexx_rounding_buff = (int32_t *)memalign(16, 2 * sizeof(int32_t));
|
||||
|
||||
long long rounding = (long long)(fir->rounding_val);
|
||||
|
||||
if (fir->shift >= 0) {
|
||||
rounding = (rounding >> fir->shift);
|
||||
} else {
|
||||
rounding = (rounding << (-fir->shift));
|
||||
}
|
||||
#if dsps_fird_s16_arp4_enabled
|
||||
fir->pos = start_pos;
|
||||
|
||||
int16_t *new_delay_buff = (int16_t *)memalign(16, (coeffs_len + 8 * 2) * sizeof(int16_t));
|
||||
for (int i = 0 ; i < (coeffs_len + 8 * 2) ; i++) {
|
||||
new_delay_buff[i] = 0;
|
||||
}
|
||||
fir->delay = &new_delay_buff[8];
|
||||
fir->free_status |= 0x0001;
|
||||
|
||||
#endif // dsps_fird_s16_arp4_enabled
|
||||
|
||||
|
||||
aexx_rounding_buff[0] = (int32_t)(rounding); // 32 lower bits (acclo) type reassignment to 32-bit
|
||||
aexx_rounding_buff[1] = (int32_t)((rounding >> 32) & 0xFF); // 8 higher bits (acchi) shift by 32 and apply the mask
|
||||
fir->rounding_buff = aexx_rounding_buff;
|
||||
fir->free_status |= 0x0004;
|
||||
|
||||
#if dsps_fird_s16_aes3_enabled
|
||||
|
||||
if (fir->delay == NULL) { // New delay buffer is allocated if the current delay line is NULL
|
||||
int16_t *new_delay_buff = (int16_t *)memalign(16, coeffs_len * sizeof(int16_t));
|
||||
fir->delay = new_delay_buff;
|
||||
fir->free_status |= 0x0001;
|
||||
} else {
|
||||
if ((int)fir->delay & 0xf) { // Delay line array must be aligned
|
||||
return ESP_ERR_DSP_ARRAY_NOT_ALIGNED;
|
||||
}
|
||||
}
|
||||
|
||||
if ((int)fir->coeffs & 0xf) { // Coefficients array must be aligned
|
||||
return ESP_ERR_DSP_ARRAY_NOT_ALIGNED;
|
||||
}
|
||||
|
||||
// If the number of coefficients is not divisible by 8, a new delay line a new coefficients arrays are allocated
|
||||
// the newly allocated arrays are divisible by 8. Coefficients are copied from the original fir structure to
|
||||
// the new coeffs array and the remaining space is filled with zeroes
|
||||
// dsps_fird_s16_free_coeffs_delay must be called to free the memory after the FIR function is finished
|
||||
if (fir->coeffs_len % 8) { // Number of coefficients must be devisible by 8
|
||||
int16_t zero_coeffs = (8 - (fir->coeffs_len % 8));
|
||||
int16_t new_coeffs_len = fir->coeffs_len + zero_coeffs;
|
||||
int16_t *aes3_delay_buff = (int16_t *)memalign(16, new_coeffs_len * sizeof(int16_t));
|
||||
int16_t *aes3_coeffs_buff = (int16_t *)memalign(16, new_coeffs_len * sizeof(int16_t));
|
||||
|
||||
for (int i = 0; i < fir->coeffs_len; i++) { // copy fir->coeffs to aes3_coeffs_buff
|
||||
aes3_coeffs_buff[i] = fir->coeffs[i];
|
||||
}
|
||||
|
||||
for (int i = fir->coeffs_len; i < new_coeffs_len; i++) { // add zeroes to the end
|
||||
aes3_coeffs_buff[i] = 0;
|
||||
}
|
||||
|
||||
fir->delay = aes3_delay_buff;
|
||||
fir->coeffs = aes3_coeffs_buff;
|
||||
fir->coeffs_len = new_coeffs_len;
|
||||
fir->free_status |= 0x0002;
|
||||
}
|
||||
|
||||
#endif // dsps_fird_s16_aes3_enabled
|
||||
#endif // CONFIG_DSP_OPTIMIZED
|
||||
|
||||
for (int i = 0; i < fir->coeffs_len; i++) { // Initialize the dealy line to zero
|
||||
fir->delay[i] = 0;
|
||||
}
|
||||
|
||||
return ESP_OK;
|
||||
}
|
||||
|
||||
esp_err_t dsps_fird_s16_aexx_free(fir_s16_t *fir)
|
||||
{
|
||||
|
||||
if (fir->free_status == 0) {
|
||||
return ESP_OK;
|
||||
}
|
||||
|
||||
if (fir->free_status & 0x0003) {
|
||||
|
||||
if (fir->free_status & 0x0002) {
|
||||
free(fir->coeffs);
|
||||
}
|
||||
#if dsps_fird_s16_arp4_enabled
|
||||
fir->delay = &fir->delay[-8];
|
||||
#endif
|
||||
free(fir->delay);
|
||||
}
|
||||
|
||||
if (fir->free_status & 0x0004) {
|
||||
free(fir->rounding_buff);
|
||||
}
|
||||
fir->free_status = 0;
|
||||
|
||||
return ESP_OK;
|
||||
}
|
||||
|
||||
|
||||
esp_err_t dsps_16_array_rev(int16_t *arr, int16_t len)
|
||||
{
|
||||
|
||||
int16_t temp;
|
||||
|
||||
for (int i = 0; i < (int)(len / 2); i++) {
|
||||
temp = arr[i];
|
||||
arr[i] = arr[len - 1 - i];
|
||||
arr[len - 1 - i] = temp;
|
||||
}
|
||||
return ESP_OK;
|
||||
}
|
||||
@@ -0,0 +1,181 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2022 Espressif Systems (Shanghai) CO LTD
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include "dsps_fir_platform.h"
|
||||
#if (dsps_fird_s16_ae32_enabled == 1)
|
||||
|
||||
#include "dsps_fir_s16_m_ae32.S"
|
||||
|
||||
// This is FIR filter for ESP32 processor.
|
||||
.text
|
||||
.align 4
|
||||
.global dsps_fird_s16_ae32
|
||||
.type dsps_fird_s16_ae32,@function
|
||||
// The function implements the following C code:
|
||||
//int32_t dsps_fird_s16_ansi(fir_s16_t *fir, const int16_t *input, int16_t *output, int32_t len)
|
||||
|
||||
|
||||
dsps_fird_s16_ae32:
|
||||
// Input params Variables
|
||||
//
|
||||
// fir - a2 N - a6
|
||||
// input - a3 pos - a7
|
||||
// output - a4 rounding_lo - a8
|
||||
// len - a5 d_pos - a9
|
||||
// &coeffs[N] - a10
|
||||
// delay - a11
|
||||
// decim - a12
|
||||
// rounding_hi - a13
|
||||
// final_shift - a14 (shift)
|
||||
|
||||
entry a1, 32
|
||||
|
||||
l16si a7, a2, 10 // a7 - pos
|
||||
l16si a6, a2, 8 // a6 - N
|
||||
l32i a10, a2, 0 // a10 - coeffs
|
||||
addx2 a10, a6, a10 // a10 - coeffs[N+1]
|
||||
addi a10, a10, -4 // a10 - coeffs[N]
|
||||
s32i a10, a1, 0 // save pointer to a1
|
||||
l32i a11, a2, 4 // a11 - delay line
|
||||
l16si a12, a2, 12 // a12 - decimation
|
||||
l16si a9, a2, 14 // a9 - d_pos
|
||||
l16si a14, a2, 16 // a14 - shift
|
||||
|
||||
// prepare rounding value
|
||||
l32i a15, a2, 20 // get address of rounding array to a15
|
||||
l32i a8, a15, 0 // a8 = lower 32 bits of the rounding value (acclo)
|
||||
l32i a13, a15, 4 // a13 = higher 8 bits of the rounding value (acchi), offset 4 (32 bits)
|
||||
|
||||
// prepare final_shift value
|
||||
addi a14, a14, -15 // shift - 15
|
||||
abs a15, a14
|
||||
blti a15, 32, _shift_lower_than_32_init // check if lower than 32
|
||||
|
||||
// greater than 32 could only be negative shift ((-40 to +40) - 15) -> -55 to +25
|
||||
addi a14, a14, 32 // if greater than 32, add 32 (SRC is not defined for SAR greater than 32)
|
||||
_shift_lower_than_32_init:
|
||||
|
||||
bltz a14, _shift_negative_init // branch if lower than zero (not including zero)
|
||||
beqz a14, _shift_negative_init // branch if equal to zero (add zero to the previous statement)
|
||||
ssl a14 // if positive, set SAR register to left shift value (SAR = 32 - shift)
|
||||
|
||||
j _end_of_shift_init
|
||||
|
||||
_shift_negative_init: // negative shift
|
||||
abs a14, a14 // absolute value
|
||||
ssr a14 // SAR = -shift
|
||||
// final_shift is saved to SAR register, SAR is not being changed during the execution
|
||||
|
||||
_end_of_shift_init:
|
||||
l16si a14, a2, 16 // a14 - load shift value
|
||||
addi a14, a14, -15 // shift - 15
|
||||
|
||||
s32i a5, a1, 4 // save len to a1, used as the return value
|
||||
|
||||
|
||||
// first delay line load (decim - d_pos) when d_pos is not 0
|
||||
beqz a9, _fird_loop_len
|
||||
sub a15, a12, a9 // a15 = decim - d_pos
|
||||
|
||||
loopnez a15, ._loop_d_pos
|
||||
|
||||
blt a7, a6, reset_fir_pos_d_pos // branch if fir->pos >= fir->N
|
||||
movi.n a7, 0 // fir->pos = 0
|
||||
l32i a11, a2, 4 // reset delay line to the beginning
|
||||
reset_fir_pos_d_pos:
|
||||
|
||||
l16si a15, a3, 0 // load 16 bits from input (a3) to a15
|
||||
addi a7, a7, 1 // fir->pos++
|
||||
s16i a15, a11, 0 // save 16 bits from a15 to delay line (a11)
|
||||
addi a3, a3, 2 // increment input pointer
|
||||
addi a11, a11, 2 // increment delay line pointer
|
||||
._loop_d_pos:
|
||||
|
||||
j .fill_delay_line // skip the first iteration of the delay line filling routine
|
||||
|
||||
// outer loop
|
||||
_fird_loop_len:
|
||||
|
||||
loopnez a12, .fill_delay_line
|
||||
|
||||
blt a7, a6, reset_fir_pos // branch if fir->pos >= fir->N
|
||||
movi.n a7, 0 // fir->pos = 0
|
||||
l32i a11, a2, 4 // reset delay line to the beginning
|
||||
reset_fir_pos:
|
||||
|
||||
l16si a15, a3, 0 // load 16 bits from input (a3) to a15
|
||||
addi a7, a7, 1 // fir->pos++
|
||||
s16i a15, a11, 0 // save 16 bits from a15 to delay line (a11)
|
||||
addi a3, a3, 2 // increment input pointer
|
||||
addi a11, a11, 2 // increment delay line pointer
|
||||
.fill_delay_line:
|
||||
|
||||
// prepare MAC unit
|
||||
wsr a8, acclo // acclo = a8
|
||||
wsr a13, acchi // acchi = a13
|
||||
|
||||
addi a11, a11, -4 // preset delay line pointer, samples (array is being incremented)
|
||||
sub a9, a6, a7 // a9 = full_count = fir->N - fir->pos
|
||||
|
||||
// (Count / 4) - 1
|
||||
srli a15, a9, 2 // a15 = count = full_count /4
|
||||
addi a10, a10, 4 // preset coeffs pointer, samples (array is being decremented)
|
||||
addi a15, a15, -1 // count - 1
|
||||
|
||||
// x1, x2, count, full_count, ID
|
||||
fir_s16_ae32_full a11, a10, a15, a9, __LINE__
|
||||
|
||||
l32i a10, a2, 0 // load coeffs
|
||||
l32i a11, a2, 4 // reset delay line to the beginning
|
||||
addx2 a10, a7, a10 // move coeffs pointer to the end
|
||||
|
||||
srli a15, a7, 2 // a15 = count = full_count (fir->pos) / 4
|
||||
addi a11, a11, -4 // preset delay line pointer, samples (array is being incremented)
|
||||
addi a15, a15, -1 // count - 1
|
||||
|
||||
// x1, x2, count, full_count, ID
|
||||
fir_s16_ae32_full a11, a10, a15, a7, __LINE__
|
||||
|
||||
// SAR already set from the beginning to final_shift value
|
||||
abs a15, a14 // absolute value of shift
|
||||
l32i a10, a1, 0 // reset coefficient pointer
|
||||
blti a15, 32, _shift_lower_than_32
|
||||
rsr a9, acchi // get only higher 8 bits of the acc register
|
||||
movi.n a15, 0xFF // higher 8 bits mask
|
||||
and a9, a9, a15 // apply mask
|
||||
srl a15, a9
|
||||
j _shift_set
|
||||
|
||||
_shift_lower_than_32:
|
||||
rsr a9, acchi // get higher 8 bits of the acc register
|
||||
movi.n a11, 0xFF // higher 8 bits mask
|
||||
rsr a15, acclo // get lower 32 bits of the acc register
|
||||
and a9, a9, a11 // apply mask
|
||||
|
||||
|
||||
bltz a14, _shift_negative // branch if lower than zero (if negative)
|
||||
beqz a14, _shift_negative
|
||||
src a15, a15, a9 // funnel shift left
|
||||
j _shift_set
|
||||
|
||||
_shift_negative: // negative shift
|
||||
src a15, a9, a15 // funnel shift right
|
||||
|
||||
_shift_set:
|
||||
|
||||
l32i a11, a2, 4 // Load initial position of the delay line
|
||||
s16i a15, a4, 0 // save the shifted value to the output array (a4)
|
||||
addi a5, a5, -1 // len--
|
||||
addi a4, a4, 2 // increase pointer of the output array
|
||||
addx2 a11, a7, a11 // p_delay[fir->pos] - (two times the fir->pos)
|
||||
|
||||
// counter
|
||||
bnez a5, _fird_loop_len // break if a5 == 0
|
||||
|
||||
l32i.n a2, a1, 4 // load return value to a2
|
||||
retw.n
|
||||
|
||||
#endif // dsps_fird_s16_ae32_enabled
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,54 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2022-2023 Espressif Systems (Shanghai) CO LTD
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include "dsps_fir.h"
|
||||
|
||||
int32_t dsps_fird_s16_ansi(fir_s16_t *fir, const int16_t *input, int16_t *output, int32_t len)
|
||||
{
|
||||
int32_t result = 0;
|
||||
int32_t input_pos = 0;
|
||||
long long rounding = 0;
|
||||
const int32_t final_shift = fir->shift - 15;
|
||||
|
||||
rounding = (long long)(fir->rounding_val);
|
||||
|
||||
if (fir->shift >= 0) {
|
||||
rounding = (rounding >> fir->shift) & 0xFFFFFFFFFF; // 40-bit mask
|
||||
} else {
|
||||
rounding = (rounding << (-fir->shift)) & 0xFFFFFFFFFF; // 40-bit mask
|
||||
}
|
||||
|
||||
// len is already a length of the *output array, calculated as (length of the input array / decimation)
|
||||
for (int i = 0; i < len; i++) {
|
||||
|
||||
for (int j = 0; j < fir->decim - fir->d_pos; j++) {
|
||||
|
||||
if (fir->pos >= fir->coeffs_len) {
|
||||
fir->pos = 0;
|
||||
}
|
||||
fir->delay[fir->pos++] = input[input_pos++];
|
||||
}
|
||||
fir->d_pos = 0;
|
||||
|
||||
long long acc = rounding;
|
||||
int16_t coeff_pos = fir->coeffs_len - 1;
|
||||
|
||||
for (int n = fir->pos; n < fir->coeffs_len ; n++) {
|
||||
acc += (int32_t)fir->coeffs[coeff_pos--] * (int32_t)fir->delay[n];
|
||||
}
|
||||
for (int n = 0; n < fir->pos ; n++) {
|
||||
acc += (int32_t)fir->coeffs[coeff_pos--] * (int32_t)fir->delay[n];
|
||||
}
|
||||
|
||||
if (final_shift > 0) {
|
||||
output[result++] = (int16_t)(acc << final_shift);
|
||||
} else {
|
||||
output[result++] = (int16_t)(acc >> (-final_shift));
|
||||
}
|
||||
|
||||
}
|
||||
return result;
|
||||
}
|
||||
@@ -0,0 +1,150 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "dsps_fir_platform.h"
|
||||
#if (dsps_fird_s16_arp4_enabled == 1)
|
||||
|
||||
// This is FIR filter for esp32p4 processor.
|
||||
.text
|
||||
.align 4
|
||||
.global dsps_fird_s16_arp4
|
||||
.global dsps_fird_s16_ansi
|
||||
.type dsps_fird_s16_arp4,@function
|
||||
// The function implements the following C code:
|
||||
// int32_t dsps_fird_s16_arp4(fir_s16_t *fir, const int16_t *input, int16_t *output, int32_t len);
|
||||
|
||||
dsps_fird_s16_arp4:
|
||||
// In case of filter length different then 8*K
|
||||
lh t2, 8(a0) // t2 - coeffs_len
|
||||
andi t2, t2, 7
|
||||
beqz t2, .dsps_fird_s16_arp4_body
|
||||
j dsps_fird_s16_ansi
|
||||
|
||||
.dsps_fird_s16_arp4_body:
|
||||
add sp,sp,-48
|
||||
sw s0, 0(sp)
|
||||
sw s1, 4(sp)
|
||||
sw s2, 8(sp)
|
||||
sw s3, 12(sp)
|
||||
sw s4, 16(sp)
|
||||
sw s5, 20(sp)
|
||||
sw s6, 24(sp)
|
||||
sw s7, 28(sp)
|
||||
sw s8, 32(sp)
|
||||
sw s9, 36(sp)
|
||||
sw s10, 40(sp)
|
||||
sw s11, 44(sp)
|
||||
|
||||
// Enable analigned data access
|
||||
esp.movx.r.cfg t6
|
||||
or t6, t6, 2
|
||||
esp.movx.w.cfg t6
|
||||
|
||||
lw t1, 4(a0) // t1 - delay_line
|
||||
lh t2, 8(a0) // t2 - coeffs_len
|
||||
lh t3, 10(a0) // t3 - pos
|
||||
lh t6, 16(a0) // t6 - shift
|
||||
add t6, t6, -15
|
||||
neg t6, t6
|
||||
lw t5, 20(a0) // t5 - rounding_buff
|
||||
lw s2, 4(a0) // s2 - delay_line* current position
|
||||
add s2, s2, t3 // s2 = delay_line + pos*2
|
||||
add s2, s2, t3 //
|
||||
add s4, t2, t2 // s4 = coeff_len*2
|
||||
add s0, t1, s4 // s0 - &delay[coeffs_len]
|
||||
|
||||
lh a4, 0(t1)
|
||||
.loop_len:
|
||||
lh t4, 12(a0) // t4 - decim
|
||||
.loop_decim_copy:
|
||||
lh s1, 0(a1) // load input data
|
||||
add a1, a1, 2
|
||||
|
||||
sh s1, 0(s2)
|
||||
add s2, s2, 2 // preincrement of delay line
|
||||
bgt s0, s2, .skeep_reset
|
||||
lw s2, 4(a0) // s2 - delay_line
|
||||
.skeep_reset:
|
||||
add t4, t4, -1
|
||||
bgtz t4, .loop_decim_copy
|
||||
|
||||
// s5 - count1 = length - pos
|
||||
// s6 = count1 >> 3 :
|
||||
sub t3, s2, t1
|
||||
srli t3, t3, 1 // t3 = (pos*2)>>1
|
||||
sub s5, t2, t3
|
||||
srli s6, s5, 3 // s6 = (coeff_len - pos)>>3
|
||||
|
||||
srli s7, t3, 3 // s7 = pos>>3
|
||||
and s8, t3, 0x07 // s8 = pos&0x07
|
||||
|
||||
esp.ld.xacc.ip t5, 0 // load rounding value to accx
|
||||
|
||||
lw s10, 0(a0) // s10 - coeffs
|
||||
esp.vld.128.ip q0, s10, 16 //q0 - coeffs
|
||||
mv s9, s2 // s9 - pointer to delay line
|
||||
esp.vld.128.ip q1, s9, 16 // q1 - delay line data
|
||||
|
||||
beqz s6, .skip_main_loop1
|
||||
esp.lp.setup 0, s6, .main_loop1
|
||||
esp.vmulas.s16.xacc.ld.ip q0, s10, 16, q0, q1 // q0 - coeffs, q1 - data
|
||||
.main_loop1: esp.vld.128.ip q1, s9, 16 // Load delay line
|
||||
.skip_main_loop1: nop
|
||||
|
||||
|
||||
add s9, s9, -16
|
||||
sub s9, s9, s4
|
||||
beqz s8, .skip_rest_add
|
||||
esp.vld.128.ip q2, s9, 16
|
||||
esp.vadd.s16 q1, q2, q1
|
||||
esp.vmulas.s16.xacc.ld.ip q0, s10, 16, q0, q1 // q0 - coeffs, q1 - data
|
||||
.skip_rest_add:
|
||||
esp.vld.128.ip q1, s9, 16
|
||||
|
||||
beqz s7, .skip_main_loop3
|
||||
esp.lp.setup 1, s7, .main_loop3
|
||||
esp.vmulas.s16.xacc.ld.ip q0, s10, 16, q0, q1 // q0 - coeffs, q1 - data
|
||||
esp.vld.128.ip q1, s9, 16
|
||||
.main_loop3: nop
|
||||
.skip_main_loop3: nop
|
||||
|
||||
// Shift and Store result
|
||||
esp.srs.s.xacc s11, t6 // shift accx register by final_shift amount (a6), save the lower 32bits to a15
|
||||
sh s11, 0(a2) // store result to output buffer
|
||||
add a2, a2, 2
|
||||
|
||||
add a3, a3, -1
|
||||
bgtz a3, .loop_len
|
||||
sh t3, 10(a0)
|
||||
|
||||
.fast_exit:
|
||||
mv a0, a6
|
||||
|
||||
lw s0, 0(sp)
|
||||
lw s1, 4(sp)
|
||||
lw s2, 8(sp)
|
||||
lw s3, 12(sp)
|
||||
lw s4, 16(sp)
|
||||
lw s5, 20(sp)
|
||||
lw s6, 24(sp)
|
||||
lw s7, 28(sp)
|
||||
lw s8, 32(sp)
|
||||
lw s9, 36(sp)
|
||||
lw s10, 40(sp)
|
||||
lw s11, 44(sp)
|
||||
|
||||
add sp,sp,48
|
||||
ret
|
||||
|
||||
#endif //
|
||||
@@ -0,0 +1,95 @@
|
||||
// Copyright 2018-2023 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "dsps_fir_platform.h"
|
||||
#if (dsps_fir_f32_ae32_enabled == 1)
|
||||
|
||||
#include "dsps_dotprod_f32_m_ae32.S"
|
||||
|
||||
// This is FIR filter for ESP32 processor.
|
||||
.text
|
||||
.align 4
|
||||
.global dsps_fir_f32_ae32
|
||||
.type dsps_fir_f32_ae32,@function
|
||||
// The function implements the following C code:
|
||||
//esp_err_t dsps_fir_f32_ae32(fir_f32_t* fir, const float* input, float* output, int len);
|
||||
|
||||
dsps_fir_f32_ae32:
|
||||
// fir - a2
|
||||
// input - a3
|
||||
// output - a4
|
||||
// len - a5
|
||||
|
||||
entry a1, 16
|
||||
// Array increment for floating point data should be 4
|
||||
l32i a7, a2, 12 // a7 - pos
|
||||
movi a10, 4
|
||||
mull a13, a7, a10// a13 - a7*4
|
||||
l32i a6, a2, 8 // a6 - N
|
||||
mull a6, a6, a10// a6 = a6*4
|
||||
l32i a10, a2, 0 // a10 - coeffs
|
||||
l32i a6, a2, 8 // a6 - N
|
||||
|
||||
movi.n a9, 0
|
||||
movi.n a8, 4
|
||||
movi.n a12, 4
|
||||
|
||||
// a13 - delay index
|
||||
fir_loop_len:
|
||||
// Store to delay line
|
||||
l32i a11, a2, 4 // a11 - delay line
|
||||
lsi f0, a3, 0 // f0 = x[i]
|
||||
addi a3, a3, 4 // x++
|
||||
ssx f0, a11, a13 // delay[a13] = f0;
|
||||
addi a13, a13, 4 // a13++
|
||||
addi a7, a7, 1 // a7++
|
||||
// verify deley line
|
||||
blt a7, a6, do_not_reset_a13
|
||||
movi a13, 0
|
||||
movi a7, 0
|
||||
do_not_reset_a13:
|
||||
// Calc amount for delay line before end
|
||||
mov a15, a10 // a15 - coeffs
|
||||
wfr f2, a9 // f2 = 0;
|
||||
sub a14, a6, a7 // a14 = N-pos
|
||||
|
||||
// a11 = &delay[pos]
|
||||
add a11, a11, a13
|
||||
|
||||
loopnez a14, first_fir_loop // pos...N-1
|
||||
lsxp f1, a15, a8 // f1 = *(coeffs--)
|
||||
lsxp f0, a11, a12 // load delay f0 = *(delay++)
|
||||
madd.s f2, f0, f1 // f2 += f0*f1
|
||||
first_fir_loop:
|
||||
l32i a11, a2, 4 // a11 - delay line
|
||||
loopnez a7, second_fir_loop // 0..pos
|
||||
lsxp f1, a15, a8 // f1 = *(coeffs--)
|
||||
lsxp f0, a11, a12 // load delay f0 = *(delay++)
|
||||
madd.s f2, f0, f1 // f2 += f0*f1
|
||||
second_fir_loop:
|
||||
|
||||
// and after end
|
||||
// Store result
|
||||
ssi f2, a4, 0
|
||||
addi a4, a4, 4 // y++ - increment output pointer
|
||||
// Check loop
|
||||
addi a5, a5, -1
|
||||
bnez a5, fir_loop_len
|
||||
// store state
|
||||
|
||||
s32i a7, a2, 12 // pos = a7
|
||||
movi.n a2, 0 // return status ESP_OK
|
||||
retw.n
|
||||
|
||||
#endif // dsps_fir_f32_ae32_enabled
|
||||
@@ -0,0 +1,233 @@
|
||||
// Copyright 2018-2023 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "dsps_fir_platform.h"
|
||||
#if (dsps_fir_f32_aes3_enabled == 1)
|
||||
|
||||
// This is FIR filter for Esp32s3 processor.
|
||||
.text
|
||||
.align 4
|
||||
.global dsps_fir_f32_aes3
|
||||
.type dsps_fir_f32_aes3,@function
|
||||
// The function implements the following C code:
|
||||
//esp_err_t dsps_fir_f32_aes3(fir_f32_t* fir, const float* input, float* output, int len);
|
||||
|
||||
dsps_fir_f32_aes3:
|
||||
// fir - a2
|
||||
// input - a3
|
||||
// output - a4
|
||||
// len - a5
|
||||
|
||||
// a2 - fir structure
|
||||
// a3 - input
|
||||
// a4 - output
|
||||
// a5 - length
|
||||
|
||||
// a6 - fir length
|
||||
// a7 - position in delay line
|
||||
// a8 - temp
|
||||
// a9 - const 0
|
||||
// a10 - coeffs ptr
|
||||
// a11 - delay line ptr
|
||||
// a12 - const
|
||||
// a13 -
|
||||
// a14 - temp for loops
|
||||
// a15 - delay line rounded to 16
|
||||
|
||||
entry a1, 16
|
||||
// Array increment for floating point data should be 4
|
||||
l32i a7, a2, 12 // a7 - pos
|
||||
|
||||
l32i a6, a2, 8 // a6 - N - amount of coefficients
|
||||
l32i a10, a2, 0 // a10 - coeffs
|
||||
l32i a11, a2, 4 // a11 - delay line
|
||||
addx4 a11, a7, a11 // a11 = a11 + a7*4
|
||||
l32i a6, a2, 8 // a6 - N
|
||||
|
||||
movi.n a9, 0
|
||||
movi.n a12, 3
|
||||
|
||||
movi.n a12, -16
|
||||
movi.n a13, 15
|
||||
// Main loop for input samples
|
||||
.fir_loop_len:
|
||||
// Store to delay line
|
||||
lsip f15, a3, 4 // a3 += 4, f15 = input[n]
|
||||
ssip f15, a11, 4 // a11 += 4, *a11 = f15
|
||||
addi a7, a7, 1 // a7++ - position in delay line
|
||||
|
||||
//
|
||||
blt a7, a6, .do_not_reset_a11
|
||||
l32i a11, a2, 4 // Load delay line
|
||||
movi a7, 0
|
||||
.do_not_reset_a11:
|
||||
// Load rounded delay line address
|
||||
and a15, a11, a12
|
||||
|
||||
l32i a10, a2, 0 // a10 - coeffs
|
||||
|
||||
// Clear f4, f5 for multiplications
|
||||
const.s f4, 0
|
||||
const.s f5, 0
|
||||
const.s f6, 0
|
||||
const.s f7, 0
|
||||
|
||||
and a8, a11, a13 // a8 = a11 & 15
|
||||
beqz a8, .offset_0
|
||||
addi a8, a8, -4
|
||||
beqz a8, .offset_1
|
||||
addi a8, a8, -4
|
||||
beqz a8, .offset_2
|
||||
addi a8, a8, -4
|
||||
beqz a8, .offset_3
|
||||
|
||||
// a10 - coeffs
|
||||
// a11 - delay line
|
||||
.offset_0:
|
||||
sub a14, a6, a7 // a14 = N-pos
|
||||
srli a14, a14, 2
|
||||
loopnez a14, .first_fir_loop_0 // pos...N-1
|
||||
EE.LDF.128.IP f3, f2, f1, f0, a10, 16 // Load coeffs
|
||||
EE.LDF.128.IP f11, f10, f9, f8, a15, 16 // Load data from delay line
|
||||
madd.s f4, f0, f8
|
||||
madd.s f5, f1, f9
|
||||
madd.s f6, f2, f10
|
||||
madd.s f7, f3, f11
|
||||
.first_fir_loop_0:
|
||||
|
||||
l32i a15, a2, 4 // a11 - delay line [0]
|
||||
srli a14, a7, 2
|
||||
loopnez a14, .second_fir_loop_0 // 0..pos
|
||||
EE.LDF.128.IP f3, f2, f1, f0, a10, 16 // Load coeffs
|
||||
EE.LDF.128.IP f11, f10, f9, f8, a15, 16 // Load data from delay line
|
||||
madd.s f4, f0, f8
|
||||
madd.s f5, f1, f9
|
||||
madd.s f6, f2, f10
|
||||
madd.s f7, f3, f11
|
||||
.second_fir_loop_0:
|
||||
j .store_fir_result;
|
||||
|
||||
.offset_1:
|
||||
sub a14, a6, a7 // a14 = N-pos
|
||||
addi a14, a14, 3
|
||||
srli a14, a14, 2
|
||||
EE.LDF.128.IP f11, f10, f9, f12, a15, 16 // Load data from delay line
|
||||
// f12 - delay[N-1], store for the last operation
|
||||
// f9..f11 - delay[0..2]
|
||||
loopnez a14, .first_fir_loop_1 // pos...N-1
|
||||
EE.LDF.128.IP f3, f2, f1, f0, a10, 16 // Load coeffs
|
||||
madd.s f4, f0, f9
|
||||
madd.s f5, f1, f10
|
||||
madd.s f6, f2, f11
|
||||
EE.LDF.128.IP f11, f10, f9, f8, a15, 16 // Load data from delay line
|
||||
madd.s f7, f3, f8
|
||||
.first_fir_loop_1:
|
||||
|
||||
l32i a15, a2, 4 // a11 - delay line [0]
|
||||
EE.LDF.128.IP f11, f10, f9, f8, a15, 16 // Load data from delay line
|
||||
srli a14, a7, 2
|
||||
loopnez a14, .second_fir_loop_1 // 0..pos
|
||||
madd.s f4, f3, f8
|
||||
EE.LDF.128.IP f3, f2, f1, f0, a10, 16 // Load coeffs
|
||||
madd.s f5, f0, f9
|
||||
madd.s f6, f1, f10
|
||||
madd.s f7, f2, f11
|
||||
EE.LDF.128.IP f11, f10, f9, f8, a15, 16 // Load data from delay line
|
||||
.second_fir_loop_1:
|
||||
|
||||
madd.s f4, f3, f12
|
||||
j .store_fir_result;
|
||||
|
||||
.offset_2:
|
||||
sub a14, a6, a7 // a14 = N-pos
|
||||
addi a14, a14, 3
|
||||
srli a14, a14, 2
|
||||
EE.LDF.128.IP f11, f10, f13, f12, a15, 16 // Load data from delay line
|
||||
// f12, f13 - delay[N-1], delay[N-2], store for the last operation
|
||||
// f10..f11 - delay[0..1]
|
||||
loopnez a14, .first_fir_loop_2 // pos...N-1
|
||||
EE.LDF.128.IP f3, f2, f1, f0, a10, 16 // Load coeffs
|
||||
madd.s f4, f0, f10
|
||||
madd.s f5, f1, f11
|
||||
EE.LDF.128.IP f11, f10, f9, f8, a15, 16 // Load data from delay line
|
||||
madd.s f6, f2, f8
|
||||
madd.s f7, f3, f9
|
||||
.first_fir_loop_2:
|
||||
|
||||
l32i a15, a2, 4 // a11 - delay line [0]
|
||||
EE.LDF.128.IP f11, f10, f9, f8, a15, 16 // Load data from delay line
|
||||
srli a14, a7, 2
|
||||
loopnez a14, .second_fir_loop_2 // 0..pos
|
||||
madd.s f4, f2, f8
|
||||
madd.s f5, f3, f9
|
||||
EE.LDF.128.IP f3, f2, f1, f0, a10, 16 // Load coeffs
|
||||
madd.s f6, f0, f10
|
||||
madd.s f7, f1, f11
|
||||
EE.LDF.128.IP f11, f10, f9, f8, a15, 16 // Load data from delay line
|
||||
.second_fir_loop_2:
|
||||
|
||||
madd.s f4, f2, f12
|
||||
madd.s f5, f3, f13
|
||||
j .store_fir_result;
|
||||
|
||||
.offset_3:
|
||||
sub a14, a6, a7 // a14 = N-pos
|
||||
addi a14, a14, 3
|
||||
srli a14, a14, 2
|
||||
EE.LDF.128.IP f11, f14, f13, f12, a15, 16 // Load data from delay line
|
||||
// f12, f13, f14 - delay[N-1], delay[N-2], delay[N-3], store for the last operation
|
||||
// f11 - delay[0]
|
||||
loopnez a14, .first_fir_loop_3 // pos...N-1
|
||||
EE.LDF.128.IP f3, f2, f1, f0, a10, 16 // Load coeffs
|
||||
madd.s f4, f0, f11
|
||||
EE.LDF.128.IP f11, f10, f9, f8, a15, 16 // Load data from delay line
|
||||
madd.s f5, f1, f8
|
||||
madd.s f6, f2, f9
|
||||
madd.s f7, f3, f10
|
||||
.first_fir_loop_3:
|
||||
|
||||
l32i a15, a2, 4 // a11 - delay line [0]
|
||||
EE.LDF.128.IP f11, f10, f9, f8, a15, 16 // Load data from delay line
|
||||
srli a14, a7, 2
|
||||
loopnez a14, .second_fir_loop_3 // 0..pos
|
||||
madd.s f4, f1, f8
|
||||
madd.s f5, f2, f9
|
||||
madd.s f6, f3, f10
|
||||
EE.LDF.128.IP f3, f2, f1, f0, a10, 16 // Load coeffs
|
||||
madd.s f7, f0, f11
|
||||
EE.LDF.128.IP f11, f10, f9, f8, a15, 16 // Load data from delay line
|
||||
.second_fir_loop_3:
|
||||
|
||||
madd.s f4, f1, f12
|
||||
madd.s f5, f2, f13
|
||||
madd.s f4, f3, f14
|
||||
|
||||
.store_fir_result:
|
||||
|
||||
add.s f4, f4, f5
|
||||
add.s f6, f6, f7
|
||||
add.s f4, f4, f6
|
||||
|
||||
// Store result
|
||||
ssip f4, a4, 4 // y++ - save result and increment output pointer
|
||||
// Check loop length
|
||||
addi a5, a5, -1
|
||||
bnez a5, .fir_loop_len
|
||||
// store state
|
||||
|
||||
s32i a7, a2, 12 // pos = a7
|
||||
movi.n a2, 0 // return status ESP_OK
|
||||
retw.n
|
||||
|
||||
#endif // dsps_fir_f32_aes3_enabled
|
||||
@@ -0,0 +1,36 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "dsps_fir.h"
|
||||
|
||||
esp_err_t dsps_fir_f32_ansi(fir_f32_t *fir, const float *input, float *output, int len)
|
||||
{
|
||||
for (int i = 0 ; i < len ; i++) {
|
||||
float acc = 0;
|
||||
int coeff_pos = 0;
|
||||
fir->delay[fir->pos] = input[i];
|
||||
fir->pos++;
|
||||
if (fir->pos >= fir->N) {
|
||||
fir->pos = 0;
|
||||
}
|
||||
for (int n = fir->pos; n < fir->N ; n++) {
|
||||
acc += fir->coeffs[coeff_pos++] * fir->delay[n];
|
||||
}
|
||||
for (int n = 0; n < fir->pos ; n++) {
|
||||
acc += fir->coeffs[coeff_pos++] * fir->delay[n];
|
||||
}
|
||||
output[i] = acc;
|
||||
}
|
||||
return ESP_OK;
|
||||
}
|
||||
@@ -0,0 +1,67 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "dsps_fir.h"
|
||||
#include "malloc.h"
|
||||
|
||||
|
||||
esp_err_t dsps_fir_init_f32(fir_f32_t *fir, float *coeffs, float *delay, int coeffs_len)
|
||||
{
|
||||
// Allocate delay line in case if it's NULL
|
||||
if (delay == NULL) {
|
||||
#ifdef CONFIG_IDF_TARGET_ESP32S3
|
||||
delay = (float *)memalign(16, (coeffs_len + 4) * sizeof(float));
|
||||
#else
|
||||
delay = (float *)malloc((coeffs_len + 4) * sizeof(float));
|
||||
#endif // CONFIG_IDF_TARGET_ESP32S3
|
||||
fir->use_delay = 1;
|
||||
} else {
|
||||
fir->use_delay = 0;
|
||||
}
|
||||
for (int i = 0; i < (coeffs_len + 4); i++) {
|
||||
delay[i] = 0;
|
||||
}
|
||||
fir->coeffs = coeffs;
|
||||
fir->delay = delay;
|
||||
fir->N = coeffs_len;
|
||||
fir->pos = 0;
|
||||
|
||||
#ifdef CONFIG_IDF_TARGET_ESP32S3
|
||||
if (fir->N % 4 != 0) {
|
||||
return ESP_ERR_DSP_INVALID_LENGTH;
|
||||
}
|
||||
// The coeffs array should be aligned to 16
|
||||
if (((uint32_t)coeffs) & 0x0f) {
|
||||
return ESP_ERR_DSP_ARRAY_NOT_ALIGNED;
|
||||
}
|
||||
// The delay array should be aligned to 16
|
||||
if (((uint32_t)delay) & 0x0f) {
|
||||
return ESP_ERR_DSP_ARRAY_NOT_ALIGNED;
|
||||
}
|
||||
#endif // CONFIG_IDF_TARGET_ESP32S3
|
||||
|
||||
for (int i = 0 ; i < coeffs_len; i++) {
|
||||
fir->delay[i] = 0;
|
||||
}
|
||||
return ESP_OK;
|
||||
}
|
||||
|
||||
esp_err_t dsps_fir_f32_free(fir_f32_t *fir)
|
||||
{
|
||||
if (fir->use_delay != 0) {
|
||||
fir->use_delay = 0;
|
||||
free(fir->delay);
|
||||
}
|
||||
return ESP_OK;
|
||||
}
|
||||
@@ -0,0 +1,98 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "dsps_fir_platform.h"
|
||||
#if (dsps_fird_f32_ae32_enabled == 1)
|
||||
|
||||
#include "dsps_dotprod_f32_m_ae32.S"
|
||||
|
||||
// This is FIR filter for ESP32 processor.
|
||||
.text
|
||||
.align 4
|
||||
.global dsps_fird_f32_ae32
|
||||
.type dsps_fird_f32_ae32,@function
|
||||
// The function implements the following C code:
|
||||
//esp_err_t dsps_fird_f32_ae32(fir_f32_t* fir, const float* input, float* output, int len);
|
||||
|
||||
dsps_fird_f32_ae32:
|
||||
// fir - a2
|
||||
// input - a3
|
||||
// output - a4
|
||||
// len - a5
|
||||
|
||||
entry a1, 16
|
||||
// Array increment for floating point data should be 4
|
||||
l32i a7, a2, 12 // a7 - pos
|
||||
movi a10, 4
|
||||
mull a13, a7, a10// a13 - a7*4
|
||||
l32i a6, a2, 8 // a6 - N
|
||||
mull a6, a6, a10// a6 = a6*4
|
||||
l32i a10, a2, 0 // a10 - coeffs
|
||||
l32i a11, a2, 4 // a11 - delay line
|
||||
l32i a6, a2, 8 // a6 - N
|
||||
l32i a12, a2, 16 // a12 - decimation
|
||||
movi a8, 0 // result = 0;
|
||||
|
||||
// a13 - delay index
|
||||
fird_loop_len:
|
||||
// Store to delay line
|
||||
|
||||
loopnez a12, .fird_load_data // K loops
|
||||
lsip f0, a3, 4 // f0 = x[i++]
|
||||
ssx f0, a11, a13 // delay[a13] = f0;
|
||||
addi a13, a13, 4 // a13++
|
||||
addi a7, a7, 1 // a7++
|
||||
// verify deley line
|
||||
blt a7, a6, do_not_reset_a13
|
||||
movi a13, 0
|
||||
movi a7, 0
|
||||
do_not_reset_a13:
|
||||
const.s f2, 0
|
||||
.fird_load_data:
|
||||
|
||||
addi a8, a8, 1
|
||||
|
||||
// Calc amount for delay line before end
|
||||
mov a15, a10 // a15 - coeffs
|
||||
sub a14, a6, a7 // a14 = N-pos
|
||||
loopnez a14, first_fird_loop // pos...N-1
|
||||
lsip f1, a15, 4 // a15++
|
||||
lsx f0, a11, a13 // load delay f0 = delay[pos]
|
||||
addi a13, a13, 4 // a13++, pos++
|
||||
madd.s f2, f0, f1 // f2 += f0*f1
|
||||
first_fird_loop:
|
||||
movi a13, 0 // load delay line counter to 0
|
||||
loopnez a7, second_fird_loop // 0..pos
|
||||
lsip f1, a15, 4 // a15++
|
||||
lsx f0, a11, a13 // load delay f0 = delay[pos]
|
||||
addi a13, a13, 4 // a13++, pos++
|
||||
madd.s f2, f0, f1 // f2 += f0*f1
|
||||
second_fird_loop:
|
||||
|
||||
// and after end
|
||||
// Store result
|
||||
ssi f2, a4, 0
|
||||
addi a4, a4, 4 // y++ - increment output pointer
|
||||
next_itt_fir32:
|
||||
// Check loop
|
||||
addi a5, a5, -1
|
||||
bnez a5, fird_loop_len
|
||||
// store state
|
||||
|
||||
s32i a7, a2, 12 // pos = a7
|
||||
|
||||
mov a2, a8 // return status ESP_OK
|
||||
retw.n
|
||||
|
||||
#endif // dsps_fird_f32_ae32_enabled
|
||||
@@ -0,0 +1,239 @@
|
||||
// Copyright 2018-2023 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "dsps_fir_platform.h"
|
||||
#if (dsps_fird_f32_aes3_enabled == 1)
|
||||
|
||||
// This is FIR filter for Esp32s3 processor.
|
||||
.text
|
||||
.align 4
|
||||
.global dsps_fird_f32_aes3
|
||||
.type dsps_fird_f32_aes3,@function
|
||||
// The function implements the following C code:
|
||||
//esp_err_t dsps_fird_f32_aes3(fir_f32_t* fir, const float* input, float* output, int len);
|
||||
|
||||
dsps_fird_f32_aes3:
|
||||
// fir - a2
|
||||
// input - a3
|
||||
// output - a4
|
||||
// len - a5
|
||||
|
||||
// a2 - fir structure
|
||||
// a3 - input
|
||||
// a4 - output
|
||||
// a5 - length
|
||||
|
||||
// a6 - fir length
|
||||
// a7 - position in delay line
|
||||
// a8 - temp
|
||||
// a10 - coeffs ptr
|
||||
// a11 - delay line ptr
|
||||
// a12 - const
|
||||
// a13 -
|
||||
// a14 - temp for loops
|
||||
// a15 - delay line rounded to 16
|
||||
|
||||
entry a1, 16
|
||||
// Array increment for floating point data should be 4
|
||||
l32i a7, a2, 12 // a7 - pos
|
||||
|
||||
l32i a6, a2, 8 // a6 - N - amount of coefficients
|
||||
l32i a10, a2, 0 // a10 - coeffs
|
||||
l32i a11, a2, 4 // a11 - delay line
|
||||
addx4 a11, a7, a11 // a11 = a11 + a7*4
|
||||
l32i a6, a2, 8 // a6 - N
|
||||
|
||||
mov.n a9, a5
|
||||
movi.n a12, 3
|
||||
|
||||
movi.n a12, -16
|
||||
movi.n a13, 15
|
||||
// Main loop for input samples
|
||||
.fird_loop_len:
|
||||
// Store K values from input to delay line:
|
||||
|
||||
l32i a14, a2, 16 // a14 - decimation
|
||||
loopnez a14, .fird_load_data // K loops
|
||||
// Store to delay line
|
||||
lsip f15, a3, 4 // a3 += 4, f15 = input[n]
|
||||
ssip f15, a11, 4 // a11 += 4, *a11 = f15
|
||||
addi a7, a7, 1 // a7++ - position in delay line
|
||||
|
||||
blt a7, a6, .do_not_reset_a11
|
||||
l32i a11, a2, 4 // Load delay line
|
||||
movi a7, 0
|
||||
.do_not_reset_a11:
|
||||
and a15, a11, a12
|
||||
.fird_load_data:
|
||||
//
|
||||
// Process data
|
||||
//
|
||||
// Load rounded delay line address
|
||||
|
||||
l32i a10, a2, 0 // a10 - coeffs
|
||||
|
||||
// Clear f4, f5 for multiplications
|
||||
const.s f4, 0
|
||||
const.s f5, 0
|
||||
const.s f6, 0
|
||||
const.s f7, 0
|
||||
|
||||
and a8, a11, a13 // a8 = a11 & 15
|
||||
beqz a8, .offset_0
|
||||
addi a8, a8, -4
|
||||
beqz a8, .offset_1
|
||||
addi a8, a8, -4
|
||||
beqz a8, .offset_2
|
||||
addi a8, a8, -4
|
||||
beqz a8, .offset_3
|
||||
|
||||
// a10 - coeffs
|
||||
// a11 - delay line
|
||||
.offset_0:
|
||||
sub a14, a6, a7 // a14 = N-pos
|
||||
srli a14, a14, 2
|
||||
loopnez a14, .first_fir_loop_0 // pos...N-1
|
||||
EE.LDF.128.IP f3, f2, f1, f0, a10, 16 // Load coeffs
|
||||
EE.LDF.128.IP f11, f10, f9, f8, a15, 16 // Load data from delay line
|
||||
madd.s f4, f0, f8
|
||||
madd.s f5, f1, f9
|
||||
madd.s f6, f2, f10
|
||||
madd.s f7, f3, f11
|
||||
.first_fir_loop_0:
|
||||
|
||||
l32i a15, a2, 4 // a11 - delay line [0]
|
||||
srli a14, a7, 2
|
||||
loopnez a14, .second_fir_loop_0 // 0..pos
|
||||
EE.LDF.128.IP f3, f2, f1, f0, a10, 16 // Load coeffs
|
||||
EE.LDF.128.IP f11, f10, f9, f8, a15, 16 // Load data from delay line
|
||||
madd.s f4, f0, f8
|
||||
madd.s f5, f1, f9
|
||||
madd.s f6, f2, f10
|
||||
madd.s f7, f3, f11
|
||||
.second_fir_loop_0:
|
||||
j .store_fir_result;
|
||||
|
||||
.offset_1:
|
||||
sub a14, a6, a7 // a14 = N-pos
|
||||
addi a14, a14, 3
|
||||
srli a14, a14, 2
|
||||
EE.LDF.128.IP f11, f10, f9, f12, a15, 16 // Load data from delay line
|
||||
// f12 - delay[N-1], store for the last operation
|
||||
// f9..f11 - delay[0..2]
|
||||
loopnez a14, .first_fir_loop_1 // pos...N-1
|
||||
EE.LDF.128.IP f3, f2, f1, f0, a10, 16 // Load coeffs
|
||||
madd.s f4, f0, f9
|
||||
madd.s f5, f1, f10
|
||||
madd.s f6, f2, f11
|
||||
EE.LDF.128.IP f11, f10, f9, f8, a15, 16 // Load data from delay line
|
||||
madd.s f7, f3, f8
|
||||
.first_fir_loop_1:
|
||||
|
||||
l32i a15, a2, 4 // a11 - delay line [0]
|
||||
EE.LDF.128.IP f11, f10, f9, f8, a15, 16 // Load data from delay line
|
||||
srli a14, a7, 2
|
||||
loopnez a14, .second_fir_loop_1 // 0..pos
|
||||
madd.s f4, f3, f8
|
||||
EE.LDF.128.IP f3, f2, f1, f0, a10, 16 // Load coeffs
|
||||
madd.s f5, f0, f9
|
||||
madd.s f6, f1, f10
|
||||
madd.s f7, f2, f11
|
||||
EE.LDF.128.IP f11, f10, f9, f8, a15, 16 // Load data from delay line
|
||||
.second_fir_loop_1:
|
||||
|
||||
madd.s f4, f3, f12
|
||||
j .store_fir_result;
|
||||
|
||||
.offset_2:
|
||||
sub a14, a6, a7 // a14 = N-pos
|
||||
addi a14, a14, 3
|
||||
srli a14, a14, 2
|
||||
EE.LDF.128.IP f11, f10, f13, f12, a15, 16 // Load data from delay line
|
||||
// f12, f13 - delay[N-1], delay[N-2], store for the last operation
|
||||
// f10..f11 - delay[0..1]
|
||||
loopnez a14, .first_fir_loop_2 // pos...N-1
|
||||
EE.LDF.128.IP f3, f2, f1, f0, a10, 16 // Load coeffs
|
||||
madd.s f4, f0, f10
|
||||
madd.s f5, f1, f11
|
||||
EE.LDF.128.IP f11, f10, f9, f8, a15, 16 // Load data from delay line
|
||||
madd.s f6, f2, f8
|
||||
madd.s f7, f3, f9
|
||||
.first_fir_loop_2:
|
||||
|
||||
l32i a15, a2, 4 // a11 - delay line [0]
|
||||
EE.LDF.128.IP f11, f10, f9, f8, a15, 16 // Load data from delay line
|
||||
srli a14, a7, 2
|
||||
loopnez a14, .second_fir_loop_2 // 0..pos
|
||||
madd.s f4, f2, f8
|
||||
madd.s f5, f3, f9
|
||||
EE.LDF.128.IP f3, f2, f1, f0, a10, 16 // Load coeffs
|
||||
madd.s f6, f0, f10
|
||||
madd.s f7, f1, f11
|
||||
EE.LDF.128.IP f11, f10, f9, f8, a15, 16 // Load data from delay line
|
||||
.second_fir_loop_2:
|
||||
|
||||
madd.s f4, f2, f12
|
||||
madd.s f5, f3, f13
|
||||
j .store_fir_result;
|
||||
|
||||
.offset_3:
|
||||
sub a14, a6, a7 // a14 = N-pos
|
||||
addi a14, a14, 3
|
||||
srli a14, a14, 2
|
||||
EE.LDF.128.IP f11, f14, f13, f12, a15, 16 // Load data from delay line
|
||||
// f12, f13, f14 - delay[N-1], delay[N-2], delay[N-3], store for the last operation
|
||||
// f11 - delay[0]
|
||||
loopnez a14, .first_fir_loop_3 // pos...N-1
|
||||
EE.LDF.128.IP f3, f2, f1, f0, a10, 16 // Load coeffs
|
||||
madd.s f4, f0, f11
|
||||
EE.LDF.128.IP f11, f10, f9, f8, a15, 16 // Load data from delay line
|
||||
madd.s f5, f1, f8
|
||||
madd.s f6, f2, f9
|
||||
madd.s f7, f3, f10
|
||||
.first_fir_loop_3:
|
||||
|
||||
l32i a15, a2, 4 // a11 - delay line [0]
|
||||
EE.LDF.128.IP f11, f10, f9, f8, a15, 16 // Load data from delay line
|
||||
srli a14, a7, 2
|
||||
loopnez a14, .second_fir_loop_3 // 0..pos
|
||||
madd.s f4, f1, f8
|
||||
madd.s f5, f2, f9
|
||||
madd.s f6, f3, f10
|
||||
EE.LDF.128.IP f3, f2, f1, f0, a10, 16 // Load coeffs
|
||||
madd.s f7, f0, f11
|
||||
EE.LDF.128.IP f11, f10, f9, f8, a15, 16 // Load data from delay line
|
||||
.second_fir_loop_3:
|
||||
|
||||
madd.s f4, f1, f12
|
||||
madd.s f5, f2, f13
|
||||
madd.s f4, f3, f14
|
||||
|
||||
.store_fir_result:
|
||||
|
||||
add.s f4, f4, f5
|
||||
add.s f6, f6, f7
|
||||
add.s f4, f4, f6
|
||||
|
||||
// Store result
|
||||
ssip f4, a4, 4 // y++ - save result and increment output pointer
|
||||
// Check loop length
|
||||
addi a5, a5, -1
|
||||
bnez a5, .fird_loop_len
|
||||
// store state
|
||||
|
||||
s32i a7, a2, 12 // pos = a7
|
||||
mov.n a2, a9
|
||||
retw.n
|
||||
|
||||
#endif // dsps_fir_f32_aes3_enabled
|
||||
@@ -0,0 +1,38 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "dsps_fir.h"
|
||||
|
||||
int dsps_fird_f32_ansi(fir_f32_t *fir, const float *input, float *output, int len)
|
||||
{
|
||||
int result = 0;
|
||||
for (int i = 0; i < len ; i++) {
|
||||
for (int k = 0 ; k < fir->decim ; k++) {
|
||||
fir->delay[fir->pos++] = *input++;
|
||||
if (fir->pos >= fir->N) {
|
||||
fir->pos = 0;
|
||||
}
|
||||
}
|
||||
float acc = 0;
|
||||
int coeff_pos = 0;
|
||||
for (int n = fir->pos; n < fir->N ; n++) {
|
||||
acc += fir->coeffs[coeff_pos++] * fir->delay[n];
|
||||
}
|
||||
for (int n = 0; n < fir->pos ; n++) {
|
||||
acc += fir->coeffs[coeff_pos++] * fir->delay[n];
|
||||
}
|
||||
output[result++] = acc;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
@@ -0,0 +1,99 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "dsps_fir_platform.h"
|
||||
#if (dsps_fird_f32_arp4_enabled == 1)
|
||||
|
||||
// This is FIR filter for esp32p4 processor.
|
||||
.text
|
||||
.align 4
|
||||
.global dsps_fird_f32_arp4
|
||||
.type dsps_fird_f32_arp4,@function
|
||||
// The function implements the following C code:
|
||||
//esp_err_t dsps_fird_f32_arp4(fir_f32_t* fir, const float* input, float* output, int len);
|
||||
|
||||
dsps_fird_f32_arp4:
|
||||
add sp,sp,-16
|
||||
|
||||
mv a6, a3
|
||||
lw t1, 4(a0) // t1 - delay
|
||||
lw a4, 4(a0) // a4 - delay
|
||||
lw t2, 8(a0) // t2 - N :FIR filter coefficients amount
|
||||
lw t3, 12(a0) // t3 - pos
|
||||
lw t4, 16(a0) // t4 - decim
|
||||
slli t3, t3, 2 // t5 = pos*4 (bytes)
|
||||
add t1, t1, t3 // delay[pos]
|
||||
slli t6, t2, 2 // t6 = N*4 (bytes)
|
||||
add t3, a4, t6 // last position for the daly[N]
|
||||
|
||||
nop
|
||||
.fird_loop_len:
|
||||
// p.lw a1, 4(a1)
|
||||
//fmv.w.x fa5,zero
|
||||
flw fa0, 0(a1) // f0 = x[i], first load
|
||||
esp.lp.setup 0, t4, .fird_load_data // label to the last executed instruction
|
||||
add a1, a1, 4 // i++
|
||||
fsw fa0, 0(t1) // delay[pos]
|
||||
add t1, t1, 4
|
||||
blt t1, t3, .do_not_reset_pos # if t0 < t1 then target
|
||||
lw t1, 4(a0) // t1 - delay
|
||||
.do_not_reset_pos:
|
||||
.fird_load_data: flw fa0, 0(a1) // f0 = x[i]
|
||||
|
||||
lw t0, 0(a0) // t0 - coeffs
|
||||
sub t5, t3, t1 // (last_pos - pos)*4
|
||||
srli t5, t5, 2 // N-pos
|
||||
sub t6, t1, a4
|
||||
srli t6, t6, 2 // pos
|
||||
|
||||
fmv.w.x fa2,zero
|
||||
|
||||
lw a5, 0(a0) // a5 - coeffs
|
||||
esp.lp.setup 0, t5, .first_fird_loop
|
||||
flw fa1, 0(a5)
|
||||
flw fa0, 0(t1)
|
||||
addi a5, a5, 4
|
||||
fmadd.s fa2, fa1, fa0, fa2
|
||||
.first_fird_loop: addi t1, t1, 4
|
||||
|
||||
|
||||
lw t1, 4(a0) // t1 - delay
|
||||
|
||||
beqz t6, .skeep_loop
|
||||
esp.lp.setup 0, t6, .second_fird_loop
|
||||
flw fa1, 0(a5)
|
||||
flw fa0, 0(t1)
|
||||
addi a5, a5, 4
|
||||
fmadd.s fa2, fa1, fa0, fa2
|
||||
.second_fird_loop: addi t1, t1, 4
|
||||
|
||||
.skeep_loop:
|
||||
// Store result
|
||||
|
||||
fsw fa2, 0(a2)
|
||||
addi a2, a2, 4
|
||||
|
||||
addi a3, a3, -1
|
||||
BNEZ a3, .fird_loop_len// Jump if > 0
|
||||
|
||||
sub t6, t1, a4
|
||||
srli t6, t6, 2 // pos
|
||||
|
||||
sw t6, 12(a0) // t3 - pos
|
||||
|
||||
mv a0, a6
|
||||
add sp,sp,16
|
||||
ret
|
||||
|
||||
#endif //
|
||||
@@ -0,0 +1,46 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "dsps_fir.h"
|
||||
|
||||
|
||||
esp_err_t dsps_fird_init_f32(fir_f32_t *fir, float *coeffs, float *delay, int N, int decim)
|
||||
{
|
||||
fir->coeffs = coeffs;
|
||||
fir->delay = delay;
|
||||
fir->N = N;
|
||||
fir->pos = 0;
|
||||
fir->decim = decim;
|
||||
|
||||
#ifdef CONFIG_IDF_TARGET_ESP32S3
|
||||
// The amount of coefficients should be divided to 4,
|
||||
// if not, add zero coefficients to round length to 0
|
||||
if (fir->N % 4 != 0) {
|
||||
return ESP_ERR_DSP_INVALID_LENGTH;
|
||||
}
|
||||
// The coeffs array should be aligned to 16
|
||||
if (((uint32_t)coeffs) & 0x0f) {
|
||||
return ESP_ERR_DSP_ARRAY_NOT_ALIGNED;
|
||||
}
|
||||
// The delay array should be aligned to 16
|
||||
if (((uint32_t)delay) & 0x0f) {
|
||||
return ESP_ERR_DSP_ARRAY_NOT_ALIGNED;
|
||||
}
|
||||
#endif // CONFIG_IDF_TARGET_ESP32S3
|
||||
|
||||
for (int i = 0 ; i < N; i++) {
|
||||
fir->delay[i] = 0;
|
||||
}
|
||||
return ESP_OK;
|
||||
}
|
||||
@@ -0,0 +1,279 @@
|
||||
// Copyright 2018-2022 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef _dsps_fir_H_
|
||||
#define _dsps_fir_H_
|
||||
|
||||
|
||||
#include "dsp_err.h"
|
||||
|
||||
#include "dsps_fir_platform.h"
|
||||
#include "dsp_common.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @brief Data struct of f32 fir filter
|
||||
*
|
||||
* This structure is used by a filter internally. A user should access this structure only in case of
|
||||
* extensions for the DSP Library.
|
||||
* All fields of this structure are initialized by the dsps_fir_init_f32(...) function.
|
||||
*/
|
||||
typedef struct fir_f32_s {
|
||||
float *coeffs; /*!< Pointer to the coefficient buffer.*/
|
||||
float *delay; /*!< Pointer to the delay line buffer.*/
|
||||
int N; /*!< FIR filter coefficients amount.*/
|
||||
int pos; /*!< Position in delay line.*/
|
||||
int decim; /*!< Decimation factor.*/
|
||||
int16_t use_delay; /*!< The delay line was allocated by init function.*/
|
||||
} fir_f32_t;
|
||||
|
||||
/**
|
||||
* @brief Data struct of s16 fir filter
|
||||
*
|
||||
* This structure is used by a filter internally. A user should access this structure only in case of
|
||||
* extensions for the DSP Library.
|
||||
* All fields of this structure are initialized by the dsps_fir_init_s16(...) function.
|
||||
*/
|
||||
typedef struct fir_s16_s {
|
||||
int16_t *coeffs; /*!< Pointer to the coefficient buffer.*/
|
||||
int16_t *delay; /*!< Pointer to the delay line buffer.*/
|
||||
int16_t coeffs_len; /*!< FIR filter coefficients amount.*/
|
||||
int16_t pos; /*!< Position in delay line.*/
|
||||
int16_t decim; /*!< Decimation factor.*/
|
||||
int16_t d_pos; /*!< Actual decimation counter.*/
|
||||
int16_t shift; /*!< Shift value of the result.*/
|
||||
int32_t *rounding_buff; /*!< Rounding buffer for the purposes of esp32s3 ee.ld.accx.ip assembly instruction */
|
||||
int32_t rounding_val; /*!< Rounding value*/
|
||||
int16_t free_status; /*!< Indicator for dsps_fird_s16_aes3_free() function*/
|
||||
} fir_s16_t;
|
||||
|
||||
/**
|
||||
* @brief initialize structure for 32 bit FIR filter
|
||||
*
|
||||
* Function initialize structure for 32 bit floating point FIR filter
|
||||
* The implementation use ANSI C and could be compiled and run on any platform
|
||||
*
|
||||
* @param fir: pointer to fir filter structure, that must be preallocated
|
||||
* @param coeffs: array with FIR filter coefficients. Must be length N
|
||||
* @param delay: array for FIR filter delay line. Must have a length = coeffs_len + 4
|
||||
* @param coeffs_len: FIR filter length. Length of coeffs array. For esp32s3 length should be divided by 4 and aligned to 16.
|
||||
*
|
||||
* @return
|
||||
* - ESP_OK on success
|
||||
* - One of the error codes from DSP library
|
||||
*/
|
||||
esp_err_t dsps_fir_init_f32(fir_f32_t *fir, float *coeffs, float *delay, int coeffs_len);
|
||||
|
||||
/**
|
||||
* @brief initialize structure for 32 bit Decimation FIR filter
|
||||
* Function initialize structure for 32 bit floating point FIR filter with decimation
|
||||
* The implementation use ANSI C and could be compiled and run on any platform
|
||||
*
|
||||
* @param fir: pointer to fir filter structure, that must be preallocated
|
||||
* @param coeffs: array with FIR filter coefficients. Must be length N
|
||||
* @param delay: array for FIR filter delay line. Must be length N
|
||||
* @param N: FIR filter length. Length of coeffs and delay arrays.
|
||||
* @param decim: decimation factor.
|
||||
*
|
||||
* @return
|
||||
* - ESP_OK on success
|
||||
* - One of the error codes from DSP library
|
||||
*/
|
||||
esp_err_t dsps_fird_init_f32(fir_f32_t *fir, float *coeffs, float *delay, int N, int decim);
|
||||
|
||||
/**
|
||||
* @brief initialize structure for 16 bit Decimation FIR filter
|
||||
* Function initialize structure for 16 bit signed fixed point FIR filter with decimation
|
||||
* The implementation use ANSI C and could be compiled and run on any platform
|
||||
*
|
||||
* @param fir: pointer to fir filter structure, that must be preallocated
|
||||
* @param coeffs: array with FIR filter coefficients. Must be length N
|
||||
* @param delay: array for FIR filter delay line. Must be length N
|
||||
* @param coeffs_len: FIR filter length. Length of coeffs and delay arrays.
|
||||
* @param decim: decimation factor.
|
||||
* @param start_pos: initial value of decimation counter. Must be [0..d)
|
||||
* @param shift: shift position of the result
|
||||
*
|
||||
* @return
|
||||
* - ESP_OK on success
|
||||
* - One of the error codes from DSP library
|
||||
*/
|
||||
esp_err_t dsps_fird_init_s16(fir_s16_t *fir, int16_t *coeffs, int16_t *delay, int16_t coeffs_len, int16_t decim, int16_t start_pos, int16_t shift);
|
||||
|
||||
|
||||
/**@{*/
|
||||
/**
|
||||
* @brief 32 bit floating point FIR filter
|
||||
*
|
||||
* Function implements FIR filter
|
||||
* The extension (_ansi) uses ANSI C and could be compiled and run on any platform.
|
||||
* The extension (_ae32) is optimized for ESP32 chip.
|
||||
*
|
||||
* @param fir: pointer to fir filter structure, that must be initialized before
|
||||
* @param[in] input: input array
|
||||
* @param[out] output: array with the result of FIR filter
|
||||
* @param[in] len: length of input and result arrays
|
||||
*
|
||||
* @return
|
||||
* - ESP_OK on success
|
||||
* - One of the error codes from DSP library
|
||||
*/
|
||||
esp_err_t dsps_fir_f32_ansi(fir_f32_t *fir, const float *input, float *output, int len);
|
||||
esp_err_t dsps_fir_f32_ae32(fir_f32_t *fir, const float *input, float *output, int len);
|
||||
esp_err_t dsps_fir_f32_aes3(fir_f32_t *fir, const float *input, float *output, int len);
|
||||
/**@}*/
|
||||
|
||||
/**@{*/
|
||||
/**
|
||||
* @brief 32 bit floating point Decimation FIR filter
|
||||
*
|
||||
* Function implements FIR filter with decimation
|
||||
* The extension (_ansi) uses ANSI C and could be compiled and run on any platform.
|
||||
* The extension (_ae32) is optimized for ESP32 chip.
|
||||
*
|
||||
* @param fir: pointer to fir filter structure, that must be initialized before
|
||||
* @param input: input array
|
||||
* @param output: array with the result of FIR filter
|
||||
* @param len: length of result array
|
||||
*
|
||||
* @return: function returns the number of samples stored in the output array
|
||||
* depends on the previous state value could be [0..len/decimation]
|
||||
*/
|
||||
int dsps_fird_f32_ansi(fir_f32_t *fir, const float *input, float *output, int len);
|
||||
int dsps_fird_f32_ae32(fir_f32_t *fir, const float *input, float *output, int len);
|
||||
int dsps_fird_f32_aes3(fir_f32_t *fir, const float *input, float *output, int len);
|
||||
int dsps_fird_f32_arp4(fir_f32_t *fir, const float *input, float *output, int len);
|
||||
/**@}*/
|
||||
|
||||
/**@{*/
|
||||
/**
|
||||
* @brief 16 bit signed fixed point Decimation FIR filter
|
||||
*
|
||||
* Function implements FIR filter with decimation
|
||||
* The extension (_ansi) uses ANSI C and could be compiled and run on any platform.
|
||||
* The extension (_ae32) is optimized for ESP32 chip.
|
||||
*
|
||||
* @param fir: pointer to fir filter structure, that must be initialized before
|
||||
* @param input: input array
|
||||
* @param output: array with the result of the FIR filter
|
||||
* @param len: length of the result array
|
||||
*
|
||||
* @return: function returns the number of samples stored in the output array
|
||||
* depends on the previous state value could be [0..len/decimation]
|
||||
*/
|
||||
int32_t dsps_fird_s16_ansi(fir_s16_t *fir, const int16_t *input, int16_t *output, int32_t len);
|
||||
int32_t dsps_fird_s16_ae32(fir_s16_t *fir, const int16_t *input, int16_t *output, int32_t len);
|
||||
int32_t dsps_fird_s16_aes3(fir_s16_t *fir, const int16_t *input, int16_t *output, int32_t len);
|
||||
int32_t dsps_fird_s16_arp4(fir_s16_t *fir, const int16_t *input, int16_t *output, int32_t len);
|
||||
/**@}*/
|
||||
|
||||
|
||||
/**@{*/
|
||||
/**
|
||||
* @brief support arrays freeing function
|
||||
*
|
||||
* Function frees all the arrays, which were created during the initialization of the fir_s16_t structure
|
||||
* 1. frees allocated memory for rounding buffer, for the purposes of esp32s3 ee.ld.accx.ip assembly instruction
|
||||
* 2. frees allocated memory in case the delay line is NULL
|
||||
* 3. frees allocated memory in case the length of the filter (and the delay line) is not divisible by 8
|
||||
* and new delay line and filter coefficients arrays are created for the purpose of the esp32s3 assembly
|
||||
*
|
||||
* @param fir: pointer to fir filter structure, that must be initialized before
|
||||
*
|
||||
* @return
|
||||
* - ESP_OK on success
|
||||
*/
|
||||
esp_err_t dsps_fird_s16_aexx_free(fir_s16_t *fir);
|
||||
/**@}*/
|
||||
|
||||
|
||||
/**@{*/
|
||||
/**
|
||||
* @brief support arrays freeing function
|
||||
*
|
||||
* Function frees the delay line arrays, if it was allocated by the init functions.
|
||||
*
|
||||
* @param fir: pointer to fir filter structure, that must be initialized before
|
||||
*
|
||||
* @return
|
||||
* - ESP_OK on success
|
||||
*/
|
||||
esp_err_t dsps_fir_f32_free(fir_f32_t *fir);
|
||||
/**@}*/
|
||||
|
||||
|
||||
/**@{*/
|
||||
/**
|
||||
* @brief Array reversal
|
||||
*
|
||||
* Function reverses 16-bit long array members for the purpose of the dsps_fird_s16_aes3 implementation
|
||||
* The function has to be called either during the fir struct initialization or every time the coefficients change
|
||||
*
|
||||
* @param arr: pointer to the array to be reversed
|
||||
* @param len: length of the array to be reversed
|
||||
*
|
||||
* @return
|
||||
* - ESP_OK on success
|
||||
*/
|
||||
esp_err_t dsps_16_array_rev(int16_t *arr, int16_t len);
|
||||
/**@}*/
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#if CONFIG_DSP_OPTIMIZED
|
||||
|
||||
#if (dsps_fir_f32_ae32_enabled == 1)
|
||||
#define dsps_fir_f32 dsps_fir_f32_ae32
|
||||
#elif (dsps_fir_f32_aes3_enabled == 1)
|
||||
#define dsps_fir_f32 dsps_fir_f32_aes3
|
||||
#else
|
||||
#define dsps_fir_f32 dsps_fir_f32_ansi
|
||||
#endif
|
||||
|
||||
#if (dsps_fird_f32_aes3_enabled == 1)
|
||||
#define dsps_fird_f32 dsps_fird_f32_aes3
|
||||
#elif (dsps_fird_f32_ae32_enabled == 1)
|
||||
#define dsps_fird_f32 dsps_fird_f32_ae32
|
||||
#elif (dsps_fird_f32_arp4_enabled == 1)
|
||||
#define dsps_fird_f32 dsps_fird_f32_arp4
|
||||
#else
|
||||
#define dsps_fird_f32 dsps_fird_f32_ansi
|
||||
#endif
|
||||
|
||||
#if (dsps_fird_s16_ae32_enabled == 1)
|
||||
#define dsps_fird_s16 dsps_fird_s16_ae32
|
||||
#elif (dsps_fird_s16_aes3_enabled == 1)
|
||||
#define dsps_fird_s16 dsps_fird_s16_aes3
|
||||
#elif (dsps_fird_s16_arp4_enabled == 1)
|
||||
#define dsps_fird_s16 dsps_fird_s16_arp4
|
||||
#else
|
||||
#define dsps_fird_s16 dsps_fird_s16_ansi
|
||||
#endif
|
||||
|
||||
#else // CONFIG_DSP_OPTIMIZED
|
||||
|
||||
#define dsps_fir_f32 dsps_fir_f32_ansi
|
||||
#define dsps_fird_f32 dsps_fird_f32_ansi
|
||||
#define dsps_fird_s16 dsps_fird_s16_ansi
|
||||
|
||||
#endif // CONFIG_DSP_OPTIMIZED
|
||||
|
||||
#endif // _dsps_fir_H_
|
||||
@@ -0,0 +1,40 @@
|
||||
#ifndef _dsps_fir_platform_H_
|
||||
#define _dsps_fir_platform_H_
|
||||
|
||||
#include "sdkconfig.h"
|
||||
|
||||
#ifdef __XTENSA__
|
||||
#include <xtensa/config/core-isa.h>
|
||||
#include <xtensa/config/core-matmap.h>
|
||||
|
||||
|
||||
#if ((XCHAL_HAVE_FP == 1) && (XCHAL_HAVE_LOOPS == 1))
|
||||
|
||||
#if CONFIG_IDF_TARGET_ESP32S3
|
||||
#define dsps_fird_f32_aes3_enabled 1
|
||||
#define dsps_fird_f32_ae32_enabled 1
|
||||
#define dsps_fird_s16_aes3_enabled 1
|
||||
#define dsps_fird_s16_ae32_enabled 0
|
||||
#define dsps_fir_f32_aes3_enabled 1
|
||||
#define dsps_fir_f32_ae32_enabled 0
|
||||
#else
|
||||
#define dsps_fird_f32_ae32_enabled 1
|
||||
#define dsps_fird_s16_aes3_enabled 0
|
||||
#define dsps_fird_s16_ae32_enabled 1
|
||||
#define dsps_fir_f32_aes3_enabled 0
|
||||
#define dsps_fir_f32_ae32_enabled 1
|
||||
#endif
|
||||
|
||||
#endif //
|
||||
#endif // __XTENSA__
|
||||
|
||||
#ifdef CONFIG_IDF_TARGET_ESP32P4
|
||||
#ifdef CONFIG_DSP_OPTIMIZED
|
||||
#define dsps_fird_f32_arp4_enabled 1
|
||||
#define dsps_fird_s16_arp4_enabled 1
|
||||
#else
|
||||
#define dsps_fird_f32_arp4_enabled 0
|
||||
#define dsps_fird_s16_arp4_enabled 0
|
||||
#endif // CONFIG_DSP_OPTIMIZED
|
||||
#endif
|
||||
#endif // _dsps_fir_platform_H_
|
||||
@@ -0,0 +1,134 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <string.h>
|
||||
#include "unity.h"
|
||||
#include "esp_dsp.h"
|
||||
#include "dsp_platform.h"
|
||||
#include "esp_log.h"
|
||||
|
||||
#include "dsps_tone_gen.h"
|
||||
#include "dsps_d_gen.h"
|
||||
#include "dsps_fir.h"
|
||||
#include "dsp_tests.h"
|
||||
|
||||
static const char *TAG = "dsps_fir_f32_aexx";
|
||||
|
||||
__attribute__((aligned(16)))
|
||||
static float x[1024];
|
||||
__attribute__((aligned(16)))
|
||||
static float y[1024];
|
||||
__attribute__((aligned(16)))
|
||||
static float y_compare[1024];
|
||||
|
||||
__attribute__((aligned(16)))
|
||||
static float coeffs[32];
|
||||
__attribute__((aligned(16)))
|
||||
static float delay[32 + 4];
|
||||
__attribute__((aligned(16)))
|
||||
static float delay_compare[32];
|
||||
|
||||
TEST_CASE("dsps_fir_f32_aexx functionality", "[dsps]")
|
||||
{
|
||||
// In the test we generate filter with cutt off frequency 0.1
|
||||
// and then filtering 0.1 and 0.3 frequencis.
|
||||
// Result must be better then 24 dB
|
||||
int len = sizeof(x) / sizeof(float);
|
||||
int fir_len = sizeof(coeffs) / sizeof(float);
|
||||
|
||||
fir_f32_t fir1;
|
||||
fir_f32_t fir2;
|
||||
for (int i = 0 ; i < fir_len ; i++) {
|
||||
coeffs[i] = (fir_len - i - 1);
|
||||
}
|
||||
|
||||
for (int i = 0 ; i < len ; i++) {
|
||||
x[i] = 0;
|
||||
}
|
||||
x[0] = 1;
|
||||
|
||||
esp_err_t err = dsps_fir_init_f32(&fir1, coeffs, delay, fir_len);
|
||||
TEST_ESP_OK(err);
|
||||
err = dsps_fir_f32(&fir1, x, y, len);
|
||||
TEST_ESP_OK(err);
|
||||
|
||||
for (int i = 0 ; i < fir_len * 3 ; i++) {
|
||||
ESP_LOGD(TAG, "fir[%i] = %f", i, y[i]);
|
||||
}
|
||||
|
||||
for (int i = 0 ; i < fir_len ; i++) {
|
||||
if (y[i] != i) {
|
||||
TEST_ASSERT_EQUAL(y[i], i);
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0 ; i < fir_len ; i++) {
|
||||
coeffs[i] = i;
|
||||
}
|
||||
|
||||
for (int i = 0 ; i < len ; i++) {
|
||||
x[i] = i;
|
||||
}
|
||||
x[0] = 1;
|
||||
dsps_fir_init_f32(&fir1, coeffs, delay, fir_len);
|
||||
dsps_fir_init_f32(&fir2, coeffs, delay_compare, fir_len);
|
||||
|
||||
dsps_fir_f32(&fir1, x, y, len);
|
||||
dsps_fir_f32_ansi(&fir2, x, y_compare, len);
|
||||
dsps_fir_f32(&fir1, x, y, len);
|
||||
dsps_fir_f32_ansi(&fir2, x, y_compare, len);
|
||||
dsps_fir_f32(&fir1, x, y, len);
|
||||
dsps_fir_f32_ansi(&fir2, x, y_compare, len);
|
||||
|
||||
for (int i = 0 ; i < len ; i++) {
|
||||
if (y[i] != y_compare[i]) {
|
||||
TEST_ASSERT_EQUAL(y[i], y_compare[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("dsps_fir_f32_aexx benchmark", "[dsps]")
|
||||
{
|
||||
|
||||
int len = sizeof(x) / sizeof(float);
|
||||
int fir_len = sizeof(coeffs) / sizeof(float);
|
||||
int repeat_count = 1;
|
||||
|
||||
fir_f32_t fir1;
|
||||
for (int i = 0 ; i < fir_len ; i++) {
|
||||
coeffs[i] = i;
|
||||
}
|
||||
|
||||
for (int i = 0 ; i < len ; i++) {
|
||||
x[i] = 0;
|
||||
}
|
||||
x[0] = 1;
|
||||
|
||||
dsps_fir_init_f32(&fir1, coeffs, delay, fir_len);
|
||||
|
||||
unsigned int start_b = dsp_get_cpu_cycle_count();
|
||||
for (int i = 0 ; i < repeat_count ; i++) {
|
||||
dsps_fir_f32(&fir1, x, y, len);
|
||||
}
|
||||
unsigned int end_b = dsp_get_cpu_cycle_count();
|
||||
|
||||
float total_b = end_b - start_b;
|
||||
float cycles = total_b / (len * repeat_count);
|
||||
|
||||
ESP_LOGI(TAG, "dsps_fir_f32_aexx - %f per sample for for %i coefficients, %f per tap \n", cycles, fir_len, cycles / (float)fir_len);
|
||||
|
||||
float min_exec = 3;
|
||||
float max_exec = 800;
|
||||
TEST_ASSERT_EXEC_IN_RANGE(min_exec, max_exec, cycles);
|
||||
}
|
||||
@@ -0,0 +1,124 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <string.h>
|
||||
#include "unity.h"
|
||||
#include "esp_dsp.h"
|
||||
#include "dsp_platform.h"
|
||||
#include "esp_log.h"
|
||||
|
||||
#include "dsps_tone_gen.h"
|
||||
#include "dsps_d_gen.h"
|
||||
#include "dsps_fir.h"
|
||||
#include "dsp_tests.h"
|
||||
|
||||
static const char *TAG = "dsps_fir_f32_ansi";
|
||||
|
||||
static float x[1024];
|
||||
static float y[1024];
|
||||
|
||||
static float coeffs[32];
|
||||
static float delay[32 + 4];
|
||||
|
||||
TEST_CASE("dsps_fir_f32_ansi functionality", "[dsps]")
|
||||
{
|
||||
// In the test we generate filter with cutt off frequency 0.1
|
||||
// and then filtering 0.1 and 0.3 frequencis.
|
||||
// Result must be better then 24 dB
|
||||
int len = sizeof(x) / sizeof(float);
|
||||
int fir_len = sizeof(coeffs) / sizeof(float);
|
||||
|
||||
fir_f32_t fir1;
|
||||
for (int i = 0 ; i < fir_len ; i++) {
|
||||
coeffs[i] = (fir_len - i - 1);
|
||||
}
|
||||
|
||||
for (int i = 0 ; i < len ; i++) {
|
||||
x[i] = 0;
|
||||
}
|
||||
x[0] = 1;
|
||||
|
||||
dsps_fir_init_f32(&fir1, coeffs, delay, fir_len);
|
||||
dsps_fir_f32_ansi(&fir1, x, y, len);
|
||||
|
||||
for (int i = 0 ; i < fir_len * 3 ; i++) {
|
||||
ESP_LOGD(TAG, "fir[%i] = %f", i, y[i]);
|
||||
}
|
||||
for (int i = 0 ; i < fir_len ; i++) {
|
||||
if (y[i] != i) {
|
||||
TEST_ASSERT_EQUAL(y[i], i);
|
||||
}
|
||||
}
|
||||
|
||||
// Check even length
|
||||
#ifndef CONFIG_IDF_TARGET_ESP32S3
|
||||
fir_len--;
|
||||
#endif
|
||||
for (int i = 0 ; i < fir_len ; i++) {
|
||||
coeffs[i] = (fir_len - i - 1);
|
||||
}
|
||||
|
||||
for (int i = 0 ; i < len ; i++) {
|
||||
x[i] = 0;
|
||||
}
|
||||
x[0] = 1;
|
||||
dsps_fir_init_f32(&fir1, coeffs, delay, fir_len);
|
||||
dsps_fir_f32_ansi(&fir1, x, y, len);
|
||||
|
||||
for (int i = 0 ; i < fir_len ; i++) {
|
||||
if (y[i] != i) {
|
||||
TEST_ASSERT_EQUAL(y[i], i);
|
||||
}
|
||||
}
|
||||
for (int i = fir_len ; i < len ; i++) {
|
||||
if (y[i] != 0) {
|
||||
TEST_ASSERT_EQUAL(y[i], 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("dsps_fir_f32_ansi benchmark", "[dsps]")
|
||||
{
|
||||
|
||||
int len = sizeof(x) / sizeof(float);
|
||||
int fir_len = sizeof(coeffs) / sizeof(float);
|
||||
int repeat_count = 1;
|
||||
|
||||
fir_f32_t fir1;
|
||||
for (int i = 0 ; i < fir_len ; i++) {
|
||||
coeffs[i] = i;
|
||||
}
|
||||
|
||||
for (int i = 0 ; i < len ; i++) {
|
||||
x[i] = 0;
|
||||
}
|
||||
x[0] = 1;
|
||||
|
||||
dsps_fir_init_f32(&fir1, coeffs, delay, fir_len);
|
||||
|
||||
unsigned int start_b = dsp_get_cpu_cycle_count();
|
||||
for (int i = 0 ; i < repeat_count ; i++) {
|
||||
dsps_fir_f32_ansi(&fir1, x, y, len);
|
||||
}
|
||||
unsigned int end_b = dsp_get_cpu_cycle_count();
|
||||
|
||||
float total_b = end_b - start_b;
|
||||
float cycles = total_b / (len * repeat_count);
|
||||
|
||||
ESP_LOGI(TAG, "dsps_fir_f32_ansi - %f per sample for for %i coefficients, %f per tap \n", cycles, fir_len, cycles / (float)fir_len);
|
||||
|
||||
float min_exec = 10;
|
||||
float max_exec = 800;
|
||||
TEST_ASSERT_EXEC_IN_RANGE(min_exec, max_exec, cycles);
|
||||
}
|
||||
@@ -0,0 +1,113 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <string.h>
|
||||
#include "unity.h"
|
||||
#include "esp_dsp.h"
|
||||
#include "dsp_platform.h"
|
||||
#include "esp_log.h"
|
||||
|
||||
#include "dsps_tone_gen.h"
|
||||
#include "dsps_d_gen.h"
|
||||
#include "dsps_fir.h"
|
||||
#include "dsp_tests.h"
|
||||
|
||||
static const char *TAG = "dsps_fird_f32";
|
||||
|
||||
static float x[1024];
|
||||
static float y[1024];
|
||||
static float y_compare[1024];
|
||||
|
||||
static float coeffs[32];
|
||||
static float delay[32];
|
||||
static float delay_compare[32];
|
||||
|
||||
TEST_CASE("dsps_fird_f32_aexx functionality", "[dsps]")
|
||||
{
|
||||
// In the test we generate filter with cutt off frequency 0.1
|
||||
// and then filtering 0.1 and 0.3 frequencis.
|
||||
// Result must be better then 24 dB
|
||||
int len = sizeof(x) / sizeof(float);
|
||||
int fir_len = sizeof(coeffs) / sizeof(float);
|
||||
int decim = 4;
|
||||
|
||||
fir_f32_t fir1;
|
||||
fir_f32_t fir2;
|
||||
for (int i = 0 ; i < fir_len ; i++) {
|
||||
coeffs[i] = i;
|
||||
}
|
||||
coeffs[0] = 1;
|
||||
|
||||
for (int i = 0 ; i < len ; i++) {
|
||||
x[i] = i;
|
||||
}
|
||||
|
||||
dsps_fird_init_f32(&fir1, coeffs, delay, fir_len, decim);
|
||||
dsps_fird_init_f32(&fir2, coeffs, delay_compare, fir_len, decim);
|
||||
int total1 = dsps_fird_f32(&fir1, x, y, len / decim);
|
||||
int total2 = dsps_fird_f32_ansi(&fir2, x, y_compare, len / decim);
|
||||
total1 += dsps_fird_f32(&fir1, x, y, len / decim);
|
||||
total2 += dsps_fird_f32_ansi(&fir2, x, y_compare, len / decim);
|
||||
total1 += dsps_fird_f32(&fir1, x, y, len / decim);
|
||||
total2 += dsps_fird_f32_ansi(&fir2, x, y_compare, len / decim);
|
||||
ESP_LOGI(TAG, "Total result = %i, expected %i from %i", total1, total2, len);
|
||||
TEST_ASSERT_EQUAL(total1, total2);
|
||||
for (int i = 0 ; i < total1 ; i++) {
|
||||
ESP_LOGD(TAG, "data[%i] = %f expected %f\n", i, y[i], y_compare[i]);
|
||||
}
|
||||
for (int i = 0 ; i < total1 ; i++) {
|
||||
if (y[i] != y_compare[i]) {
|
||||
TEST_ASSERT_EQUAL(y[i], y_compare[i]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
TEST_CASE("dsps_fird_f32_aexx benchmark", "[dsps]")
|
||||
{
|
||||
|
||||
int len = sizeof(x) / sizeof(float);
|
||||
int fir_len = sizeof(coeffs) / sizeof(float);
|
||||
int repeat_count = 1;
|
||||
int decim = 4;
|
||||
|
||||
fir_f32_t fir1;
|
||||
for (int i = 0 ; i < fir_len ; i++) {
|
||||
coeffs[i] = i;
|
||||
}
|
||||
|
||||
for (int i = 0 ; i < len ; i++) {
|
||||
x[i] = 0;
|
||||
}
|
||||
x[0] = 1;
|
||||
|
||||
dsps_fird_init_f32(&fir1, coeffs, delay, fir_len, decim);
|
||||
|
||||
unsigned int start_b = dsp_get_cpu_cycle_count();
|
||||
for (int i = 0 ; i < repeat_count ; i++) {
|
||||
dsps_fird_f32(&fir1, x, y, len / decim);
|
||||
}
|
||||
unsigned int end_b = dsp_get_cpu_cycle_count();
|
||||
|
||||
float total_b = end_b - start_b;
|
||||
float cycles = total_b / (len * repeat_count);
|
||||
|
||||
ESP_LOGI(TAG, "dsps_fir_f32_ae32 - %f per sample for for %i coefficients, %f per decim tap\n", cycles, fir_len, cycles / (float)fir_len * decim);
|
||||
ESP_LOGI(TAG, "Total cycles = %i", end_b - start_b);
|
||||
float min_exec = 3;
|
||||
float max_exec = 300;
|
||||
TEST_ASSERT_EXEC_IN_RANGE(min_exec, max_exec, cycles);
|
||||
}
|
||||
@@ -0,0 +1,101 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <string.h>
|
||||
#include "unity.h"
|
||||
#include "esp_dsp.h"
|
||||
#include "dsp_platform.h"
|
||||
#include "esp_log.h"
|
||||
|
||||
#include "dsps_tone_gen.h"
|
||||
#include "dsps_d_gen.h"
|
||||
#include "dsps_fir.h"
|
||||
#include "dsp_tests.h"
|
||||
|
||||
static const char *TAG = "dsps_fird_f32_ansi";
|
||||
|
||||
static float x[1024];
|
||||
static float y[1024];
|
||||
|
||||
static float coeffs[32];
|
||||
static float delay[32];
|
||||
|
||||
TEST_CASE("dsps_fird_f32_ansi functionality", "[dsps]")
|
||||
{
|
||||
// In the test we generate filter with cutt off frequency 0.1
|
||||
// and then filtering 0.1 and 0.3 frequencis.
|
||||
// Result must be better then 24 dB
|
||||
int len = sizeof(x) / sizeof(float);
|
||||
int fir_len = sizeof(coeffs) / sizeof(float);
|
||||
int decim = 4;
|
||||
|
||||
fir_f32_t fir1;
|
||||
for (int i = 0 ; i < fir_len ; i++) {
|
||||
coeffs[i] = 0;
|
||||
}
|
||||
coeffs[0] = 1;
|
||||
|
||||
for (int i = 0 ; i < len ; i++) {
|
||||
x[i] = (i) % decim;
|
||||
}
|
||||
|
||||
dsps_fird_init_f32(&fir1, coeffs, delay, fir_len, decim);
|
||||
int total = dsps_fird_f32_ansi(&fir1, x, y, len / decim);
|
||||
ESP_LOGI(TAG, "Total result = %i from %i", total, len);
|
||||
TEST_ASSERT_EQUAL(total, len / decim);
|
||||
for (int i = 0 ; i < total ; i++) {
|
||||
ESP_LOGD(TAG, "data[%i] = %f\n", i, y[i]);
|
||||
}
|
||||
for (int i = 0 ; i < total ; i++) {
|
||||
TEST_ASSERT_EQUAL(y[i], 0);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
TEST_CASE("dsps_fird_f32_ansi benchmark", "[dsps]")
|
||||
{
|
||||
|
||||
int len = sizeof(x) / sizeof(float);
|
||||
int fir_len = sizeof(coeffs) / sizeof(float);
|
||||
int repeat_count = 1;
|
||||
int decim = 4;
|
||||
|
||||
fir_f32_t fir1;
|
||||
for (int i = 0 ; i < fir_len ; i++) {
|
||||
coeffs[i] = i;
|
||||
}
|
||||
|
||||
for (int i = 0 ; i < len ; i++) {
|
||||
x[i] = 0;
|
||||
}
|
||||
x[0] = 1;
|
||||
|
||||
dsps_fird_init_f32(&fir1, coeffs, delay, fir_len, decim);
|
||||
|
||||
unsigned int start_b = dsp_get_cpu_cycle_count();
|
||||
for (int i = 0 ; i < repeat_count ; i++) {
|
||||
dsps_fird_f32_ansi(&fir1, x, y, len / decim);
|
||||
}
|
||||
unsigned int end_b = dsp_get_cpu_cycle_count();
|
||||
|
||||
float total_b = end_b - start_b;
|
||||
float cycles = total_b / (len * repeat_count);
|
||||
|
||||
ESP_LOGI(TAG, "dsps_fir_f32_ansi - %f per sample for for %i coefficients, %f per decim tap \n", cycles, fir_len, cycles / (float)fir_len * decim);
|
||||
float min_exec = 10;
|
||||
float max_exec = 300;
|
||||
TEST_ASSERT_EXEC_IN_RANGE(min_exec, max_exec, cycles);
|
||||
}
|
||||
@@ -0,0 +1,371 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2022-2023 Espressif Systems (Shanghai) CO LTD
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
|
||||
#include <string.h>
|
||||
#include <malloc.h>
|
||||
#include <stdbool.h>
|
||||
#include "unity.h"
|
||||
#include "dsp_platform.h"
|
||||
#include "esp_log.h"
|
||||
#include "esp_err.h"
|
||||
#include "esp_dsp.h"
|
||||
|
||||
#include "dsps_tone_gen.h"
|
||||
#include "dsps_d_gen.h"
|
||||
#include "dsps_fir.h"
|
||||
#include "dsp_tests.h"
|
||||
#include "dsps_wind.h"
|
||||
#include "dsps_view.h"
|
||||
#include "dsps_fft2r.h"
|
||||
|
||||
#define COEFFS 256
|
||||
#define N_IN_SAMPLES 4096
|
||||
#define DECIMATION 2
|
||||
#define Q15_MAX INT16_MAX
|
||||
#define LEAKAGE_BINS 10
|
||||
#define FIR_BUFF_LEN 16
|
||||
|
||||
#define MAX_FIR_LEN 64
|
||||
|
||||
static const char *TAG = "dsps_fird_s16_aexx";
|
||||
|
||||
const static int32_t len = N_IN_SAMPLES;
|
||||
const static int32_t N_FFT = (N_IN_SAMPLES / DECIMATION);
|
||||
const static int16_t decim = DECIMATION;
|
||||
const static int16_t fir_len = COEFFS;
|
||||
const static int32_t fir_buffer = (N_IN_SAMPLES + FIR_BUFF_LEN);
|
||||
|
||||
|
||||
// error messages for the init functions
|
||||
static void error_msg_handler(fir_s16_t *fir, esp_err_t status)
|
||||
{
|
||||
|
||||
if (status != ESP_OK) {
|
||||
dsps_fird_s16_aexx_free(fir);
|
||||
|
||||
switch (status) {
|
||||
case ESP_ERR_DSP_INVALID_LENGTH:
|
||||
TEST_ASSERT_MESSAGE(false, "Number of the coefficients must be higher than 1");
|
||||
break;
|
||||
case ESP_ERR_DSP_ARRAY_NOT_ALIGNED:
|
||||
TEST_ASSERT_MESSAGE(false, "Delay line or (and) coefficients arrays not aligned");
|
||||
break;
|
||||
case ESP_ERR_DSP_PARAM_OUTOFRANGE:
|
||||
TEST_ASSERT_MESSAGE(false, "Start position or (and) Decimation ratio or (and) Shift out of range");
|
||||
break;
|
||||
default:
|
||||
TEST_ASSERT_MESSAGE(false, "Unspecified error");
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
TEST_CASE("dsps_fird_s16_aexx functionality", "[dsps]")
|
||||
{
|
||||
|
||||
const int32_t max_len[2] = {2048, 2520}; // 2520 can be divided by 3, 6, 9, 12, 15, 18 and 21
|
||||
const int16_t max_dec[2] = {32, 21};
|
||||
const int16_t min_dec[2] = {2, 3};
|
||||
const int16_t shift_vals[17] = {-15, 0, 15};
|
||||
|
||||
int16_t *x = (int16_t *)memalign(16, max_len[1] * sizeof(int16_t));
|
||||
int16_t *y = (int16_t *)memalign(16, max_len[1] * sizeof(int16_t));
|
||||
int16_t *y_compare = (int16_t *)memalign(16, max_len[1] * sizeof(int16_t));
|
||||
|
||||
int16_t *coeffs = (int16_t *)memalign(16, MAX_FIR_LEN * sizeof(int16_t));
|
||||
int16_t *coeffs_aexx = (int16_t *)memalign(16, MAX_FIR_LEN * sizeof(int16_t));
|
||||
int16_t *coeffs_ansi = (int16_t *)memalign(16, MAX_FIR_LEN * sizeof(int16_t));
|
||||
|
||||
int16_t *delay = (int16_t *)memalign(16, MAX_FIR_LEN * sizeof(int16_t));
|
||||
int16_t *delay_compare = (int16_t *)memalign(16, MAX_FIR_LEN * sizeof(int16_t));
|
||||
|
||||
int32_t combinations = 0;
|
||||
esp_err_t status1 = ESP_OK, status2 = ESP_OK;
|
||||
fir_s16_t fir1, fir2;
|
||||
|
||||
for (int i = 0 ; i < MAX_FIR_LEN ; i++) {
|
||||
coeffs[i] = i + 0x100;
|
||||
}
|
||||
|
||||
for (int i = 0 ; i < max_len[1] ; i++) {
|
||||
x[i] = 0x10;
|
||||
}
|
||||
|
||||
for (int variations = 0; variations < 2; variations++) {
|
||||
|
||||
ESP_LOGI(TAG, ": %"PRId32" input samples, coefficients range from 2 to %"PRId16", decimation range from %"PRId16" to %"PRId16", shift in range from -40 to 40 and start positions within the coeffs range",
|
||||
max_len[variations], (int16_t)MAX_FIR_LEN, min_dec[variations], max_dec[variations]);
|
||||
|
||||
// decimation increment is set as dec * 2 for input data length 2048 (2, 4, 8, 16, 32)
|
||||
// as dec + 3 for input data length 2520 (3, 6, 9, 12, 15, 18, 21)
|
||||
for (int16_t dec = min_dec[variations]; dec <= max_dec[variations]; ((variations) ? (dec += 3) : (dec *= 2)) ) {
|
||||
|
||||
const int32_t loop_len = max_len[variations] / dec;
|
||||
const int16_t start_position = 0;
|
||||
|
||||
for (int16_t fir_length = 2; fir_length <= MAX_FIR_LEN; fir_length += 16) {
|
||||
|
||||
for (int16_t shift_amount = 0; shift_amount < sizeof(shift_vals) / sizeof(uint16_t); shift_amount++) {
|
||||
|
||||
for (int k = 0 ; k < fir_length; k++) {
|
||||
coeffs_ansi[k] = coeffs[k];
|
||||
coeffs_aexx[k] = coeffs[k];
|
||||
}
|
||||
|
||||
status1 = dsps_fird_init_s16(&fir1, coeffs_aexx, delay, fir_length, dec, start_position, shift_vals[shift_amount]);
|
||||
error_msg_handler(&fir1, status1);
|
||||
status2 = dsps_fird_init_s16(&fir2, coeffs_ansi, delay_compare, fir_length, dec, start_position, shift_vals[shift_amount]);
|
||||
error_msg_handler(&fir2, status2);
|
||||
|
||||
#if(dsps_fird_s16_aes3_enabled)
|
||||
dsps_16_array_rev(fir1.coeffs, fir1.coeffs_len); // coefficients are being reverted for the purposes of the aes3 TIE implementation
|
||||
#endif
|
||||
|
||||
for (int16_t start_pos = 0; start_pos < dec; start_pos++) {
|
||||
|
||||
fir1.d_pos = start_pos;
|
||||
fir2.d_pos = start_pos;
|
||||
|
||||
for (int j = 0; j < fir1.coeffs_len; j++) {
|
||||
fir1.delay[j] = 0;
|
||||
fir2.delay[j] = 0;
|
||||
}
|
||||
|
||||
fir1.pos = 0;
|
||||
fir2.pos = 0;
|
||||
|
||||
const int32_t total1 = dsps_fird_s16(&fir1, x, y, loop_len);
|
||||
const int32_t total2 = dsps_fird_s16_ansi(&fir2, x, y_compare, loop_len);
|
||||
|
||||
TEST_ASSERT_EQUAL(total1, total2);
|
||||
for (int i = 0 ; i < total1 ; i++) {
|
||||
TEST_ASSERT_EQUAL(y[i], y_compare[i]);
|
||||
}
|
||||
|
||||
combinations++;
|
||||
}
|
||||
|
||||
dsps_fird_s16_aexx_free(&fir1);
|
||||
dsps_fird_s16_aexx_free(&fir2);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ESP_LOGI(TAG, ": %"PRId32" total filter combinations\n", combinations);
|
||||
|
||||
free(x);
|
||||
free(y);
|
||||
free(coeffs);
|
||||
free(delay);
|
||||
free(y_compare);
|
||||
free(coeffs_ansi);
|
||||
free(coeffs_aexx);
|
||||
free(delay_compare);
|
||||
|
||||
}
|
||||
|
||||
TEST_CASE("dsps_fird_s16_aexx benchmark", "[dsps]")
|
||||
{
|
||||
|
||||
const int16_t local_dec = 2;
|
||||
const int32_t local_len = (len % 16) ? (4096) : (len); // length must be devisible by 16
|
||||
|
||||
int16_t *x = (int16_t *)memalign(16, local_len * sizeof(int16_t));
|
||||
int16_t *y = (int16_t *)memalign(16, local_len * sizeof(int16_t));
|
||||
|
||||
int16_t *coeffs = (int16_t *)memalign(16, MAX_FIR_LEN * sizeof(int16_t));
|
||||
int16_t *delay = (int16_t *)memalign(16, MAX_FIR_LEN * sizeof(int16_t));
|
||||
|
||||
const int repeat_count = 100;
|
||||
const int16_t start_pos = 0;
|
||||
const int16_t shift = 0;
|
||||
int32_t loop_len = 0;
|
||||
|
||||
fir_s16_t fir;
|
||||
esp_err_t status = ESP_OK;
|
||||
|
||||
status = dsps_fird_init_s16(&fir, coeffs, delay, MAX_FIR_LEN, local_dec, start_pos, shift);
|
||||
error_msg_handler(&fir, status);
|
||||
|
||||
#if(dsps_fird_s16_aes3_enabled)
|
||||
dsps_16_array_rev(fir.coeffs, fir.coeffs_len);
|
||||
#endif
|
||||
|
||||
// Test for decimations 2, 4, 8, 16
|
||||
for (int dec = local_dec; dec <= 16 ; dec *= 2) {
|
||||
|
||||
loop_len = (local_len / dec);
|
||||
fir.decim = dec;
|
||||
|
||||
const unsigned int start_b = dsp_get_cpu_cycle_count();
|
||||
for (int j = 0 ; j < repeat_count ; j++) {
|
||||
dsps_fird_s16(&fir, x, y, loop_len);
|
||||
}
|
||||
const unsigned int end_b = dsp_get_cpu_cycle_count();
|
||||
|
||||
const float total_b = end_b - start_b;
|
||||
const float cycles = total_b / (float)(repeat_count);
|
||||
const float cycles_per_sample = total_b / (float)(local_len * repeat_count);
|
||||
const float cycles_per_decim_tap = cycles_per_sample / (float)(fir.coeffs_len * fir.decim);
|
||||
|
||||
ESP_LOGI(TAG, ": %.2f total cycles, %.2f cycles per sample, %.2f per decim tap, for %"PRId32" input samples, %"PRId16" coefficients and decimation %"PRId16"\n",
|
||||
cycles, cycles_per_sample, cycles_per_decim_tap, local_len, (int16_t)MAX_FIR_LEN, fir.decim);
|
||||
|
||||
const float min_exec = (((local_len / fir.decim) * fir.coeffs_len) / 2);
|
||||
const float max_exec = min_exec * 20;
|
||||
TEST_ASSERT_EXEC_IN_RANGE(min_exec, max_exec, cycles);
|
||||
|
||||
}
|
||||
|
||||
dsps_fird_s16_aexx_free(&fir);
|
||||
free(x);
|
||||
free(y);
|
||||
free(coeffs);
|
||||
free(delay);
|
||||
}
|
||||
|
||||
TEST_CASE("dsps_fird_s16_aexx noise_snr", "[dsps]")
|
||||
{
|
||||
|
||||
// In the SNR-noise test we are generating a sine wave signal, filtering the signal using a fixed point FIRD filter
|
||||
// and do the FFT of the filtered signal. Afterward, a noise and SNR calculated from the FFT spectrum
|
||||
|
||||
// FIR Coeffs
|
||||
int16_t *s_coeffs = (int16_t *)memalign(16, fir_len * sizeof(int16_t)); // fixed point coefficients
|
||||
int16_t *delay_line = (int16_t *)memalign(16, fir_len * sizeof(int16_t)); // fixed point delay line
|
||||
float *f_coeffs = (float *)memalign(16, fir_len * sizeof(float)); // floating point coefficients
|
||||
|
||||
// Coefficients windowing
|
||||
dsps_wind_hann_f32(f_coeffs, fir_len);
|
||||
const float fir_order = (float)fir_len - 1;
|
||||
const float ft = 0.25; // Transition frequency
|
||||
for (int i = 0; i < fir_len; i++) {
|
||||
f_coeffs[i] *= sinf((2 * M_PI * ft * (i - fir_order / 2))) / (M_PI * (i - fir_order / 2));
|
||||
}
|
||||
|
||||
// FIR coefficients conversion to q15
|
||||
for (int i = 0; i < fir_len; i++) {
|
||||
s_coeffs[i] = f_coeffs[i] * Q15_MAX;
|
||||
}
|
||||
|
||||
free(f_coeffs);
|
||||
|
||||
// Signal generation
|
||||
const float amplitude = 0.9;
|
||||
const float frequency = 0.05;
|
||||
const float phase = 0;
|
||||
float *f_in_signal = (float *)memalign(16, fir_buffer * sizeof(float));
|
||||
dsps_tone_gen_f32(f_in_signal, fir_buffer, amplitude, frequency, phase);
|
||||
|
||||
// Input signal conversion to q15
|
||||
int16_t *fir_x = (int16_t *)memalign(16, fir_buffer * sizeof(int16_t));
|
||||
int16_t *fir_y = (int16_t *)memalign(16, fir_buffer * sizeof(int16_t));
|
||||
int16_t *fir_y2 = (int16_t *)memalign(16, fir_buffer * sizeof(int16_t));
|
||||
for (int i = 0; i < fir_buffer; i++) {
|
||||
fir_x[i] = f_in_signal[i] * (int16_t)Q15_MAX;
|
||||
}
|
||||
|
||||
free(f_in_signal);
|
||||
|
||||
// FIR
|
||||
const int16_t start_pos = 0;
|
||||
const int16_t shift = 0;
|
||||
const int32_t loop_len = (int32_t)(fir_buffer / decim); // loop_len result must be without remainder
|
||||
fir_s16_t fir;
|
||||
esp_err_t status = dsps_fird_init_s16(&fir, s_coeffs, delay_line, fir_len, decim, start_pos, shift);
|
||||
fir_s16_t fir2;
|
||||
esp_err_t status2 = dsps_fird_init_s16(&fir2, s_coeffs, delay_line, fir_len, decim, start_pos, shift);
|
||||
error_msg_handler(&fir, status);
|
||||
error_msg_handler(&fir2, status2);
|
||||
|
||||
#if(dsps_fird_s16_aes3_enabled || dsps_fird_s16_arv4_enabled)
|
||||
dsps_16_array_rev(fir.coeffs, fir.coeffs_len);
|
||||
#endif
|
||||
|
||||
dsps_fird_s16(&fir, fir_x, fir_y, loop_len);
|
||||
dsps_fird_s16_ansi(&fir2, fir_x, fir_y2, loop_len);
|
||||
for (int i = 0 ; i < loop_len ; i++) {
|
||||
ESP_LOGD(TAG, "Data[%i] = %i vs %i, diff = %i", i, fir_y[i], fir_y2[i], fir_y[i] - fir_y2[i]);
|
||||
}
|
||||
|
||||
free(delay_line);
|
||||
free(s_coeffs);
|
||||
free(fir_x);
|
||||
|
||||
// FIR Output conversion to float
|
||||
const unsigned int ignored_fir_samples = (FIR_BUFF_LEN / 2) - 1;
|
||||
float *fir_output = (float *)memalign(16, len * sizeof(float));
|
||||
for (int i = 0; i < N_FFT; i++) {
|
||||
fir_output[i] = (float)(fir_y[ignored_fir_samples + i] / (float)Q15_MAX);
|
||||
}
|
||||
|
||||
free(fir_y);
|
||||
|
||||
// Signal windowing
|
||||
float *window = (float *)memalign(16, N_FFT * sizeof(float));
|
||||
dsps_wind_blackman_f32(window, N_FFT);
|
||||
|
||||
// Prepare FFT input, real and imaginary part
|
||||
const int32_t fft_data_len = (N_IN_SAMPLES / DECIMATION) * 2;
|
||||
float *fft_data = (float *)memalign(16, fft_data_len * sizeof(float));
|
||||
for (int i = 0 ; i < N_FFT ; i++) {
|
||||
fft_data[i * 2 + 0] = fir_output[i] * window[i];
|
||||
fft_data[i * 2 + 1] = 0;
|
||||
}
|
||||
free(fir_output);
|
||||
free(window);
|
||||
|
||||
// Initialize FFT
|
||||
esp_err_t ret = dsps_fft2r_init_fc32(NULL, N_FFT * 2);
|
||||
TEST_ESP_OK(ret);
|
||||
|
||||
// Do the FFT
|
||||
dsps_fft2r_fc32(fft_data, N_FFT);
|
||||
dsps_bit_rev_fc32(fft_data, N_FFT);
|
||||
dsps_cplx2reC_fc32(fft_data, N_FFT);
|
||||
|
||||
// Convert the FFT spectrum from amplitude to watts, find the max value and its position
|
||||
float max_val = -1000000;
|
||||
int max_pos = 0;
|
||||
for (int i = 0 ; i < N_FFT / 2 ; i++) {
|
||||
fft_data[i] = (fft_data[i * 2 + 0] * fft_data[i * 2 + 0] + fft_data[i * 2 + 1] * fft_data[i * 2 + 1]) / (N_FFT * 3);
|
||||
if (fft_data[i] > max_val) {
|
||||
max_val = fft_data[i];
|
||||
max_pos = i;
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate the power of the signal and noise of the spectrum and convert the spectrum to dB
|
||||
float signal_pow = 0, noise_pow = 0;
|
||||
for (int i = 0 ; i < N_FFT / 2 ; i++) {
|
||||
if ((i >= max_pos - LEAKAGE_BINS) && (i <= max_pos + LEAKAGE_BINS)) {
|
||||
signal_pow += fft_data[i];
|
||||
} else {
|
||||
noise_pow += fft_data[i];
|
||||
}
|
||||
|
||||
fft_data[i] = 10 * log10f(0.0000000000001 + fft_data[i]);
|
||||
}
|
||||
|
||||
// Convert the signal power and noise power from watts to dB and calculate SNR
|
||||
const float snr = 10 * log10f(signal_pow / noise_pow);
|
||||
noise_pow = 10 * log10f(noise_pow);
|
||||
signal_pow = 10 * log10f(signal_pow);
|
||||
|
||||
ESP_LOGI(TAG, "\nSignal Power: \t%f\nNoise Power: \t%f\nSNR: \t\t%f", signal_pow, noise_pow, snr);
|
||||
dsps_view(fft_data, N_FFT / 2, 64, 16, -140, 40, '|');
|
||||
free(fft_data);
|
||||
|
||||
const float min_exec_snr = 50;
|
||||
const float max_exec_snr = 120;
|
||||
TEST_ASSERT_EXEC_IN_RANGE(min_exec_snr, max_exec_snr, snr);
|
||||
dsps_fird_s16_aexx_free(&fir);
|
||||
|
||||
}
|
||||
@@ -0,0 +1,292 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2022 Espressif Systems (Shanghai) CO LTD
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include <malloc.h>
|
||||
#include <stdint.h>
|
||||
#include "unity.h"
|
||||
#include "dsp_platform.h"
|
||||
#include "esp_log.h"
|
||||
#include "esp_err.h"
|
||||
#include "esp_dsp.h"
|
||||
|
||||
#include "dsps_tone_gen.h"
|
||||
#include "dsps_d_gen.h"
|
||||
#include "dsps_fir.h"
|
||||
#include "dsp_tests.h"
|
||||
#include "dsps_wind.h"
|
||||
#include "dsps_view.h"
|
||||
#include "dsps_fft2r.h"
|
||||
|
||||
#define COEFFS 64
|
||||
#define N_IN_SAMPLES 1024
|
||||
#define DECIMATION 2
|
||||
#define Q15_MAX INT16_MAX
|
||||
#define LEAKAGE_BINS 10
|
||||
#define FIR_BUFF_LEN 16
|
||||
|
||||
static const char *TAG = "dsps_fird_s16_ansi";
|
||||
|
||||
const static int32_t len = N_IN_SAMPLES;
|
||||
const static int32_t N_FFT = (N_IN_SAMPLES / DECIMATION);
|
||||
const static int16_t decim = DECIMATION;
|
||||
const static int16_t fir_len = COEFFS;
|
||||
const static int32_t fir_buffer = (N_IN_SAMPLES + FIR_BUFF_LEN);
|
||||
|
||||
|
||||
// error messages for the init functions
|
||||
static void error_msg_handler(fir_s16_t *fir, esp_err_t status)
|
||||
{
|
||||
|
||||
if (status != ESP_OK) {
|
||||
dsps_fird_s16_aexx_free(fir);
|
||||
|
||||
switch (status) {
|
||||
case ESP_ERR_DSP_INVALID_LENGTH:
|
||||
TEST_ASSERT_MESSAGE(false, "Number of the coefficients must be higher than 1");
|
||||
break;
|
||||
case ESP_ERR_DSP_ARRAY_NOT_ALIGNED:
|
||||
TEST_ASSERT_MESSAGE(false, "Delay line or (and) coefficients arrays not aligned");
|
||||
break;
|
||||
case ESP_ERR_DSP_PARAM_OUTOFRANGE:
|
||||
TEST_ASSERT_MESSAGE(false, "Start position or (and) Decimation ratio or (and) Shift out of range");
|
||||
break;
|
||||
default:
|
||||
TEST_ASSERT_MESSAGE(false, "Unspecified error");
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
TEST_CASE("dsps_fird_s16_ansi functionality", "[dsps]")
|
||||
{
|
||||
|
||||
int16_t *x = (int16_t *)memalign(16, len * sizeof(int16_t));
|
||||
int16_t *y = (int16_t *)memalign(16, len * sizeof(int16_t));
|
||||
|
||||
int16_t *coeffs = (int16_t *)memalign(16, fir_len * sizeof(int16_t));
|
||||
int16_t *delay = (int16_t *)memalign(16, fir_len * sizeof(int16_t));
|
||||
|
||||
const int16_t start_pos = 0;
|
||||
const int16_t shift = 0;
|
||||
const int16_t dec = decim;
|
||||
const int32_t output_len = (int32_t)(len / dec);
|
||||
|
||||
fir_s16_t fir1;
|
||||
for (int i = 0 ; i < fir_len ; i++) {
|
||||
coeffs[i] = 0;
|
||||
}
|
||||
coeffs[0] = 0x4000;
|
||||
|
||||
for (int i = 0 ; i < len ; i++) {
|
||||
x[i] = 0x4000;
|
||||
}
|
||||
|
||||
esp_err_t status = dsps_fird_init_s16(&fir1, coeffs, delay, fir_len, dec, start_pos, shift);
|
||||
error_msg_handler(&fir1, status);
|
||||
|
||||
const int32_t total = dsps_fird_s16_ansi(&fir1, x, y, output_len);
|
||||
|
||||
ESP_LOGI(TAG, "%"PRId32" input samples, decimation %"PRId16",total result = %"PRId32"\n", len, dec, total);
|
||||
TEST_ASSERT_EQUAL(total, len / decim);
|
||||
for (int i = 0 ; i < total ; i++) {
|
||||
ESP_LOGD(TAG, "data[%i] = %d\n", i, y[i]);
|
||||
}
|
||||
|
||||
for (int i = 0 ; i < total ; i++) {
|
||||
TEST_ASSERT_EQUAL(y[i], (0x2000));
|
||||
}
|
||||
|
||||
dsps_fird_s16_aexx_free(&fir1);
|
||||
free(x);
|
||||
free(y);
|
||||
free(coeffs);
|
||||
free(delay);
|
||||
}
|
||||
|
||||
|
||||
TEST_CASE("dsps_fird_s16_ansi benchmark", "[dsps]")
|
||||
{
|
||||
|
||||
int16_t *x = (int16_t *)memalign(16, len * sizeof(int16_t));
|
||||
int16_t *y = (int16_t *)memalign(16, len * sizeof(int16_t));
|
||||
|
||||
int16_t *coeffs = (int16_t *)memalign(16, fir_len * sizeof(int16_t));
|
||||
int16_t *delay = (int16_t *)memalign(16, fir_len * sizeof(int16_t));
|
||||
|
||||
const int repeat_count = 4;
|
||||
const int16_t dec = 1;
|
||||
const int16_t start_pos = 0;
|
||||
const int16_t shift = 0;
|
||||
int32_t output_len = 0;
|
||||
|
||||
fir_s16_t fir1;
|
||||
for (int i = 0 ; i < fir_len ; i++) {
|
||||
coeffs[i] = i;
|
||||
}
|
||||
|
||||
for (int i = 0 ; i < len ; i++) {
|
||||
x[i] = 0;
|
||||
}
|
||||
x[0] = 1;
|
||||
|
||||
esp_err_t status = dsps_fird_init_s16(&fir1, coeffs, delay, fir_len, dec, start_pos, shift);
|
||||
error_msg_handler(&fir1, status);
|
||||
|
||||
// Decimations 1, 2, 4, 8
|
||||
for (int i = 0 ; i < 4 ; i++) {
|
||||
|
||||
output_len = (int32_t)(len / fir1.decim);
|
||||
const unsigned int start_b = dsp_get_cpu_cycle_count();
|
||||
for (int i = 0 ; i < repeat_count ; i++) {
|
||||
dsps_fird_s16_ansi(&fir1, x, y, output_len);
|
||||
}
|
||||
const unsigned int end_b = dsp_get_cpu_cycle_count();
|
||||
|
||||
const float total_b = end_b - start_b;
|
||||
const float cycles = total_b / (len * repeat_count);
|
||||
|
||||
ESP_LOGI(TAG, "total cycles %f per sample for %"PRId16" coefficients, decimation %"PRId16", %f per decim tap \n",
|
||||
cycles, fir_len, fir1.decim, cycles / (float)fir_len * fir1.decim);
|
||||
float min_exec = 10;
|
||||
float max_exec = 1500;
|
||||
TEST_ASSERT_EXEC_IN_RANGE(min_exec, max_exec, cycles);
|
||||
fir1.decim *= 2;
|
||||
}
|
||||
|
||||
dsps_fird_s16_aexx_free(&fir1);
|
||||
free(x);
|
||||
free(y);
|
||||
free(coeffs);
|
||||
free(delay);
|
||||
}
|
||||
|
||||
|
||||
TEST_CASE("dsps_fird_s16_ansi noise_snr", "[dsps]")
|
||||
{
|
||||
|
||||
// In the SNR-noise test we are generating a sine wave signal, filtering the signal using a fixed point FIRD filter
|
||||
// and do the FFT of the filtered signal. Afterward, a noise and SNR calculated from the FFT spectrum
|
||||
|
||||
// FIR Coeffs
|
||||
int16_t *s_coeffs = (int16_t *)memalign(16, fir_len * sizeof(int16_t)); // fixed point coefficients
|
||||
int16_t *delay_line = (int16_t *)memalign(16, fir_len * sizeof(int16_t)); // fixed point delay line
|
||||
float *f_coeffs = (float *)memalign(16, fir_len * sizeof(float)); // floating point coefficients
|
||||
|
||||
// Coefficients windowing
|
||||
dsps_wind_hann_f32(f_coeffs, fir_len);
|
||||
const float fir_order = (float)fir_len - 1;
|
||||
const float ft = 0.25; // sine frequency
|
||||
for (int i = 0; i < fir_len; i++) {
|
||||
f_coeffs[i] *= sinf((2 * M_PI * ft * (i - fir_order / 2))) / (M_PI * (i - fir_order / 2));
|
||||
}
|
||||
|
||||
// FIR coefficients conversion to q15
|
||||
for (int i = 0; i < fir_len; i++) {
|
||||
s_coeffs[i] = f_coeffs[i] * Q15_MAX;
|
||||
}
|
||||
|
||||
free(f_coeffs);
|
||||
|
||||
// Signal generation
|
||||
const float amplitude = 0.9;
|
||||
const float frequency = 0.05;
|
||||
const float phase = 0;
|
||||
float *f_in_signal = (float *)memalign(16, fir_buffer * sizeof(float));
|
||||
dsps_tone_gen_f32(f_in_signal, fir_buffer, amplitude, frequency, phase);
|
||||
|
||||
// Input signal conversion to q15
|
||||
int16_t *fir_x = (int16_t *)memalign(16, fir_buffer * sizeof(int16_t));
|
||||
int16_t *fir_y = (int16_t *)memalign(16, fir_buffer * sizeof(int16_t));
|
||||
for (int i = 0; i < fir_buffer; i++) {
|
||||
fir_x[i] = f_in_signal[i] * (int16_t)Q15_MAX;
|
||||
}
|
||||
|
||||
free(f_in_signal);
|
||||
|
||||
// FIR
|
||||
const int16_t start_pos = 0;
|
||||
const int16_t shift = 0;
|
||||
const int32_t output_len = (int32_t)(fir_buffer / decim);
|
||||
fir_s16_t fir1;
|
||||
esp_err_t status = dsps_fird_init_s16(&fir1, s_coeffs, delay_line, fir_len, decim, start_pos, shift);
|
||||
error_msg_handler(&fir1, status);
|
||||
dsps_fird_s16_ansi(&fir1, fir_x, fir_y, output_len);
|
||||
|
||||
free(delay_line);
|
||||
free(s_coeffs);
|
||||
free(fir_x);
|
||||
|
||||
// FIR Output conversion to float
|
||||
const unsigned int ignored_fir_samples = (FIR_BUFF_LEN / 2) - 1;
|
||||
float *fir_output = (float *)memalign(16, len * sizeof(float));
|
||||
for (int i = 0; i < N_FFT; i++) {
|
||||
fir_output[i] = (float)(fir_y[ignored_fir_samples + i] / (float)Q15_MAX);
|
||||
}
|
||||
|
||||
free(fir_y);
|
||||
|
||||
// Signal windowing
|
||||
float *window = (float *)memalign(16, N_FFT * sizeof(float));
|
||||
dsps_wind_blackman_f32(window, N_FFT);
|
||||
|
||||
// Prepare FFT input, real and imaginary part
|
||||
const int32_t fft_data_len = (N_IN_SAMPLES / DECIMATION) * 2;
|
||||
float *fft_data = (float *)memalign(16, fft_data_len * sizeof(float));
|
||||
for (int i = 0 ; i < N_FFT ; i++) {
|
||||
fft_data[i * 2 + 0] = fir_output[i] * window[i];
|
||||
fft_data[i * 2 + 1] = 0;
|
||||
}
|
||||
free(fir_output);
|
||||
free(window);
|
||||
|
||||
// Initialize FFT
|
||||
esp_err_t ret = dsps_fft2r_init_fc32(NULL, N_FFT * 2);
|
||||
TEST_ESP_OK(ret);
|
||||
|
||||
// Do the FFT
|
||||
dsps_fft2r_fc32(fft_data, N_FFT);
|
||||
dsps_bit_rev_fc32(fft_data, N_FFT);
|
||||
dsps_cplx2reC_fc32(fft_data, N_FFT);
|
||||
|
||||
// Convert the FFT spectrum from amplitude to watts, find the max value and its position
|
||||
float max_val = -1000000;
|
||||
int max_pos = 0;
|
||||
for (int i = 0 ; i < N_FFT / 2 ; i++) {
|
||||
fft_data[i] = (fft_data[i * 2 + 0] * fft_data[i * 2 + 0] + fft_data[i * 2 + 1] * fft_data[i * 2 + 1]) / (N_FFT * 3);
|
||||
if (fft_data[i] > max_val) {
|
||||
max_val = fft_data[i];
|
||||
max_pos = i;
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate the power of the signal and noise of the spectrum and convert the spectrum to dB
|
||||
float signal_pow = 0, noise_pow = 0;
|
||||
for (int i = 0 ; i < N_FFT / 2 ; i++) {
|
||||
if ((i >= max_pos - LEAKAGE_BINS) && (i <= max_pos + LEAKAGE_BINS)) {
|
||||
signal_pow += fft_data[i];
|
||||
} else {
|
||||
noise_pow += fft_data[i];
|
||||
}
|
||||
|
||||
fft_data[i] = 10 * log10f(0.0000000000001 + fft_data[i]);
|
||||
}
|
||||
|
||||
// Convert the signal power and noise power to dB, calculate SNR
|
||||
const float snr = 10 * log10f(signal_pow / noise_pow);
|
||||
noise_pow = 10 * log10f(noise_pow);
|
||||
signal_pow = 10 * log10f(signal_pow);
|
||||
|
||||
ESP_LOGI(TAG, "\nSignal Power: \t%f\nNoise Power: \t%f\nSNR: \t\t%f", signal_pow, noise_pow, snr);
|
||||
dsps_view(fft_data, N_FFT / 2, 64, 16, -140, 40, '|');
|
||||
free(fft_data);
|
||||
|
||||
const float min_exec_snr = 50;
|
||||
const float max_exec_snr = 120;
|
||||
TEST_ASSERT_EXEC_IN_RANGE(min_exec_snr, max_exec_snr, snr);
|
||||
dsps_fird_s16_aexx_free(&fir1);
|
||||
|
||||
}
|
||||
@@ -0,0 +1,12 @@
|
||||
|
||||
void test_fir();
|
||||
|
||||
int main(void)
|
||||
{
|
||||
printf("main starts!\n");
|
||||
// xt_iss_profile_enable();
|
||||
test_fir();
|
||||
// xt_iss_profile_disable();
|
||||
|
||||
printf("Test done\n");
|
||||
}
|
||||
@@ -0,0 +1,52 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <math.h>
|
||||
|
||||
#include "dsp_common.h"
|
||||
|
||||
#include "dsps_fir.h"
|
||||
|
||||
float x[1024];
|
||||
float y[1024];
|
||||
float y_compare[1024];
|
||||
|
||||
float coeffs[256];
|
||||
float delay[256];
|
||||
float delay_compare[256];
|
||||
|
||||
|
||||
void test_fir()
|
||||
{
|
||||
int len = sizeof(x) / sizeof(float);
|
||||
int fir_len = sizeof(coeffs) / sizeof(float);
|
||||
|
||||
fir_f32_t fir1;
|
||||
fir_f32_t fir2;
|
||||
for (int i = 0 ; i < fir_len ; i++) {
|
||||
coeffs[i] = i;
|
||||
}
|
||||
|
||||
for (int i = 0 ; i < len ; i++) {
|
||||
x[i] = 0;
|
||||
}
|
||||
x[0] = 1;
|
||||
|
||||
for (int i = 0 ; i < fir_len ; i++) {
|
||||
coeffs[i] = i;
|
||||
}
|
||||
|
||||
for (int i = 0 ; i < len ; i++) {
|
||||
x[i] = i;
|
||||
}
|
||||
x[0] = 1;
|
||||
dsps_fir_init_f32(&fir1, coeffs, delay, fir_len / 4);
|
||||
dsps_fir_init_f32(&fir2, coeffs, delay_compare, fir_len);
|
||||
|
||||
xt_iss_profile_enable();
|
||||
dsps_fir_f32_aes3(&fir1, x, y, len);
|
||||
dsps_fir_f32_ansi(&fir2, x, y_compare, len);
|
||||
xt_iss_profile_disable();
|
||||
|
||||
printf("Test Pass!\n");
|
||||
}
|
||||
Reference in New Issue
Block a user