add some code
This commit is contained in:
@@ -0,0 +1,82 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "dsps_add_platform.h"
|
||||
#if (dsps_add_s16_ae32_enabled == 1)
|
||||
|
||||
.text
|
||||
.align 4
|
||||
.global dsps_add_s16_ae32
|
||||
.type dsps_add_s16_ae32,@function
|
||||
// The function implements the following C code:
|
||||
// esp_err_t dsps_add_s16_ansi(const int16_t *input1, const int16_t *input2, int16_t *output, int len, int step1, int step2, int step_out, int shift)
|
||||
// {
|
||||
// for (int i = 0 ; i < len ; i++) {
|
||||
// int32_t acc = (int32_t)input1[i * step1] + (int32_t)input2[i * step2];
|
||||
// output[i * step_out] = acc >> shift;
|
||||
// }
|
||||
// return ESP_OK;
|
||||
// }
|
||||
dsps_add_s16_ae32:
|
||||
// input1 - a2
|
||||
// input2 - a3
|
||||
// output - a4
|
||||
// len - a5
|
||||
// step_in1 - a6
|
||||
// step_in2 - a7
|
||||
// step_out - stack (a10)
|
||||
// shift - stack (a9)
|
||||
|
||||
entry a1, 16
|
||||
|
||||
|
||||
// l32i.n a10, a1, 16
|
||||
// s16i a10, a4, 0
|
||||
// l32i.n a10, a1, 20
|
||||
// s16i a10, a4, 2
|
||||
|
||||
l32i.n a10, a1, 16 // Load step_out to the a10 register
|
||||
l32i.n a9, a1, 20 // Load shift to the a9 register
|
||||
ssr a9 // sar = a9
|
||||
|
||||
slli a6, a6, 1 // a6 - step_in<<1
|
||||
slli a7, a7, 1 // a7 - step_in<<1
|
||||
slli a10, a10, 1 // a8 - step_out<<1
|
||||
|
||||
// s16i a10, a4, 0
|
||||
// s16i a6, a4, 2
|
||||
// s16i a7, a4, 4
|
||||
// s16i a5, a4, 6
|
||||
|
||||
l16si a11, a2, 0
|
||||
l16si a8, a3, 0
|
||||
add a8, a11, a8
|
||||
srl a9, a8 // a8 = a8>>sar
|
||||
|
||||
loopnez a5, .loop_end_add_s16_ae32
|
||||
add.n a2, a2, a6 // input1+=step_in1;
|
||||
add.n a3, a3, a7 // input2+=step_in2;
|
||||
|
||||
l16si a11, a2, 0
|
||||
l16si a8, a3, 0
|
||||
s16i a9, a4, 0 // store result to the putput
|
||||
add a8, a11, a8
|
||||
srl a9, a8 // a8 = a8>>sar
|
||||
|
||||
add.n a4, a4, a10 // output+=step_out;
|
||||
.loop_end_add_s16_ae32:
|
||||
movi.n a2, 0 // return status ESP_OK
|
||||
retw.n
|
||||
|
||||
#endif // dsps_add_s16_ae32_enabled
|
||||
@@ -0,0 +1,105 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
|
||||
#include "dsps_add_platform.h"
|
||||
#if (dsps_add_s16_aes3_enabled == 1)
|
||||
|
||||
.text
|
||||
.align 4
|
||||
.global dsps_add_s16_aes3
|
||||
.type dsps_add_s16_aes3,@function
|
||||
// The function implements the following C code:
|
||||
// esp_err_t dsps_add_s16_ansi(const int16_t *input1, const int16_t *input2, int16_t *output, int len, int step1, int step2, int step_out, int shift)
|
||||
// {
|
||||
// for (int i = 0 ; i < len ; i++) {
|
||||
// int32_t acc = (int32_t)input1[i * step1] + (int32_t)input2[i * step2];
|
||||
// output[i * step_out] = acc >> shift;
|
||||
// }
|
||||
// return ESP_OK;
|
||||
// }
|
||||
dsps_add_s16_aes3:
|
||||
// input1 - a2
|
||||
// input2 - a3
|
||||
// output - a4
|
||||
// len - a5
|
||||
// step_in1 - a6
|
||||
// step_in2 - a7
|
||||
// step_out - stack (a10)
|
||||
// shift - stack (a9)
|
||||
|
||||
entry a1, 16
|
||||
|
||||
l32i.n a10, a1, 16 // Load step_out to the a10 register
|
||||
l32i.n a9, a1, 20 // Load shift to the a9 register
|
||||
ssr a9 // sar = a9
|
||||
|
||||
// Check if any of steps is not 0
|
||||
addi a15, a6, -1
|
||||
bnez a15, .add_s16_ae32_mode // Branch if step !=0
|
||||
addi a15, a7, -1
|
||||
bnez a15, .add_s16_ae32_mode // Branch if step !=0
|
||||
addi a15, a10, -1
|
||||
bnez a15,.add_s16_ae32_mode // Branch if step !=0
|
||||
|
||||
// Check addresses
|
||||
movi a15, 0xF // modulo 16 mask
|
||||
bany a2, a15, .add_s16_ae32_mode // jump if != 0
|
||||
bany a3, a15, .add_s16_ae32_mode // jump if != 0
|
||||
|
||||
// Check length (should be divided to 8)
|
||||
movi a15, 0x7 // modulo 8 mask
|
||||
bany a5, a15, .add_s16_ae32_mode // jump if != 0
|
||||
|
||||
// Process main function for S3
|
||||
|
||||
slli a6, a6, 1 // a6 - step_in<<1
|
||||
slli a7, a7, 1 // a7 - step_in<<1
|
||||
slli a10, a10, 1 // a8 - step_out<<1
|
||||
wsr.sar a9 // load sar register
|
||||
|
||||
// Preload q1 from a3
|
||||
//ee.vld.128.ip q1, a3, 16
|
||||
srli a5, a5, 3
|
||||
ee.vld.128.ip q0, a2, 16
|
||||
loopnez a5, .loop_end_add_s16_aes3_main
|
||||
ee.vld.128.ip q1, a3, 16
|
||||
ee.vadds.s16.ld.incp q0, a2, q4, q0, q1
|
||||
ee.vst.128.ip q4, a4, 16
|
||||
.loop_end_add_s16_aes3_main:
|
||||
|
||||
// Exit for Esp32s3 mode
|
||||
movi.n a2, 0 // return status ESP_OK
|
||||
retw.n
|
||||
|
||||
|
||||
.add_s16_ae32_mode:
|
||||
slli a6, a6, 1 // a6 - step_in<<1
|
||||
slli a7, a7, 1 // a7 - step_in<<1
|
||||
slli a10, a10, 1 // a8 - step_out<<1
|
||||
|
||||
l16si a11, a2, 0
|
||||
l16si a8, a3, 0
|
||||
add a8, a11, a8
|
||||
srl a9, a8 // a8 = a8>>sar
|
||||
|
||||
loopnez a5, .loop_end_add_s16_aes3
|
||||
add.n a2, a2, a6 // input1+=step_in1;
|
||||
add.n a3, a3, a7 // input2+=step_in2;
|
||||
|
||||
l16si a11, a2, 0
|
||||
l16si a8, a3, 0
|
||||
s16i a9, a4, 0 // store result to the putput
|
||||
add a8, a11, a8
|
||||
srl a9, a8 // a8 = a8>>sar
|
||||
|
||||
add.n a4, a4, a10 // output+=step_out;
|
||||
.loop_end_add_s16_aes3:
|
||||
// Exit for Esp32 mode
|
||||
movi.n a2, 0 // return status ESP_OK
|
||||
retw.n
|
||||
|
||||
#endif // dsps_add_s16_aes3_enabled
|
||||
@@ -0,0 +1,27 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2023-2024 Espressif Systems (Shanghai) CO LTD
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
|
||||
#include "dsps_add.h"
|
||||
|
||||
esp_err_t dsps_add_s16_ansi(const int16_t *input1, const int16_t *input2, int16_t *output, int len, int step1, int step2, int step_out, int shift)
|
||||
{
|
||||
if (NULL == input1) {
|
||||
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
|
||||
}
|
||||
if (NULL == input2) {
|
||||
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
|
||||
}
|
||||
if (NULL == output) {
|
||||
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
|
||||
}
|
||||
|
||||
for (int i = 0 ; i < len ; i++) {
|
||||
int32_t acc = (int32_t)input1[i * step1] + (int32_t)input2[i * step2];
|
||||
output[i * step_out] = acc >> shift;
|
||||
}
|
||||
return ESP_OK;
|
||||
}
|
||||
@@ -0,0 +1,97 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include "dsps_add_platform.h"
|
||||
#if (dsps_add_s16_aes3_enabled == 1)
|
||||
|
||||
.text
|
||||
.align 4
|
||||
.global dsps_add_s8_aes3
|
||||
.type dsps_add_s8_aes3,@function
|
||||
// The function implements the following C code:
|
||||
// esp_err_t dsps_add_s8_ansi(const int16_t *input1, const int16_t *input2, int16_t *output, int len, int step1, int step2, int step_out, int shift)
|
||||
// {
|
||||
// for (int i = 0 ; i < len ; i++) {
|
||||
// int32_t acc = (int32_t)input1[i * step1] + (int32_t)input2[i * step2];
|
||||
// output[i * step_out] = acc >> shift;
|
||||
// }
|
||||
// return ESP_OK;
|
||||
// }
|
||||
dsps_add_s8_aes3:
|
||||
// input1 - a2
|
||||
// input2 - a3
|
||||
// output - a4
|
||||
// len - a5
|
||||
// step_in1 - a6
|
||||
// step_in2 - a7
|
||||
// step_out - stack (a10)
|
||||
// shift - stack (a9)
|
||||
|
||||
entry a1, 16
|
||||
|
||||
l32i.n a10, a1, 16 // Load step_out to the a10 register
|
||||
l32i.n a9, a1, 20 // Load shift to the a9 register
|
||||
ssr a9 // sar = a9
|
||||
|
||||
// Check if any of steps is not 0
|
||||
addi a15, a6, -1
|
||||
bnez a15, .add_s8_ae32_mode // Branch if step !=0
|
||||
addi a15, a7, -1
|
||||
bnez a15, .add_s8_ae32_mode // Branch if step !=0
|
||||
addi a15, a10, -1
|
||||
bnez a15,.add_s8_ae32_mode // Branch if step !=0
|
||||
|
||||
// Check addresses
|
||||
movi a15, 0xF // modulo 16 mask
|
||||
bany a2, a15, .add_s8_ae32_mode // jump if != 0
|
||||
bany a3, a15, .add_s8_ae32_mode // jump if != 0
|
||||
|
||||
// Check length (should be divided to 8)
|
||||
movi a15, 0xf // modulo 8 mask
|
||||
bany a5, a15, .add_s8_ae32_mode // jump if != 0
|
||||
|
||||
// Process main function for S3
|
||||
|
||||
wsr.sar a9 // load sar register
|
||||
|
||||
// Preload q1 from a3
|
||||
//ee.vld.128.ip q1, a3, 16
|
||||
srli a5, a5, 4
|
||||
ee.vld.128.ip q0, a2, 16
|
||||
loopnez a5, .loop_end_add_s8_aes3_main
|
||||
ee.vld.128.ip q1, a3, 16
|
||||
ee.vadds.s8.ld.incp q0, a2, q4, q0, q1
|
||||
ee.vst.128.ip q4, a4, 16
|
||||
.loop_end_add_s8_aes3_main:
|
||||
|
||||
// Exit for Esp32s3 mode
|
||||
movi.n a2, 0 // return status ESP_OK
|
||||
retw.n
|
||||
|
||||
|
||||
.add_s8_ae32_mode:
|
||||
l8ui a11, a2, 0
|
||||
l8ui a8, a3, 0
|
||||
add a8, a11, a8
|
||||
srl a9, a8 // a8 = a8>>sar
|
||||
|
||||
loopnez a5, .loop_end_add_s8_aes3
|
||||
add.n a2, a2, a6 // input1+=step_in1;
|
||||
add.n a3, a3, a7 // input2+=step_in2;
|
||||
|
||||
l8ui a11, a2, 0
|
||||
l8ui a8, a3, 0
|
||||
s8i a9, a4, 0 // store result to the putput
|
||||
add a8, a11, a8
|
||||
srl a9, a8 // a8 = a8>>sar
|
||||
|
||||
add.n a4, a4, a10 // output+=step_out;
|
||||
.loop_end_add_s8_aes3:
|
||||
// Exit for Esp32 mode
|
||||
movi.n a2, 0 // return status ESP_OK
|
||||
retw.n
|
||||
|
||||
#endif // dsps_add_s8_aes3_enabled
|
||||
@@ -0,0 +1,26 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include "dsps_add.h"
|
||||
|
||||
esp_err_t dsps_add_s8_ansi(const int8_t *input1, const int8_t *input2, int8_t *output, int len, int step1, int step2, int step_out, int shift)
|
||||
{
|
||||
if (NULL == input1) {
|
||||
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
|
||||
}
|
||||
if (NULL == input2) {
|
||||
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
|
||||
}
|
||||
if (NULL == output) {
|
||||
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
|
||||
}
|
||||
|
||||
for (int i = 0 ; i < len ; i++) {
|
||||
int32_t acc = (int32_t)input1[i * step1] + (int32_t)input2[i * step2];
|
||||
output[i * step_out] = acc >> shift;
|
||||
}
|
||||
return ESP_OK;
|
||||
}
|
||||
@@ -0,0 +1,64 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "dsps_add_platform.h"
|
||||
#if (dsps_add_f32_ae32_enabled == 1)
|
||||
|
||||
// This is bi quad filter form II for ESP32 processor.
|
||||
.text
|
||||
.align 4
|
||||
.global dsps_add_f32_ae32
|
||||
.type dsps_add_f32_ae32,@function
|
||||
// The function implements the following C code:
|
||||
// esp_err_t dsps_add_f32_ansi(const float *input1, const float *input2, float *output, int len, int step1, int step2, int step_out)
|
||||
// {
|
||||
// for (int i = 0 ; i < len ; i++) {
|
||||
// output[i * step_out] = input1[i * step1] + input2[i * step2];
|
||||
// }
|
||||
// return ESP_OK;
|
||||
// }
|
||||
|
||||
dsps_add_f32_ae32:
|
||||
// input1 - a2
|
||||
// input2 - a3
|
||||
// output - a4
|
||||
// len - a5
|
||||
// step1 - a6
|
||||
// step2 - a7
|
||||
// step_out - stack (a8)
|
||||
|
||||
entry a1, 16
|
||||
|
||||
l32i.n a8, a1, 16 // Load step_out to the a8 register
|
||||
slli a6, a6, 2 // a6 - step1<<2
|
||||
slli a7, a7, 2 // a7 - step2<<2
|
||||
slli a8, a8, 2 // a8 - step_out<<2
|
||||
|
||||
lsi f0, a2, 0
|
||||
add.n a2, a2, a6 // input1_ptr+=step1;
|
||||
loopnez a5, loop_end_add_f32_ae32
|
||||
lsi f1, a3, 0
|
||||
add.n a3, a3, a7 // input2_ptr+=step2;
|
||||
|
||||
add.s f2, f1, f0 // f2 = f1 + f0
|
||||
lsi f0, a2, 0
|
||||
add.n a2, a2, a6 // input1_ptr+=step1;
|
||||
ssi f2, a4, 0
|
||||
add.n a4, a4, a8 // input2_ptr+=step2;
|
||||
loop_end_add_f32_ae32:
|
||||
|
||||
movi.n a2, 0 // return status ESP_OK
|
||||
retw.n
|
||||
|
||||
#endif // dsps_add_f32_ae32_enabled
|
||||
@@ -0,0 +1,33 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "dsps_add.h"
|
||||
|
||||
esp_err_t dsps_add_f32_ansi(const float *input1, const float *input2, float *output, int len, int step1, int step2, int step_out)
|
||||
{
|
||||
if (NULL == input1) {
|
||||
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
|
||||
}
|
||||
if (NULL == input2) {
|
||||
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
|
||||
}
|
||||
if (NULL == output) {
|
||||
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
|
||||
}
|
||||
|
||||
for (int i = 0 ; i < len ; i++) {
|
||||
output[i * step_out] = input1[i * step1] + input2[i * step2];
|
||||
}
|
||||
return ESP_OK;
|
||||
}
|
||||
@@ -0,0 +1,89 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef _dsps_add_H_
|
||||
#define _dsps_add_H_
|
||||
#include "dsp_err.h"
|
||||
|
||||
#include "dsps_add_platform.h"
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
|
||||
|
||||
/**@{*/
|
||||
/**
|
||||
* @brief add two arrays
|
||||
*
|
||||
* The function add one input array to another
|
||||
* out[i*step_out] = input1[i*step1] + input2[i*step2]; i=[0..len)
|
||||
* The implementation use ANSI C and could be compiled and run on any platform
|
||||
*
|
||||
* @param[in] input1: input array 1
|
||||
* @param[in] input2: input array 2
|
||||
* @param output: output array
|
||||
* @param len: amount of operations for arrays
|
||||
* @param step1: step over input array 1 (by default should be 1)
|
||||
* @param step2: step over input array 2 (by default should be 1)
|
||||
* @param step_out: step over output array (by default should be 1)
|
||||
*
|
||||
* @return
|
||||
* - ESP_OK on success
|
||||
* - One of the error codes from DSP library
|
||||
*/
|
||||
esp_err_t dsps_add_f32_ansi(const float *input1, const float *input2, float *output, int len, int step1, int step2, int step_out);
|
||||
esp_err_t dsps_add_f32_ae32(const float *input1, const float *input2, float *output, int len, int step1, int step2, int step_out);
|
||||
|
||||
esp_err_t dsps_add_s16_ansi(const int16_t *input1, const int16_t *input2, int16_t *output, int len, int step1, int step2, int step_out, int shift);
|
||||
esp_err_t dsps_add_s16_ae32(const int16_t *input1, const int16_t *input2, int16_t *output, int len, int step1, int step2, int step_out, int shift);
|
||||
esp_err_t dsps_add_s16_aes3(const int16_t *input1, const int16_t *input2, int16_t *output, int len, int step1, int step2, int step_out, int shift);
|
||||
|
||||
esp_err_t dsps_add_s8_ansi(const int8_t *input1, const int8_t *input2, int8_t *output, int len, int step1, int step2, int step_out, int shift);
|
||||
esp_err_t dsps_add_s8_aes3(const int8_t *input1, const int8_t *input2, int8_t *output, int len, int step1, int step2, int step_out, int shift);
|
||||
|
||||
/**@}*/
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#if CONFIG_DSP_OPTIMIZED
|
||||
|
||||
#if (dsps_add_f32_ae32_enabled == 1)
|
||||
#define dsps_add_f32 dsps_add_f32_ae32
|
||||
#else
|
||||
#define dsps_add_f32 dsps_add_f32_ansi
|
||||
#endif
|
||||
|
||||
#if (dsps_add_s16_aes3_enabled == 1)
|
||||
#define dsps_add_s16 dsps_add_s16_aes3
|
||||
#define dsps_add_s8 dsps_add_s8_aes3
|
||||
#elif (dsps_add_s16_ae32_enabled == 1)
|
||||
#define dsps_add_s16 dsps_add_s16_ae32
|
||||
#define dsps_add_s8 dsps_add_s8_ansi
|
||||
#else
|
||||
#define dsps_add_s16 dsps_add_s16_ansi
|
||||
#define dsps_add_s8 dsps_add_s8_ansi
|
||||
#endif
|
||||
|
||||
#else // CONFIG_DSP_OPTIMIZED
|
||||
#define dsps_add_f32 dsps_add_f32_ansi
|
||||
#define dsps_add_s16 dsps_add_s16_ansi
|
||||
#define dsps_add_s8 dsps_add_s8_ansi
|
||||
#endif // CONFIG_DSP_OPTIMIZED
|
||||
|
||||
#endif // _dsps_add_H_
|
||||
@@ -0,0 +1,32 @@
|
||||
#ifndef _dsps_add_platform_H_
|
||||
#define _dsps_add_platform_H_
|
||||
|
||||
#include "sdkconfig.h"
|
||||
|
||||
#ifdef __XTENSA__
|
||||
#include <xtensa/config/core-isa.h>
|
||||
#include <xtensa/config/core-matmap.h>
|
||||
|
||||
#if (CONFIG_IDF_TARGET_ESP32S3 == 1)
|
||||
#define dsps_add_f32_ae32_enabled 1
|
||||
#define dsps_add_s16_aes3_enabled 1
|
||||
#else
|
||||
|
||||
#if ((XCHAL_HAVE_FP == 1) && (XCHAL_HAVE_LOOPS == 1))
|
||||
|
||||
#define dsps_add_f32_ae32_enabled 1
|
||||
#define dsps_add_s16_ae32_enabled 1
|
||||
|
||||
#endif
|
||||
|
||||
#if (XCHAL_HAVE_LOOPS == 1)
|
||||
#define dsps_add_f32_ae32_enabled 1
|
||||
#define dsps_add_s16_ae32_enabled 1
|
||||
#endif
|
||||
|
||||
#endif // CONFIG_IDF_TARGET_ESP32S3
|
||||
|
||||
#endif // __XTENSA__
|
||||
|
||||
|
||||
#endif // _dsps_add_platform_H_
|
||||
@@ -0,0 +1,71 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <string.h>
|
||||
#include "unity.h"
|
||||
#include "dsp_platform.h"
|
||||
#include "esp_log.h"
|
||||
|
||||
#include "dsp_tests.h"
|
||||
#include "dsps_add.h"
|
||||
#include "esp_attr.h"
|
||||
|
||||
static const char *TAG = "dsps_add";
|
||||
|
||||
TEST_CASE("dsps_add_f32_ansi functionality", "[dsps]")
|
||||
{
|
||||
int n = 32;
|
||||
float x[n];
|
||||
float y[n];
|
||||
for (int i = 0 ; i < n ; i++) {
|
||||
x[i] = i;
|
||||
y[i] = 2 * i;
|
||||
}
|
||||
dsps_add_f32_ansi(x, x, x, n, 1, 1, 1);
|
||||
for (int i = 0 ; i < n ; i++) {
|
||||
if (x[i] != y[i]) {
|
||||
TEST_ASSERT_EQUAL(x[i], y[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("dsps_add_f32 functionality", "[dsps]")
|
||||
{
|
||||
int n = 32;
|
||||
float x[n];
|
||||
float y[n];
|
||||
for (int i = 0 ; i < n ; i++) {
|
||||
x[i] = i;
|
||||
y[i] = 2 * i;
|
||||
}
|
||||
dsps_add_f32(x, x, x, n, 1, 1, 1);
|
||||
for (int i = 0 ; i < n ; i++) {
|
||||
if (x[i] != y[i]) {
|
||||
TEST_ASSERT_EQUAL(x[i], y[i]);
|
||||
}
|
||||
}
|
||||
|
||||
int repeat_count = 1;
|
||||
|
||||
dsps_add_f32(x, x, x, n, 1, 1, 1);
|
||||
|
||||
unsigned int start_b = dsp_get_cpu_cycle_count();
|
||||
dsps_add_f32(x, x, x, n, 1, 1, 1);
|
||||
unsigned int end_b = dsp_get_cpu_cycle_count();
|
||||
|
||||
float total_b = end_b - start_b;
|
||||
float cycles = total_b / (n * repeat_count);
|
||||
ESP_LOGI(TAG, "dsps_add_f32 - %f cycles per sample \n", cycles);
|
||||
|
||||
}
|
||||
@@ -0,0 +1,57 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include "unity.h"
|
||||
#include "dsp_platform.h"
|
||||
#include "esp_log.h"
|
||||
|
||||
#include "dsp_tests.h"
|
||||
#include "dsps_add.h"
|
||||
#include "esp_attr.h"
|
||||
|
||||
#if (dsps_add_s16_ae32_enabled == 1)
|
||||
|
||||
static const char *TAG = "dsps_add";
|
||||
TEST_CASE("dsps_add_s16_ae32 functionality", "[dsps]")
|
||||
{
|
||||
int n = 64;
|
||||
int16_t x[n];
|
||||
int16_t y[n];
|
||||
int32_t temp;
|
||||
int shift = 0;
|
||||
for (int i = 0 ; i < n ; i++) {
|
||||
x[i] = i << 4;
|
||||
temp = ((int32_t)x[i] + (int32_t)x[i]) >> shift;
|
||||
y[i] = temp;
|
||||
}
|
||||
|
||||
dsps_add_s16_ae32(x, x, x, n, 1, 1, 1, shift);
|
||||
for (int i = 0 ; i < n ; i++) {
|
||||
ESP_LOGD(TAG, "x[%i] = %i %i", i, x[i], y[i]);
|
||||
if (x[i] != y[i]) {
|
||||
TEST_ASSERT_EQUAL(x[i], y[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("dsps_add_s16_ae32 benchmark", "[dsps]")
|
||||
{
|
||||
const int n = 256;
|
||||
int16_t x[n];
|
||||
for (int i = 0 ; i < n ; i++) {
|
||||
x[i] = i << 4;
|
||||
}
|
||||
|
||||
unsigned int start_b = dsp_get_cpu_cycle_count();
|
||||
dsps_add_s16_ae32(x, x, x, n, 1, 1, 1, 0);
|
||||
unsigned int end_b = dsp_get_cpu_cycle_count();
|
||||
|
||||
float cycles = end_b - start_b;
|
||||
ESP_LOGI(TAG, "dsps_add_s16_ae32 - %f cycles per sample \n", cycles);
|
||||
}
|
||||
|
||||
#endif // (dsps_add_s16_ae32_enabled == 1)
|
||||
@@ -0,0 +1,61 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include "unity.h"
|
||||
#include "dsp_platform.h"
|
||||
#include "esp_log.h"
|
||||
#include <malloc.h>
|
||||
|
||||
#include "dsps_add.h"
|
||||
#include "esp_attr.h"
|
||||
#include "dsp_tests.h"
|
||||
|
||||
#if (dsps_add_s16_aes3_enabled == 1)
|
||||
|
||||
static const char *TAG = "dsps_add";
|
||||
TEST_CASE("dsps_add_s16_aes3 functionality", "[dsps]")
|
||||
{
|
||||
int n = 64;
|
||||
int16_t *x = (int16_t *)memalign(16, n * sizeof(int16_t));
|
||||
int16_t *y = (int16_t *)memalign(16, n * sizeof(int16_t));
|
||||
int32_t temp;
|
||||
int shift = 0;
|
||||
for (int i = 0 ; i < n ; i++) {
|
||||
x[i] = i << 4;
|
||||
temp = ((int32_t)x[i] + (int32_t)x[i]) >> shift;
|
||||
y[i] = temp;
|
||||
}
|
||||
|
||||
dsps_add_s16_aes3(x, x, x, n, 1, 1, 1, shift);
|
||||
for (int i = 0 ; i < n ; i++) {
|
||||
ESP_LOGD(TAG, "x[%i] = %i %i", i, x[i], y[i]);
|
||||
if (x[i] != y[i]) {
|
||||
TEST_ASSERT_EQUAL(x[i], y[i]);
|
||||
}
|
||||
}
|
||||
free(x);
|
||||
free(y);
|
||||
}
|
||||
|
||||
TEST_CASE("dsps_add_s16_aes3 benchmark", "[dsps]")
|
||||
{
|
||||
const int n = 2048;
|
||||
int16_t *x = (int16_t *)memalign(16, n * sizeof(int16_t));
|
||||
for (int i = 0 ; i < n ; i++) {
|
||||
x[i] = i << 4;
|
||||
}
|
||||
|
||||
unsigned int start_b = dsp_get_cpu_cycle_count();
|
||||
dsps_add_s16_aes3(x, x, x, n, 1, 1, 1, 0);
|
||||
unsigned int end_b = dsp_get_cpu_cycle_count();
|
||||
|
||||
float cycles = end_b - start_b;
|
||||
ESP_LOGI(TAG, "dsps_add_s16_aes3 - %f cycles per sample \n", cycles);
|
||||
free(x);
|
||||
|
||||
}
|
||||
#endif // (dsps_add_s16_aes3_enabled == 1)
|
||||
@@ -0,0 +1,53 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include "unity.h"
|
||||
#include "dsp_platform.h"
|
||||
#include "esp_log.h"
|
||||
|
||||
#include "dsp_tests.h"
|
||||
#include "dsps_add.h"
|
||||
#include "esp_attr.h"
|
||||
|
||||
static const char *TAG = "dsps_add";
|
||||
|
||||
TEST_CASE("dsps_add_s16_ansi functionality", "[dsps]")
|
||||
{
|
||||
int n = 64;
|
||||
int16_t x[n];
|
||||
int16_t y[n];
|
||||
int32_t temp;
|
||||
int shift = 0;
|
||||
for (int i = 0 ; i < n ; i++) {
|
||||
x[i] = i << 4;
|
||||
temp = ((int32_t)x[i] + (int32_t)x[i]) >> shift;
|
||||
y[i] = temp;
|
||||
}
|
||||
|
||||
dsps_add_s16_ansi(x, x, x, n, 1, 1, 1, 0);
|
||||
for (int i = 0 ; i < n ; i++) {
|
||||
if (x[i] != y[i]) {
|
||||
TEST_ASSERT_EQUAL(x[i], y[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("dsps_add_s16_ansi benchmark", "[dsps]")
|
||||
{
|
||||
const int n = 256;
|
||||
int16_t x[n];
|
||||
for (int i = 0 ; i < n ; i++) {
|
||||
x[i] = i << 4;
|
||||
}
|
||||
|
||||
unsigned int start_b = dsp_get_cpu_cycle_count();
|
||||
dsps_add_s16_ansi(x, x, x, n, 1, 1, 1, 0);
|
||||
unsigned int end_b = dsp_get_cpu_cycle_count();
|
||||
|
||||
float cycles = end_b - start_b;
|
||||
ESP_LOGI(TAG, "dsps_add_s16_ansi - %f cycles per sample \n", cycles);
|
||||
}
|
||||
@@ -0,0 +1,61 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include "unity.h"
|
||||
#include "dsp_platform.h"
|
||||
#include "esp_log.h"
|
||||
#include <malloc.h>
|
||||
|
||||
#include "dsps_add.h"
|
||||
#include "esp_attr.h"
|
||||
#include "dsp_tests.h"
|
||||
|
||||
#if (dsps_add_s16_aes3_enabled == 1)
|
||||
|
||||
static const char *TAG = "dsps_add";
|
||||
TEST_CASE("dsps_add_s8_aes3 functionality", "[dsps]")
|
||||
{
|
||||
int n = 64;
|
||||
int8_t *x = (int8_t *)memalign(16, n * sizeof(int8_t));
|
||||
int8_t *y = (int8_t *)memalign(16, n * sizeof(int8_t));
|
||||
int32_t temp;
|
||||
int shift = 0;
|
||||
for (int i = 0 ; i < n ; i++) {
|
||||
x[i] = i - n / 2;
|
||||
temp = ((int32_t)x[i] + (int32_t)x[i]) >> shift;
|
||||
y[i] = temp;
|
||||
}
|
||||
|
||||
dsps_add_s8_aes3(x, x, x, n, 1, 1, 1, shift);
|
||||
for (int i = 0 ; i < n ; i++) {
|
||||
ESP_LOGD(TAG, "x[%i] = %i %i", i, x[i], y[i]);
|
||||
if (x[i] != y[i]) {
|
||||
TEST_ASSERT_EQUAL(x[i], y[i]);
|
||||
}
|
||||
}
|
||||
free(x);
|
||||
free(y);
|
||||
}
|
||||
|
||||
TEST_CASE("dsps_add_s8_aes3 benchmark", "[dsps]")
|
||||
{
|
||||
const int n = 2048;
|
||||
int8_t *x = (int8_t *)memalign(16, n * sizeof(int8_t));
|
||||
for (int i = 0 ; i < n ; i++) {
|
||||
x[i] = i << 4;
|
||||
}
|
||||
|
||||
unsigned int start_b = dsp_get_cpu_cycle_count();
|
||||
dsps_add_s8_aes3(x, x, x, n, 1, 1, 1, 0);
|
||||
unsigned int end_b = dsp_get_cpu_cycle_count();
|
||||
|
||||
float cycles = end_b - start_b;
|
||||
ESP_LOGI(TAG, "dsps_add_s8_aes3 - %f cycles per sample \n", cycles);
|
||||
free(x);
|
||||
|
||||
}
|
||||
#endif // (dsps_add_s16_aes3_enabled == 1)
|
||||
@@ -0,0 +1,53 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include "unity.h"
|
||||
#include "dsp_platform.h"
|
||||
#include "esp_log.h"
|
||||
|
||||
#include "dsp_tests.h"
|
||||
#include "dsps_add.h"
|
||||
#include "esp_attr.h"
|
||||
|
||||
static const char *TAG = "dsps_add";
|
||||
|
||||
TEST_CASE("dsps_add_s8_ansi functionality", "[dsps]")
|
||||
{
|
||||
int n = 64;
|
||||
int8_t x[n];
|
||||
int8_t y[n];
|
||||
int32_t temp;
|
||||
int shift = 0;
|
||||
for (int i = 0 ; i < n ; i++) {
|
||||
x[i] = i - n / 2;
|
||||
temp = ((int32_t)x[i] + (int32_t)x[i]) >> shift;
|
||||
y[i] = temp;
|
||||
}
|
||||
|
||||
dsps_add_s8_ansi(x, x, x, n, 1, 1, 1, 0);
|
||||
for (int i = 0 ; i < n ; i++) {
|
||||
if (x[i] != y[i]) {
|
||||
TEST_ASSERT_EQUAL(x[i], y[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("dsps_add_s8_ansi benchmark", "[dsps]")
|
||||
{
|
||||
const int n = 256;
|
||||
int8_t x[n];
|
||||
for (int i = 0 ; i < n ; i++) {
|
||||
x[i] = i << 4;
|
||||
}
|
||||
|
||||
unsigned int start_b = dsp_get_cpu_cycle_count();
|
||||
dsps_add_s8_ansi(x, x, x, n, 1, 1, 1, 0);
|
||||
unsigned int end_b = dsp_get_cpu_cycle_count();
|
||||
|
||||
float cycles = end_b - start_b;
|
||||
ESP_LOGI(TAG, "dsps_add_s8_ansi - %f cycles per sample \n", cycles);
|
||||
}
|
||||
@@ -0,0 +1,57 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "dsps_addc_platform.h"
|
||||
#if (dsps_addc_f32_ae32_enabled == 1)
|
||||
|
||||
// This is bi quad filter form II for ESP32 processor.
|
||||
.text
|
||||
.align 4
|
||||
.global dsps_addc_f32_ae32
|
||||
.type dsps_addc_f32_ae32,@function
|
||||
// The function implements the following C code:
|
||||
// esp_err_t dsps_addc_f32_ansi(const float *input, float *output, int len, float C, int step_in, int step_out)
|
||||
// {
|
||||
// for (int i = 0 ; i < len ; i++) {
|
||||
// output[i * step_out] = input[i * step_in] + C;
|
||||
// }
|
||||
// return ESP_OK;
|
||||
// }
|
||||
dsps_addc_f32_ae32:
|
||||
// input - a2
|
||||
// output - a3
|
||||
// len - a4
|
||||
// C - a5
|
||||
// step_in - a6
|
||||
// step_out - a7
|
||||
|
||||
entry a1, 16
|
||||
|
||||
slli a6, a6, 2 // a6 - step_in<<2
|
||||
slli a7, a7, 2 // a7 - step_out<<2
|
||||
wfr f0, a5 // a5 - load to the f0
|
||||
|
||||
loopnez a4, loop_end_addc_f32_ae32
|
||||
lsi f1, a2, 0
|
||||
|
||||
add.s f2, f1, f0 // f2 = f1 + f0
|
||||
add.n a2, a2, a6 // input1_ptr+=step_in;
|
||||
ssi f2, a3, 0
|
||||
add.n a3, a3, a7 // output+=step_out;
|
||||
loop_end_addc_f32_ae32:
|
||||
|
||||
movi.n a2, 0 // return status ESP_OK
|
||||
retw.n
|
||||
|
||||
#endif // dsps_addc_f32_ae32_enabled
|
||||
@@ -0,0 +1,30 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "dsps_addc.h"
|
||||
|
||||
esp_err_t dsps_addc_f32_ansi(const float *input, float *output, int len, float C, int step_in, int step_out)
|
||||
{
|
||||
if (NULL == input) {
|
||||
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
|
||||
}
|
||||
if (NULL == output) {
|
||||
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
|
||||
}
|
||||
|
||||
for (int i = 0 ; i < len ; i++) {
|
||||
output[i * step_out] = input[i * step_in] + C;
|
||||
}
|
||||
return ESP_OK;
|
||||
}
|
||||
@@ -0,0 +1,65 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef _dsps_addc_H_
|
||||
#define _dsps_addc_H_
|
||||
#include "dsp_err.h"
|
||||
|
||||
#include "dsps_addc_platform.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
|
||||
|
||||
/**@{*/
|
||||
/**
|
||||
* @brief add constant
|
||||
*
|
||||
* The function adds constant to the input array
|
||||
* x[i*step_out] = y[i*step_in] + C; i=[0..len)
|
||||
* The implementation use ANSI C and could be compiled and run on any platform
|
||||
*
|
||||
* @param[in] input: input array
|
||||
* @param output: output array
|
||||
* @param len: amount of operations for arrays
|
||||
* @param C: constant value
|
||||
* @param step_in: step over input array (by default should be 1)
|
||||
* @param step_out: step over output array (by default should be 1)
|
||||
*
|
||||
* @return
|
||||
* - ESP_OK on success
|
||||
* - One of the error codes from DSP library
|
||||
*/
|
||||
esp_err_t dsps_addc_f32_ansi(const float *input, float *output, int len, float C, int step_in, int step_out);
|
||||
esp_err_t dsps_addc_f32_ae32(const float *input, float *output, int len, float C, int step_in, int step_out);
|
||||
/**@}*/
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#if CONFIG_DSP_OPTIMIZED
|
||||
#if (dsps_addc_f32_ae32_enabled == 1)
|
||||
#define dsps_addc_f32 dsps_addc_f32_ae32
|
||||
#else
|
||||
#define dsps_addc_f32 dsps_addc_f32_ansi
|
||||
#endif
|
||||
#else
|
||||
#define dsps_addc_f32 dsps_addc_f32_ansi
|
||||
#endif // CONFIG_DSP_OPTIMIZED
|
||||
|
||||
#endif // _dsps_addc_H_
|
||||
@@ -0,0 +1,19 @@
|
||||
#ifndef _dsps_addc_platform_H_
|
||||
#define _dsps_addc_platform_H_
|
||||
|
||||
#include "sdkconfig.h"
|
||||
|
||||
#ifdef __XTENSA__
|
||||
#include <xtensa/config/core-isa.h>
|
||||
#include <xtensa/config/core-matmap.h>
|
||||
|
||||
|
||||
#if ((XCHAL_HAVE_FP == 1) && (XCHAL_HAVE_LOOPS == 1))
|
||||
|
||||
#define dsps_addc_f32_ae32_enabled 1
|
||||
|
||||
#endif
|
||||
#endif // __XTENSA__
|
||||
|
||||
|
||||
#endif // _dsps_addc_platform_H_
|
||||
@@ -0,0 +1,71 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <string.h>
|
||||
#include "unity.h"
|
||||
#include "dsp_platform.h"
|
||||
#include "esp_log.h"
|
||||
|
||||
#include "dsp_tests.h"
|
||||
#include "dsps_addc.h"
|
||||
#include "esp_attr.h"
|
||||
|
||||
static const char *TAG = "dsps_addc";
|
||||
|
||||
|
||||
TEST_CASE("dsps_addc_f32_ansi functionality", "[dsps]")
|
||||
{
|
||||
int n = 64;
|
||||
float x[n];
|
||||
float y[n];
|
||||
for (int i = 0 ; i < n ; i++) {
|
||||
x[i] = i;
|
||||
y[i] = i + 10;
|
||||
}
|
||||
dsps_addc_f32_ansi(x, x, n, 10, 1, 1);
|
||||
for (int i = 0 ; i < n ; i++) {
|
||||
if (x[i] != y[i]) {
|
||||
TEST_ASSERT_EQUAL(x[i], y[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("dsps_addc_f32 functionality", "[dsps]")
|
||||
{
|
||||
int n = 64;
|
||||
float x[n];
|
||||
float y[n];
|
||||
for (int i = 0 ; i < n ; i++) {
|
||||
x[i] = i;
|
||||
y[i] = i + 10;
|
||||
}
|
||||
dsps_addc_f32(x, x, n, 10, 1, 1);
|
||||
for (int i = 0 ; i < n ; i++) {
|
||||
if (x[i] != y[i]) {
|
||||
TEST_ASSERT_EQUAL(x[i], y[i]);
|
||||
}
|
||||
}
|
||||
|
||||
int repeat_count = 1;
|
||||
|
||||
dsps_addc_f32(x, x, n, 10, 1, 1);
|
||||
|
||||
unsigned int start_b = dsp_get_cpu_cycle_count();
|
||||
dsps_addc_f32(x, x, n, 10, 1, 1);
|
||||
unsigned int end_b = dsp_get_cpu_cycle_count();
|
||||
|
||||
float total_b = end_b - start_b;
|
||||
float cycles = total_b / (n * repeat_count);
|
||||
ESP_LOGI(TAG, "dsps_addc_f32 - %f cycles per sample \n", cycles);
|
||||
}
|
||||
@@ -0,0 +1,25 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef _dsps_math_H_
|
||||
#define _dsps_math_H_
|
||||
|
||||
#include "dsps_add.h"
|
||||
#include "dsps_sub.h"
|
||||
#include "dsps_mul.h"
|
||||
#include "dsps_addc.h"
|
||||
#include "dsps_mulc.h"
|
||||
#include "dsps_sqrt.h"
|
||||
|
||||
#endif // _dsps_math_H_
|
||||
@@ -0,0 +1,74 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include "dsps_mul_platform.h"
|
||||
#if (dsps_mul_s16_ae32_enabled == 1)
|
||||
|
||||
.text
|
||||
.align 4
|
||||
.global dsps_mul_s16_ae32
|
||||
.type dsps_mul_s16_ae32,@function
|
||||
// The function implements the following C code:
|
||||
// esp_err_t dsps_mul_s16_ansi(const int16_t *input1, const int16_t *input2, int16_t *output, int len, int step1, int step2, int step_out, int shift)
|
||||
// {
|
||||
// for (int i = 0 ; i < len ; i++) {
|
||||
// int32_t acc = (int32_t)input1[i * step1] + (int32_t)input2[i * step2];
|
||||
// output[i * step_out] = acc >> shift;
|
||||
// }
|
||||
// return ESP_OK;
|
||||
// }
|
||||
dsps_mul_s16_ae32:
|
||||
// input1 - a2
|
||||
// input2 - a3
|
||||
// output - a4
|
||||
// len - a5
|
||||
// step_in1 - a6
|
||||
// step_in2 - a7
|
||||
// step_out - stack (a10)
|
||||
// shift - stack (a9)
|
||||
|
||||
entry a1, 16
|
||||
|
||||
|
||||
// l32i.n a10, a1, 16
|
||||
// s16i a10, a4, 0
|
||||
// l32i.n a10, a1, 20
|
||||
// s16i a10, a4, 2
|
||||
|
||||
l32i.n a10, a1, 16 // Load step_out to the a10 register
|
||||
l32i.n a9, a1, 20 // Load shift to the a9 register
|
||||
ssr a9 // sar = a9
|
||||
|
||||
slli a6, a6, 1 // a6 - step_in<<1
|
||||
slli a7, a7, 1 // a7 - step_in<<1
|
||||
slli a10, a10, 1 // a8 - step_out<<1
|
||||
|
||||
// s16i a10, a4, 0
|
||||
// s16i a6, a4, 2
|
||||
// s16i a7, a4, 4
|
||||
// s16i a5, a4, 6
|
||||
|
||||
l16si a11, a2, 0
|
||||
l16si a8, a3, 0
|
||||
add a8, a11, a8
|
||||
srl a9, a8 // a8 = a8>>sar
|
||||
|
||||
loopnez a5, .loop_end_mul_s16_ae32
|
||||
add.n a2, a2, a6 // input1+=step_in1;
|
||||
add.n a3, a3, a7 // input2+=step_in2;
|
||||
|
||||
l16si a11, a2, 0
|
||||
l16si a8, a3, 0
|
||||
s16i a9, a4, 0 // store result to the putput
|
||||
mull a8, a11, a8
|
||||
srl a9, a8 // a8 = a8>>sar
|
||||
|
||||
add.n a4, a4, a10 // output+=step_out;
|
||||
.loop_end_mul_s16_ae32:
|
||||
movi.n a2, 0 // return status ESP_OK
|
||||
retw.n
|
||||
|
||||
#endif // dsps_mul_s16_ae32_enabled
|
||||
@@ -0,0 +1,104 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include "dsps_mul_platform.h"
|
||||
#if (dsps_mul_s16_aes3_enabled == 1)
|
||||
|
||||
.text
|
||||
.align 4
|
||||
.global dsps_mul_s16_aes3
|
||||
.type dsps_mul_s16_aes3,@function
|
||||
// The function implements the following C code:
|
||||
// esp_err_t dsps_mul_s16_ansi(const int16_t *input1, const int16_t *input2, int16_t *output, int len, int step1, int step2, int step_out, int shift)
|
||||
// {
|
||||
// for (int i = 0 ; i < len ; i++) {
|
||||
// int32_t acc = (int32_t)input1[i * step1] + (int32_t)input2[i * step2];
|
||||
// output[i * step_out] = acc >> shift;
|
||||
// }
|
||||
// return ESP_OK;
|
||||
// }
|
||||
dsps_mul_s16_aes3:
|
||||
// input1 - a2
|
||||
// input2 - a3
|
||||
// output - a4
|
||||
// len - a5
|
||||
// step_in1 - a6
|
||||
// step_in2 - a7
|
||||
// step_out - stack (a10)
|
||||
// shift - stack (a9)
|
||||
|
||||
entry a1, 16
|
||||
|
||||
l32i.n a10, a1, 16 // Load step_out to the a10 register
|
||||
l32i.n a9, a1, 20 // Load shift to the a9 register
|
||||
ssr a9 // sar = a9
|
||||
|
||||
// Check if any of steps is not 0
|
||||
addi a15, a6, -1
|
||||
bnez a15, .mul_s16_ae32_mode // Branch if step !=0
|
||||
addi a15, a7, -1
|
||||
bnez a15, .mul_s16_ae32_mode // Branch if step !=0
|
||||
addi a15, a10, -1
|
||||
bnez a15,.mul_s16_ae32_mode // Branch if step !=0
|
||||
|
||||
// Check addresses
|
||||
movi a15, 0xF // modulo 16 mask
|
||||
bany a2, a15, .mul_s16_ae32_mode // jump if != 0
|
||||
bany a3, a15, .mul_s16_ae32_mode // jump if != 0
|
||||
|
||||
// Check length (should be divided to 8)
|
||||
movi a15, 0x7 // modulo 8 mask
|
||||
bany a5, a15, .mul_s16_ae32_mode // jump if != 0
|
||||
|
||||
// Process main function for S3
|
||||
|
||||
slli a6, a6, 1 // a6 - step_in<<1
|
||||
slli a7, a7, 1 // a7 - step_in<<1
|
||||
slli a10, a10, 1 // a8 - step_out<<1
|
||||
wsr.sar a9 // load sar register
|
||||
|
||||
// Preload q1 from a3
|
||||
//ee.vld.128.ip q1, a3, 16
|
||||
srli a5, a5, 3
|
||||
ee.vld.128.ip q0, a2, 16
|
||||
loopnez a5, .loop_end_mul_s16_aes3_main
|
||||
ee.vld.128.ip q1, a3, 16
|
||||
ee.vmul.s16.ld.incp q0, a2, q4, q0, q1
|
||||
ee.vst.128.ip q4, a4, 16
|
||||
.loop_end_mul_s16_aes3_main:
|
||||
|
||||
// Exit for Esp32s3 mode
|
||||
movi.n a2, 0 // return status ESP_OK
|
||||
retw.n
|
||||
|
||||
|
||||
.mul_s16_ae32_mode:
|
||||
slli a6, a6, 1 // a6 - step_in<<1
|
||||
slli a7, a7, 1 // a7 - step_in<<1
|
||||
slli a10, a10, 1 // a8 - step_out<<1
|
||||
|
||||
l16si a11, a2, 0
|
||||
l16si a8, a3, 0
|
||||
mull a8, a11, a8
|
||||
srl a9, a8 // a8 = a8>>sar
|
||||
|
||||
loopnez a5, .loop_end_mul_s16_aes3
|
||||
add.n a2, a2, a6 // input1+=step_in1;
|
||||
add.n a3, a3, a7 // input2+=step_in2;
|
||||
|
||||
l16si a11, a2, 0
|
||||
l16si a8, a3, 0
|
||||
s16i a9, a4, 0 // store result to the putput
|
||||
mull a8, a11, a8
|
||||
srl a9, a8 // a8 = a8>>sar
|
||||
|
||||
add.n a4, a4, a10 // output+=step_out;
|
||||
.loop_end_mul_s16_aes3:
|
||||
// Exit for Esp32 mode
|
||||
movi.n a2, 0 // return status ESP_OK
|
||||
retw.n
|
||||
|
||||
#endif // dsps_mul_s16_aes3_enabled
|
||||
@@ -0,0 +1,34 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "dsps_mul.h"
|
||||
|
||||
esp_err_t dsps_mul_s16_ansi(const int16_t *input1, const int16_t *input2, int16_t *output, int len, int step1, int step2, int step_out, int shift)
|
||||
{
|
||||
if (NULL == input1) {
|
||||
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
|
||||
}
|
||||
if (NULL == input2) {
|
||||
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
|
||||
}
|
||||
if (NULL == output) {
|
||||
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
|
||||
}
|
||||
|
||||
for (int i = 0 ; i < len ; i++) {
|
||||
int ttt = (int)input1[i * step1] * (int)input2[i * step2];
|
||||
output[i * step_out] = ttt >> shift;
|
||||
}
|
||||
return ESP_OK;
|
||||
}
|
||||
@@ -0,0 +1,97 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include "dsps_mul_platform.h"
|
||||
#if (dsps_mul_s16_aes3_enabled == 1)
|
||||
|
||||
.text
|
||||
.align 4
|
||||
.global dsps_mul_s8_aes3
|
||||
.type dsps_mul_s8_aes3,@function
|
||||
// The function implements the following C code:
|
||||
// esp_err_t dsps_mul_s8_ansi(const int16_t *input1, const int16_t *input2, int16_t *output, int len, int step1, int step2, int step_out, int shift)
|
||||
// {
|
||||
// for (int i = 0 ; i < len ; i++) {
|
||||
// int32_t acc = (int32_t)input1[i * step1] + (int32_t)input2[i * step2];
|
||||
// output[i * step_out] = acc >> shift;
|
||||
// }
|
||||
// return ESP_OK;
|
||||
// }
|
||||
dsps_mul_s8_aes3:
|
||||
// input1 - a2
|
||||
// input2 - a3
|
||||
// output - a4
|
||||
// len - a5
|
||||
// step_in1 - a6
|
||||
// step_in2 - a7
|
||||
// step_out - stack (a10)
|
||||
// shift - stack (a9)
|
||||
|
||||
entry a1, 16
|
||||
|
||||
l32i.n a10, a1, 16 // Load step_out to the a10 register
|
||||
l32i.n a9, a1, 20 // Load shift to the a9 register
|
||||
ssr a9 // sar = a9
|
||||
|
||||
// Check if any of steps is not 0
|
||||
addi a15, a6, -1
|
||||
bnez a15, .sub_s8_ae32_mode // Branch if step !=0
|
||||
addi a15, a7, -1
|
||||
bnez a15, .sub_s8_ae32_mode // Branch if step !=0
|
||||
addi a15, a10, -1
|
||||
bnez a15,.sub_s8_ae32_mode // Branch if step !=0
|
||||
|
||||
// Check addresses
|
||||
movi a15, 0xF // modulo 16 mask
|
||||
bany a2, a15, .sub_s8_ae32_mode // jump if != 0
|
||||
bany a3, a15, .sub_s8_ae32_mode // jump if != 0
|
||||
|
||||
// Check length (should be divided to 8)
|
||||
movi a15, 0xf // modulo 8 mask
|
||||
bany a5, a15, .sub_s8_ae32_mode // jump if != 0
|
||||
|
||||
// Process main function for S3
|
||||
|
||||
wsr.sar a9 // load sar register
|
||||
|
||||
// Preload q1 from a3
|
||||
//ee.vld.128.ip q1, a3, 16
|
||||
srli a5, a5, 4
|
||||
ee.vld.128.ip q0, a2, 16
|
||||
loopnez a5, .loop_end_mul_s8_aes3_main
|
||||
ee.vld.128.ip q1, a3, 16
|
||||
ee.vmul.s8.ld.incp q0, a2, q4, q0, q1
|
||||
ee.vst.128.ip q4, a4, 16
|
||||
.loop_end_mul_s8_aes3_main:
|
||||
|
||||
// Exit for Esp32s3 mode
|
||||
movi.n a2, 0 // return status ESP_OK
|
||||
retw.n
|
||||
|
||||
|
||||
.sub_s8_ae32_mode:
|
||||
l8ui a11, a2, 0
|
||||
l8ui a8, a3, 0
|
||||
mull a8, a11, a8
|
||||
srl a9, a8 // a8 = a8>>sar
|
||||
|
||||
loopnez a5, .loop_end_mul_s8_aes3
|
||||
add.n a2, a2, a6 // input1+=step_in1;
|
||||
add.n a3, a3, a7 // input2+=step_in2;
|
||||
|
||||
l8ui a11, a2, 0
|
||||
l8ui a8, a3, 0
|
||||
s8i a9, a4, 0 // store result to the putput
|
||||
mull a8, a11, a8
|
||||
srl a9, a8 // a8 = a8>>sar
|
||||
|
||||
add.n a4, a4, a10 // output+=step_out;
|
||||
.loop_end_mul_s8_aes3:
|
||||
// Exit for Esp32 mode
|
||||
movi.n a2, 0 // return status ESP_OK
|
||||
retw.n
|
||||
|
||||
#endif // dsps_mul_s8_aes3_enabled
|
||||
@@ -0,0 +1,26 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include "dsps_mul.h"
|
||||
|
||||
esp_err_t dsps_mul_s8_ansi(const int8_t *input1, const int8_t *input2, int8_t *output, int len, int step1, int step2, int step_out, int shift)
|
||||
{
|
||||
if (NULL == input1) {
|
||||
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
|
||||
}
|
||||
if (NULL == input2) {
|
||||
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
|
||||
}
|
||||
if (NULL == output) {
|
||||
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
|
||||
}
|
||||
|
||||
for (int i = 0 ; i < len ; i++) {
|
||||
int32_t acc = (int32_t)input1[i * step1] * (int32_t)input2[i * step2];
|
||||
output[i * step_out] = acc >> shift;
|
||||
}
|
||||
return ESP_OK;
|
||||
}
|
||||
@@ -0,0 +1,64 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "dsps_mul_platform.h"
|
||||
#if (dsps_mul_f32_ae32_enabled == 1)
|
||||
|
||||
// This is bi quad filter form II for ESP32 processor.
|
||||
.text
|
||||
.align 4
|
||||
.global dsps_mul_f32_ae32
|
||||
.type dsps_mul_f32_ae32,@function
|
||||
// The function implements the following C code:
|
||||
// esp_err_t dsps_mul_f32_ansi(const float *input1, const float *input2, float *output, int len, int step1, int step2, int step_out)
|
||||
// {
|
||||
// for (int i = 0 ; i < len ; i++) {
|
||||
// output[i * step_out] = input1[i * step1] * input2[i * step2];
|
||||
// }
|
||||
// return ESP_OK;
|
||||
// }
|
||||
|
||||
dsps_mul_f32_ae32:
|
||||
// input1 - a2
|
||||
// input2 - a3
|
||||
// output - a4
|
||||
// len - a5
|
||||
// step1 - a6
|
||||
// step2 - a7
|
||||
// step_out - stack (a8)
|
||||
|
||||
entry a1, 16
|
||||
|
||||
l32i.n a8, a1, 16 // Load step_out to the a8 register
|
||||
slli a6, a6, 2 // a6 - step1<<2
|
||||
slli a7, a7, 2 // a7 - step2<<2
|
||||
slli a8, a8, 2 // a8 - step_out<<2
|
||||
|
||||
lsi f0, a2, 0
|
||||
add.n a2, a2, a6 // input1_ptr+=step1;
|
||||
loopnez a5, loop_end_mul_f32_ae32
|
||||
lsi f1, a3, 0
|
||||
add.n a3, a3, a7 // input2_ptr+=step2;
|
||||
|
||||
mul.s f2, f1, f0 // f2 = f1*f0
|
||||
lsi f0, a2, 0
|
||||
add.n a2, a2, a6 // input1_ptr+=step1;
|
||||
ssi f2, a4, 0
|
||||
add.n a4, a4, a8 // input2_ptr+=step2;
|
||||
loop_end_mul_f32_ae32:
|
||||
|
||||
movi.n a2, 0 // return status ESP_OK
|
||||
retw.n
|
||||
|
||||
#endif // dsps_mul_f32_ae32_enabled
|
||||
@@ -0,0 +1,33 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "dsps_mul.h"
|
||||
|
||||
esp_err_t dsps_mul_f32_ansi(const float *input1, const float *input2, float *output, int len, int step1, int step2, int step_out)
|
||||
{
|
||||
if (NULL == input1) {
|
||||
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
|
||||
}
|
||||
if (NULL == input2) {
|
||||
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
|
||||
}
|
||||
if (NULL == output) {
|
||||
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
|
||||
}
|
||||
|
||||
for (int i = 0 ; i < len ; i++) {
|
||||
output[i * step_out] = input1[i * step1] * input2[i * step2];
|
||||
}
|
||||
return ESP_OK;
|
||||
}
|
||||
@@ -0,0 +1,111 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef _dsps_mul_H_
|
||||
#define _dsps_mul_H_
|
||||
#include "dsp_err.h"
|
||||
|
||||
#include "dsps_mul_platform.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
|
||||
|
||||
/**@{*/
|
||||
/**
|
||||
* @brief Multiply two arrays
|
||||
*
|
||||
* The function multiply one input array to another and store result to other array
|
||||
* out[i*step_out] = input1[i*step1] * input2[i*step2]; i=[0..len)
|
||||
* The implementation use ANSI C and could be compiled and run on any platform
|
||||
*
|
||||
* @param[in] input1: input array 1
|
||||
* @param[in] input2: input array 2
|
||||
* @param output: output array
|
||||
* @param len: amount of operations for arrays
|
||||
* @param step1: step over input array 1 (by default should be 1)
|
||||
* @param step2: step over input array 2 (by default should be 1)
|
||||
* @param step_out: step over output array (by default should be 1)
|
||||
*
|
||||
* @return
|
||||
* - ESP_OK on success
|
||||
* - One of the error codes from DSP library
|
||||
*/
|
||||
esp_err_t dsps_mul_f32_ansi(const float *input1, const float *input2, float *output, int len, int step1, int step2, int step_out);
|
||||
esp_err_t dsps_mul_f32_ae32(const float *input1, const float *input2, float *output, int len, int step1, int step2, int step_out);
|
||||
/**@}*/
|
||||
|
||||
|
||||
/**@{*/
|
||||
/**
|
||||
* @brief Multiply two arrays
|
||||
*
|
||||
* The function multiply one input array to another and store result to other array
|
||||
* out[i*step_out] = input1[i*step1] * input2[i*step2]; i=[0..len)
|
||||
* The implementation use ANSI C and could be compiled and run on any platform
|
||||
*
|
||||
* @param[in] input1: input array 1
|
||||
* @param[in] input2: input array 2
|
||||
* @param output: output array
|
||||
* @param len: amount of operations for arrays
|
||||
* @param step1: step over input array 1 (by default should be 1)
|
||||
* @param step2: step over input array 2 (by default should be 1)
|
||||
* @param step_out: step over output array (by default should be 1)
|
||||
* @param shift: output shift after multiplication (by default should be 15)
|
||||
*
|
||||
* @return
|
||||
* - ESP_OK on success
|
||||
* - One of the error codes from DSP library
|
||||
*/
|
||||
esp_err_t dsps_mul_s16_ansi(const int16_t *input1, const int16_t *input2, int16_t *output, int len, int step1, int step2, int step_out, int shift);
|
||||
esp_err_t dsps_mul_s16_ae32(const int16_t *input1, const int16_t *input2, int16_t *output, int len, int step1, int step2, int step_out, int shift);
|
||||
esp_err_t dsps_mul_s16_aes3(const int16_t *input1, const int16_t *input2, int16_t *output, int len, int step1, int step2, int step_out, int shift);
|
||||
|
||||
esp_err_t dsps_mul_s8_ansi(const int8_t *input1, const int8_t *input2, int8_t *output, int len, int step1, int step2, int step_out, int shift);
|
||||
esp_err_t dsps_mul_s8_aes3(const int8_t *input1, const int8_t *input2, int8_t *output, int len, int step1, int step2, int step_out, int shift);
|
||||
|
||||
/**@}*/
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#if CONFIG_DSP_OPTIMIZED
|
||||
|
||||
#if (dsps_mul_f32_ae32_enabled == 1)
|
||||
#define dsps_mul_f32 dsps_mul_f32_ae32
|
||||
#else
|
||||
#define dsps_mul_f32 dsps_mul_f32_ansi
|
||||
#endif
|
||||
|
||||
#if (dsps_mul_s16_aes3_enabled == 1)
|
||||
#define dsps_mul_s16 dsps_mul_s16_aes3
|
||||
#define dsps_mul_s8 dsps_mul_s8_aes3
|
||||
#elif (dsps_mul_s16_ae32_enabled == 1)
|
||||
#define dsps_mul_s16 dsps_mul_s16_ae32
|
||||
#define dsps_mul_s8 dsps_mul_s8_ansi
|
||||
#else
|
||||
#define dsps_mul_s16 dsps_mul_s16_ansi
|
||||
#define dsps_mul_s8 dsps_mul_s8_ansi
|
||||
#endif
|
||||
|
||||
#else // CONFIG_DSP_OPTIMIZED
|
||||
#define dsps_mul_f32 dsps_mul_f32_ansi
|
||||
#define dsps_mul_s16 dsps_mul_s16_ansi
|
||||
#define dsps_mul_s8 dsps_mul_s8_ansi
|
||||
#endif // CONFIG_DSP_OPTIMIZED
|
||||
|
||||
#endif // _dsps_mul_H_
|
||||
@@ -0,0 +1,30 @@
|
||||
#ifndef _dsps_mul_platform_H_
|
||||
#define _dsps_mul_platform_H_
|
||||
|
||||
#include "sdkconfig.h"
|
||||
|
||||
#ifdef __XTENSA__
|
||||
#include <xtensa/config/core-isa.h>
|
||||
#include <xtensa/config/core-matmap.h>
|
||||
|
||||
|
||||
#if ((XCHAL_HAVE_FP == 1) && (XCHAL_HAVE_LOOPS == 1))
|
||||
|
||||
#define dsps_mul_f32_ae32_enabled 1
|
||||
#define dsps_mul_s16_ae32_enabled 1
|
||||
|
||||
#endif
|
||||
|
||||
#if (XCHAL_HAVE_LOOPS == 1)
|
||||
#define dsps_mul_f32_ae32_enabled 1
|
||||
#define dsps_mul_s16_ae32_enabled 1
|
||||
#endif
|
||||
|
||||
#if (CONFIG_IDF_TARGET_ESP32S3 == 1)
|
||||
#define dsps_mul_f32_ae32_enabled 1
|
||||
#define dsps_mul_s16_aes3_enabled 1
|
||||
#endif
|
||||
|
||||
#endif // __XTENSA__
|
||||
|
||||
#endif // _dsps_mul_platform_H_
|
||||
@@ -0,0 +1,71 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <string.h>
|
||||
#include "unity.h"
|
||||
#include "dsp_platform.h"
|
||||
#include "esp_log.h"
|
||||
|
||||
#include "dsp_tests.h"
|
||||
#include "dsps_mul.h"
|
||||
#include "esp_attr.h"
|
||||
|
||||
static const char *TAG = "dsps_mul";
|
||||
|
||||
TEST_CASE("dsps_mul_f32_ansi functionality", "[dsps]")
|
||||
{
|
||||
int n = 32;
|
||||
float x[n];
|
||||
float y[n];
|
||||
for (int i = 0 ; i < n ; i++) {
|
||||
x[i] = i;
|
||||
y[i] = i * i;
|
||||
}
|
||||
dsps_mul_f32_ansi(x, x, x, n, 1, 1, 1);
|
||||
for (int i = 0 ; i < n ; i++) {
|
||||
if (x[i] != y[i]) {
|
||||
TEST_ASSERT_EQUAL(x[i], y[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("dsps_mul_f32 functionality", "[dsps]")
|
||||
{
|
||||
int n = 32;
|
||||
float x[n];
|
||||
float y[n];
|
||||
for (int i = 0 ; i < n ; i++) {
|
||||
x[i] = i;
|
||||
y[i] = i * i;
|
||||
}
|
||||
dsps_mul_f32(x, x, x, n, 1, 1, 1);
|
||||
for (int i = 0 ; i < n ; i++) {
|
||||
if (x[i] != y[i]) {
|
||||
TEST_ASSERT_EQUAL(x[i], y[i]);
|
||||
}
|
||||
}
|
||||
|
||||
int repeat_count = 1;
|
||||
|
||||
dsps_mul_f32(x, x, x, n, 1, 1, 1);
|
||||
|
||||
unsigned int start_b = dsp_get_cpu_cycle_count();
|
||||
dsps_mul_f32(x, x, x, n, 1, 1, 1);
|
||||
unsigned int end_b = dsp_get_cpu_cycle_count();
|
||||
|
||||
float total_b = end_b - start_b;
|
||||
float cycles = total_b / (n * repeat_count);
|
||||
ESP_LOGI(TAG, "dsps_mul_f32 - %f cycles per sample \n", cycles);
|
||||
|
||||
}
|
||||
@@ -0,0 +1,62 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <string.h>
|
||||
#include "unity.h"
|
||||
#include "dsp_platform.h"
|
||||
#include "esp_log.h"
|
||||
|
||||
#include "dsp_tests.h"
|
||||
#include "dsps_mul.h"
|
||||
#include "esp_attr.h"
|
||||
|
||||
static const char *TAG = "dsps_mul";
|
||||
|
||||
TEST_CASE("dsps_mul_s16 functionality", "[dsps]")
|
||||
{
|
||||
int n = 64;
|
||||
int16_t x[n];
|
||||
int16_t y[n];
|
||||
int32_t temp;
|
||||
int shift = 0;
|
||||
for (int i = 0 ; i < n ; i++) {
|
||||
x[i] = i << 4;
|
||||
temp = ((int32_t)x[i] * (int32_t)x[i]) >> shift;
|
||||
y[i] = temp;
|
||||
}
|
||||
|
||||
dsps_mul_s16(x, x, x, n, 1, 1, 1, shift);
|
||||
for (int i = 0 ; i < n ; i++) {
|
||||
ESP_LOGD(TAG, "x[%i] = %i %i", i, x[i], y[i]);
|
||||
if (x[i] != y[i]) {
|
||||
TEST_ASSERT_EQUAL(x[i], y[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("dsps_mul_s16 benchmark", "[dsps]")
|
||||
{
|
||||
const int n = 256;
|
||||
int16_t x[n];
|
||||
for (int i = 0 ; i < n ; i++) {
|
||||
x[i] = i << 4;
|
||||
}
|
||||
|
||||
unsigned int start_b = dsp_get_cpu_cycle_count();
|
||||
dsps_mul_s16(x, x, x, n, 1, 1, 1, 0);
|
||||
unsigned int end_b = dsp_get_cpu_cycle_count();
|
||||
|
||||
float cycles = end_b - start_b;
|
||||
ESP_LOGI(TAG, "dsps_mul_s16 - %f cycles per sample \n", cycles);
|
||||
}
|
||||
@@ -0,0 +1,69 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <string.h>
|
||||
#include "unity.h"
|
||||
#include "dsp_platform.h"
|
||||
#include "esp_log.h"
|
||||
#include <malloc.h>
|
||||
|
||||
#include "dsps_mul.h"
|
||||
#include "esp_attr.h"
|
||||
#include "dsp_tests.h"
|
||||
|
||||
#if (dsps_mul_s16_aes3_enabled == 1)
|
||||
|
||||
static const char *TAG = "dsps_mul";
|
||||
TEST_CASE("dsps_mul_s16_aes3 functionality", "[dsps]")
|
||||
{
|
||||
int n = 64;
|
||||
int16_t *x = (int16_t *)memalign(16, n * sizeof(int16_t));
|
||||
int16_t *y = (int16_t *)memalign(16, n * sizeof(int16_t));
|
||||
int32_t temp;
|
||||
int shift = 0;
|
||||
for (int i = 0 ; i < n ; i++) {
|
||||
x[i] = i << 4;
|
||||
temp = ((int32_t)x[i] * (int32_t)x[i]) >> shift;
|
||||
y[i] = temp;
|
||||
}
|
||||
|
||||
dsps_mul_s16_aes3(x, x, x, n, 1, 1, 1, shift);
|
||||
for (int i = 0 ; i < n ; i++) {
|
||||
ESP_LOGD(TAG, "x[%i] = %i %i", i, x[i], y[i]);
|
||||
if (x[i] != y[i]) {
|
||||
TEST_ASSERT_EQUAL(x[i], y[i]);
|
||||
}
|
||||
}
|
||||
free(x);
|
||||
free(y);
|
||||
}
|
||||
|
||||
TEST_CASE("dsps_mul_s16_aes3 benchmark", "[dsps]")
|
||||
{
|
||||
const int n = 2048;
|
||||
int16_t *x = (int16_t *)memalign(16, n * sizeof(int16_t));
|
||||
for (int i = 0 ; i < n ; i++) {
|
||||
x[i] = i << 4;
|
||||
}
|
||||
|
||||
unsigned int start_b = dsp_get_cpu_cycle_count();
|
||||
dsps_mul_s16_aes3(x, x, x, n, 1, 1, 1, 0);
|
||||
unsigned int end_b = dsp_get_cpu_cycle_count();
|
||||
|
||||
float cycles = end_b - start_b;
|
||||
ESP_LOGI(TAG, "dsps_mul_s16_aes3 - %f cycles per sample \n", cycles);
|
||||
free(x);
|
||||
|
||||
}
|
||||
#endif // (dsps_mul_s16_aes3_enabled == 1)
|
||||
@@ -0,0 +1,66 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <string.h>
|
||||
#include "unity.h"
|
||||
#include "dsp_platform.h"
|
||||
#include "esp_log.h"
|
||||
#include <malloc.h>
|
||||
|
||||
#include "dsps_mul.h"
|
||||
#include "esp_attr.h"
|
||||
#include "dsp_tests.h"
|
||||
|
||||
static const char *TAG = "dsps_mul";
|
||||
TEST_CASE("dsps_mul_s16_ansi functionality", "[dsps]")
|
||||
{
|
||||
int n = 64;
|
||||
int16_t *x = (int16_t *)memalign(16, n * sizeof(int16_t));
|
||||
int16_t *y = (int16_t *)memalign(16, n * sizeof(int16_t));
|
||||
int32_t temp;
|
||||
int shift = 0;
|
||||
for (int i = 0 ; i < n ; i++) {
|
||||
x[i] = i << 4;
|
||||
temp = ((int32_t)x[i] * (int32_t)x[i]) >> shift;
|
||||
y[i] = temp;
|
||||
}
|
||||
|
||||
dsps_mul_s16_ansi(x, x, x, n, 1, 1, 1, shift);
|
||||
for (int i = 0 ; i < n ; i++) {
|
||||
ESP_LOGD(TAG, "x[%i] = %i %i", i, x[i], y[i]);
|
||||
if (x[i] != y[i]) {
|
||||
TEST_ASSERT_EQUAL(x[i], y[i]);
|
||||
}
|
||||
}
|
||||
free(x);
|
||||
free(y);
|
||||
}
|
||||
|
||||
TEST_CASE("dsps_mul_s16_ansi benchmark", "[dsps]")
|
||||
{
|
||||
const int n = 2048;
|
||||
int16_t *x = (int16_t *)memalign(16, n * sizeof(int16_t));
|
||||
for (int i = 0 ; i < n ; i++) {
|
||||
x[i] = i << 4;
|
||||
}
|
||||
|
||||
unsigned int start_b = dsp_get_cpu_cycle_count();
|
||||
dsps_mul_s16_ansi(x, x, x, n, 1, 1, 1, 0);
|
||||
unsigned int end_b = dsp_get_cpu_cycle_count();
|
||||
|
||||
float cycles = end_b - start_b;
|
||||
ESP_LOGI(TAG, "dsps_mul_s16_ansi - %f cycles per sample \n", cycles);
|
||||
free(x);
|
||||
|
||||
}
|
||||
@@ -0,0 +1,69 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <string.h>
|
||||
#include "unity.h"
|
||||
#include "dsp_platform.h"
|
||||
#include "esp_log.h"
|
||||
#include <malloc.h>
|
||||
|
||||
#include "dsps_mul.h"
|
||||
#include "esp_attr.h"
|
||||
#include "dsp_tests.h"
|
||||
|
||||
#if (dsps_mul_s16_aes3_enabled == 1)
|
||||
|
||||
static const char *TAG = "dsps_mul";
|
||||
TEST_CASE("dsps_mul_s8_aes3 functionality", "[dsps]")
|
||||
{
|
||||
int n = 64;
|
||||
int8_t *x = (int8_t *)memalign(16, n * sizeof(int8_t));
|
||||
int8_t *y = (int8_t *)memalign(16, n * sizeof(int8_t));
|
||||
int32_t temp;
|
||||
int shift = 0;
|
||||
for (int i = 0 ; i < n ; i++) {
|
||||
x[i] = i - n / 2;
|
||||
temp = ((int32_t)x[i] * (int32_t)x[i]) >> shift;
|
||||
y[i] = temp;
|
||||
}
|
||||
|
||||
dsps_mul_s8_aes3(x, x, x, n, 1, 1, 1, shift);
|
||||
for (int i = 0 ; i < n ; i++) {
|
||||
ESP_LOGD(TAG, "x[%i] = %i %i", i, x[i], y[i]);
|
||||
if (x[i] != y[i]) {
|
||||
TEST_ASSERT_EQUAL(x[i], y[i]);
|
||||
}
|
||||
}
|
||||
free(x);
|
||||
free(y);
|
||||
}
|
||||
|
||||
TEST_CASE("dsps_mul_s8_aes3 benchmark", "[dsps]")
|
||||
{
|
||||
const int n = 256;
|
||||
int8_t *x = (int8_t *)memalign(16, n * sizeof(int8_t));
|
||||
for (int i = 0 ; i < n ; i++) {
|
||||
x[i] = i << 4;
|
||||
}
|
||||
|
||||
unsigned int start_b = dsp_get_cpu_cycle_count();
|
||||
dsps_mul_s8_aes3(x, x, x, n, 1, 1, 1, 0);
|
||||
unsigned int end_b = dsp_get_cpu_cycle_count();
|
||||
|
||||
float cycles = end_b - start_b;
|
||||
ESP_LOGI(TAG, "dsps_mul_s8_aes3 - %f cycles per sample \n", cycles);
|
||||
free(x);
|
||||
|
||||
}
|
||||
#endif // (dsps_mul_s16_aes3_enabled == 1)
|
||||
@@ -0,0 +1,61 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <string.h>
|
||||
#include "unity.h"
|
||||
#include "dsp_platform.h"
|
||||
#include "esp_log.h"
|
||||
|
||||
#include "dsp_tests.h"
|
||||
#include "dsps_mul.h"
|
||||
#include "esp_attr.h"
|
||||
|
||||
static const char *TAG = "dsps_mul";
|
||||
|
||||
TEST_CASE("dsps_mul_s8_ansi functionality", "[dsps]")
|
||||
{
|
||||
int n = 64;
|
||||
int8_t x[n];
|
||||
int8_t y[n];
|
||||
int32_t temp;
|
||||
int shift = 0;
|
||||
for (int i = 0 ; i < n ; i++) {
|
||||
x[i] = i - n / 2;
|
||||
temp = ((int32_t)x[i] * (int32_t)x[i]) >> shift;
|
||||
y[i] = temp;
|
||||
}
|
||||
|
||||
dsps_mul_s8_ansi(x, x, x, n, 1, 1, 1, 0);
|
||||
for (int i = 0 ; i < n ; i++) {
|
||||
if (x[i] != y[i]) {
|
||||
TEST_ASSERT_EQUAL(x[i], y[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("dsps_mul_s8_ansi benchmark", "[dsps]")
|
||||
{
|
||||
const int n = 256;
|
||||
int8_t x[n];
|
||||
for (int i = 0 ; i < n ; i++) {
|
||||
x[i] = i << 4;
|
||||
}
|
||||
|
||||
unsigned int start_b = dsp_get_cpu_cycle_count();
|
||||
dsps_mul_s8_ansi(x, x, x, n, 1, 1, 1, 0);
|
||||
unsigned int end_b = dsp_get_cpu_cycle_count();
|
||||
|
||||
float cycles = end_b - start_b;
|
||||
ESP_LOGI(TAG, "dsps_mul_s8_ansi - %f cycles per sample \n", cycles);
|
||||
}
|
||||
@@ -0,0 +1,77 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "dsps_mulc_platform.h"
|
||||
#if (dsps_mulc_s16_ae32_enabled == 1)
|
||||
|
||||
.text
|
||||
.align 4
|
||||
.global dsps_mulc_s16_ae32
|
||||
.type dsps_mulc_s16_ae32,@function
|
||||
// The function implements the following C code:
|
||||
// esp_err_t dsps_mulc_f32_ansi(const float *input, float *output, int len, float C, int step_in, int step_out)
|
||||
// {
|
||||
// for (int i = 0 ; i < len ; i++) {
|
||||
// int32_t acc = (int32_t)input[i * step_in] * (int32_t)C;
|
||||
// output[i * step_out] = (int16_t)(acc>>15);
|
||||
// }
|
||||
// return ESP_OK;
|
||||
// }
|
||||
dsps_mulc_s16_ae32:
|
||||
// input - a2
|
||||
// output - a3
|
||||
// len - a4
|
||||
// C - a5
|
||||
// step_in - a6
|
||||
// step_out - a7
|
||||
|
||||
entry a1, 16
|
||||
|
||||
movi.n a8, 15 // output shift
|
||||
ssr a8
|
||||
|
||||
srli a4, a4, 1 // a4 = a4>>1
|
||||
slli a6, a6, 2 // a6 - step_in<<3, because we load two inputs per loop
|
||||
slli a7, a7, 1 // a7 - step_out<<2
|
||||
|
||||
addi a6, a6, -4;
|
||||
addi a2, a2, -4;
|
||||
|
||||
ldinc m0, a2
|
||||
|
||||
loopnez a4, loop_end_mulc_f32_ae32
|
||||
add.n a2, a2, a6 // input+=step_input;
|
||||
mul.DA.LL m0, a5
|
||||
rsr a8, acchi
|
||||
rsr a9, acclo
|
||||
src a8, a8, a9 // Here result in a8
|
||||
s16i a8, a3, 0 // store result to the putput
|
||||
// rsr a9, acclo
|
||||
// s16i a9, a3, 0 // store result to the putput
|
||||
add.n a3, a3, a7 // output+=step_out;
|
||||
mul.DA.HL m0, a5
|
||||
|
||||
rsr a8, acchi
|
||||
rsr a9, acclo
|
||||
ldinc m0, a2 // load next data
|
||||
src a10, a8, a9 // Here result in a8
|
||||
s16i a10, a3, 0 // store result to the putput
|
||||
// // rsr a9, acclo
|
||||
// // s16i a9, a3, 0 // store result to the putput
|
||||
add.n a3, a3, a7 // output+=step_out;
|
||||
loop_end_mulc_f32_ae32:
|
||||
movi.n a2, 0 // return status ESP_OK
|
||||
retw.n
|
||||
|
||||
#endif // dsps_mulc_s16_ae32_enabled
|
||||
@@ -0,0 +1,31 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "dsps_mulc.h"
|
||||
|
||||
esp_err_t dsps_mulc_s16_ansi(const int16_t *input, int16_t *output, int len, int16_t C, int step_in, int step_out)
|
||||
{
|
||||
if (NULL == input) {
|
||||
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
|
||||
}
|
||||
if (NULL == output) {
|
||||
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
|
||||
}
|
||||
|
||||
for (int i = 0 ; i < len ; i++) {
|
||||
int32_t acc = (int32_t)input[i * step_in] * (int32_t)C;
|
||||
output[i * step_out] = (int16_t)(acc >> 15);
|
||||
}
|
||||
return ESP_OK;
|
||||
}
|
||||
@@ -0,0 +1,57 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "dsps_mulc_platform.h"
|
||||
#if (dsps_mulc_f32_ae32_enabled == 1)
|
||||
|
||||
// This is bi quad filter form II for ESP32 processor.
|
||||
.text
|
||||
.align 4
|
||||
.global dsps_mulc_f32_ae32
|
||||
.type dsps_mulc_f32_ae32,@function
|
||||
// The function implements the following C code:
|
||||
// esp_err_t dsps_mulc_f32_ansi(const float *input, float *output, int len, float C, int step_in, int step_out)
|
||||
// {
|
||||
// for (int i = 0 ; i < len ; i++) {
|
||||
// output[i * step_out] = input[i * step_in] * C;
|
||||
// }
|
||||
// return ESP_OK;
|
||||
// }
|
||||
dsps_mulc_f32_ae32:
|
||||
// input - a2
|
||||
// output - a3
|
||||
// len - a4
|
||||
// C - a5
|
||||
// step_in - a6
|
||||
// step_out - a7
|
||||
|
||||
entry a1, 16
|
||||
|
||||
slli a6, a6, 2 // a6 - step_in<<2
|
||||
slli a7, a7, 2 // a7 - step_out<<2
|
||||
wfr f0, a5 // a5 - load to the f0
|
||||
|
||||
loopnez a4, loop_end_mulc_f32_ae32
|
||||
lsi f1, a2, 0
|
||||
|
||||
mul.s f2, f1, f0 // f2 = f1 * f0
|
||||
add.n a2, a2, a6 // input1_ptr+=step_in;
|
||||
ssi f2, a3, 0
|
||||
add.n a3, a3, a7 // output+=step_out;
|
||||
loop_end_mulc_f32_ae32:
|
||||
|
||||
movi.n a2, 0 // return status ESP_OK
|
||||
retw.n
|
||||
|
||||
#endif // dsps_mulc_f32_ae32_enabled
|
||||
@@ -0,0 +1,30 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "dsps_mulc.h"
|
||||
|
||||
esp_err_t dsps_mulc_f32_ansi(const float *input, float *output, int len, float C, int step_in, int step_out)
|
||||
{
|
||||
if (NULL == input) {
|
||||
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
|
||||
}
|
||||
if (NULL == output) {
|
||||
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
|
||||
}
|
||||
|
||||
for (int i = 0 ; i < len ; i++) {
|
||||
output[i * step_out] = input[i * step_in] * C;
|
||||
}
|
||||
return ESP_OK;
|
||||
}
|
||||
@@ -0,0 +1,74 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef _dsps_mulc_H_
|
||||
#define _dsps_mulc_H_
|
||||
#include "dsp_err.h"
|
||||
|
||||
#include "dsps_mulc_platform.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
|
||||
/**@{*/
|
||||
/**
|
||||
* @brief multiply constant
|
||||
*
|
||||
* The function multiplies input array to the constant value
|
||||
* x[i*step_out] = y[i*step_in]*C; i=[0..len)
|
||||
* The implementation use ANSI C and could be compiled and run on any platform
|
||||
*
|
||||
* @param[in] input: input array
|
||||
* @param output: output array
|
||||
* @param len: amount of operations for arrays
|
||||
* @param C: constant value
|
||||
* @param step_in: step over input array (by default should be 1)
|
||||
* @param step_out: step over output array (by default should be 1)
|
||||
*
|
||||
* @return
|
||||
* - ESP_OK on success
|
||||
* - One of the error codes from DSP library
|
||||
*/
|
||||
esp_err_t dsps_mulc_f32_ansi(const float *input, float *output, int len, float C, int step_in, int step_out);
|
||||
esp_err_t dsps_mulc_f32_ae32(const float *input, float *output, int len, float C, int step_in, int step_out);
|
||||
|
||||
esp_err_t dsps_mulc_s16_ae32(const int16_t *input, int16_t *output, int len, int16_t C, int step_in, int step_out);
|
||||
esp_err_t dsps_mulc_s16_ansi(const int16_t *input, int16_t *output, int len, int16_t C, int step_in, int step_out);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#if CONFIG_DSP_OPTIMIZED
|
||||
#if (dsps_mulc_f32_ae32_enabled == 1)
|
||||
#define dsps_mulc_f32 dsps_mulc_f32_ae32
|
||||
#else //
|
||||
#define dsps_mulc_f32 dsps_mulc_f32_ansi
|
||||
#endif
|
||||
#if (dsps_mulc_s16_ae32_enabled == 1)
|
||||
#define dsps_mulc_s16 dsps_mulc_s16_ae32
|
||||
#else
|
||||
#define dsps_mulc_s16 dsps_mulc_s16_ansi
|
||||
#endif // dsps_mulc_s16_ae32_enabled
|
||||
|
||||
#else
|
||||
#define dsps_mulc_f32 dsps_mulc_f32_ansi
|
||||
#define dsps_mulc_s16 dsps_mulc_s16_ansi
|
||||
#endif
|
||||
|
||||
|
||||
#endif // _dsps_mulc_H_
|
||||
@@ -0,0 +1,25 @@
|
||||
#ifndef _dsps_mulc_platform_H_
|
||||
#define _dsps_mulc_platform_H_
|
||||
|
||||
#include "sdkconfig.h"
|
||||
|
||||
#ifdef __XTENSA__
|
||||
#include <xtensa/config/core-isa.h>
|
||||
#include <xtensa/config/core-matmap.h>
|
||||
|
||||
|
||||
#if ((XCHAL_HAVE_FP == 1) && (XCHAL_HAVE_LOOPS == 1))
|
||||
|
||||
#define dsps_mulc_f32_ae32_enabled 1
|
||||
|
||||
#endif
|
||||
|
||||
#if ((XCHAL_HAVE_LOOPS == 1) && (XCHAL_HAVE_MAC16 == 1))
|
||||
|
||||
#define dsps_mulc_s16_ae32_enabled 1
|
||||
|
||||
#endif //
|
||||
#endif // __XTENSA__
|
||||
|
||||
|
||||
#endif // _dsps_mulc_platform_H_
|
||||
@@ -0,0 +1,70 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <string.h>
|
||||
#include "unity.h"
|
||||
#include "dsp_platform.h"
|
||||
#include "esp_log.h"
|
||||
|
||||
#include "dsp_tests.h"
|
||||
#include "dsps_mulc.h"
|
||||
#include "esp_attr.h"
|
||||
|
||||
static const char *TAG = "dsps_mulc";
|
||||
|
||||
TEST_CASE("dsps_mulc_f32_ansi functionality", "[dsps]")
|
||||
{
|
||||
int n = 64;
|
||||
float x[n];
|
||||
float y[n];
|
||||
for (int i = 0 ; i < n ; i++) {
|
||||
x[i] = i;
|
||||
y[i] = i * 10;
|
||||
}
|
||||
dsps_mulc_f32_ansi(x, x, n, 10, 1, 1);
|
||||
for (int i = 0 ; i < n ; i++) {
|
||||
if (x[i] != y[i]) {
|
||||
TEST_ASSERT_EQUAL(x[i], y[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("dsps_mulc_f32 functionality", "[dsps]")
|
||||
{
|
||||
int n = 64;
|
||||
float x[n];
|
||||
float y[n];
|
||||
for (int i = 0 ; i < n ; i++) {
|
||||
x[i] = i;
|
||||
y[i] = i * 10;
|
||||
}
|
||||
dsps_mulc_f32(x, x, n, 10, 1, 1);
|
||||
for (int i = 0 ; i < n ; i++) {
|
||||
if (x[i] != y[i]) {
|
||||
TEST_ASSERT_EQUAL(x[i], y[i]);
|
||||
}
|
||||
}
|
||||
|
||||
int repeat_count = 1;
|
||||
|
||||
dsps_mulc_f32(x, x, n, 10, 1, 1);
|
||||
|
||||
unsigned int start_b = dsp_get_cpu_cycle_count();
|
||||
dsps_mulc_f32(x, x, n, 10, 1, 1);
|
||||
unsigned int end_b = dsp_get_cpu_cycle_count();
|
||||
|
||||
float total_b = end_b - start_b;
|
||||
float cycles = total_b / (n * repeat_count);
|
||||
ESP_LOGI(TAG, "dsps_mulc_f32 - %f cycles per sample \n", cycles);
|
||||
}
|
||||
@@ -0,0 +1,61 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <string.h>
|
||||
#include "unity.h"
|
||||
#include "dsp_platform.h"
|
||||
#include "esp_log.h"
|
||||
|
||||
#include "dsp_tests.h"
|
||||
#include "dsps_mulc.h"
|
||||
#include "esp_attr.h"
|
||||
|
||||
static const char *TAG = "dsps_mulc";
|
||||
|
||||
TEST_CASE("dsps_mulc_s16 functionality", "[dsps]")
|
||||
{
|
||||
int n = 64;
|
||||
int16_t x[n];
|
||||
int16_t y[n];
|
||||
int32_t temp;
|
||||
int16_t test_const = 0x2000;
|
||||
for (int i = 0 ; i < n ; i++) {
|
||||
x[i] = i << 4;
|
||||
temp = (int32_t)x[i] * (int32_t)test_const;
|
||||
y[i] = temp >> 15;
|
||||
}
|
||||
|
||||
dsps_mulc_s16(x, x, n, test_const, 1, 1);
|
||||
for (int i = 0 ; i < n ; i++) {
|
||||
if (x[i] != y[i]) {
|
||||
TEST_ASSERT_EQUAL(x[i], y[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("dsps_mulc_s16 benchmark", "[dsps]")
|
||||
{
|
||||
const int n = 256;
|
||||
int16_t x[n];
|
||||
for (int i = 0 ; i < n ; i++) {
|
||||
x[i] = i << 4;
|
||||
}
|
||||
|
||||
unsigned int start_b = dsp_get_cpu_cycle_count();
|
||||
dsps_mulc_s16(x, x, n, 10, 1, 1);
|
||||
unsigned int end_b = dsp_get_cpu_cycle_count();
|
||||
|
||||
float cycles = end_b - start_b;
|
||||
ESP_LOGI(TAG, "dsps_mulc_s16 - %f cycles per sample \n", cycles);
|
||||
}
|
||||
@@ -0,0 +1,61 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <string.h>
|
||||
#include "unity.h"
|
||||
#include "dsp_platform.h"
|
||||
#include "esp_log.h"
|
||||
|
||||
#include "dsp_tests.h"
|
||||
#include "dsps_mulc.h"
|
||||
#include "esp_attr.h"
|
||||
|
||||
static const char *TAG = "dsps_mulc";
|
||||
|
||||
TEST_CASE("dsps_mulc_s16_ansi functionality", "[dsps]")
|
||||
{
|
||||
int n = 64;
|
||||
int16_t x[n];
|
||||
int16_t y[n];
|
||||
int32_t temp;
|
||||
int16_t test_const = 0x2000;
|
||||
for (int i = 0 ; i < n ; i++) {
|
||||
x[i] = i << 4;
|
||||
temp = (int32_t)x[i] * (int32_t)test_const;
|
||||
y[i] = temp >> 15;
|
||||
}
|
||||
|
||||
dsps_mulc_s16_ansi(x, x, n, test_const, 1, 1);
|
||||
for (int i = 0 ; i < n ; i++) {
|
||||
if (x[i] != y[i]) {
|
||||
TEST_ASSERT_EQUAL(x[i], y[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("dsps_mulc_s16 benchmark", "[dsps]")
|
||||
{
|
||||
const int n = 256;
|
||||
int16_t x[n];
|
||||
for (int i = 0 ; i < n ; i++) {
|
||||
x[i] = i << 4;
|
||||
}
|
||||
|
||||
unsigned int start_b = dsp_get_cpu_cycle_count();
|
||||
dsps_mulc_s16_ansi(x, x, n, 10, 1, 1);
|
||||
unsigned int end_b = dsp_get_cpu_cycle_count();
|
||||
|
||||
float cycles = end_b - start_b;
|
||||
ESP_LOGI(TAG, "dsps_mulc_f32 - %f cycles per sample \n", cycles);
|
||||
}
|
||||
@@ -0,0 +1,56 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "dsps_sqrt.h"
|
||||
#include <math.h>
|
||||
|
||||
|
||||
inline float dsps_sqrtf_f32_ansi(float f)
|
||||
{
|
||||
int result;
|
||||
int *f_ptr = (int *)&f;
|
||||
result = 0x1fbb4000 + (*f_ptr >> 1);
|
||||
const int *p = &result;
|
||||
float *f_result = (float *)p;
|
||||
return *f_result;
|
||||
}
|
||||
|
||||
esp_err_t dsps_sqrt_f32_ansi(const float *input, float *output, int len)
|
||||
{
|
||||
if (NULL == input) {
|
||||
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
|
||||
}
|
||||
if (NULL == output) {
|
||||
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
|
||||
}
|
||||
|
||||
for (int i = 0 ; i < len ; i++) {
|
||||
output[i] = dsps_sqrtf_f32_ansi(input[i]);
|
||||
}
|
||||
return ESP_OK;
|
||||
}
|
||||
|
||||
float dsps_inverted_sqrtf_f32_ansi(float data )
|
||||
{
|
||||
const float x2 = data * 0.5F;
|
||||
const float threehalfs = 1.5F;
|
||||
|
||||
union {
|
||||
float f;
|
||||
uint32_t i;
|
||||
} conv = {data}; // member 'f' set to value of 'data'.
|
||||
conv.i = 0x5f3759df - ( conv.i >> 1 );
|
||||
conv.f *= ( threehalfs - ( x2 * conv.f * conv.f ) );
|
||||
return conv.f;
|
||||
}
|
||||
@@ -0,0 +1,91 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef _dsps_sqrt_H_
|
||||
#define _dsps_sqrt_H_
|
||||
#include "dsp_err.h"
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
|
||||
/**@{*/
|
||||
/**
|
||||
* @brief square root approximation
|
||||
*
|
||||
* The function takes square root approximation
|
||||
* x[i] ~ sqrt(y[i]); i=[0..len)
|
||||
* The implementation use ANSI C and could be compiled and run on any platform
|
||||
*
|
||||
* @param[in] input: input array
|
||||
* @param output: output array
|
||||
* @param len: amount of operations for arrays
|
||||
*
|
||||
* @return
|
||||
* - ESP_OK on success
|
||||
* - One of the error codes from DSP library
|
||||
*/
|
||||
esp_err_t dsps_sqrt_f32_ansi(const float *input, float *output, int len);
|
||||
//esp_err_t dsps_sqrt_s32_ansi(const int32_t *input, int16_t *output, int len);
|
||||
|
||||
/**@{*/
|
||||
/**
|
||||
* @brief square root approximation
|
||||
*
|
||||
* The function takes square root approximation
|
||||
* x ~ sqrt(y);
|
||||
* The implementation use ANSI C and could be compiled and run on any platform
|
||||
*
|
||||
* @param[in] data: input value
|
||||
*
|
||||
* @return
|
||||
* - square root value
|
||||
*/
|
||||
float dsps_sqrtf_f32_ansi(const float data);
|
||||
|
||||
|
||||
/**@{*/
|
||||
/**
|
||||
* @brief inverted square root approximation
|
||||
*
|
||||
* The function takes inverted square root approximation
|
||||
* x ~ 1/sqrt(y);
|
||||
* The implementation use ANSI C and could be compiled and run on any platform
|
||||
*
|
||||
* @param[in] data: input value
|
||||
*
|
||||
* @return
|
||||
* - inverted square root value
|
||||
*/
|
||||
float dsps_inverted_sqrtf_f32_ansi(float data );
|
||||
/**@}*/
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef CONFIG_DSP_OPTIMIZED
|
||||
#define dsps_sqrt_f32 dsps_sqrt_f32_ansi
|
||||
#define dsps_sqrtf_f32 dsps_sqrtf_f32_ansi
|
||||
#define dsps_inverted_sqrtf_f32 dsps_inverted_sqrtf_f32_ansi
|
||||
#else
|
||||
#define dsps_sqrt_f32 dsps_sqrt_f32_ansi
|
||||
#define dsps_sqrtf_f32 dsps_sqrtf_f32_ansi
|
||||
#define dsps_inverted_sqrtf_f32 dsps_inverted_sqrtf_f32_ansi
|
||||
#endif
|
||||
|
||||
#endif // _dsps_sqrt_H_
|
||||
@@ -0,0 +1,81 @@
|
||||
// Copyright 2018-2023 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <string.h>
|
||||
#include "unity.h"
|
||||
#include "dsp_platform.h"
|
||||
#include "esp_log.h"
|
||||
|
||||
#include "dsp_tests.h"
|
||||
#include "dsps_sqrt.h"
|
||||
#include "esp_attr.h"
|
||||
|
||||
static const char *TAG = "dsps_sqrt";
|
||||
|
||||
TEST_CASE("dsps_sqrtf_f32_ansi functionality", "[dsps]")
|
||||
{
|
||||
float max_err = -1000;
|
||||
float max_value = 0;
|
||||
float min_err = 0;
|
||||
float min_value = (float)INT32_MAX;
|
||||
int test_points = 100000;
|
||||
for (size_t i = 0; i < test_points; i++) {
|
||||
float test_value = rand();
|
||||
// if (test_value > max_value) max_value = test_value;
|
||||
// if (test_value < min_value) min_value = test_value;
|
||||
float x = sqrtf(test_value);
|
||||
float y = dsps_sqrtf_f32(test_value);
|
||||
float error = 20 * log10f(fabs((x - y) / x) + 0.000001);
|
||||
if (error > max_err) {
|
||||
max_err = error;
|
||||
max_value = test_value;
|
||||
}
|
||||
if (error < min_err) {
|
||||
min_err = error;
|
||||
min_value = test_value;
|
||||
}
|
||||
if (error > -25) {
|
||||
ESP_LOGE(TAG, "dsps_sqrtf_f32_ansi: error = %f dB, value = %f (0x%8.8x)\n", error, test_value, (int)test_value);
|
||||
TEST_ASSERT_EQUAL(x, y);
|
||||
}
|
||||
}
|
||||
ESP_LOGI(TAG, "dsps_sqrtf_f32_ansi: max error = %f dB, min error = %f dB, max_value = %f (0x%8.8x), min_value = %f (0x%8.8x)\n", max_err, min_err, max_value, (int)max_value, min_value, (int)min_value);
|
||||
}
|
||||
|
||||
TEST_CASE("dsps_sqrt_f32_ansi functionality", "[dsps]")
|
||||
{
|
||||
int n = 256;
|
||||
float *x = (float *)malloc(sizeof(float) * n);
|
||||
float *result = (float *)malloc(sizeof(float) * n);
|
||||
float *y = (float *)malloc(sizeof(float) * n);
|
||||
for (int i = 0 ; i < n ; i++) {
|
||||
y[i] = i * 10;
|
||||
x[i] = y[i] * y[i];
|
||||
}
|
||||
unsigned int start_b = dsp_get_cpu_cycle_count();
|
||||
dsps_sqrt_f32_ansi(x, result, n);
|
||||
float cycles = dsp_get_cpu_cycle_count() - start_b;
|
||||
|
||||
for (int i = 0 ; i < n ; i++) {
|
||||
//printf("Result[%i] = %f, expected = %f, diff = %f\n", i, result[i], y[i], 20*logf(fabs((result[i] - y[i])/y[i]) + 0.000001));
|
||||
float error = 20 * log10f(fabs((result[i] - y[i]) / y[i]) + 0.000001);
|
||||
if (error > -25) {
|
||||
TEST_ASSERT_EQUAL(result[i], y[i]);
|
||||
}
|
||||
}
|
||||
ESP_LOGI(TAG, "dsps_sqrt_f32_ansi - %f cycles for %i samples \n", cycles, n);
|
||||
free(x);
|
||||
free(y);
|
||||
free(result);
|
||||
}
|
||||
@@ -0,0 +1,74 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include "dsps_sub_platform.h"
|
||||
#if (dsps_sub_s16_ae32_enabled == 1)
|
||||
|
||||
.text
|
||||
.align 4
|
||||
.global dsps_sub_s16_ae32
|
||||
.type dsps_sub_s16_ae32,@function
|
||||
// The function implements the following C code:
|
||||
// esp_err_t dsps_sub_s16_ansi(const int16_t *input1, const int16_t *input2, int16_t *output, int len, int step1, int step2, int step_out, int shift)
|
||||
// {
|
||||
// for (int i = 0 ; i < len ; i++) {
|
||||
// int32_t acc = (int32_t)input1[i * step1] + (int32_t)input2[i * step2];
|
||||
// output[i * step_out] = acc >> shift;
|
||||
// }
|
||||
// return ESP_OK;
|
||||
// }
|
||||
dsps_sub_s16_ae32:
|
||||
// input1 - a2
|
||||
// input2 - a3
|
||||
// output - a4
|
||||
// len - a5
|
||||
// step_in1 - a6
|
||||
// step_in2 - a7
|
||||
// step_out - stack (a10)
|
||||
// shift - stack (a9)
|
||||
|
||||
entry a1, 16
|
||||
|
||||
|
||||
// l32i.n a10, a1, 16
|
||||
// s16i a10, a4, 0
|
||||
// l32i.n a10, a1, 20
|
||||
// s16i a10, a4, 2
|
||||
|
||||
l32i.n a10, a1, 16 // Load step_out to the a10 register
|
||||
l32i.n a9, a1, 20 // Load shift to the a9 register
|
||||
ssr a9 // sar = a9
|
||||
|
||||
slli a6, a6, 1 // a6 - step_in<<1
|
||||
slli a7, a7, 1 // a7 - step_in<<1
|
||||
slli a10, a10, 1 // a8 - step_out<<1
|
||||
|
||||
// s16i a10, a4, 0
|
||||
// s16i a6, a4, 2
|
||||
// s16i a7, a4, 4
|
||||
// s16i a5, a4, 6
|
||||
|
||||
l16si a11, a2, 0
|
||||
l16si a8, a3, 0
|
||||
add a8, a11, a8
|
||||
srl a9, a8 // a8 = a8>>sar
|
||||
|
||||
loopnez a5, .loop_end_sub_s16_ae32
|
||||
add.n a2, a2, a6 // input1+=step_in1;
|
||||
add.n a3, a3, a7 // input2+=step_in2;
|
||||
|
||||
l16si a11, a2, 0
|
||||
l16si a8, a3, 0
|
||||
s16i a9, a4, 0 // store result to the putput
|
||||
sub a8, a11, a8
|
||||
srl a9, a8 // a8 = a8>>sar
|
||||
|
||||
add.n a4, a4, a10 // output+=step_out;
|
||||
.loop_end_sub_s16_ae32:
|
||||
movi.n a2, 0 // return status ESP_OK
|
||||
retw.n
|
||||
|
||||
#endif // dsps_sub_s16_ae32_enabled
|
||||
@@ -0,0 +1,104 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include "dsps_sub_platform.h"
|
||||
#if (dsps_sub_s16_aes3_enabled == 1)
|
||||
|
||||
.text
|
||||
.align 4
|
||||
.global dsps_sub_s16_aes3
|
||||
.type dsps_sub_s16_aes3,@function
|
||||
// The function implements the following C code:
|
||||
// esp_err_t dsps_sub_s16_ansi(const int16_t *input1, const int16_t *input2, int16_t *output, int len, int step1, int step2, int step_out, int shift)
|
||||
// {
|
||||
// for (int i = 0 ; i < len ; i++) {
|
||||
// int32_t acc = (int32_t)input1[i * step1] + (int32_t)input2[i * step2];
|
||||
// output[i * step_out] = acc >> shift;
|
||||
// }
|
||||
// return ESP_OK;
|
||||
// }
|
||||
dsps_sub_s16_aes3:
|
||||
// input1 - a2
|
||||
// input2 - a3
|
||||
// output - a4
|
||||
// len - a5
|
||||
// step_in1 - a6
|
||||
// step_in2 - a7
|
||||
// step_out - stack (a10)
|
||||
// shift - stack (a9)
|
||||
|
||||
entry a1, 16
|
||||
|
||||
l32i.n a10, a1, 16 // Load step_out to the a10 register
|
||||
l32i.n a9, a1, 20 // Load shift to the a9 register
|
||||
ssr a9 // sar = a9
|
||||
|
||||
// Check if any of steps is not 0
|
||||
addi a15, a6, -1
|
||||
bnez a15, .sub_s16_ae32_mode // Branch if step !=0
|
||||
addi a15, a7, -1
|
||||
bnez a15, .sub_s16_ae32_mode // Branch if step !=0
|
||||
addi a15, a10, -1
|
||||
bnez a15,.sub_s16_ae32_mode // Branch if step !=0
|
||||
|
||||
// Check addresses
|
||||
movi a15, 0xF // modulo 16 mask
|
||||
bany a2, a15, .sub_s16_ae32_mode // jump if != 0
|
||||
bany a3, a15, .sub_s16_ae32_mode // jump if != 0
|
||||
|
||||
// Check length (should be divided to 8)
|
||||
movi a15, 0x7 // modulo 8 mask
|
||||
bany a5, a15, .sub_s16_ae32_mode // jump if != 0
|
||||
|
||||
// Process main function for S3
|
||||
|
||||
slli a6, a6, 1 // a6 - step_in<<1
|
||||
slli a7, a7, 1 // a7 - step_in<<1
|
||||
slli a10, a10, 1 // a8 - step_out<<1
|
||||
wsr.sar a9 // load sar register
|
||||
|
||||
// Preload q1 from a3
|
||||
//ee.vld.128.ip q1, a3, 16
|
||||
srli a5, a5, 3
|
||||
ee.vld.128.ip q0, a2, 16
|
||||
loopnez a5, .loop_end_sub_s16_aes3_main
|
||||
ee.vld.128.ip q1, a3, 16
|
||||
ee.vsubs.s16.ld.incp q0, a2, q4, q0, q1
|
||||
ee.vst.128.ip q4, a4, 16
|
||||
.loop_end_sub_s16_aes3_main:
|
||||
|
||||
// Exit for Esp32s3 mode
|
||||
movi.n a2, 0 // return status ESP_OK
|
||||
retw.n
|
||||
|
||||
|
||||
.sub_s16_ae32_mode:
|
||||
slli a6, a6, 1 // a6 - step_in<<1
|
||||
slli a7, a7, 1 // a7 - step_in<<1
|
||||
slli a10, a10, 1 // a8 - step_out<<1
|
||||
|
||||
l16si a11, a2, 0
|
||||
l16si a8, a3, 0
|
||||
sub a8, a11, a8
|
||||
srl a9, a8 // a8 = a8>>sar
|
||||
|
||||
loopnez a5, .loop_end_sub_s16_aes3
|
||||
add.n a2, a2, a6 // input1+=step_in1;
|
||||
add.n a3, a3, a7 // input2+=step_in2;
|
||||
|
||||
l16si a11, a2, 0
|
||||
l16si a8, a3, 0
|
||||
s16i a9, a4, 0 // store result to the putput
|
||||
sub a8, a11, a8
|
||||
srl a9, a8 // a8 = a8>>sar
|
||||
|
||||
add.n a4, a4, a10 // output+=step_out;
|
||||
.loop_end_sub_s16_aes3:
|
||||
// Exit for Esp32 mode
|
||||
movi.n a2, 0 // return status ESP_OK
|
||||
retw.n
|
||||
|
||||
#endif // dsps_sub_s16_aes3_enabled
|
||||
@@ -0,0 +1,26 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include "dsps_add.h"
|
||||
|
||||
esp_err_t dsps_sub_s16_ansi(const int16_t *input1, const int16_t *input2, int16_t *output, int len, int step1, int step2, int step_out, int shift)
|
||||
{
|
||||
if (NULL == input1) {
|
||||
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
|
||||
}
|
||||
if (NULL == input2) {
|
||||
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
|
||||
}
|
||||
if (NULL == output) {
|
||||
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
|
||||
}
|
||||
|
||||
for (int i = 0 ; i < len ; i++) {
|
||||
int32_t acc = (int32_t)input1[i * step1] - (int32_t)input2[i * step2];
|
||||
output[i * step_out] = acc >> shift;
|
||||
}
|
||||
return ESP_OK;
|
||||
}
|
||||
@@ -0,0 +1,97 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include "dsps_sub_platform.h"
|
||||
#if (dsps_sub_s16_aes3_enabled == 1)
|
||||
|
||||
.text
|
||||
.align 4
|
||||
.global dsps_sub_s8_aes3
|
||||
.type dsps_sub_s8_aes3,@function
|
||||
// The function implements the following C code:
|
||||
// esp_err_t dsps_sub_s8_ansi(const int16_t *input1, const int16_t *input2, int16_t *output, int len, int step1, int step2, int step_out, int shift)
|
||||
// {
|
||||
// for (int i = 0 ; i < len ; i++) {
|
||||
// int32_t acc = (int32_t)input1[i * step1] + (int32_t)input2[i * step2];
|
||||
// output[i * step_out] = acc >> shift;
|
||||
// }
|
||||
// return ESP_OK;
|
||||
// }
|
||||
dsps_sub_s8_aes3:
|
||||
// input1 - a2
|
||||
// input2 - a3
|
||||
// output - a4
|
||||
// len - a5
|
||||
// step_in1 - a6
|
||||
// step_in2 - a7
|
||||
// step_out - stack (a10)
|
||||
// shift - stack (a9)
|
||||
|
||||
entry a1, 16
|
||||
|
||||
l32i.n a10, a1, 16 // Load step_out to the a10 register
|
||||
l32i.n a9, a1, 20 // Load shift to the a9 register
|
||||
ssr a9 // sar = a9
|
||||
|
||||
// Check if any of steps is not 0
|
||||
addi a15, a6, -1
|
||||
bnez a15, .sub_s8_ae32_mode // Branch if step !=0
|
||||
addi a15, a7, -1
|
||||
bnez a15, .sub_s8_ae32_mode // Branch if step !=0
|
||||
addi a15, a10, -1
|
||||
bnez a15,.sub_s8_ae32_mode // Branch if step !=0
|
||||
|
||||
// Check addresses
|
||||
movi a15, 0xF // modulo 16 mask
|
||||
bany a2, a15, .sub_s8_ae32_mode // jump if != 0
|
||||
bany a3, a15, .sub_s8_ae32_mode // jump if != 0
|
||||
|
||||
// Check length (should be divided to 8)
|
||||
movi a15, 0xf // modulo 8 mask
|
||||
bany a5, a15, .sub_s8_ae32_mode // jump if != 0
|
||||
|
||||
// Process main function for S3
|
||||
|
||||
wsr.sar a9 // load sar register
|
||||
|
||||
// Preload q1 from a3
|
||||
//ee.vld.128.ip q1, a3, 16
|
||||
srli a5, a5, 4
|
||||
ee.vld.128.ip q0, a2, 16
|
||||
loopnez a5, .loop_end_sub_s8_aes3_main
|
||||
ee.vld.128.ip q1, a3, 16
|
||||
ee.vsubs.s8.ld.incp q0, a2, q4, q0, q1
|
||||
ee.vst.128.ip q4, a4, 16
|
||||
.loop_end_sub_s8_aes3_main:
|
||||
|
||||
// Exit for Esp32s3 mode
|
||||
movi.n a2, 0 // return status ESP_OK
|
||||
retw.n
|
||||
|
||||
|
||||
.sub_s8_ae32_mode:
|
||||
l8ui a11, a2, 0
|
||||
l8ui a8, a3, 0
|
||||
sub a8, a11, a8
|
||||
srl a9, a8 // a8 = a8>>sar
|
||||
|
||||
loopnez a5, .loop_end_sub_s8_aes3
|
||||
add.n a2, a2, a6 // input1+=step_in1;
|
||||
add.n a3, a3, a7 // input2+=step_in2;
|
||||
|
||||
l8ui a11, a2, 0
|
||||
l8ui a8, a3, 0
|
||||
s8i a9, a4, 0 // store result to the putput
|
||||
sub a8, a11, a8
|
||||
srl a9, a8 // a8 = a8>>sar
|
||||
|
||||
add.n a4, a4, a10 // output+=step_out;
|
||||
.loop_end_sub_s8_aes3:
|
||||
// Exit for Esp32 mode
|
||||
movi.n a2, 0 // return status ESP_OK
|
||||
retw.n
|
||||
|
||||
#endif // dsps_sub_s8_aes3_enabled
|
||||
@@ -0,0 +1,26 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include "dsps_sub.h"
|
||||
|
||||
esp_err_t dsps_sub_s8_ansi(const int8_t *input1, const int8_t *input2, int8_t *output, int len, int step1, int step2, int step_out, int shift)
|
||||
{
|
||||
if (NULL == input1) {
|
||||
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
|
||||
}
|
||||
if (NULL == input2) {
|
||||
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
|
||||
}
|
||||
if (NULL == output) {
|
||||
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
|
||||
}
|
||||
|
||||
for (int i = 0 ; i < len ; i++) {
|
||||
int32_t acc = (int32_t)input1[i * step1] - (int32_t)input2[i * step2];
|
||||
output[i * step_out] = acc >> shift;
|
||||
}
|
||||
return ESP_OK;
|
||||
}
|
||||
@@ -0,0 +1,64 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "dsps_sub_platform.h"
|
||||
#if (dsps_sub_f32_ae32_enabled == 1)
|
||||
|
||||
// This is bi quad filter form II for ESP32 processor.
|
||||
.text
|
||||
.align 4
|
||||
.global dsps_sub_f32_ae32
|
||||
.type dsps_sub_f32_ae32,@function
|
||||
// The function implements the following C code:
|
||||
// esp_err_t dsps_sub_f32_ae32(const float *input1, const float *input2, float *output, int len, int step1, int step2, int step_out)
|
||||
// {
|
||||
// for (int i = 0 ; i < len ; i++) {
|
||||
// output[i * step_out] = input1[i * step1] - input2[i * step2];
|
||||
// }
|
||||
// return ESP_OK;
|
||||
// }
|
||||
|
||||
dsps_sub_f32_ae32:
|
||||
// input1 - a2
|
||||
// input2 - a3
|
||||
// output - a4
|
||||
// len - a5
|
||||
// step1 - a6
|
||||
// step2 - a7
|
||||
// step_out - stack (a8)
|
||||
|
||||
entry a1, 16
|
||||
|
||||
l32i.n a8, a1, 16 // Load step_out to the a8 register
|
||||
slli a6, a6, 2 // a6 - step1<<2
|
||||
slli a7, a7, 2 // a7 - step2<<2
|
||||
slli a8, a8, 2 // a8 - step_out<<2
|
||||
|
||||
lsi f0, a2, 0
|
||||
add.n a2, a2, a6 // input1_ptr+=step1;
|
||||
loopnez a5, loop_end_sub_f32_ae32
|
||||
lsi f1, a3, 0
|
||||
add.n a3, a3, a7 // input2_ptr+=step2;
|
||||
|
||||
sub.s f2, f0, f1 // f2 = f0 - f1
|
||||
lsi f0, a2, 0
|
||||
add.n a2, a2, a6 // input1_ptr+=step1;
|
||||
ssi f2, a4, 0
|
||||
add.n a4, a4, a8 // input2_ptr+=step2;
|
||||
loop_end_sub_f32_ae32:
|
||||
|
||||
movi.n a2, 0 // return status ESP_OK
|
||||
retw.n
|
||||
|
||||
#endif // dsps_sub_f32_ae32_enabled
|
||||
@@ -0,0 +1,33 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "dsps_sub.h"
|
||||
|
||||
esp_err_t dsps_sub_f32_ansi(const float *input1, const float *input2, float *output, int len, int step1, int step2, int step_out)
|
||||
{
|
||||
if (NULL == input1) {
|
||||
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
|
||||
}
|
||||
if (NULL == input2) {
|
||||
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
|
||||
}
|
||||
if (NULL == output) {
|
||||
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
|
||||
}
|
||||
|
||||
for (int i = 0 ; i < len ; i++) {
|
||||
output[i * step_out] = input1[i * step1] - input2[i * step2];
|
||||
}
|
||||
return ESP_OK;
|
||||
}
|
||||
@@ -0,0 +1,87 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef _dsps_sub_H_
|
||||
#define _dsps_sub_H_
|
||||
#include "dsp_err.h"
|
||||
|
||||
#include "dsps_sub_platform.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
|
||||
|
||||
/**@{*/
|
||||
/**
|
||||
* @brief sub arrays
|
||||
*
|
||||
* The function subtract one array from another
|
||||
* out[i*step_out] = input1[i*step1] - input2[i*step2]; i=[0..len)
|
||||
* The implementation use ANSI C and could be compiled and run on any platform
|
||||
*
|
||||
* @param[in] input1: input array 1
|
||||
* @param[in] input2: input array 2
|
||||
* @param output: output array
|
||||
* @param len: amount of operations for arrays
|
||||
* @param step1: step over input array 1 (by default should be 1)
|
||||
* @param step2: step over input array 2 (by default should be 1)
|
||||
* @param step_out: step over output array (by default should be 1)
|
||||
*
|
||||
* @return
|
||||
* - ESP_OK on success
|
||||
* - One of the error codes from DSP library
|
||||
*/
|
||||
esp_err_t dsps_sub_f32_ansi(const float *input1, const float *input2, float *output, int len, int step1, int step2, int step_out);
|
||||
esp_err_t dsps_sub_f32_ae32(const float *input1, const float *input2, float *output, int len, int step1, int step2, int step_out);
|
||||
|
||||
esp_err_t dsps_sub_s16_ansi(const int16_t *input1, const int16_t *input2, int16_t *output, int len, int step1, int step2, int step_out, int shift);
|
||||
esp_err_t dsps_sub_s16_ae32(const int16_t *input1, const int16_t *input2, int16_t *output, int len, int step1, int step2, int step_out, int shift);
|
||||
esp_err_t dsps_sub_s16_aes3(const int16_t *input1, const int16_t *input2, int16_t *output, int len, int step1, int step2, int step_out, int shift);
|
||||
|
||||
esp_err_t dsps_sub_s8_ansi(const int8_t *input1, const int8_t *input2, int8_t *output, int len, int step1, int step2, int step_out, int shift);
|
||||
esp_err_t dsps_sub_s8_aes3(const int8_t *input1, const int8_t *input2, int8_t *output, int len, int step1, int step2, int step_out, int shift);
|
||||
/**@}*/
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#if CONFIG_DSP_OPTIMIZED
|
||||
|
||||
#if (dsps_sub_f32_ae32_enabled == 1)
|
||||
#define dsps_sub_f32 dsps_sub_f32_ae32
|
||||
#else
|
||||
#define dsps_sub_f32 dsps_sub_f32_ansi
|
||||
#endif
|
||||
|
||||
#if (dsps_sub_s16_aes3_enabled == 1)
|
||||
#define dsps_sub_s16 dsps_sub_s16_aes3
|
||||
#define dsps_sub_s8 dsps_sub_s8_aes3
|
||||
#elif (dsps_sub_s16_ae32_enabled == 1)
|
||||
#define dsps_sub_s16 dsps_sub_s16_ae32
|
||||
#define dsps_sub_s8 dsps_sub_s8_ansi
|
||||
#else
|
||||
#define dsps_sub_s16 dsps_sub_s16_ansi
|
||||
#define dsps_sub_s8 dsps_sub_s8_ansi
|
||||
#endif
|
||||
|
||||
#else // CONFIG_DSP_OPTIMIZED
|
||||
#define dsps_sub_f32 dsps_sub_f32_ansi
|
||||
#define dsps_sub_s16 dsps_sub_s16_ansi
|
||||
#define dsps_sub_s8 dsps_sub_s8_ansi
|
||||
#endif // CONFIG_DSP_OPTIMIZED
|
||||
|
||||
#endif // _dsps_sub_H_
|
||||
@@ -0,0 +1,30 @@
|
||||
#ifndef _dsps_sub_platform_H_
|
||||
#define _dsps_sub_platform_H_
|
||||
|
||||
#include "sdkconfig.h"
|
||||
|
||||
#ifdef __XTENSA__
|
||||
#include <xtensa/config/core-isa.h>
|
||||
#include <xtensa/config/core-matmap.h>
|
||||
|
||||
|
||||
#if ((XCHAL_HAVE_FP == 1) && (XCHAL_HAVE_LOOPS == 1))
|
||||
|
||||
#define dsps_sub_f32_ae32_enabled 1
|
||||
#define dsps_sub_s16_ae32_enabled 1
|
||||
|
||||
#endif
|
||||
|
||||
#if (XCHAL_HAVE_LOOPS == 1)
|
||||
#define dsps_sub_f32_ae32_enabled 1
|
||||
#define dsps_sub_s16_ae32_enabled 1
|
||||
#endif
|
||||
|
||||
#if (CONFIG_IDF_TARGET_ESP32S3 == 1)
|
||||
#define dsps_sub_f32_ae32_enabled 1
|
||||
#define dsps_sub_s16_aes3_enabled 1
|
||||
#endif
|
||||
|
||||
#endif // __XTENSA__
|
||||
|
||||
#endif // _dsps_sub_platform_H_
|
||||
@@ -0,0 +1,73 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <string.h>
|
||||
#include "unity.h"
|
||||
#include "dsp_platform.h"
|
||||
#include "esp_log.h"
|
||||
|
||||
#include "dsp_tests.h"
|
||||
#include "dsps_sub.h"
|
||||
#include "esp_attr.h"
|
||||
|
||||
static const char *TAG = "dsps_sub";
|
||||
|
||||
TEST_CASE("dsps_sub_f32_ansi functionality", "[dsps]")
|
||||
{
|
||||
int n = 32;
|
||||
float x[n];
|
||||
float y[n];
|
||||
float z[n];
|
||||
for (int i = 0 ; i < n ; i++) {
|
||||
x[i] = i * 3;
|
||||
y[i] = i * 2;
|
||||
z[i] = i;
|
||||
}
|
||||
dsps_sub_f32_ansi(x, y, x, n, 1, 1, 1);
|
||||
for (int i = 0 ; i < n ; i++) {
|
||||
if (x[i] != z[i]) {
|
||||
TEST_ASSERT_EQUAL(x[i], z[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("dsps_sub_f32 functionality", "[dsps]")
|
||||
{
|
||||
int n = 32;
|
||||
float x[n];
|
||||
float y[n];
|
||||
float z[n];
|
||||
for (int i = 0 ; i < n ; i++) {
|
||||
x[i] = i * 3;
|
||||
y[i] = i * 2;
|
||||
z[i] = i;
|
||||
}
|
||||
dsps_sub_f32(x, y, x, n, 1, 1, 1);
|
||||
for (int i = 0 ; i < n ; i++) {
|
||||
if (x[i] != z[i]) {
|
||||
TEST_ASSERT_EQUAL(x[i], z[i]);
|
||||
}
|
||||
}
|
||||
|
||||
int repeat_count = 1;
|
||||
dsps_sub_f32(x, x, x, n, 1, 1, 1);
|
||||
|
||||
unsigned int start_b = dsp_get_cpu_cycle_count();
|
||||
dsps_sub_f32(x, x, x, n, 1, 1, 1);
|
||||
unsigned int end_b = dsp_get_cpu_cycle_count();
|
||||
|
||||
float total_b = end_b - start_b;
|
||||
float cycles = total_b / (n * repeat_count);
|
||||
ESP_LOGI(TAG, "dsps_sub_f32 - %f cycles per sample \n", cycles);
|
||||
}
|
||||
@@ -0,0 +1,57 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include "unity.h"
|
||||
#include "dsp_platform.h"
|
||||
#include "esp_log.h"
|
||||
|
||||
#include "dsp_tests.h"
|
||||
#include "dsps_sub.h"
|
||||
#include "esp_attr.h"
|
||||
|
||||
#if (dsps_sub_s16_ae32_enabled == 1)
|
||||
|
||||
static const char *TAG = "dsps_sub";
|
||||
|
||||
TEST_CASE("dsps_sub_s16_ae32 functionality", "[dsps]")
|
||||
{
|
||||
int n = 64;
|
||||
int16_t x[n];
|
||||
int16_t y[n];
|
||||
int32_t temp;
|
||||
int shift = 0;
|
||||
for (int i = 0 ; i < n ; i++) {
|
||||
x[i] = i << 4;
|
||||
temp = ((int32_t)x[i] - (int32_t)x[i]) >> shift;
|
||||
y[i] = temp;
|
||||
}
|
||||
|
||||
dsps_sub_s16_ae32(x, x, x, n, 1, 1, 1, shift);
|
||||
for (int i = 0 ; i < n ; i++) {
|
||||
ESP_LOGD(TAG, "x[%i] = %i %i", i, x[i], y[i]);
|
||||
if (x[i] != y[i]) {
|
||||
TEST_ASSERT_EQUAL(x[i], y[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("dsps_sub_s16_ae32 benchmark", "[dsps]")
|
||||
{
|
||||
const int n = 256;
|
||||
int16_t x[n];
|
||||
for (int i = 0 ; i < n ; i++) {
|
||||
x[i] = i << 4;
|
||||
}
|
||||
|
||||
unsigned int start_b = dsp_get_cpu_cycle_count();
|
||||
dsps_sub_s16_ae32(x, x, x, n, 1, 1, 1, 0);
|
||||
unsigned int end_b = dsp_get_cpu_cycle_count();
|
||||
|
||||
float cycles = end_b - start_b;
|
||||
ESP_LOGI(TAG, "dsps_sub_s16_ae32 - %f cycles per sample \n", cycles);
|
||||
}
|
||||
#endif // dsps_sub_s16_ae32_enabled
|
||||
@@ -0,0 +1,61 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include "unity.h"
|
||||
#include "dsp_platform.h"
|
||||
#include "esp_log.h"
|
||||
#include <malloc.h>
|
||||
|
||||
#include "dsps_sub.h"
|
||||
#include "esp_attr.h"
|
||||
#include "dsp_tests.h"
|
||||
|
||||
#if (dsps_sub_s16_aes3_enabled == 1)
|
||||
|
||||
static const char *TAG = "dsps_sub";
|
||||
TEST_CASE("dsps_sub_s16_aes3 functionality", "[dsps]")
|
||||
{
|
||||
int n = 64;
|
||||
int16_t *x = (int16_t *)memalign(16, n * sizeof(int16_t));
|
||||
int16_t *y = (int16_t *)memalign(16, n * sizeof(int16_t));
|
||||
int32_t temp;
|
||||
int shift = 0;
|
||||
for (int i = 0 ; i < n ; i++) {
|
||||
x[i] = i << 4;
|
||||
temp = ((int32_t)x[i] - (int32_t)x[i]) >> shift;
|
||||
y[i] = temp;
|
||||
}
|
||||
|
||||
dsps_sub_s16_aes3(x, x, x, n, 1, 1, 1, shift);
|
||||
for (int i = 0 ; i < n ; i++) {
|
||||
ESP_LOGD(TAG, "x[%i] = %i %i", i, x[i], y[i]);
|
||||
if (x[i] != y[i]) {
|
||||
TEST_ASSERT_EQUAL(x[i], y[i]);
|
||||
}
|
||||
}
|
||||
free(x);
|
||||
free(y);
|
||||
}
|
||||
|
||||
TEST_CASE("dsps_sub_s16_aes3 benchmark", "[dsps]")
|
||||
{
|
||||
const int n = 256;
|
||||
int16_t *x = (int16_t *)memalign(16, n * sizeof(int16_t));
|
||||
for (int i = 0 ; i < n ; i++) {
|
||||
x[i] = i << 4;
|
||||
}
|
||||
|
||||
unsigned int start_b = dsp_get_cpu_cycle_count();
|
||||
dsps_sub_s16_aes3(x, x, x, n, 1, 1, 1, 0);
|
||||
unsigned int end_b = dsp_get_cpu_cycle_count();
|
||||
|
||||
float cycles = end_b - start_b;
|
||||
ESP_LOGI(TAG, "dsps_sub_s16_aes3 - %f cycles per sample \n", cycles);
|
||||
free(x);
|
||||
|
||||
}
|
||||
#endif // (dsps_sub_s16_aes3_enabled == 1)
|
||||
@@ -0,0 +1,54 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2018-2024 Espressif Systems (Shanghai) CO LTD
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include "unity.h"
|
||||
#include "dsp_platform.h"
|
||||
#include "esp_log.h"
|
||||
|
||||
#include "dsp_tests.h"
|
||||
#include "dsps_sub.h"
|
||||
#include "esp_attr.h"
|
||||
|
||||
static const char *TAG = "dsps_sub";
|
||||
|
||||
TEST_CASE("dsps_sub_s16_ansi functionality", "[dsps]")
|
||||
{
|
||||
int n = 64;
|
||||
int16_t x[n];
|
||||
int16_t y[n];
|
||||
int32_t temp;
|
||||
int shift = 0;
|
||||
for (int i = 0 ; i < n ; i++) {
|
||||
x[i] = i << 4;
|
||||
temp = ((int32_t)x[i] - (int32_t)x[i]) >> shift;
|
||||
y[i] = temp;
|
||||
}
|
||||
|
||||
dsps_sub_s16_ansi(x, x, x, n, 1, 1, 1, shift);
|
||||
for (int i = 0 ; i < n ; i++) {
|
||||
ESP_LOGD(TAG, "x[%i] = %i %i", i, x[i], y[i]);
|
||||
if (x[i] != y[i]) {
|
||||
TEST_ASSERT_EQUAL(x[i], y[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("dsps_sub_s16_ansi benchmark", "[dsps]")
|
||||
{
|
||||
const int n = 256;
|
||||
int16_t x[n];
|
||||
for (int i = 0 ; i < n ; i++) {
|
||||
x[i] = i << 4;
|
||||
}
|
||||
|
||||
unsigned int start_b = dsp_get_cpu_cycle_count();
|
||||
dsps_sub_s16_ansi(x, x, x, n, 1, 1, 1, 0);
|
||||
unsigned int end_b = dsp_get_cpu_cycle_count();
|
||||
|
||||
float cycles = end_b - start_b;
|
||||
ESP_LOGI(TAG, "dsps_sub_s16_ansi - %f cycles per sample \n", cycles);
|
||||
}
|
||||
@@ -0,0 +1,61 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include "unity.h"
|
||||
#include "dsp_platform.h"
|
||||
#include "esp_log.h"
|
||||
#include <malloc.h>
|
||||
|
||||
#include "dsps_sub.h"
|
||||
#include "esp_attr.h"
|
||||
#include "dsp_tests.h"
|
||||
|
||||
#if (dsps_sub_s16_aes3_enabled == 1)
|
||||
|
||||
static const char *TAG = "dsps_sub";
|
||||
TEST_CASE("dsps_sub_s8_aes3 functionality", "[dsps]")
|
||||
{
|
||||
int n = 64;
|
||||
int8_t *x = (int8_t *)memalign(16, n * sizeof(int8_t));
|
||||
int8_t *y = (int8_t *)memalign(16, n * sizeof(int8_t));
|
||||
int32_t temp;
|
||||
int shift = 0;
|
||||
for (int i = 0 ; i < n ; i++) {
|
||||
x[i] = i - n / 2;
|
||||
temp = ((int32_t)x[i] - (int32_t)x[i]) >> shift;
|
||||
y[i] = temp;
|
||||
}
|
||||
|
||||
dsps_sub_s8_aes3(x, x, x, n, 1, 1, 1, shift);
|
||||
for (int i = 0 ; i < n ; i++) {
|
||||
ESP_LOGD(TAG, "x[%i] = %i %i", i, x[i], y[i]);
|
||||
if (x[i] != y[i]) {
|
||||
TEST_ASSERT_EQUAL(x[i], y[i]);
|
||||
}
|
||||
}
|
||||
free(x);
|
||||
free(y);
|
||||
}
|
||||
|
||||
TEST_CASE("dsps_sub_s8_aes3 benchmark", "[dsps]")
|
||||
{
|
||||
const int n = 256;
|
||||
int8_t *x = (int8_t *)memalign(16, n * sizeof(int8_t));
|
||||
for (int i = 0 ; i < n ; i++) {
|
||||
x[i] = i << 4;
|
||||
}
|
||||
|
||||
unsigned int start_b = dsp_get_cpu_cycle_count();
|
||||
dsps_sub_s8_aes3(x, x, x, n, 1, 1, 1, 0);
|
||||
unsigned int end_b = dsp_get_cpu_cycle_count();
|
||||
|
||||
float cycles = end_b - start_b;
|
||||
ESP_LOGI(TAG, "dsps_sub_s8_aes3 - %f cycles per sample \n", cycles);
|
||||
free(x);
|
||||
|
||||
}
|
||||
#endif // (dsps_sub_s16_aes3_enabled == 1)
|
||||
@@ -0,0 +1,53 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include "unity.h"
|
||||
#include "dsp_platform.h"
|
||||
#include "esp_log.h"
|
||||
|
||||
#include "dsp_tests.h"
|
||||
#include "dsps_sub.h"
|
||||
#include "esp_attr.h"
|
||||
|
||||
static const char *TAG = "dsps_sub";
|
||||
|
||||
TEST_CASE("dsps_sub_s8_ansi functionality", "[dsps]")
|
||||
{
|
||||
int n = 64;
|
||||
int8_t x[n];
|
||||
int8_t y[n];
|
||||
int32_t temp;
|
||||
int shift = 0;
|
||||
for (int i = 0 ; i < n ; i++) {
|
||||
x[i] = i - n / 2;
|
||||
temp = ((int32_t)x[i] - (int32_t)x[i]) >> shift;
|
||||
y[i] = temp;
|
||||
}
|
||||
|
||||
dsps_sub_s8_ansi(x, x, x, n, 1, 1, 1, 0);
|
||||
for (int i = 0 ; i < n ; i++) {
|
||||
if (x[i] != y[i]) {
|
||||
TEST_ASSERT_EQUAL(x[i], y[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("dsps_sub_s8_ansi benchmark", "[dsps]")
|
||||
{
|
||||
const int n = 256;
|
||||
int8_t x[n];
|
||||
for (int i = 0 ; i < n ; i++) {
|
||||
x[i] = i << 4;
|
||||
}
|
||||
|
||||
unsigned int start_b = dsp_get_cpu_cycle_count();
|
||||
dsps_sub_s8_ansi(x, x, x, n, 1, 1, 1, 0);
|
||||
unsigned int end_b = dsp_get_cpu_cycle_count();
|
||||
|
||||
float cycles = end_b - start_b;
|
||||
ESP_LOGI(TAG, "dsps_sub_s8_ansi - %f cycles per sample \n", cycles);
|
||||
}
|
||||
Reference in New Issue
Block a user