add some code

This commit is contained in:
2025-09-05 13:25:11 +08:00
parent 9ff0a99e7a
commit 3cf1229a85
8911 changed files with 2535396 additions and 0 deletions

View File

@@ -0,0 +1,74 @@
/*
* SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Apache-2.0
*/
#include "dsps_mul_platform.h"
#if (dsps_mul_s16_ae32_enabled == 1)
.text
.align 4
.global dsps_mul_s16_ae32
.type dsps_mul_s16_ae32,@function
// The function implements the following C code:
// esp_err_t dsps_mul_s16_ansi(const int16_t *input1, const int16_t *input2, int16_t *output, int len, int step1, int step2, int step_out, int shift)
// {
// for (int i = 0 ; i < len ; i++) {
// int32_t acc = (int32_t)input1[i * step1] + (int32_t)input2[i * step2];
// output[i * step_out] = acc >> shift;
// }
// return ESP_OK;
// }
dsps_mul_s16_ae32:
// input1 - a2
// input2 - a3
// output - a4
// len - a5
// step_in1 - a6
// step_in2 - a7
// step_out - stack (a10)
// shift - stack (a9)
entry a1, 16
// l32i.n a10, a1, 16
// s16i a10, a4, 0
// l32i.n a10, a1, 20
// s16i a10, a4, 2
l32i.n a10, a1, 16 // Load step_out to the a10 register
l32i.n a9, a1, 20 // Load shift to the a9 register
ssr a9 // sar = a9
slli a6, a6, 1 // a6 - step_in<<1
slli a7, a7, 1 // a7 - step_in<<1
slli a10, a10, 1 // a8 - step_out<<1
// s16i a10, a4, 0
// s16i a6, a4, 2
// s16i a7, a4, 4
// s16i a5, a4, 6
l16si a11, a2, 0
l16si a8, a3, 0
add a8, a11, a8
srl a9, a8 // a8 = a8>>sar
loopnez a5, .loop_end_mul_s16_ae32
add.n a2, a2, a6 // input1+=step_in1;
add.n a3, a3, a7 // input2+=step_in2;
l16si a11, a2, 0
l16si a8, a3, 0
s16i a9, a4, 0 // store result to the putput
mull a8, a11, a8
srl a9, a8 // a8 = a8>>sar
add.n a4, a4, a10 // output+=step_out;
.loop_end_mul_s16_ae32:
movi.n a2, 0 // return status ESP_OK
retw.n
#endif // dsps_mul_s16_ae32_enabled

View File

@@ -0,0 +1,104 @@
/*
* SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Apache-2.0
*/
#include "dsps_mul_platform.h"
#if (dsps_mul_s16_aes3_enabled == 1)
.text
.align 4
.global dsps_mul_s16_aes3
.type dsps_mul_s16_aes3,@function
// The function implements the following C code:
// esp_err_t dsps_mul_s16_ansi(const int16_t *input1, const int16_t *input2, int16_t *output, int len, int step1, int step2, int step_out, int shift)
// {
// for (int i = 0 ; i < len ; i++) {
// int32_t acc = (int32_t)input1[i * step1] + (int32_t)input2[i * step2];
// output[i * step_out] = acc >> shift;
// }
// return ESP_OK;
// }
dsps_mul_s16_aes3:
// input1 - a2
// input2 - a3
// output - a4
// len - a5
// step_in1 - a6
// step_in2 - a7
// step_out - stack (a10)
// shift - stack (a9)
entry a1, 16
l32i.n a10, a1, 16 // Load step_out to the a10 register
l32i.n a9, a1, 20 // Load shift to the a9 register
ssr a9 // sar = a9
// Check if any of steps is not 0
addi a15, a6, -1
bnez a15, .mul_s16_ae32_mode // Branch if step !=0
addi a15, a7, -1
bnez a15, .mul_s16_ae32_mode // Branch if step !=0
addi a15, a10, -1
bnez a15,.mul_s16_ae32_mode // Branch if step !=0
// Check addresses
movi a15, 0xF // modulo 16 mask
bany a2, a15, .mul_s16_ae32_mode // jump if != 0
bany a3, a15, .mul_s16_ae32_mode // jump if != 0
// Check length (should be divided to 8)
movi a15, 0x7 // modulo 8 mask
bany a5, a15, .mul_s16_ae32_mode // jump if != 0
// Process main function for S3
slli a6, a6, 1 // a6 - step_in<<1
slli a7, a7, 1 // a7 - step_in<<1
slli a10, a10, 1 // a8 - step_out<<1
wsr.sar a9 // load sar register
// Preload q1 from a3
//ee.vld.128.ip q1, a3, 16
srli a5, a5, 3
ee.vld.128.ip q0, a2, 16
loopnez a5, .loop_end_mul_s16_aes3_main
ee.vld.128.ip q1, a3, 16
ee.vmul.s16.ld.incp q0, a2, q4, q0, q1
ee.vst.128.ip q4, a4, 16
.loop_end_mul_s16_aes3_main:
// Exit for Esp32s3 mode
movi.n a2, 0 // return status ESP_OK
retw.n
.mul_s16_ae32_mode:
slli a6, a6, 1 // a6 - step_in<<1
slli a7, a7, 1 // a7 - step_in<<1
slli a10, a10, 1 // a8 - step_out<<1
l16si a11, a2, 0
l16si a8, a3, 0
mull a8, a11, a8
srl a9, a8 // a8 = a8>>sar
loopnez a5, .loop_end_mul_s16_aes3
add.n a2, a2, a6 // input1+=step_in1;
add.n a3, a3, a7 // input2+=step_in2;
l16si a11, a2, 0
l16si a8, a3, 0
s16i a9, a4, 0 // store result to the putput
mull a8, a11, a8
srl a9, a8 // a8 = a8>>sar
add.n a4, a4, a10 // output+=step_out;
.loop_end_mul_s16_aes3:
// Exit for Esp32 mode
movi.n a2, 0 // return status ESP_OK
retw.n
#endif // dsps_mul_s16_aes3_enabled

View File

@@ -0,0 +1,34 @@
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "dsps_mul.h"
esp_err_t dsps_mul_s16_ansi(const int16_t *input1, const int16_t *input2, int16_t *output, int len, int step1, int step2, int step_out, int shift)
{
if (NULL == input1) {
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
}
if (NULL == input2) {
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
}
if (NULL == output) {
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
}
for (int i = 0 ; i < len ; i++) {
int ttt = (int)input1[i * step1] * (int)input2[i * step2];
output[i * step_out] = ttt >> shift;
}
return ESP_OK;
}

View File

@@ -0,0 +1,97 @@
/*
* SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Apache-2.0
*/
#include "dsps_mul_platform.h"
#if (dsps_mul_s16_aes3_enabled == 1)
.text
.align 4
.global dsps_mul_s8_aes3
.type dsps_mul_s8_aes3,@function
// The function implements the following C code:
// esp_err_t dsps_mul_s8_ansi(const int16_t *input1, const int16_t *input2, int16_t *output, int len, int step1, int step2, int step_out, int shift)
// {
// for (int i = 0 ; i < len ; i++) {
// int32_t acc = (int32_t)input1[i * step1] + (int32_t)input2[i * step2];
// output[i * step_out] = acc >> shift;
// }
// return ESP_OK;
// }
dsps_mul_s8_aes3:
// input1 - a2
// input2 - a3
// output - a4
// len - a5
// step_in1 - a6
// step_in2 - a7
// step_out - stack (a10)
// shift - stack (a9)
entry a1, 16
l32i.n a10, a1, 16 // Load step_out to the a10 register
l32i.n a9, a1, 20 // Load shift to the a9 register
ssr a9 // sar = a9
// Check if any of steps is not 0
addi a15, a6, -1
bnez a15, .sub_s8_ae32_mode // Branch if step !=0
addi a15, a7, -1
bnez a15, .sub_s8_ae32_mode // Branch if step !=0
addi a15, a10, -1
bnez a15,.sub_s8_ae32_mode // Branch if step !=0
// Check addresses
movi a15, 0xF // modulo 16 mask
bany a2, a15, .sub_s8_ae32_mode // jump if != 0
bany a3, a15, .sub_s8_ae32_mode // jump if != 0
// Check length (should be divided to 8)
movi a15, 0xf // modulo 8 mask
bany a5, a15, .sub_s8_ae32_mode // jump if != 0
// Process main function for S3
wsr.sar a9 // load sar register
// Preload q1 from a3
//ee.vld.128.ip q1, a3, 16
srli a5, a5, 4
ee.vld.128.ip q0, a2, 16
loopnez a5, .loop_end_mul_s8_aes3_main
ee.vld.128.ip q1, a3, 16
ee.vmul.s8.ld.incp q0, a2, q4, q0, q1
ee.vst.128.ip q4, a4, 16
.loop_end_mul_s8_aes3_main:
// Exit for Esp32s3 mode
movi.n a2, 0 // return status ESP_OK
retw.n
.sub_s8_ae32_mode:
l8ui a11, a2, 0
l8ui a8, a3, 0
mull a8, a11, a8
srl a9, a8 // a8 = a8>>sar
loopnez a5, .loop_end_mul_s8_aes3
add.n a2, a2, a6 // input1+=step_in1;
add.n a3, a3, a7 // input2+=step_in2;
l8ui a11, a2, 0
l8ui a8, a3, 0
s8i a9, a4, 0 // store result to the putput
mull a8, a11, a8
srl a9, a8 // a8 = a8>>sar
add.n a4, a4, a10 // output+=step_out;
.loop_end_mul_s8_aes3:
// Exit for Esp32 mode
movi.n a2, 0 // return status ESP_OK
retw.n
#endif // dsps_mul_s8_aes3_enabled

View File

@@ -0,0 +1,26 @@
/*
* SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Apache-2.0
*/
#include "dsps_mul.h"
esp_err_t dsps_mul_s8_ansi(const int8_t *input1, const int8_t *input2, int8_t *output, int len, int step1, int step2, int step_out, int shift)
{
if (NULL == input1) {
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
}
if (NULL == input2) {
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
}
if (NULL == output) {
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
}
for (int i = 0 ; i < len ; i++) {
int32_t acc = (int32_t)input1[i * step1] * (int32_t)input2[i * step2];
output[i * step_out] = acc >> shift;
}
return ESP_OK;
}

View File

@@ -0,0 +1,64 @@
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "dsps_mul_platform.h"
#if (dsps_mul_f32_ae32_enabled == 1)
// This is bi quad filter form II for ESP32 processor.
.text
.align 4
.global dsps_mul_f32_ae32
.type dsps_mul_f32_ae32,@function
// The function implements the following C code:
// esp_err_t dsps_mul_f32_ansi(const float *input1, const float *input2, float *output, int len, int step1, int step2, int step_out)
// {
// for (int i = 0 ; i < len ; i++) {
// output[i * step_out] = input1[i * step1] * input2[i * step2];
// }
// return ESP_OK;
// }
dsps_mul_f32_ae32:
// input1 - a2
// input2 - a3
// output - a4
// len - a5
// step1 - a6
// step2 - a7
// step_out - stack (a8)
entry a1, 16
l32i.n a8, a1, 16 // Load step_out to the a8 register
slli a6, a6, 2 // a6 - step1<<2
slli a7, a7, 2 // a7 - step2<<2
slli a8, a8, 2 // a8 - step_out<<2
lsi f0, a2, 0
add.n a2, a2, a6 // input1_ptr+=step1;
loopnez a5, loop_end_mul_f32_ae32
lsi f1, a3, 0
add.n a3, a3, a7 // input2_ptr+=step2;
mul.s f2, f1, f0 // f2 = f1*f0
lsi f0, a2, 0
add.n a2, a2, a6 // input1_ptr+=step1;
ssi f2, a4, 0
add.n a4, a4, a8 // input2_ptr+=step2;
loop_end_mul_f32_ae32:
movi.n a2, 0 // return status ESP_OK
retw.n
#endif // dsps_mul_f32_ae32_enabled

View File

@@ -0,0 +1,33 @@
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "dsps_mul.h"
esp_err_t dsps_mul_f32_ansi(const float *input1, const float *input2, float *output, int len, int step1, int step2, int step_out)
{
if (NULL == input1) {
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
}
if (NULL == input2) {
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
}
if (NULL == output) {
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
}
for (int i = 0 ; i < len ; i++) {
output[i * step_out] = input1[i * step1] * input2[i * step2];
}
return ESP_OK;
}

View File

@@ -0,0 +1,111 @@
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef _dsps_mul_H_
#define _dsps_mul_H_
#include "dsp_err.h"
#include "dsps_mul_platform.h"
#ifdef __cplusplus
extern "C"
{
#endif
/**@{*/
/**
* @brief Multiply two arrays
*
* The function multiply one input array to another and store result to other array
* out[i*step_out] = input1[i*step1] * input2[i*step2]; i=[0..len)
* The implementation use ANSI C and could be compiled and run on any platform
*
* @param[in] input1: input array 1
* @param[in] input2: input array 2
* @param output: output array
* @param len: amount of operations for arrays
* @param step1: step over input array 1 (by default should be 1)
* @param step2: step over input array 2 (by default should be 1)
* @param step_out: step over output array (by default should be 1)
*
* @return
* - ESP_OK on success
* - One of the error codes from DSP library
*/
esp_err_t dsps_mul_f32_ansi(const float *input1, const float *input2, float *output, int len, int step1, int step2, int step_out);
esp_err_t dsps_mul_f32_ae32(const float *input1, const float *input2, float *output, int len, int step1, int step2, int step_out);
/**@}*/
/**@{*/
/**
* @brief Multiply two arrays
*
* The function multiply one input array to another and store result to other array
* out[i*step_out] = input1[i*step1] * input2[i*step2]; i=[0..len)
* The implementation use ANSI C and could be compiled and run on any platform
*
* @param[in] input1: input array 1
* @param[in] input2: input array 2
* @param output: output array
* @param len: amount of operations for arrays
* @param step1: step over input array 1 (by default should be 1)
* @param step2: step over input array 2 (by default should be 1)
* @param step_out: step over output array (by default should be 1)
* @param shift: output shift after multiplication (by default should be 15)
*
* @return
* - ESP_OK on success
* - One of the error codes from DSP library
*/
esp_err_t dsps_mul_s16_ansi(const int16_t *input1, const int16_t *input2, int16_t *output, int len, int step1, int step2, int step_out, int shift);
esp_err_t dsps_mul_s16_ae32(const int16_t *input1, const int16_t *input2, int16_t *output, int len, int step1, int step2, int step_out, int shift);
esp_err_t dsps_mul_s16_aes3(const int16_t *input1, const int16_t *input2, int16_t *output, int len, int step1, int step2, int step_out, int shift);
esp_err_t dsps_mul_s8_ansi(const int8_t *input1, const int8_t *input2, int8_t *output, int len, int step1, int step2, int step_out, int shift);
esp_err_t dsps_mul_s8_aes3(const int8_t *input1, const int8_t *input2, int8_t *output, int len, int step1, int step2, int step_out, int shift);
/**@}*/
#ifdef __cplusplus
}
#endif
#if CONFIG_DSP_OPTIMIZED
#if (dsps_mul_f32_ae32_enabled == 1)
#define dsps_mul_f32 dsps_mul_f32_ae32
#else
#define dsps_mul_f32 dsps_mul_f32_ansi
#endif
#if (dsps_mul_s16_aes3_enabled == 1)
#define dsps_mul_s16 dsps_mul_s16_aes3
#define dsps_mul_s8 dsps_mul_s8_aes3
#elif (dsps_mul_s16_ae32_enabled == 1)
#define dsps_mul_s16 dsps_mul_s16_ae32
#define dsps_mul_s8 dsps_mul_s8_ansi
#else
#define dsps_mul_s16 dsps_mul_s16_ansi
#define dsps_mul_s8 dsps_mul_s8_ansi
#endif
#else // CONFIG_DSP_OPTIMIZED
#define dsps_mul_f32 dsps_mul_f32_ansi
#define dsps_mul_s16 dsps_mul_s16_ansi
#define dsps_mul_s8 dsps_mul_s8_ansi
#endif // CONFIG_DSP_OPTIMIZED
#endif // _dsps_mul_H_

View File

@@ -0,0 +1,30 @@
#ifndef _dsps_mul_platform_H_
#define _dsps_mul_platform_H_
#include "sdkconfig.h"
#ifdef __XTENSA__
#include <xtensa/config/core-isa.h>
#include <xtensa/config/core-matmap.h>
#if ((XCHAL_HAVE_FP == 1) && (XCHAL_HAVE_LOOPS == 1))
#define dsps_mul_f32_ae32_enabled 1
#define dsps_mul_s16_ae32_enabled 1
#endif
#if (XCHAL_HAVE_LOOPS == 1)
#define dsps_mul_f32_ae32_enabled 1
#define dsps_mul_s16_ae32_enabled 1
#endif
#if (CONFIG_IDF_TARGET_ESP32S3 == 1)
#define dsps_mul_f32_ae32_enabled 1
#define dsps_mul_s16_aes3_enabled 1
#endif
#endif // __XTENSA__
#endif // _dsps_mul_platform_H_

View File

@@ -0,0 +1,71 @@
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <string.h>
#include "unity.h"
#include "dsp_platform.h"
#include "esp_log.h"
#include "dsp_tests.h"
#include "dsps_mul.h"
#include "esp_attr.h"
static const char *TAG = "dsps_mul";
TEST_CASE("dsps_mul_f32_ansi functionality", "[dsps]")
{
int n = 32;
float x[n];
float y[n];
for (int i = 0 ; i < n ; i++) {
x[i] = i;
y[i] = i * i;
}
dsps_mul_f32_ansi(x, x, x, n, 1, 1, 1);
for (int i = 0 ; i < n ; i++) {
if (x[i] != y[i]) {
TEST_ASSERT_EQUAL(x[i], y[i]);
}
}
}
TEST_CASE("dsps_mul_f32 functionality", "[dsps]")
{
int n = 32;
float x[n];
float y[n];
for (int i = 0 ; i < n ; i++) {
x[i] = i;
y[i] = i * i;
}
dsps_mul_f32(x, x, x, n, 1, 1, 1);
for (int i = 0 ; i < n ; i++) {
if (x[i] != y[i]) {
TEST_ASSERT_EQUAL(x[i], y[i]);
}
}
int repeat_count = 1;
dsps_mul_f32(x, x, x, n, 1, 1, 1);
unsigned int start_b = dsp_get_cpu_cycle_count();
dsps_mul_f32(x, x, x, n, 1, 1, 1);
unsigned int end_b = dsp_get_cpu_cycle_count();
float total_b = end_b - start_b;
float cycles = total_b / (n * repeat_count);
ESP_LOGI(TAG, "dsps_mul_f32 - %f cycles per sample \n", cycles);
}

View File

@@ -0,0 +1,62 @@
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <string.h>
#include "unity.h"
#include "dsp_platform.h"
#include "esp_log.h"
#include "dsp_tests.h"
#include "dsps_mul.h"
#include "esp_attr.h"
static const char *TAG = "dsps_mul";
TEST_CASE("dsps_mul_s16 functionality", "[dsps]")
{
int n = 64;
int16_t x[n];
int16_t y[n];
int32_t temp;
int shift = 0;
for (int i = 0 ; i < n ; i++) {
x[i] = i << 4;
temp = ((int32_t)x[i] * (int32_t)x[i]) >> shift;
y[i] = temp;
}
dsps_mul_s16(x, x, x, n, 1, 1, 1, shift);
for (int i = 0 ; i < n ; i++) {
ESP_LOGD(TAG, "x[%i] = %i %i", i, x[i], y[i]);
if (x[i] != y[i]) {
TEST_ASSERT_EQUAL(x[i], y[i]);
}
}
}
TEST_CASE("dsps_mul_s16 benchmark", "[dsps]")
{
const int n = 256;
int16_t x[n];
for (int i = 0 ; i < n ; i++) {
x[i] = i << 4;
}
unsigned int start_b = dsp_get_cpu_cycle_count();
dsps_mul_s16(x, x, x, n, 1, 1, 1, 0);
unsigned int end_b = dsp_get_cpu_cycle_count();
float cycles = end_b - start_b;
ESP_LOGI(TAG, "dsps_mul_s16 - %f cycles per sample \n", cycles);
}

View File

@@ -0,0 +1,69 @@
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <string.h>
#include "unity.h"
#include "dsp_platform.h"
#include "esp_log.h"
#include <malloc.h>
#include "dsps_mul.h"
#include "esp_attr.h"
#include "dsp_tests.h"
#if (dsps_mul_s16_aes3_enabled == 1)
static const char *TAG = "dsps_mul";
TEST_CASE("dsps_mul_s16_aes3 functionality", "[dsps]")
{
int n = 64;
int16_t *x = (int16_t *)memalign(16, n * sizeof(int16_t));
int16_t *y = (int16_t *)memalign(16, n * sizeof(int16_t));
int32_t temp;
int shift = 0;
for (int i = 0 ; i < n ; i++) {
x[i] = i << 4;
temp = ((int32_t)x[i] * (int32_t)x[i]) >> shift;
y[i] = temp;
}
dsps_mul_s16_aes3(x, x, x, n, 1, 1, 1, shift);
for (int i = 0 ; i < n ; i++) {
ESP_LOGD(TAG, "x[%i] = %i %i", i, x[i], y[i]);
if (x[i] != y[i]) {
TEST_ASSERT_EQUAL(x[i], y[i]);
}
}
free(x);
free(y);
}
TEST_CASE("dsps_mul_s16_aes3 benchmark", "[dsps]")
{
const int n = 2048;
int16_t *x = (int16_t *)memalign(16, n * sizeof(int16_t));
for (int i = 0 ; i < n ; i++) {
x[i] = i << 4;
}
unsigned int start_b = dsp_get_cpu_cycle_count();
dsps_mul_s16_aes3(x, x, x, n, 1, 1, 1, 0);
unsigned int end_b = dsp_get_cpu_cycle_count();
float cycles = end_b - start_b;
ESP_LOGI(TAG, "dsps_mul_s16_aes3 - %f cycles per sample \n", cycles);
free(x);
}
#endif // (dsps_mul_s16_aes3_enabled == 1)

View File

@@ -0,0 +1,66 @@
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <string.h>
#include "unity.h"
#include "dsp_platform.h"
#include "esp_log.h"
#include <malloc.h>
#include "dsps_mul.h"
#include "esp_attr.h"
#include "dsp_tests.h"
static const char *TAG = "dsps_mul";
TEST_CASE("dsps_mul_s16_ansi functionality", "[dsps]")
{
int n = 64;
int16_t *x = (int16_t *)memalign(16, n * sizeof(int16_t));
int16_t *y = (int16_t *)memalign(16, n * sizeof(int16_t));
int32_t temp;
int shift = 0;
for (int i = 0 ; i < n ; i++) {
x[i] = i << 4;
temp = ((int32_t)x[i] * (int32_t)x[i]) >> shift;
y[i] = temp;
}
dsps_mul_s16_ansi(x, x, x, n, 1, 1, 1, shift);
for (int i = 0 ; i < n ; i++) {
ESP_LOGD(TAG, "x[%i] = %i %i", i, x[i], y[i]);
if (x[i] != y[i]) {
TEST_ASSERT_EQUAL(x[i], y[i]);
}
}
free(x);
free(y);
}
TEST_CASE("dsps_mul_s16_ansi benchmark", "[dsps]")
{
const int n = 2048;
int16_t *x = (int16_t *)memalign(16, n * sizeof(int16_t));
for (int i = 0 ; i < n ; i++) {
x[i] = i << 4;
}
unsigned int start_b = dsp_get_cpu_cycle_count();
dsps_mul_s16_ansi(x, x, x, n, 1, 1, 1, 0);
unsigned int end_b = dsp_get_cpu_cycle_count();
float cycles = end_b - start_b;
ESP_LOGI(TAG, "dsps_mul_s16_ansi - %f cycles per sample \n", cycles);
free(x);
}

View File

@@ -0,0 +1,69 @@
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <string.h>
#include "unity.h"
#include "dsp_platform.h"
#include "esp_log.h"
#include <malloc.h>
#include "dsps_mul.h"
#include "esp_attr.h"
#include "dsp_tests.h"
#if (dsps_mul_s16_aes3_enabled == 1)
static const char *TAG = "dsps_mul";
TEST_CASE("dsps_mul_s8_aes3 functionality", "[dsps]")
{
int n = 64;
int8_t *x = (int8_t *)memalign(16, n * sizeof(int8_t));
int8_t *y = (int8_t *)memalign(16, n * sizeof(int8_t));
int32_t temp;
int shift = 0;
for (int i = 0 ; i < n ; i++) {
x[i] = i - n / 2;
temp = ((int32_t)x[i] * (int32_t)x[i]) >> shift;
y[i] = temp;
}
dsps_mul_s8_aes3(x, x, x, n, 1, 1, 1, shift);
for (int i = 0 ; i < n ; i++) {
ESP_LOGD(TAG, "x[%i] = %i %i", i, x[i], y[i]);
if (x[i] != y[i]) {
TEST_ASSERT_EQUAL(x[i], y[i]);
}
}
free(x);
free(y);
}
TEST_CASE("dsps_mul_s8_aes3 benchmark", "[dsps]")
{
const int n = 256;
int8_t *x = (int8_t *)memalign(16, n * sizeof(int8_t));
for (int i = 0 ; i < n ; i++) {
x[i] = i << 4;
}
unsigned int start_b = dsp_get_cpu_cycle_count();
dsps_mul_s8_aes3(x, x, x, n, 1, 1, 1, 0);
unsigned int end_b = dsp_get_cpu_cycle_count();
float cycles = end_b - start_b;
ESP_LOGI(TAG, "dsps_mul_s8_aes3 - %f cycles per sample \n", cycles);
free(x);
}
#endif // (dsps_mul_s16_aes3_enabled == 1)

View File

@@ -0,0 +1,61 @@
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <string.h>
#include "unity.h"
#include "dsp_platform.h"
#include "esp_log.h"
#include "dsp_tests.h"
#include "dsps_mul.h"
#include "esp_attr.h"
static const char *TAG = "dsps_mul";
TEST_CASE("dsps_mul_s8_ansi functionality", "[dsps]")
{
int n = 64;
int8_t x[n];
int8_t y[n];
int32_t temp;
int shift = 0;
for (int i = 0 ; i < n ; i++) {
x[i] = i - n / 2;
temp = ((int32_t)x[i] * (int32_t)x[i]) >> shift;
y[i] = temp;
}
dsps_mul_s8_ansi(x, x, x, n, 1, 1, 1, 0);
for (int i = 0 ; i < n ; i++) {
if (x[i] != y[i]) {
TEST_ASSERT_EQUAL(x[i], y[i]);
}
}
}
TEST_CASE("dsps_mul_s8_ansi benchmark", "[dsps]")
{
const int n = 256;
int8_t x[n];
for (int i = 0 ; i < n ; i++) {
x[i] = i << 4;
}
unsigned int start_b = dsp_get_cpu_cycle_count();
dsps_mul_s8_ansi(x, x, x, n, 1, 1, 1, 0);
unsigned int end_b = dsp_get_cpu_cycle_count();
float cycles = end_b - start_b;
ESP_LOGI(TAG, "dsps_mul_s8_ansi - %f cycles per sample \n", cycles);
}