add some code

This commit is contained in:
2025-09-05 13:25:11 +08:00
parent 9ff0a99e7a
commit 3cf1229a85
8911 changed files with 2535396 additions and 0 deletions

View File

@@ -0,0 +1,63 @@
/*
* SPDX-FileCopyrightText: 2023 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Apache-2.0
*/
#include "dspm_add_platform.h"
#if (dspm_add_f32_ae32_enabled == 1)
// This is an add function for sub-matrices for ESP32 processor
.text
.align 4
.global dspm_add_f32_ae32
.type dspm_add_f32_ae32,@function
// The function implements the following C code:
// esp_err_t dspm_add_f32_ansi(const float *input1, const float *input2, float *output, int rows, int cols, int padd1, int padd2, int padd_out, int step1, int step2, int step_out);
dspm_add_f32_ae32:
// input1 - a2
// input2 - a3
// output - a4
// rows - a5
// cols - a6
// padd1 - a7
// padd2 - a8
// padd_out - a9
// step1 - a10
// step2 - a11
// step_out - a12
entry a1, 16
l32i.n a8, a1, 16 // padd2
l32i.n a9, a1, 20 // padd_out
l32i.n a10, a1, 24 // step1
l32i.n a11, a1, 24 // step2
l32i.n a12, a1, 24 // step_out
slli a10, a10, 2 // a10 - step1 << 2
slli a11, a11, 2 // a11 - step2 << 2
slli a12, a12, 2 // a12 - step_out << 2
.outer_loop_add_f32_ae32:
loopnez a6, .loop_add_f32_ae32
lsxp f0, a2, a10 // load input1 to f0, increment input1 (input1_ptr+=step1)
lsxp f1, a3, a11 // load input2 to f1, increment input2 (input2_ptr+=step2)
add.s f2, f0, f1 // f2 = f0 + f1
ssxp f2, a4, a12 // save result f2 to output a4, increment output (output_ptr+=step_out)
.loop_add_f32_ae32:
addx4 a3, a8, a3 // input2_ptr += (padd2 << 2);
addx4 a2, a7, a2 // input1_ptr += (padd1 << 2);
addx4 a4, a9, a4 // output_ptr += (padd_out << 2);
addi.n a5, a5, -1 // rows - 1
bnez a5, .outer_loop_add_f32_ae32
movi.n a2, 0 // return status ESP_OK
retw.n
#endif // dspm_add_f32_ae32_enabled

View File

@@ -0,0 +1,64 @@
/*
* SPDX-FileCopyrightText: 2023 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Apache-2.0
*/
#include "dspm_add.h"
esp_err_t dspm_add_f32_ansi(const float *input1, const float *input2, float *output, int rows, int cols, int padd1, int padd2, int padd_out, int step1, int step2, int step_out)
{
if (NULL == input1) {
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
}
if (NULL == input2) {
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
}
if (NULL == output) {
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
}
if (rows <= 0) {
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
}
if (cols <= 0) {
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
}
if (padd1 < 0) {
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
}
if (padd2 < 0) {
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
}
if (padd_out < 0) {
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
}
if (step1 <= 0) {
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
}
if (step2 <= 0) {
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
}
if (step_out <= 0) {
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
}
const int ptr_input1_step = cols + padd1;
const int ptr_input2_step = cols + padd2;
const int ptr_output_step = cols + padd_out;
float *ptr_input1 = (float *)input1;
float *ptr_input2 = (float *)input2;
for (int row = 0; row < rows; row++) {
for (int col = 0; col < cols; col++) {
output[col * step_out] = ptr_input1[col * step1] + ptr_input2[col * step2];
}
ptr_input1 += ptr_input1_step;
ptr_input2 += ptr_input2_step;
output += ptr_output_step;
}
return ESP_OK;
}

View File

@@ -0,0 +1,65 @@
/*
* SPDX-FileCopyrightText: 2023 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Apache-2.0
*/
#ifndef _dspm_add_H_
#define _dspm_add_H_
#include "dsp_err.h"
#include "dspm_add_platform.h"
#ifdef __cplusplus
extern "C"
{
#endif
/**@{*/
/**
* @brief add two arrays with paddings (add two sub-matrices)
*
* The function adds two arrays defined as sub-matrices with paddings
* out[row * ptr_step_out + col * step_out] = in1[row * ptr_step_in1 + col * step1] + in2[row * ptr_step_in2 + col * step2];
* The implementation use ANSI C and could be compiled and run on any platform
*
* @param[in] input1: input array 1
* @param[in] input2: input array 2
* @param[out] output: output array
* @param[in] rows: matrix rows
* @param[in] cols: matrix cols
* @param[in] padd1: input array 1 padding
* @param[in] padd2: input array 2 padding
* @param[in] padd_out: output array padding
* @param[in] step1: step over input array 1 (by default should be 1)
* @param[in] step2: step over input array 2 (by default should be 1)
* @param[in] step_out: step over output array (by default should be 1)
*
* @return
* - ESP_OK on success
* - One of the error codes from DSP library
*/
esp_err_t dspm_add_f32_ansi(const float *input1, const float *input2, float *output, int rows, int cols, int padd1, int padd2, int padd_out, int step1, int step2, int step_out);
esp_err_t dspm_add_f32_ae32(const float *input1, const float *input2, float *output, int rows, int cols, int padd1, int padd2, int padd_out, int step1, int step2, int step_out);
/**@}*/
#ifdef __cplusplus
}
#endif
#if CONFIG_DSP_OPTIMIZED
#if (dspm_add_f32_ae32_enabled == 1)
#define dspm_add_f32 dspm_add_f32_ae32
#else
#define dspm_add_f32 dspm_add_f32_ansi
#endif
#else // CONFIG_DSP_OPTIMIZED
#define dspm_add_f32 dspm_add_f32_ansi
#endif // CONFIG_DSP_OPTIMIZED
#endif // _dspm_add_H_

View File

@@ -0,0 +1,20 @@
#ifndef _dspm_add_platform_H_
#define _dspm_add_platform_H_
#include "sdkconfig.h"
#ifdef __XTENSA__
#include <xtensa/config/core-isa.h>
#include <xtensa/config/core-matmap.h>
#if ((XCHAL_HAVE_FP == 1) && (XCHAL_HAVE_LOOPS == 1))
#define dspm_add_f32_ae32_enabled 1
#endif
#endif // __XTENSA__
#endif // _dspm_add_platform_H_

View File

@@ -0,0 +1,146 @@
/*
* SPDX-FileCopyrightText: 2023 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Apache-2.0
*/
#include <string.h>
#include "unity.h"
#include "dsp_platform.h"
#include "esp_log.h"
#include "esp_dsp.h"
#include "dspm_add.h"
#include "esp_attr.h"
#include "test_mat_common.h"
#include "dsp_tests.h"
TEST_CASE("dspm_add_f32_ae32 functionality", "[dspm]")
{
// create ROI rectangle
dspm::Mat::Rect roi_rect;
char message[60];
for (int var = 0; var < 7; var++) {
for (int start_row = 0; start_row < 2; start_row++) {
for (int start_col = 0; start_col < 2; start_col++) {
for (int row = 1; row < 6; row++) {
for (int col = 1; col < 6; col++) {
sprintf(message, "var = %d s_row = %d s_col = %d, row = %d, col = %d", var, start_row, start_col, row, col);
// create A B C matrices with row col dimensions + padding
// padding is from both sides of the targeted sub-matrix
// 1 1 1 1
// 1 x x 1
// 1 x x 1
// 1 1 1 1
dspm::Mat A(row + (2 * start_row), col + (2 * start_col));
dspm::Mat B(row + (2 * start_row), col + (2 * start_col));
dspm::Mat C_compare(row + (2 * start_row), col + (2 * start_col));
dspm::Mat C = dspm::Mat::ones(row + (2 * start_row), col + (2 * start_col));
// create A B C sub matrices with undefined dimensions
dspm::Mat A_sub;
dspm::Mat B_sub;
dspm::Mat C_sub;
// adjust ROI rectangles
roi_rect.resizeRect(start_col, start_row, col, row);
for (int i = 0; i < A.length; i++) {
A.data[i] = i + 1;
B.data[i] = i + 1;
C_compare.data[i] = (i + 1) * 2;
}
// Combinations of A B C matrices and sub-matrices are created for testing
// As an example: case 1
// Matrices A and C are sub-matrices - the data are defined as a pointer to an external buffer
// Matrix B is a matrix - the data are copied into the B matrix
switch (var) {
case 0: {
A_sub.CopyHead(A.getROI(roi_rect)); // A sub-matrix - NO DATA CPY
B_sub.CopyHead(B.getROI(roi_rect)); // B sub-matrix - NO DATA CPY
C_sub.CopyHead(C.getROI(roi_rect)); // C sub-matrix - NO DATA CPY
} break;
case 1: {
A_sub = A.Get(roi_rect); // A matrix - DATA CPY
B_sub.CopyHead(B.getROI(roi_rect)); // B sub_matirx - NO DATA CPY
C_sub.CopyHead(C.getROI(roi_rect)); // C sub_matirx - NO DATA CPY
} break;
case 2: {
A_sub.CopyHead(A.getROI(roi_rect)); // A sub-matrix - NO DATA CPY
B_sub = B.Get(roi_rect); // B matrix - DATA CPY
C_sub.CopyHead(C.getROI(roi_rect)); // C sub-matrix - NO DATA CPY
} break;
case 3: {
A_sub = A.Get(roi_rect); // A matrix - DATA CPY
B_sub = B.Get(roi_rect); // B matrix - DATA CPY
C_sub.CopyHead(C.getROI(roi_rect)); // C sub-matrix - NO DATA CPY
} break;
case 4: {
A_sub.CopyHead(A.getROI(roi_rect)); // A sub-matrix - NO DATA CPY
B_sub.CopyHead(B.getROI(roi_rect)); // B sub-matrix - NO DATA CPY
C_sub = C.Get(roi_rect); // C matrix - DATA CPY
} break;
case 5: {
A_sub.CopyHead(A.getROI(roi_rect)); // A sub-matrix - NO DATA CPY
B_sub = B.Get(roi_rect); // B matrix - DATA CPY
C_sub = C.Get(roi_rect); // C matrix - DATA CPY
} break;
case 6: {
A_sub = A.Get(roi_rect); // A matrix - DATA CPY
B_sub.CopyHead(B.getROI(roi_rect)); // B sub-matrix - NO DATA CPY
C_sub = C.Get(roi_rect); // C matrix - DATA CPY
} break;
default:
break;
}
dspm_add_f32(A_sub.data, B_sub.data, C_sub.data, row, col, A_sub.padding, B_sub.padding, C_sub.padding, 1, 1, 1);
dspm::Mat C_sub_check = C_compare.Get(roi_rect);
// C is a sub-matrix
if (C_sub.sub_matrix) {
// Create a copy of the original C matrix (filled with ones 1)
// to check if an area around the sub-matrix is unaffected after a matrix operation
dspm::Mat C_area_check = dspm::Mat::ones(row + (2 * start_row), col + (2 * start_col));
test_assert_equal_mat_mat(C_sub_check, C_sub, message);
test_assert_check_area_mat_mat(C_area_check, C_sub, start_row, start_col, message);
// C is a matrix
} else {
test_assert_equal_mat_mat(C_sub_check, C_sub, message);
}
}
}
}
}
}
}
static portMUX_TYPE testnlock = portMUX_INITIALIZER_UNLOCKED;
TEST_CASE("dspm_add_f32_ae32 benchmark", "[dspm]")
{
const int dim = 4;
const int M_off = 1;
dspm::Mat mat(dim + M_off, dim + M_off);
dspm::Mat mat_sub = mat.getROI(M_off, M_off, dim, dim);
portENTER_CRITICAL(&testnlock);
dspm_add_f32(mat_sub.data, mat_sub.data, mat_sub.data, dim, dim, mat_sub.padding, mat_sub.padding, mat_sub.padding, 1, 1, 1);
unsigned int start_b = dsp_get_cpu_cycle_count();
int repeat_count = 1024;
for (int i = 0 ; i < repeat_count ; i++) {
dspm_add_f32(mat_sub.data, mat_sub.data, mat_sub.data, dim, dim, mat_sub.padding, mat_sub.padding, mat_sub.padding, 1, 1, 1);
}
unsigned int end_b = dsp_get_cpu_cycle_count();
portEXIT_CRITICAL(&testnlock);
float total_b = end_b - start_b;
float cycles = total_b / (repeat_count);
printf("Benchmark dspm_add_f32_ae32 - %f per sample %dx%d.\n", cycles, dim, dim);
float min_exec = 100;
float max_exec = 1400;
TEST_ASSERT_EXEC_IN_RANGE(min_exec, max_exec, cycles);
}

View File

@@ -0,0 +1,146 @@
/*
* SPDX-FileCopyrightText: 2023 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Apache-2.0
*/
#include <string.h>
#include "unity.h"
#include "dsp_platform.h"
#include "esp_log.h"
#include "esp_dsp.h"
#include "dspm_add.h"
#include "esp_attr.h"
#include "test_mat_common.h"
#include "dsp_tests.h"
TEST_CASE("dspm_add_f32_ansi functionality", "[dsps]")
{
// create ROI rectangle
dspm::Mat::Rect roi_rect;
char message[60];
for (int var = 0; var < 7; var++) {
for (int start_row = 0; start_row < 2; start_row++) {
for (int start_col = 0; start_col < 2; start_col++) {
for (int row = 1; row < 6; row++) {
for (int col = 1; col < 6; col++) {
sprintf(message, "var = %d s_row = %d s_col = %d, row = %d, col = %d", var, start_row, start_col, row, col);
// create A B C matrices with row col dimensions + padding
// padding is from both sides of the targeted sub-matrix
// 1 1 1 1
// 1 x x 1
// 1 x x 1
// 1 1 1 1
dspm::Mat A(row + (2 * start_row), col + (2 * start_col));
dspm::Mat B(row + (2 * start_row), col + (2 * start_col));
dspm::Mat C_compare(row + (2 * start_row), col + (2 * start_col));
dspm::Mat C = dspm::Mat::ones(row + (2 * start_row), col + (2 * start_col));
// create A B C sub matrices with undefined dimensions
dspm::Mat A_sub;
dspm::Mat B_sub;
dspm::Mat C_sub;
// adjust ROI rectangles
roi_rect.resizeRect(start_col, start_row, col, row);
for (int i = 0; i < A.length; i++) {
A.data[i] = i + 1;
B.data[i] = i + 1;
C_compare.data[i] = (i + 1) * 2;
}
// Combinations of A B C matrices and sub-matrices are created for testing
// As an example: case 1
// Matrices A and C are sub-matrices - the data are defined as a pointer to an external buffer
// Matrix B is a matrix - the data are copied into the B matrix
switch (var) {
case 0: {
A_sub.CopyHead(A.getROI(roi_rect)); // A sub-matrix - NO DATA CPY
B_sub.CopyHead(B.getROI(roi_rect)); // B sub-matrix - NO DATA CPY
C_sub.CopyHead(C.getROI(roi_rect)); // C sub-matrix - NO DATA CPY
} break;
case 1: {
A_sub = A.Get(roi_rect); // A matrix - DATA CPY
B_sub.CopyHead(B.getROI(roi_rect)); // B sub_matirx - NO DATA CPY
C_sub.CopyHead(C.getROI(roi_rect)); // C sub_matirx - NO DATA CPY
} break;
case 2: {
A_sub.CopyHead(A.getROI(roi_rect)); // A sub-matrix - NO DATA CPY
B_sub = B.Get(roi_rect); // B matrix - DATA CPY
C_sub.CopyHead(C.getROI(roi_rect)); // C sub-matrix - NO DATA CPY
} break;
case 3: {
A_sub = A.Get(roi_rect); // A matrix - DATA CPY
B_sub = B.Get(roi_rect); // B matrix - DATA CPY
C_sub.CopyHead(C.getROI(roi_rect)); // C sub-matrix - NO DATA CPY
} break;
case 4: {
A_sub.CopyHead(A.getROI(roi_rect)); // A sub-matrix - NO DATA CPY
B_sub.CopyHead(B.getROI(roi_rect)); // B sub-matrix - NO DATA CPY
C_sub = C.Get(roi_rect); // C matrix - DATA CPY
} break;
case 5: {
A_sub.CopyHead(A.getROI(roi_rect)); // A sub-matrix - NO DATA CPY
B_sub = B.Get(roi_rect); // B matrix - DATA CPY
C_sub = C.Get(roi_rect); // C matrix - DATA CPY
} break;
case 6: {
A_sub = A.Get(roi_rect); // A matrix - DATA CPY
B_sub.CopyHead(B.getROI(roi_rect)); // B sub-matrix - NO DATA CPY
C_sub = C.Get(roi_rect); // C matrix - DATA CPY
} break;
default:
break;
}
dspm_add_f32_ansi(A_sub.data, B_sub.data, C_sub.data, row, col, A_sub.padding, B_sub.padding, C_sub.padding, 1, 1, 1);
dspm::Mat C_sub_check = C_compare.Get(roi_rect);
// C is a sub-matrix
if (C_sub.sub_matrix) {
// Create a copy of the original C matrix (filled with ones 1)
// to check if an area around the sub-matrix is unaffected after a matrix operation
dspm::Mat C_area_check = dspm::Mat::ones(row + (2 * start_row), col + (2 * start_col));
test_assert_equal_mat_mat(C_sub_check, C_sub, message);
test_assert_check_area_mat_mat(C_area_check, C_sub, start_row, start_col, message);
// C is a matrix
} else {
test_assert_equal_mat_mat(C_sub_check, C_sub, message);
}
}
}
}
}
}
}
static portMUX_TYPE testnlock = portMUX_INITIALIZER_UNLOCKED;
TEST_CASE("dspm_add_f32_ansi benchmark", "[dsps]")
{
const int dim = 4;
const int M_off = 1;
dspm::Mat mat(dim + M_off, dim + M_off);
dspm::Mat mat_sub = mat.getROI(M_off, M_off, dim, dim);
portENTER_CRITICAL(&testnlock);
dspm_add_f32_ansi(mat_sub.data, mat_sub.data, mat_sub.data, dim, dim, mat_sub.padding, mat_sub.padding, mat_sub.padding, 1, 1, 1);
unsigned int start_b = dsp_get_cpu_cycle_count();
int repeat_count = 1024;
for (int i = 0 ; i < repeat_count ; i++) {
dspm_add_f32_ansi(mat_sub.data, mat_sub.data, mat_sub.data, dim, dim, mat_sub.padding, mat_sub.padding, mat_sub.padding, 1, 1, 1);
}
unsigned int end_b = dsp_get_cpu_cycle_count();
portEXIT_CRITICAL(&testnlock);
float total_b = end_b - start_b;
float cycles = total_b / (repeat_count);
printf("Benchmark dspm_add_f32_ansi - %f per sample %dx%d.\n", cycles, dim, dim);
float min_exec = 100;
float max_exec = 1400;
TEST_ASSERT_EXEC_IN_RANGE(min_exec, max_exec, cycles);
}