add some code

This commit is contained in:
2025-09-05 13:25:11 +08:00
parent 9ff0a99e7a
commit 3cf1229a85
8911 changed files with 2535396 additions and 0 deletions

View File

@@ -0,0 +1,58 @@
/*
* SPDX-FileCopyrightText: 2023 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Apache-2.0
*/
#include "dspm_mulc_platform.h"
#if (dspm_mulc_f32_ae32_enabled == 1)
// This is a mul function for sub-matrices for ESP32 processor
.text
.align 4
.global dspm_mulc_f32_ae32
.type dspm_mulc_f32_ae32,@function
// The function implements the following C code:
// esp_err_t dspm_mulc_f32_ansi(const float *input, float *output, float C, int rows, int cols, int padd_in, int padd_out, int step_in, int step_out);
dspm_mulc_f32_ae32:
// input - a2
// output - a3
// C - a4
// rows - a5
// cols - a6
// padd_in - a7
// padd_out - a8
// step_in - a9
// step_out - a10
entry a1, 16
l32i.n a8, a1, 16 // padd_out
l32i.n a9, a1, 20 // step_in
l32i.n a10, a1, 24 // step_out
slli a9, a9, 2 // a9 - step_in << 2
slli a10, a10, 2 // a10 - step_out << 2
wfr f0, a4 // a4 - load to the f0
.outer_loop_mulc_f32_ae32:
loopnez a6, .loop_mulc_f32_ae32
lsxp f1, a2, a9 // load input to f1, increment input (input_ptr+=step_in)
mul.s f2, f0, f1 // f2 = f0 * f1
ssxp f2, a3, a10 // save result f2 to output a3, increment output (output_ptr+=step_out)
.loop_mulc_f32_ae32:
addx4 a2, a7, a2 // input1_ptr += (padd_in << 2);
addx4 a3, a8, a3 // output_ptr += (padd_out << 2);
addi.n a5, a5, -1 // rows - 1
bnez a5, .outer_loop_mulc_f32_ae32
movi.n a2, 0 // return status ESP_OK
retw.n
#endif // dspm_mulc_f32_ae32_enabled

View File

@@ -0,0 +1,51 @@
/*
* SPDX-FileCopyrightText: 2023 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Apache-2.0
*/
#include "dspm_mulc.h"
esp_err_t dspm_mulc_f32_ansi(const float *input, float *output, float C, int rows, int cols, int padd_in, int padd_out, int step_in, int step_out)
{
if (NULL == input) {
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
}
if (NULL == output) {
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
}
if (rows <= 0) {
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
}
if (cols <= 0) {
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
}
if (padd_in < 0) {
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
}
if (padd_out < 0) {
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
}
if (step_in <= 0) {
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
}
if (step_out <= 0) {
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
}
const int ptr_input_step = cols + padd_in;
const int ptr_output_step = cols + padd_out;
float *ptr_input = (float *)input;
for (int row = 0; row < rows; row++) {
for (int col = 0; col < cols; col++) {
output[col * step_out] = ptr_input[col * step_in] * C;
}
ptr_input += ptr_input_step;
output += ptr_output_step;
}
return ESP_OK;
}

View File

@@ -0,0 +1,61 @@
/*
* SPDX-FileCopyrightText: 2023 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Apache-2.0
*/
#ifndef _dspm_mulc_H_
#define _dspm_mulc_H_
#include "dsp_err.h"
#include "dspm_mulc_platform.h"
#ifdef __cplusplus
extern "C"
{
#endif
/**@{*/
/**
* @brief multiply a constant and an array with padding
*
* The function multiplies a constant and an array defined as s sub-matrix with padding
* out[row * ptr_step_out + col * step_out] = input[row * ptr_step_in + col * step_in] * C;
* The implementation uses ANSI C and could be compiled and run on any platform
*
* @param[in] input: input array
* @param[out] output: output array
* @param[in] C: constant value
* @param[in] rows: input matrix rows
* @param[in] cols: input matrix cols
* @param[in] padd_in: input array padding
* @param[in] padd_out: output array padding
* @param[in] step_in: step over input array (by default should be 1)
* @param[in] step_out: step over output array (by default should be 1)
*
* @return
* - ESP_OK on success
* - One of the error codes from DSP library
*/
esp_err_t dspm_mulc_f32_ansi(const float *input, float *output, float C, int rows, int cols, int padd_in, int padd_out, int step_in, int step_out);
esp_err_t dspm_mulc_f32_ae32(const float *input, float *output, float C, int rows, int cols, int padd_in, int padd_out, int step_in, int step_out);
/**@}*/
#ifdef __cplusplus
}
#endif
#if CONFIG_DSP_OPTIMIZED
#if (dspm_mulc_f32_ae32_enabled == 1)
#define dspm_mulc_f32 dspm_mulc_f32_ae32
#else //
#define dspm_mulc_f32 dspm_mulc_f32_ansi
#endif
#else
#define dspm_mulc_f32 dspm_mulc_f32_ansi
#endif
#endif // _dspm_mulc_H_

View File

@@ -0,0 +1,20 @@
#ifndef _dspm_mulc_platform_H_
#define _dspm_mulc_platform_H_
#include "sdkconfig.h"
#ifdef __XTENSA__
#include <xtensa/config/core-isa.h>
#include <xtensa/config/core-matmap.h>
#if ((XCHAL_HAVE_FP == 1) && (XCHAL_HAVE_LOOPS == 1))
#define dspm_mulc_f32_ae32_enabled 1
#endif
#endif // __XTENSA__
#endif // _dspm_mulc_platform_H_

View File

@@ -0,0 +1,122 @@
/*
* SPDX-FileCopyrightText: 2023 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Apache-2.0
*/
#include <string.h>
#include "unity.h"
#include "dsp_platform.h"
#include "esp_log.h"
#include "esp_dsp.h"
#include "dspm_mulc.h"
#include "esp_attr.h"
#include "test_mat_common.h"
#include "dsp_tests.h"
TEST_CASE("dspm_mulc_f32_ansi functionality", "[dspm]")
{
// create ROI rectangle
dspm::Mat::Rect roi_rect;
char message[60];
for (int var = 0; var < 3; var++) {
for (int start_row = 0; start_row < 2; start_row++) {
for (int start_col = 0; start_col < 2; start_col++) {
for (int row = 1; row < 6; row++) {
for (int col = 1; col < 6; col++) {
sprintf(message, "var = %d s_row = %d s_col = %d, row = %d, col = %d", var, start_row, start_col, row, col);
// create A C matrices with row col dimensions + padding
// padding is from both sides of the targeted sub-matrix
// 1 1 1 1
// 1 x x 1
// 1 x x 1
// 1 1 1 1
dspm::Mat A(row + (2 * start_row), col + (2 * start_col));
dspm::Mat C_compare(row + (2 * start_row), col + (2 * start_col));
dspm::Mat C = dspm::Mat::ones(row + (2 * start_row), col + (2 * start_col));
// create A C sub matrices with undefined dimensions
dspm::Mat A_sub;
dspm::Mat C_sub;
float B = 2;
// adjust ROI rectangles
roi_rect.resizeRect(start_col, start_row, col, row);
for (int i = 0; i < A.length; i++) {
A.data[i] = i + 1;
C_compare.data[i] = (i + 1) * B;
}
// Combinations of A C matrices and sub-matrices are created for testing
// As an example: case 2
// Matrix A is a sub-matrix - the data are defined as a pointer to an external buffer
// Matrix C is a matrix - the data are copied into the C matrix
switch (var) {
case 0: {
A_sub.CopyHead(A.getROI(roi_rect)); // A sub-matrix - NO DATA CPY
C_sub.CopyHead(C.getROI(roi_rect)); // C sub-matrix - NO DATA CPY
} break;
case 1: {
A_sub = A.Get(roi_rect); // A matrix - DATA CPY
C_sub.CopyHead(C.getROI(roi_rect)); // C sub_matirx - NO DATA CPY
} break;
case 2: {
A_sub.CopyHead(A.getROI(roi_rect)); // A sub-matrix - NO DATA CPY
C_sub = C.Get(roi_rect); // C matrix - DATA CPY
} break;
default:
break;
}
dspm_mulc_f32(A_sub.data, C_sub.data, B, row, col, A_sub.padding, C_sub.padding, 1, 1);
dspm::Mat C_sub_check = C_compare.Get(roi_rect);
// C is a sub-matrix
if (C_sub.sub_matrix) {
// Create a copy of the original C matrix (filled with ones 1)
// to check if an area around the sub-matrix is unaffected after a matrix operation
dspm::Mat C_area_check = dspm::Mat::ones(row + (2 * start_row), col + (2 * start_col));
test_assert_equal_mat_mat(C_sub_check, C_sub, message);
test_assert_check_area_mat_mat(C_area_check, C_sub, start_row, start_col, message);
// C is a matrix
} else {
test_assert_equal_mat_mat(C_sub_check, C_sub, message);
}
}
}
}
}
}
}
static portMUX_TYPE testnlock = portMUX_INITIALIZER_UNLOCKED;
TEST_CASE("dspm_mulc_f32_ansi benchmark", "[dspm]")
{
const int dim = 4;
const int M_off = 1;
const float B = 1;
dspm::Mat mat(dim + M_off, dim + M_off);
dspm::Mat mat_sub = mat.getROI(M_off, M_off, dim, dim);
portENTER_CRITICAL(&testnlock);
dspm_mulc_f32(mat_sub.data, mat_sub.data, B, dim, dim, mat_sub.padding, mat_sub.padding, 1, 1);
unsigned int start_b = dsp_get_cpu_cycle_count();
int repeat_count = 1024;
for (int i = 0 ; i < repeat_count ; i++) {
dspm_mulc_f32(mat_sub.data, mat_sub.data, B, dim, dim, mat_sub.padding, mat_sub.padding, 1, 1);
}
unsigned int end_b = dsp_get_cpu_cycle_count();
portEXIT_CRITICAL(&testnlock);
float total_b = end_b - start_b;
float cycles = total_b / (repeat_count);
printf("Benchmark dspm_mulc_f32_ansi - %f per sample %dx%d.\n", cycles, dim, dim);
float min_exec = 100;
float max_exec = 1400;
TEST_ASSERT_EXEC_IN_RANGE(min_exec, max_exec, cycles);
}