add some code

2025-09-05 13:25:11 +08:00
parent 9ff0a99e7a
commit 3cf1229a85
8911 changed files with 2535396 additions and 0 deletions
--- a/managed_components/espressif__esp-dsp/modules/matrix/mulc/float/dspm_mulc_f32_ae32.S
+++ b/managed_components/espressif__esp-dsp/modules/matrix/mulc/float/dspm_mulc_f32_ae32.S
@@ -0,0 +1,58 @@
+/*
+ * SPDX-FileCopyrightText: 2023 Espressif Systems (Shanghai) CO LTD
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include "dspm_mulc_platform.h"
+#if (dspm_mulc_f32_ae32_enabled == 1)
+
+// This is a mul function for sub-matrices for ESP32 processor
+    .text
+    .align  4
+    .global dspm_mulc_f32_ae32
+    .type   dspm_mulc_f32_ae32,@function
+// The function implements the following C code:
+// esp_err_t dspm_mulc_f32_ansi(const float *input, float *output, float C, int rows, int cols, int padd_in, int padd_out, int step_in, int step_out);
+
+dspm_mulc_f32_ae32: 
+// input            - a2
+// output           - a3
+// C                - a4
+// rows             - a5
+// cols             - a6
+// padd_in          - a7
+// padd_out         - a8
+// step_in          - a9
+// step_out         - a10
+
+    entry   a1, 16
+
+    l32i.n  a8,  a1, 16         // padd_out
+    l32i.n  a9,  a1, 20         // step_in
+    l32i.n  a10, a1, 24         // step_out
+
+    slli    a9,  a9,  2         // a9   - step_in << 2
+    slli    a10, a10, 2         // a10  - step_out << 2
+
+    wfr     f0,  a4             // a4   - load to the f0
+
+    .outer_loop_mulc_f32_ae32:
+
+        loopnez a6, .loop_mulc_f32_ae32
+            lsxp     f1,  a2,  a9       // load input to f1, increment input (input_ptr+=step_in)
+
+            mul.s    f2,  f0,  f1       // f2 = f0 * f1
+            ssxp     f2,  a3,  a10      // save result f2 to output a3, increment output (output_ptr+=step_out)
+        .loop_mulc_f32_ae32:
+
+        addx4    a2,  a7,  a2           // input1_ptr += (padd_in << 2);
+        addx4    a3,  a8,  a3           // output_ptr += (padd_out << 2);
+        addi.n   a5,  a5,  -1           // rows - 1
+
+    bnez a5, .outer_loop_mulc_f32_ae32
+
+    movi.n  a2, 0 // return status ESP_OK
+    retw.n
+
+#endif // dspm_mulc_f32_ae32_enabled
--- a/managed_components/espressif__esp-dsp/modules/matrix/mulc/float/dspm_mulc_f32_ansi.c
+++ b/managed_components/espressif__esp-dsp/modules/matrix/mulc/float/dspm_mulc_f32_ansi.c
@@ -0,0 +1,51 @@
+/*
+ * SPDX-FileCopyrightText: 2023 Espressif Systems (Shanghai) CO LTD
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include "dspm_mulc.h"
+
+esp_err_t dspm_mulc_f32_ansi(const float *input, float *output, float C, int rows, int cols, int padd_in, int padd_out, int step_in, int step_out)
+{
+    if (NULL == input) {
+        return ESP_ERR_DSP_PARAM_OUTOFRANGE;
+    }
+    if (NULL == output) {
+        return ESP_ERR_DSP_PARAM_OUTOFRANGE;
+    }
+
+    if (rows <= 0) {
+        return ESP_ERR_DSP_PARAM_OUTOFRANGE;
+    }
+    if (cols <= 0) {
+        return ESP_ERR_DSP_PARAM_OUTOFRANGE;
+    }
+
+    if (padd_in < 0) {
+        return ESP_ERR_DSP_PARAM_OUTOFRANGE;
+    }
+    if (padd_out < 0) {
+        return ESP_ERR_DSP_PARAM_OUTOFRANGE;
+    }
+
+    if (step_in <= 0) {
+        return ESP_ERR_DSP_PARAM_OUTOFRANGE;
+    }
+    if (step_out <= 0) {
+        return ESP_ERR_DSP_PARAM_OUTOFRANGE;
+    }
+
+    const int ptr_input_step = cols + padd_in;
+    const int ptr_output_step = cols + padd_out;
+    float *ptr_input = (float *)input;
+
+    for (int row = 0; row < rows; row++) {
+        for (int col = 0; col < cols; col++) {
+            output[col * step_out] = ptr_input[col * step_in] * C;
+        }
+        ptr_input += ptr_input_step;
+        output += ptr_output_step;
+    }
+    return ESP_OK;
+}
--- a/managed_components/espressif__esp-dsp/modules/matrix/mulc/include/dspm_mulc.h
+++ b/managed_components/espressif__esp-dsp/modules/matrix/mulc/include/dspm_mulc.h
@@ -0,0 +1,61 @@
+/*
+ * SPDX-FileCopyrightText: 2023 Espressif Systems (Shanghai) CO LTD
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+
+#ifndef _dspm_mulc_H_
+#define _dspm_mulc_H_
+#include "dsp_err.h"
+
+#include "dspm_mulc_platform.h"
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+/**@{*/
+/**
+ * @brief   multiply a constant and an array with padding
+ *
+ * The function multiplies a constant and an array defined as s sub-matrix with padding
+ * out[row * ptr_step_out + col * step_out] = input[row * ptr_step_in + col * step_in] * C;
+ * The implementation uses ANSI C and could be compiled and run on any platform
+ *
+ * @param[in]  input: input array
+ * @param[out] output: output array
+ * @param[in]  C: constant value
+ * @param[in]  rows: input matrix rows
+ * @param[in]  cols: input matrix cols
+ * @param[in]  padd_in: input array padding
+ * @param[in]  padd_out: output array padding
+ * @param[in]  step_in: step over input array (by default should be 1)
+ * @param[in]  step_out: step over output array (by default should be 1)
+ *
+ * @return
+ *      - ESP_OK on success
+ *      - One of the error codes from DSP library
+ */
+esp_err_t dspm_mulc_f32_ansi(const float *input, float *output, float C, int rows, int cols, int padd_in, int padd_out, int step_in, int step_out);
+esp_err_t dspm_mulc_f32_ae32(const float *input, float *output, float C, int rows, int cols, int padd_in, int padd_out, int step_in, int step_out);
+/**@}*/
+
+#ifdef __cplusplus
+}
+#endif
+
+#if CONFIG_DSP_OPTIMIZED
+#if (dspm_mulc_f32_ae32_enabled == 1)
+#define dspm_mulc_f32 dspm_mulc_f32_ae32
+#else //
+#define dspm_mulc_f32 dspm_mulc_f32_ansi
+#endif
+
+#else
+#define dspm_mulc_f32 dspm_mulc_f32_ansi
+#endif
+
+
+#endif // _dspm_mulc_H_
--- a/managed_components/espressif__esp-dsp/modules/matrix/mulc/include/dspm_mulc_platform.h
+++ b/managed_components/espressif__esp-dsp/modules/matrix/mulc/include/dspm_mulc_platform.h
@@ -0,0 +1,20 @@
+#ifndef _dspm_mulc_platform_H_
+#define _dspm_mulc_platform_H_
+
+#include "sdkconfig.h"
+
+#ifdef __XTENSA__
+#include <xtensa/config/core-isa.h>
+#include <xtensa/config/core-matmap.h>
+
+
+#if ((XCHAL_HAVE_FP == 1) && (XCHAL_HAVE_LOOPS == 1))
+
+#define dspm_mulc_f32_ae32_enabled  1
+
+#endif
+
+#endif // __XTENSA__
+
+
+#endif // _dspm_mulc_platform_H_
--- a/managed_components/espressif__esp-dsp/modules/matrix/mulc/test/test_dspm_mulc_f32_ansi.cpp
+++ b/managed_components/espressif__esp-dsp/modules/matrix/mulc/test/test_dspm_mulc_f32_ansi.cpp
@@ -0,0 +1,122 @@
+/*
+ * SPDX-FileCopyrightText: 2023 Espressif Systems (Shanghai) CO LTD
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <string.h>
+#include "unity.h"
+#include "dsp_platform.h"
+#include "esp_log.h"
+#include "esp_dsp.h"
+#include "dspm_mulc.h"
+#include "esp_attr.h"
+#include "test_mat_common.h"
+#include "dsp_tests.h"
+
+TEST_CASE("dspm_mulc_f32_ansi functionality", "[dspm]")
+{
+    // create ROI rectangle
+    dspm::Mat::Rect roi_rect;
+
+    char message[60];
+    for (int var = 0; var < 3; var++) {
+        for (int start_row = 0; start_row < 2; start_row++) {
+            for (int start_col = 0; start_col < 2; start_col++) {
+                for (int row = 1; row < 6; row++) {
+                    for (int col = 1; col < 6; col++) {
+                        sprintf(message, "var = %d  s_row = %d  s_col = %d, row = %d, col = %d", var, start_row, start_col, row, col);
+                        // create A C matrices with row col dimensions + padding
+                        // padding is from both sides of the targeted sub-matrix
+                        // 1 1 1 1
+                        // 1 x x 1
+                        // 1 x x 1
+                        // 1 1 1 1
+                        dspm::Mat A(row + (2 * start_row), col + (2 * start_col));
+                        dspm::Mat C_compare(row + (2 * start_row), col + (2 * start_col));
+                        dspm::Mat C = dspm::Mat::ones(row + (2 * start_row), col + (2 * start_col));
+
+                        // create A C sub matrices with undefined dimensions
+                        dspm::Mat A_sub;
+                        dspm::Mat C_sub;
+                        float B = 2;
+
+                        // adjust ROI rectangles
+                        roi_rect.resizeRect(start_col, start_row, col, row);
+
+                        for (int i = 0; i < A.length; i++) {
+                            A.data[i] = i + 1;
+                            C_compare.data[i] = (i + 1) * B;
+                        }
+
+                        // Combinations of A C matrices and sub-matrices are created for testing
+                        // As an example: case 2
+                        // Matrix A is a sub-matrix - the data are defined as a pointer to an external buffer
+                        // Matrix C is a matrix - the data are copied into the C matrix
+                        switch (var) {
+                        case 0: {
+                            A_sub.CopyHead(A.getROI(roi_rect));    // A sub-matrix - NO DATA CPY
+                            C_sub.CopyHead(C.getROI(roi_rect));    // C sub-matrix - NO DATA CPY
+                        } break;
+                        case 1: {
+                            A_sub = A.Get(roi_rect);               // A matrix     - DATA CPY
+                            C_sub.CopyHead(C.getROI(roi_rect));    // C sub_matirx - NO DATA CPY
+                        } break;
+                        case 2: {
+                            A_sub.CopyHead(A.getROI(roi_rect));    // A sub-matrix - NO DATA CPY
+                            C_sub = C.Get(roi_rect);               // C matrix     - DATA CPY
+                        } break;
+                        default:
+                            break;
+                        }
+
+                        dspm_mulc_f32(A_sub.data, C_sub.data, B, row, col, A_sub.padding, C_sub.padding, 1, 1);
+                        dspm::Mat C_sub_check = C_compare.Get(roi_rect);
+
+                        // C is a sub-matrix
+                        if (C_sub.sub_matrix) {
+                            // Create a copy of the original C matrix (filled with ones 1)
+                            // to check if an area around the sub-matrix is unaffected after a matrix operation
+                            dspm::Mat C_area_check = dspm::Mat::ones(row + (2 * start_row), col + (2 * start_col));
+                            test_assert_equal_mat_mat(C_sub_check, C_sub, message);
+                            test_assert_check_area_mat_mat(C_area_check, C_sub, start_row, start_col, message);
+                            // C is a matrix
+                        } else {
+                            test_assert_equal_mat_mat(C_sub_check, C_sub, message);
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+
+static portMUX_TYPE testnlock = portMUX_INITIALIZER_UNLOCKED;
+
+TEST_CASE("dspm_mulc_f32_ansi benchmark", "[dspm]")
+{
+    const int dim = 4;
+    const int M_off = 1;
+    const float B = 1;
+
+    dspm::Mat mat(dim + M_off, dim + M_off);
+    dspm::Mat mat_sub = mat.getROI(M_off, M_off, dim, dim);
+
+    portENTER_CRITICAL(&testnlock);
+    dspm_mulc_f32(mat_sub.data, mat_sub.data, B, dim, dim, mat_sub.padding, mat_sub.padding, 1, 1);
+
+    unsigned int start_b = dsp_get_cpu_cycle_count();
+    int repeat_count = 1024;
+    for (int i = 0 ; i < repeat_count ; i++) {
+        dspm_mulc_f32(mat_sub.data, mat_sub.data, B, dim, dim, mat_sub.padding, mat_sub.padding, 1, 1);
+    }
+    unsigned int end_b = dsp_get_cpu_cycle_count();
+    portEXIT_CRITICAL(&testnlock);
+
+    float total_b = end_b - start_b;
+    float cycles = total_b / (repeat_count);
+    printf("Benchmark dspm_mulc_f32_ansi - %f per sample %dx%d.\n", cycles, dim, dim);
+    float min_exec = 100;
+    float max_exec = 1400;
+    TEST_ASSERT_EXEC_IN_RANGE(min_exec, max_exec, cycles);
+}