add some code

2025-09-05 13:25:11 +08:00
parent 9ff0a99e7a
commit 3cf1229a85
8911 changed files with 2535396 additions and 0 deletions
@@ -0,0 +1,63 @@
+/*
+ * SPDX-FileCopyrightText: 2023 Espressif Systems (Shanghai) CO LTD
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include "dspm_add_platform.h"
+#if (dspm_add_f32_ae32_enabled == 1)
+
+// This is an add function for sub-matrices for ESP32 processor
+    .text
+    .align  4
+    .global dspm_add_f32_ae32
+    .type   dspm_add_f32_ae32,@function
+// The function implements the following C code:
+// esp_err_t dspm_add_f32_ansi(const float *input1, const float *input2, float *output, int rows, int cols, int padd1, int padd2, int padd_out, int step1, int step2, int step_out);
+
+dspm_add_f32_ae32: 
+// input1           - a2
+// input2           - a3
+// output           - a4
+// rows             - a5
+// cols             - a6
+// padd1            - a7
+// padd2            - a8
+// padd_out         - a9
+// step1            - a10
+// step2            - a11
+// step_out         - a12
+
+    entry   a1, 16
+
+    l32i.n  a8,  a1, 16         // padd2
+    l32i.n  a9,  a1, 20         // padd_out
+    l32i.n  a10, a1, 24         // step1
+    l32i.n  a11, a1, 24         // step2
+    l32i.n  a12, a1, 24         // step_out
+
+    slli    a10, a10, 2         // a10  - step1 << 2
+    slli    a11, a11, 2         // a11  - step2 << 2
+    slli    a12, a12, 2         // a12  - step_out << 2
+
+    .outer_loop_add_f32_ae32:
+
+        loopnez a6, .loop_add_f32_ae32
+            lsxp     f0,  a2,  a10      // load input1 to f0, increment input1 (input1_ptr+=step1)
+            lsxp     f1,  a3,  a11      // load input2 to f1, increment input2 (input2_ptr+=step2)
+
+            add.s    f2,  f0,  f1       // f2 = f0 + f1
+            ssxp     f2,  a4,  a12      // save result f2 to output a4, increment output (output_ptr+=step_out)
+        .loop_add_f32_ae32:
+
+        addx4    a3,  a8,  a3           // input2_ptr += (padd2 << 2);
+        addx4    a2,  a7,  a2           // input1_ptr += (padd1 << 2);
+        addx4    a4,  a9,  a4           // output_ptr += (padd_out << 2);
+        addi.n   a5,  a5,  -1           // rows - 1
+
+    bnez a5, .outer_loop_add_f32_ae32
+
+    movi.n  a2, 0 // return status ESP_OK
+    retw.n
+
+#endif // dspm_add_f32_ae32_enabled
@@ -0,0 +1,64 @@
+/*
+ * SPDX-FileCopyrightText: 2023 Espressif Systems (Shanghai) CO LTD
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+
+#include "dspm_add.h"
+
+esp_err_t dspm_add_f32_ansi(const float *input1, const float *input2, float *output, int rows, int cols, int padd1, int padd2, int padd_out, int step1, int step2, int step_out)
+{
+    if (NULL == input1) {
+        return ESP_ERR_DSP_PARAM_OUTOFRANGE;
+    }
+    if (NULL == input2) {
+        return ESP_ERR_DSP_PARAM_OUTOFRANGE;
+    }
+    if (NULL == output) {
+        return ESP_ERR_DSP_PARAM_OUTOFRANGE;
+    }
+
+    if (rows <= 0) {
+        return ESP_ERR_DSP_PARAM_OUTOFRANGE;
+    }
+    if (cols <= 0) {
+        return ESP_ERR_DSP_PARAM_OUTOFRANGE;
+    }
+
+    if (padd1 < 0) {
+        return ESP_ERR_DSP_PARAM_OUTOFRANGE;
+    }
+    if (padd2 < 0) {
+        return ESP_ERR_DSP_PARAM_OUTOFRANGE;
+    }
+    if (padd_out < 0) {
+        return ESP_ERR_DSP_PARAM_OUTOFRANGE;
+    }
+
+    if (step1 <= 0) {
+        return ESP_ERR_DSP_PARAM_OUTOFRANGE;
+    }
+    if (step2 <= 0) {
+        return ESP_ERR_DSP_PARAM_OUTOFRANGE;
+    }
+    if (step_out <= 0) {
+        return ESP_ERR_DSP_PARAM_OUTOFRANGE;
+    }
+
+    const int ptr_input1_step = cols + padd1;
+    const int ptr_input2_step = cols + padd2;
+    const int ptr_output_step = cols + padd_out;
+    float *ptr_input1 = (float *)input1;
+    float *ptr_input2 = (float *)input2;
+
+    for (int row = 0; row < rows; row++) {
+        for (int col = 0; col < cols; col++) {
+            output[col * step_out] = ptr_input1[col * step1] + ptr_input2[col * step2];
+        }
+        ptr_input1 += ptr_input1_step;
+        ptr_input2 += ptr_input2_step;
+        output += ptr_output_step;
+    }
+    return ESP_OK;
+}
@@ -0,0 +1,65 @@
+/*
+ * SPDX-FileCopyrightText: 2023 Espressif Systems (Shanghai) CO LTD
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+
+#ifndef _dspm_add_H_
+#define _dspm_add_H_
+#include "dsp_err.h"
+
+#include "dspm_add_platform.h"
+
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+
+/**@{*/
+/**
+ * @brief   add two arrays with paddings (add two sub-matrices)
+ *
+ * The function adds two arrays defined as sub-matrices with paddings
+ * out[row * ptr_step_out + col * step_out] = in1[row * ptr_step_in1 + col * step1] + in2[row * ptr_step_in2 + col * step2];
+ * The implementation use ANSI C and could be compiled and run on any platform
+ *
+ * @param[in]  input1: input array 1
+ * @param[in]  input2: input array 2
+ * @param[out] output: output array
+ * @param[in]  rows: matrix rows
+ * @param[in]  cols: matrix cols
+ * @param[in]  padd1: input array 1 padding
+ * @param[in]  padd2: input array 2 padding
+ * @param[in]  padd_out: output array padding
+ * @param[in]  step1: step over input array 1 (by default should be 1)
+ * @param[in]  step2: step over input array 2 (by default should be 1)
+ * @param[in]  step_out: step over output array (by default should be 1)
+ *
+ * @return
+ *      - ESP_OK on success
+ *      - One of the error codes from DSP library
+ */
+esp_err_t dspm_add_f32_ansi(const float *input1, const float *input2, float *output, int rows, int cols, int padd1, int padd2, int padd_out, int step1, int step2, int step_out);
+esp_err_t dspm_add_f32_ae32(const float *input1, const float *input2, float *output, int rows, int cols, int padd1, int padd2, int padd_out, int step1, int step2, int step_out);
+/**@}*/
+
+#ifdef __cplusplus
+}
+#endif
+
+#if CONFIG_DSP_OPTIMIZED
+
+#if (dspm_add_f32_ae32_enabled == 1)
+#define dspm_add_f32 dspm_add_f32_ae32
+#else
+#define dspm_add_f32 dspm_add_f32_ansi
+#endif
+
+#else // CONFIG_DSP_OPTIMIZED
+#define dspm_add_f32 dspm_add_f32_ansi
+#endif // CONFIG_DSP_OPTIMIZED
+
+#endif // _dspm_add_H_
@@ -0,0 +1,20 @@
+#ifndef _dspm_add_platform_H_
+#define _dspm_add_platform_H_
+
+#include "sdkconfig.h"
+
+#ifdef __XTENSA__
+#include <xtensa/config/core-isa.h>
+#include <xtensa/config/core-matmap.h>
+
+
+#if ((XCHAL_HAVE_FP == 1) && (XCHAL_HAVE_LOOPS == 1))
+
+#define dspm_add_f32_ae32_enabled  1
+
+#endif
+
+#endif // __XTENSA__
+
+
+#endif // _dspm_add_platform_H_
@@ -0,0 +1,146 @@
+/*
+ * SPDX-FileCopyrightText: 2023 Espressif Systems (Shanghai) CO LTD
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <string.h>
+#include "unity.h"
+#include "dsp_platform.h"
+#include "esp_log.h"
+#include "esp_dsp.h"
+#include "dspm_add.h"
+#include "esp_attr.h"
+#include "test_mat_common.h"
+#include "dsp_tests.h"
+
+TEST_CASE("dspm_add_f32_ae32 functionality", "[dspm]")
+{
+    // create ROI rectangle
+    dspm::Mat::Rect roi_rect;
+
+    char message[60];
+    for (int var = 0; var < 7; var++) {
+        for (int start_row = 0; start_row < 2; start_row++) {
+            for (int start_col = 0; start_col < 2; start_col++) {
+                for (int row = 1; row < 6; row++) {
+                    for (int col = 1; col < 6; col++) {
+                        sprintf(message, "var = %d  s_row = %d  s_col = %d, row = %d, col = %d", var, start_row, start_col, row, col);
+                        // create A B C matrices with row col dimensions + padding
+                        // padding is from both sides of the targeted sub-matrix
+                        // 1 1 1 1
+                        // 1 x x 1
+                        // 1 x x 1
+                        // 1 1 1 1
+                        dspm::Mat A(row + (2 * start_row), col + (2 * start_col));
+                        dspm::Mat B(row + (2 * start_row), col + (2 * start_col));
+                        dspm::Mat C_compare(row + (2 * start_row), col + (2 * start_col));
+                        dspm::Mat C = dspm::Mat::ones(row + (2 * start_row), col + (2 * start_col));
+
+                        // create A B C sub matrices with undefined dimensions
+                        dspm::Mat A_sub;
+                        dspm::Mat B_sub;
+                        dspm::Mat C_sub;
+
+                        // adjust ROI rectangles
+                        roi_rect.resizeRect(start_col, start_row, col, row);
+
+                        for (int i = 0; i < A.length; i++) {
+                            A.data[i] = i + 1;
+                            B.data[i] = i + 1;
+                            C_compare.data[i] = (i + 1) * 2;
+                        }
+
+                        // Combinations of A B C matrices and sub-matrices are created for testing
+                        // As an example: case 1
+                        // Matrices A and C are sub-matrices - the data are defined as a pointer to an external buffer
+                        // Matrix B is a matrix - the data are copied into the B matrix
+                        switch (var) {
+                        case 0: {
+                            A_sub.CopyHead(A.getROI(roi_rect));    // A sub-matrix - NO DATA CPY
+                            B_sub.CopyHead(B.getROI(roi_rect));    // B sub-matrix - NO DATA CPY
+                            C_sub.CopyHead(C.getROI(roi_rect));    // C sub-matrix - NO DATA CPY
+                        } break;
+                        case 1: {
+                            A_sub = A.Get(roi_rect);               // A matrix     - DATA CPY
+                            B_sub.CopyHead(B.getROI(roi_rect));    // B sub_matirx - NO DATA CPY
+                            C_sub.CopyHead(C.getROI(roi_rect));    // C sub_matirx - NO DATA CPY
+                        } break;
+                        case 2: {
+                            A_sub.CopyHead(A.getROI(roi_rect));    // A sub-matrix - NO DATA CPY
+                            B_sub = B.Get(roi_rect);               // B matrix     - DATA CPY
+                            C_sub.CopyHead(C.getROI(roi_rect));    // C sub-matrix - NO DATA CPY
+                        } break;
+                        case 3: {
+                            A_sub = A.Get(roi_rect);               // A matrix     - DATA CPY
+                            B_sub = B.Get(roi_rect);               // B matrix     - DATA CPY
+                            C_sub.CopyHead(C.getROI(roi_rect));    // C sub-matrix - NO DATA CPY
+                        } break;
+                        case 4: {
+                            A_sub.CopyHead(A.getROI(roi_rect));    // A sub-matrix - NO DATA CPY
+                            B_sub.CopyHead(B.getROI(roi_rect));    // B sub-matrix - NO DATA CPY
+                            C_sub = C.Get(roi_rect);               // C matrix     - DATA CPY
+                        } break;
+                        case 5: {
+                            A_sub.CopyHead(A.getROI(roi_rect));    // A sub-matrix - NO DATA CPY
+                            B_sub = B.Get(roi_rect);               // B matrix     - DATA CPY
+                            C_sub = C.Get(roi_rect);               // C matrix     - DATA CPY
+                        } break;
+                        case 6: {
+                            A_sub = A.Get(roi_rect);               // A matrix     - DATA CPY
+                            B_sub.CopyHead(B.getROI(roi_rect));    // B sub-matrix - NO DATA CPY
+                            C_sub = C.Get(roi_rect);               // C matrix     - DATA CPY
+                        } break;
+                        default:
+                            break;
+                        }
+
+                        dspm_add_f32(A_sub.data, B_sub.data, C_sub.data, row, col, A_sub.padding, B_sub.padding, C_sub.padding, 1, 1, 1);
+                        dspm::Mat C_sub_check = C_compare.Get(roi_rect);
+
+                        // C is a sub-matrix
+                        if (C_sub.sub_matrix) {
+                            // Create a copy of the original C matrix (filled with ones 1)
+                            // to check if an area around the sub-matrix is unaffected after a matrix operation
+                            dspm::Mat C_area_check = dspm::Mat::ones(row + (2 * start_row), col + (2 * start_col));
+                            test_assert_equal_mat_mat(C_sub_check, C_sub, message);
+                            test_assert_check_area_mat_mat(C_area_check, C_sub, start_row, start_col, message);
+                            // C is a matrix
+                        } else {
+                            test_assert_equal_mat_mat(C_sub_check, C_sub, message);
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+
+static portMUX_TYPE testnlock = portMUX_INITIALIZER_UNLOCKED;
+
+TEST_CASE("dspm_add_f32_ae32 benchmark", "[dspm]")
+{
+    const int dim = 4;
+    const int M_off = 1;
+
+    dspm::Mat mat(dim + M_off, dim + M_off);
+    dspm::Mat mat_sub = mat.getROI(M_off, M_off, dim, dim);
+
+    portENTER_CRITICAL(&testnlock);
+    dspm_add_f32(mat_sub.data, mat_sub.data, mat_sub.data, dim, dim, mat_sub.padding, mat_sub.padding, mat_sub.padding, 1, 1, 1);
+
+    unsigned int start_b = dsp_get_cpu_cycle_count();
+    int repeat_count = 1024;
+    for (int i = 0 ; i < repeat_count ; i++) {
+        dspm_add_f32(mat_sub.data, mat_sub.data, mat_sub.data, dim, dim, mat_sub.padding, mat_sub.padding, mat_sub.padding, 1, 1, 1);
+    }
+    unsigned int end_b = dsp_get_cpu_cycle_count();
+    portEXIT_CRITICAL(&testnlock);
+
+    float total_b = end_b - start_b;
+    float cycles = total_b / (repeat_count);
+    printf("Benchmark dspm_add_f32_ae32 - %f per sample %dx%d.\n", cycles, dim, dim);
+    float min_exec = 100;
+    float max_exec = 1400;
+    TEST_ASSERT_EXEC_IN_RANGE(min_exec, max_exec, cycles);
+}
@@ -0,0 +1,146 @@
+/*
+ * SPDX-FileCopyrightText: 2023 Espressif Systems (Shanghai) CO LTD
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <string.h>
+#include "unity.h"
+#include "dsp_platform.h"
+#include "esp_log.h"
+#include "esp_dsp.h"
+#include "dspm_add.h"
+#include "esp_attr.h"
+#include "test_mat_common.h"
+#include "dsp_tests.h"
+
+TEST_CASE("dspm_add_f32_ansi functionality", "[dsps]")
+{
+    // create ROI rectangle
+    dspm::Mat::Rect roi_rect;
+
+    char message[60];
+    for (int var = 0; var < 7; var++) {
+        for (int start_row = 0; start_row < 2; start_row++) {
+            for (int start_col = 0; start_col < 2; start_col++) {
+                for (int row = 1; row < 6; row++) {
+                    for (int col = 1; col < 6; col++) {
+                        sprintf(message, "var = %d  s_row = %d  s_col = %d, row = %d, col = %d", var, start_row, start_col, row, col);
+                        // create A B C matrices with row col dimensions + padding
+                        // padding is from both sides of the targeted sub-matrix
+                        // 1 1 1 1
+                        // 1 x x 1
+                        // 1 x x 1
+                        // 1 1 1 1
+                        dspm::Mat A(row + (2 * start_row), col + (2 * start_col));
+                        dspm::Mat B(row + (2 * start_row), col + (2 * start_col));
+                        dspm::Mat C_compare(row + (2 * start_row), col + (2 * start_col));
+                        dspm::Mat C = dspm::Mat::ones(row + (2 * start_row), col + (2 * start_col));
+
+                        // create A B C sub matrices with undefined dimensions
+                        dspm::Mat A_sub;
+                        dspm::Mat B_sub;
+                        dspm::Mat C_sub;
+
+                        // adjust ROI rectangles
+                        roi_rect.resizeRect(start_col, start_row, col, row);
+
+                        for (int i = 0; i < A.length; i++) {
+                            A.data[i] = i + 1;
+                            B.data[i] = i + 1;
+                            C_compare.data[i] = (i + 1) * 2;
+                        }
+
+                        // Combinations of A B C matrices and sub-matrices are created for testing
+                        // As an example: case 1
+                        // Matrices A and C are sub-matrices - the data are defined as a pointer to an external buffer
+                        // Matrix B is a matrix - the data are copied into the B matrix
+                        switch (var) {
+                        case 0: {
+                            A_sub.CopyHead(A.getROI(roi_rect));    // A sub-matrix - NO DATA CPY
+                            B_sub.CopyHead(B.getROI(roi_rect));    // B sub-matrix - NO DATA CPY
+                            C_sub.CopyHead(C.getROI(roi_rect));    // C sub-matrix - NO DATA CPY
+                        } break;
+                        case 1: {
+                            A_sub = A.Get(roi_rect);               // A matrix     - DATA CPY
+                            B_sub.CopyHead(B.getROI(roi_rect));    // B sub_matirx - NO DATA CPY
+                            C_sub.CopyHead(C.getROI(roi_rect));    // C sub_matirx - NO DATA CPY
+                        } break;
+                        case 2: {
+                            A_sub.CopyHead(A.getROI(roi_rect));    // A sub-matrix - NO DATA CPY
+                            B_sub = B.Get(roi_rect);               // B matrix     - DATA CPY
+                            C_sub.CopyHead(C.getROI(roi_rect));    // C sub-matrix - NO DATA CPY
+                        } break;
+                        case 3: {
+                            A_sub = A.Get(roi_rect);               // A matrix     - DATA CPY
+                            B_sub = B.Get(roi_rect);               // B matrix     - DATA CPY
+                            C_sub.CopyHead(C.getROI(roi_rect));    // C sub-matrix - NO DATA CPY
+                        } break;
+                        case 4: {
+                            A_sub.CopyHead(A.getROI(roi_rect));    // A sub-matrix - NO DATA CPY
+                            B_sub.CopyHead(B.getROI(roi_rect));    // B sub-matrix - NO DATA CPY
+                            C_sub = C.Get(roi_rect);               // C matrix     - DATA CPY
+                        } break;
+                        case 5: {
+                            A_sub.CopyHead(A.getROI(roi_rect));    // A sub-matrix - NO DATA CPY
+                            B_sub = B.Get(roi_rect);               // B matrix     - DATA CPY
+                            C_sub = C.Get(roi_rect);               // C matrix     - DATA CPY
+                        } break;
+                        case 6: {
+                            A_sub = A.Get(roi_rect);               // A matrix     - DATA CPY
+                            B_sub.CopyHead(B.getROI(roi_rect));    // B sub-matrix - NO DATA CPY
+                            C_sub = C.Get(roi_rect);               // C matrix     - DATA CPY
+                        } break;
+                        default:
+                            break;
+                        }
+
+                        dspm_add_f32_ansi(A_sub.data, B_sub.data, C_sub.data, row, col, A_sub.padding, B_sub.padding, C_sub.padding, 1, 1, 1);
+                        dspm::Mat C_sub_check = C_compare.Get(roi_rect);
+
+                        // C is a sub-matrix
+                        if (C_sub.sub_matrix) {
+                            // Create a copy of the original C matrix (filled with ones 1)
+                            // to check if an area around the sub-matrix is unaffected after a matrix operation
+                            dspm::Mat C_area_check = dspm::Mat::ones(row + (2 * start_row), col + (2 * start_col));
+                            test_assert_equal_mat_mat(C_sub_check, C_sub, message);
+                            test_assert_check_area_mat_mat(C_area_check, C_sub, start_row, start_col, message);
+                            // C is a matrix
+                        } else {
+                            test_assert_equal_mat_mat(C_sub_check, C_sub, message);
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+
+static portMUX_TYPE testnlock = portMUX_INITIALIZER_UNLOCKED;
+
+TEST_CASE("dspm_add_f32_ansi benchmark", "[dsps]")
+{
+    const int dim = 4;
+    const int M_off = 1;
+
+    dspm::Mat mat(dim + M_off, dim + M_off);
+    dspm::Mat mat_sub = mat.getROI(M_off, M_off, dim, dim);
+
+    portENTER_CRITICAL(&testnlock);
+    dspm_add_f32_ansi(mat_sub.data, mat_sub.data, mat_sub.data, dim, dim, mat_sub.padding, mat_sub.padding, mat_sub.padding, 1, 1, 1);
+
+    unsigned int start_b = dsp_get_cpu_cycle_count();
+    int repeat_count = 1024;
+    for (int i = 0 ; i < repeat_count ; i++) {
+        dspm_add_f32_ansi(mat_sub.data, mat_sub.data, mat_sub.data, dim, dim, mat_sub.padding, mat_sub.padding, mat_sub.padding, 1, 1, 1);
+    }
+    unsigned int end_b = dsp_get_cpu_cycle_count();
+    portEXIT_CRITICAL(&testnlock);
+
+    float total_b = end_b - start_b;
+    float cycles = total_b / (repeat_count);
+    printf("Benchmark dspm_add_f32_ansi - %f per sample %dx%d.\n", cycles, dim, dim);
+    float min_exec = 100;
+    float max_exec = 1400;
+    TEST_ASSERT_EXEC_IN_RANGE(min_exec, max_exec, cycles);
+}