add some code
This commit is contained in:
@@ -0,0 +1,63 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2023 Espressif Systems (Shanghai) CO LTD
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include "dspm_add_platform.h"
|
||||
#if (dspm_add_f32_ae32_enabled == 1)
|
||||
|
||||
// This is an add function for sub-matrices for ESP32 processor
|
||||
.text
|
||||
.align 4
|
||||
.global dspm_add_f32_ae32
|
||||
.type dspm_add_f32_ae32,@function
|
||||
// The function implements the following C code:
|
||||
// esp_err_t dspm_add_f32_ansi(const float *input1, const float *input2, float *output, int rows, int cols, int padd1, int padd2, int padd_out, int step1, int step2, int step_out);
|
||||
|
||||
dspm_add_f32_ae32:
|
||||
// input1 - a2
|
||||
// input2 - a3
|
||||
// output - a4
|
||||
// rows - a5
|
||||
// cols - a6
|
||||
// padd1 - a7
|
||||
// padd2 - a8
|
||||
// padd_out - a9
|
||||
// step1 - a10
|
||||
// step2 - a11
|
||||
// step_out - a12
|
||||
|
||||
entry a1, 16
|
||||
|
||||
l32i.n a8, a1, 16 // padd2
|
||||
l32i.n a9, a1, 20 // padd_out
|
||||
l32i.n a10, a1, 24 // step1
|
||||
l32i.n a11, a1, 24 // step2
|
||||
l32i.n a12, a1, 24 // step_out
|
||||
|
||||
slli a10, a10, 2 // a10 - step1 << 2
|
||||
slli a11, a11, 2 // a11 - step2 << 2
|
||||
slli a12, a12, 2 // a12 - step_out << 2
|
||||
|
||||
.outer_loop_add_f32_ae32:
|
||||
|
||||
loopnez a6, .loop_add_f32_ae32
|
||||
lsxp f0, a2, a10 // load input1 to f0, increment input1 (input1_ptr+=step1)
|
||||
lsxp f1, a3, a11 // load input2 to f1, increment input2 (input2_ptr+=step2)
|
||||
|
||||
add.s f2, f0, f1 // f2 = f0 + f1
|
||||
ssxp f2, a4, a12 // save result f2 to output a4, increment output (output_ptr+=step_out)
|
||||
.loop_add_f32_ae32:
|
||||
|
||||
addx4 a3, a8, a3 // input2_ptr += (padd2 << 2);
|
||||
addx4 a2, a7, a2 // input1_ptr += (padd1 << 2);
|
||||
addx4 a4, a9, a4 // output_ptr += (padd_out << 2);
|
||||
addi.n a5, a5, -1 // rows - 1
|
||||
|
||||
bnez a5, .outer_loop_add_f32_ae32
|
||||
|
||||
movi.n a2, 0 // return status ESP_OK
|
||||
retw.n
|
||||
|
||||
#endif // dspm_add_f32_ae32_enabled
|
||||
@@ -0,0 +1,64 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2023 Espressif Systems (Shanghai) CO LTD
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
|
||||
#include "dspm_add.h"
|
||||
|
||||
esp_err_t dspm_add_f32_ansi(const float *input1, const float *input2, float *output, int rows, int cols, int padd1, int padd2, int padd_out, int step1, int step2, int step_out)
|
||||
{
|
||||
if (NULL == input1) {
|
||||
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
|
||||
}
|
||||
if (NULL == input2) {
|
||||
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
|
||||
}
|
||||
if (NULL == output) {
|
||||
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
|
||||
}
|
||||
|
||||
if (rows <= 0) {
|
||||
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
|
||||
}
|
||||
if (cols <= 0) {
|
||||
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
|
||||
}
|
||||
|
||||
if (padd1 < 0) {
|
||||
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
|
||||
}
|
||||
if (padd2 < 0) {
|
||||
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
|
||||
}
|
||||
if (padd_out < 0) {
|
||||
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
|
||||
}
|
||||
|
||||
if (step1 <= 0) {
|
||||
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
|
||||
}
|
||||
if (step2 <= 0) {
|
||||
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
|
||||
}
|
||||
if (step_out <= 0) {
|
||||
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
|
||||
}
|
||||
|
||||
const int ptr_input1_step = cols + padd1;
|
||||
const int ptr_input2_step = cols + padd2;
|
||||
const int ptr_output_step = cols + padd_out;
|
||||
float *ptr_input1 = (float *)input1;
|
||||
float *ptr_input2 = (float *)input2;
|
||||
|
||||
for (int row = 0; row < rows; row++) {
|
||||
for (int col = 0; col < cols; col++) {
|
||||
output[col * step_out] = ptr_input1[col * step1] + ptr_input2[col * step2];
|
||||
}
|
||||
ptr_input1 += ptr_input1_step;
|
||||
ptr_input2 += ptr_input2_step;
|
||||
output += ptr_output_step;
|
||||
}
|
||||
return ESP_OK;
|
||||
}
|
||||
@@ -0,0 +1,65 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2023 Espressif Systems (Shanghai) CO LTD
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
|
||||
#ifndef _dspm_add_H_
|
||||
#define _dspm_add_H_
|
||||
#include "dsp_err.h"
|
||||
|
||||
#include "dspm_add_platform.h"
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
|
||||
|
||||
/**@{*/
|
||||
/**
|
||||
* @brief add two arrays with paddings (add two sub-matrices)
|
||||
*
|
||||
* The function adds two arrays defined as sub-matrices with paddings
|
||||
* out[row * ptr_step_out + col * step_out] = in1[row * ptr_step_in1 + col * step1] + in2[row * ptr_step_in2 + col * step2];
|
||||
* The implementation use ANSI C and could be compiled and run on any platform
|
||||
*
|
||||
* @param[in] input1: input array 1
|
||||
* @param[in] input2: input array 2
|
||||
* @param[out] output: output array
|
||||
* @param[in] rows: matrix rows
|
||||
* @param[in] cols: matrix cols
|
||||
* @param[in] padd1: input array 1 padding
|
||||
* @param[in] padd2: input array 2 padding
|
||||
* @param[in] padd_out: output array padding
|
||||
* @param[in] step1: step over input array 1 (by default should be 1)
|
||||
* @param[in] step2: step over input array 2 (by default should be 1)
|
||||
* @param[in] step_out: step over output array (by default should be 1)
|
||||
*
|
||||
* @return
|
||||
* - ESP_OK on success
|
||||
* - One of the error codes from DSP library
|
||||
*/
|
||||
esp_err_t dspm_add_f32_ansi(const float *input1, const float *input2, float *output, int rows, int cols, int padd1, int padd2, int padd_out, int step1, int step2, int step_out);
|
||||
esp_err_t dspm_add_f32_ae32(const float *input1, const float *input2, float *output, int rows, int cols, int padd1, int padd2, int padd_out, int step1, int step2, int step_out);
|
||||
/**@}*/
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#if CONFIG_DSP_OPTIMIZED
|
||||
|
||||
#if (dspm_add_f32_ae32_enabled == 1)
|
||||
#define dspm_add_f32 dspm_add_f32_ae32
|
||||
#else
|
||||
#define dspm_add_f32 dspm_add_f32_ansi
|
||||
#endif
|
||||
|
||||
#else // CONFIG_DSP_OPTIMIZED
|
||||
#define dspm_add_f32 dspm_add_f32_ansi
|
||||
#endif // CONFIG_DSP_OPTIMIZED
|
||||
|
||||
#endif // _dspm_add_H_
|
||||
@@ -0,0 +1,20 @@
|
||||
#ifndef _dspm_add_platform_H_
|
||||
#define _dspm_add_platform_H_
|
||||
|
||||
#include "sdkconfig.h"
|
||||
|
||||
#ifdef __XTENSA__
|
||||
#include <xtensa/config/core-isa.h>
|
||||
#include <xtensa/config/core-matmap.h>
|
||||
|
||||
|
||||
#if ((XCHAL_HAVE_FP == 1) && (XCHAL_HAVE_LOOPS == 1))
|
||||
|
||||
#define dspm_add_f32_ae32_enabled 1
|
||||
|
||||
#endif
|
||||
|
||||
#endif // __XTENSA__
|
||||
|
||||
|
||||
#endif // _dspm_add_platform_H_
|
||||
@@ -0,0 +1,146 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2023 Espressif Systems (Shanghai) CO LTD
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include "unity.h"
|
||||
#include "dsp_platform.h"
|
||||
#include "esp_log.h"
|
||||
#include "esp_dsp.h"
|
||||
#include "dspm_add.h"
|
||||
#include "esp_attr.h"
|
||||
#include "test_mat_common.h"
|
||||
#include "dsp_tests.h"
|
||||
|
||||
TEST_CASE("dspm_add_f32_ae32 functionality", "[dspm]")
|
||||
{
|
||||
// create ROI rectangle
|
||||
dspm::Mat::Rect roi_rect;
|
||||
|
||||
char message[60];
|
||||
for (int var = 0; var < 7; var++) {
|
||||
for (int start_row = 0; start_row < 2; start_row++) {
|
||||
for (int start_col = 0; start_col < 2; start_col++) {
|
||||
for (int row = 1; row < 6; row++) {
|
||||
for (int col = 1; col < 6; col++) {
|
||||
sprintf(message, "var = %d s_row = %d s_col = %d, row = %d, col = %d", var, start_row, start_col, row, col);
|
||||
// create A B C matrices with row col dimensions + padding
|
||||
// padding is from both sides of the targeted sub-matrix
|
||||
// 1 1 1 1
|
||||
// 1 x x 1
|
||||
// 1 x x 1
|
||||
// 1 1 1 1
|
||||
dspm::Mat A(row + (2 * start_row), col + (2 * start_col));
|
||||
dspm::Mat B(row + (2 * start_row), col + (2 * start_col));
|
||||
dspm::Mat C_compare(row + (2 * start_row), col + (2 * start_col));
|
||||
dspm::Mat C = dspm::Mat::ones(row + (2 * start_row), col + (2 * start_col));
|
||||
|
||||
// create A B C sub matrices with undefined dimensions
|
||||
dspm::Mat A_sub;
|
||||
dspm::Mat B_sub;
|
||||
dspm::Mat C_sub;
|
||||
|
||||
// adjust ROI rectangles
|
||||
roi_rect.resizeRect(start_col, start_row, col, row);
|
||||
|
||||
for (int i = 0; i < A.length; i++) {
|
||||
A.data[i] = i + 1;
|
||||
B.data[i] = i + 1;
|
||||
C_compare.data[i] = (i + 1) * 2;
|
||||
}
|
||||
|
||||
// Combinations of A B C matrices and sub-matrices are created for testing
|
||||
// As an example: case 1
|
||||
// Matrices A and C are sub-matrices - the data are defined as a pointer to an external buffer
|
||||
// Matrix B is a matrix - the data are copied into the B matrix
|
||||
switch (var) {
|
||||
case 0: {
|
||||
A_sub.CopyHead(A.getROI(roi_rect)); // A sub-matrix - NO DATA CPY
|
||||
B_sub.CopyHead(B.getROI(roi_rect)); // B sub-matrix - NO DATA CPY
|
||||
C_sub.CopyHead(C.getROI(roi_rect)); // C sub-matrix - NO DATA CPY
|
||||
} break;
|
||||
case 1: {
|
||||
A_sub = A.Get(roi_rect); // A matrix - DATA CPY
|
||||
B_sub.CopyHead(B.getROI(roi_rect)); // B sub_matirx - NO DATA CPY
|
||||
C_sub.CopyHead(C.getROI(roi_rect)); // C sub_matirx - NO DATA CPY
|
||||
} break;
|
||||
case 2: {
|
||||
A_sub.CopyHead(A.getROI(roi_rect)); // A sub-matrix - NO DATA CPY
|
||||
B_sub = B.Get(roi_rect); // B matrix - DATA CPY
|
||||
C_sub.CopyHead(C.getROI(roi_rect)); // C sub-matrix - NO DATA CPY
|
||||
} break;
|
||||
case 3: {
|
||||
A_sub = A.Get(roi_rect); // A matrix - DATA CPY
|
||||
B_sub = B.Get(roi_rect); // B matrix - DATA CPY
|
||||
C_sub.CopyHead(C.getROI(roi_rect)); // C sub-matrix - NO DATA CPY
|
||||
} break;
|
||||
case 4: {
|
||||
A_sub.CopyHead(A.getROI(roi_rect)); // A sub-matrix - NO DATA CPY
|
||||
B_sub.CopyHead(B.getROI(roi_rect)); // B sub-matrix - NO DATA CPY
|
||||
C_sub = C.Get(roi_rect); // C matrix - DATA CPY
|
||||
} break;
|
||||
case 5: {
|
||||
A_sub.CopyHead(A.getROI(roi_rect)); // A sub-matrix - NO DATA CPY
|
||||
B_sub = B.Get(roi_rect); // B matrix - DATA CPY
|
||||
C_sub = C.Get(roi_rect); // C matrix - DATA CPY
|
||||
} break;
|
||||
case 6: {
|
||||
A_sub = A.Get(roi_rect); // A matrix - DATA CPY
|
||||
B_sub.CopyHead(B.getROI(roi_rect)); // B sub-matrix - NO DATA CPY
|
||||
C_sub = C.Get(roi_rect); // C matrix - DATA CPY
|
||||
} break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
dspm_add_f32(A_sub.data, B_sub.data, C_sub.data, row, col, A_sub.padding, B_sub.padding, C_sub.padding, 1, 1, 1);
|
||||
dspm::Mat C_sub_check = C_compare.Get(roi_rect);
|
||||
|
||||
// C is a sub-matrix
|
||||
if (C_sub.sub_matrix) {
|
||||
// Create a copy of the original C matrix (filled with ones 1)
|
||||
// to check if an area around the sub-matrix is unaffected after a matrix operation
|
||||
dspm::Mat C_area_check = dspm::Mat::ones(row + (2 * start_row), col + (2 * start_col));
|
||||
test_assert_equal_mat_mat(C_sub_check, C_sub, message);
|
||||
test_assert_check_area_mat_mat(C_area_check, C_sub, start_row, start_col, message);
|
||||
// C is a matrix
|
||||
} else {
|
||||
test_assert_equal_mat_mat(C_sub_check, C_sub, message);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static portMUX_TYPE testnlock = portMUX_INITIALIZER_UNLOCKED;
|
||||
|
||||
TEST_CASE("dspm_add_f32_ae32 benchmark", "[dspm]")
|
||||
{
|
||||
const int dim = 4;
|
||||
const int M_off = 1;
|
||||
|
||||
dspm::Mat mat(dim + M_off, dim + M_off);
|
||||
dspm::Mat mat_sub = mat.getROI(M_off, M_off, dim, dim);
|
||||
|
||||
portENTER_CRITICAL(&testnlock);
|
||||
dspm_add_f32(mat_sub.data, mat_sub.data, mat_sub.data, dim, dim, mat_sub.padding, mat_sub.padding, mat_sub.padding, 1, 1, 1);
|
||||
|
||||
unsigned int start_b = dsp_get_cpu_cycle_count();
|
||||
int repeat_count = 1024;
|
||||
for (int i = 0 ; i < repeat_count ; i++) {
|
||||
dspm_add_f32(mat_sub.data, mat_sub.data, mat_sub.data, dim, dim, mat_sub.padding, mat_sub.padding, mat_sub.padding, 1, 1, 1);
|
||||
}
|
||||
unsigned int end_b = dsp_get_cpu_cycle_count();
|
||||
portEXIT_CRITICAL(&testnlock);
|
||||
|
||||
float total_b = end_b - start_b;
|
||||
float cycles = total_b / (repeat_count);
|
||||
printf("Benchmark dspm_add_f32_ae32 - %f per sample %dx%d.\n", cycles, dim, dim);
|
||||
float min_exec = 100;
|
||||
float max_exec = 1400;
|
||||
TEST_ASSERT_EXEC_IN_RANGE(min_exec, max_exec, cycles);
|
||||
}
|
||||
@@ -0,0 +1,146 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2023 Espressif Systems (Shanghai) CO LTD
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include "unity.h"
|
||||
#include "dsp_platform.h"
|
||||
#include "esp_log.h"
|
||||
#include "esp_dsp.h"
|
||||
#include "dspm_add.h"
|
||||
#include "esp_attr.h"
|
||||
#include "test_mat_common.h"
|
||||
#include "dsp_tests.h"
|
||||
|
||||
TEST_CASE("dspm_add_f32_ansi functionality", "[dsps]")
|
||||
{
|
||||
// create ROI rectangle
|
||||
dspm::Mat::Rect roi_rect;
|
||||
|
||||
char message[60];
|
||||
for (int var = 0; var < 7; var++) {
|
||||
for (int start_row = 0; start_row < 2; start_row++) {
|
||||
for (int start_col = 0; start_col < 2; start_col++) {
|
||||
for (int row = 1; row < 6; row++) {
|
||||
for (int col = 1; col < 6; col++) {
|
||||
sprintf(message, "var = %d s_row = %d s_col = %d, row = %d, col = %d", var, start_row, start_col, row, col);
|
||||
// create A B C matrices with row col dimensions + padding
|
||||
// padding is from both sides of the targeted sub-matrix
|
||||
// 1 1 1 1
|
||||
// 1 x x 1
|
||||
// 1 x x 1
|
||||
// 1 1 1 1
|
||||
dspm::Mat A(row + (2 * start_row), col + (2 * start_col));
|
||||
dspm::Mat B(row + (2 * start_row), col + (2 * start_col));
|
||||
dspm::Mat C_compare(row + (2 * start_row), col + (2 * start_col));
|
||||
dspm::Mat C = dspm::Mat::ones(row + (2 * start_row), col + (2 * start_col));
|
||||
|
||||
// create A B C sub matrices with undefined dimensions
|
||||
dspm::Mat A_sub;
|
||||
dspm::Mat B_sub;
|
||||
dspm::Mat C_sub;
|
||||
|
||||
// adjust ROI rectangles
|
||||
roi_rect.resizeRect(start_col, start_row, col, row);
|
||||
|
||||
for (int i = 0; i < A.length; i++) {
|
||||
A.data[i] = i + 1;
|
||||
B.data[i] = i + 1;
|
||||
C_compare.data[i] = (i + 1) * 2;
|
||||
}
|
||||
|
||||
// Combinations of A B C matrices and sub-matrices are created for testing
|
||||
// As an example: case 1
|
||||
// Matrices A and C are sub-matrices - the data are defined as a pointer to an external buffer
|
||||
// Matrix B is a matrix - the data are copied into the B matrix
|
||||
switch (var) {
|
||||
case 0: {
|
||||
A_sub.CopyHead(A.getROI(roi_rect)); // A sub-matrix - NO DATA CPY
|
||||
B_sub.CopyHead(B.getROI(roi_rect)); // B sub-matrix - NO DATA CPY
|
||||
C_sub.CopyHead(C.getROI(roi_rect)); // C sub-matrix - NO DATA CPY
|
||||
} break;
|
||||
case 1: {
|
||||
A_sub = A.Get(roi_rect); // A matrix - DATA CPY
|
||||
B_sub.CopyHead(B.getROI(roi_rect)); // B sub_matirx - NO DATA CPY
|
||||
C_sub.CopyHead(C.getROI(roi_rect)); // C sub_matirx - NO DATA CPY
|
||||
} break;
|
||||
case 2: {
|
||||
A_sub.CopyHead(A.getROI(roi_rect)); // A sub-matrix - NO DATA CPY
|
||||
B_sub = B.Get(roi_rect); // B matrix - DATA CPY
|
||||
C_sub.CopyHead(C.getROI(roi_rect)); // C sub-matrix - NO DATA CPY
|
||||
} break;
|
||||
case 3: {
|
||||
A_sub = A.Get(roi_rect); // A matrix - DATA CPY
|
||||
B_sub = B.Get(roi_rect); // B matrix - DATA CPY
|
||||
C_sub.CopyHead(C.getROI(roi_rect)); // C sub-matrix - NO DATA CPY
|
||||
} break;
|
||||
case 4: {
|
||||
A_sub.CopyHead(A.getROI(roi_rect)); // A sub-matrix - NO DATA CPY
|
||||
B_sub.CopyHead(B.getROI(roi_rect)); // B sub-matrix - NO DATA CPY
|
||||
C_sub = C.Get(roi_rect); // C matrix - DATA CPY
|
||||
} break;
|
||||
case 5: {
|
||||
A_sub.CopyHead(A.getROI(roi_rect)); // A sub-matrix - NO DATA CPY
|
||||
B_sub = B.Get(roi_rect); // B matrix - DATA CPY
|
||||
C_sub = C.Get(roi_rect); // C matrix - DATA CPY
|
||||
} break;
|
||||
case 6: {
|
||||
A_sub = A.Get(roi_rect); // A matrix - DATA CPY
|
||||
B_sub.CopyHead(B.getROI(roi_rect)); // B sub-matrix - NO DATA CPY
|
||||
C_sub = C.Get(roi_rect); // C matrix - DATA CPY
|
||||
} break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
dspm_add_f32_ansi(A_sub.data, B_sub.data, C_sub.data, row, col, A_sub.padding, B_sub.padding, C_sub.padding, 1, 1, 1);
|
||||
dspm::Mat C_sub_check = C_compare.Get(roi_rect);
|
||||
|
||||
// C is a sub-matrix
|
||||
if (C_sub.sub_matrix) {
|
||||
// Create a copy of the original C matrix (filled with ones 1)
|
||||
// to check if an area around the sub-matrix is unaffected after a matrix operation
|
||||
dspm::Mat C_area_check = dspm::Mat::ones(row + (2 * start_row), col + (2 * start_col));
|
||||
test_assert_equal_mat_mat(C_sub_check, C_sub, message);
|
||||
test_assert_check_area_mat_mat(C_area_check, C_sub, start_row, start_col, message);
|
||||
// C is a matrix
|
||||
} else {
|
||||
test_assert_equal_mat_mat(C_sub_check, C_sub, message);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static portMUX_TYPE testnlock = portMUX_INITIALIZER_UNLOCKED;
|
||||
|
||||
TEST_CASE("dspm_add_f32_ansi benchmark", "[dsps]")
|
||||
{
|
||||
const int dim = 4;
|
||||
const int M_off = 1;
|
||||
|
||||
dspm::Mat mat(dim + M_off, dim + M_off);
|
||||
dspm::Mat mat_sub = mat.getROI(M_off, M_off, dim, dim);
|
||||
|
||||
portENTER_CRITICAL(&testnlock);
|
||||
dspm_add_f32_ansi(mat_sub.data, mat_sub.data, mat_sub.data, dim, dim, mat_sub.padding, mat_sub.padding, mat_sub.padding, 1, 1, 1);
|
||||
|
||||
unsigned int start_b = dsp_get_cpu_cycle_count();
|
||||
int repeat_count = 1024;
|
||||
for (int i = 0 ; i < repeat_count ; i++) {
|
||||
dspm_add_f32_ansi(mat_sub.data, mat_sub.data, mat_sub.data, dim, dim, mat_sub.padding, mat_sub.padding, mat_sub.padding, 1, 1, 1);
|
||||
}
|
||||
unsigned int end_b = dsp_get_cpu_cycle_count();
|
||||
portEXIT_CRITICAL(&testnlock);
|
||||
|
||||
float total_b = end_b - start_b;
|
||||
float cycles = total_b / (repeat_count);
|
||||
printf("Benchmark dspm_add_f32_ansi - %f per sample %dx%d.\n", cycles, dim, dim);
|
||||
float min_exec = 100;
|
||||
float max_exec = 1400;
|
||||
TEST_ASSERT_EXEC_IN_RANGE(min_exec, max_exec, cycles);
|
||||
}
|
||||
Reference in New Issue
Block a user