add some code

2025-09-05 13:25:11 +08:00
parent 9ff0a99e7a
commit 3cf1229a85
8911 changed files with 2535396 additions and 0 deletions
--- a/managed_components/espressif__esp-dsp/modules/matrix/mul/test/include/test_mat_common.h
+++ b/managed_components/espressif__esp-dsp/modules/matrix/mul/test/include/test_mat_common.h
@@ -0,0 +1,84 @@
+/*
+ * SPDX-FileCopyrightText: 2023 Espressif Systems (Shanghai) CO LTD
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#ifndef _test_mat_common_H_
+#define _test_mat_common_H_
+
+#include "dspm_mult.h"
+#include "dsp_err.h"
+#include "dspm_mult_platform.h"
+#include "esp_dsp.h"
+#include "dsp_platform.h"
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+/**
+ * @brief data type for testing operations with sub-matrices
+ *
+ * test evaluation in the test app for matrices check
+ * compare 2 matrices
+ */
+typedef struct m_test_data_s {
+    int var;
+    int A_start_row;
+    int A_start_col;
+    int B_start_row;
+    int B_start_col;
+    int C_start_row;
+    int C_start_col;
+    int m;
+    int n;
+    int k;
+} m_test_data_t;
+
+/**
+ * @brief check whether 2 matrices are equal
+ *
+ * test evaluation in the test app for matrices check
+ * compare 2 matrices
+ *
+ * @param[in] m_expected: reference matrix
+ * @param[in] m_actual: matrix to be evaluated
+ * @param[in] message: message for test app, in case the test fails
+ *
+ */
+void test_assert_equal_mat_mat(dspm::Mat &m_expected, dspm::Mat &m_actual, const char *message);
+
+/**
+ * @brief check whether a matrix is set to a constant
+ *
+ * test evaluation in the test app for matrices check
+ * compare matrix with constant
+ *
+ * @param[in] m_actual: matrix to be evaluated
+ * @param[in] num: reference constant
+ * @param[in] message: message for test app, if a test fails
+ *
+ */
+void test_assert_equal_mat_const(dspm::Mat &m_actual, float num, const char *message);
+
+/**
+ * @brief check if an area around a sub-matrix is unaffected
+ *
+ * test evaluation in the test app for matrices check
+ *
+ * @param[in] m_origin: original matrix
+ * @param[in] m_modified: sub-matrix, which is created from m_orign
+ * @param[in] start_row: sub-matrix start row
+ * @param[in] start_col: sub-matrix start col
+ * @param[in] message: message for test app, in case the test fails
+ *
+ */
+void test_assert_check_area_mat_mat(dspm::Mat &m_origin, dspm::Mat &m_modified, int start_row, int start_col, const char *message);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // _test_mat_common_H_
--- a/managed_components/espressif__esp-dsp/modules/matrix/mul/test/test_mat_common.cpp
+++ b/managed_components/espressif__esp-dsp/modules/matrix/mul/test/test_mat_common.cpp
@@ -0,0 +1,74 @@
+/*
+ * SPDX-FileCopyrightText: 2023 Espressif Systems (Shanghai) CO LTD
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <string.h>
+#include "unity.h"
+#include "esp_log.h"
+
+#include "esp_attr.h"
+#include "dsp_tests.h"
+#include "test_mat_common.h"
+
+void test_assert_equal_mat_mat(dspm::Mat &m_expected, dspm::Mat &m_actual, const char *message)
+{
+    for (int row = 0; row < m_expected.rows; row++) {
+        for (int col = 0; col < m_expected.cols; col++) {
+            TEST_ASSERT_EQUAL_FLOAT_MESSAGE(m_expected(row, col), m_actual(row, col), message);
+        }
+    }
+}
+
+void test_assert_equal_mat_const(dspm::Mat &m_actual, float num, const char *message)
+{
+    for (int row = 0; row < m_actual.rows; row++) {
+        for (int col = 0; col < m_actual.cols; col++) {
+            TEST_ASSERT_EQUAL_FLOAT_MESSAGE(num, m_actual(row, col), message);
+        }
+    }
+}
+
+void test_assert_check_area_mat_mat(dspm::Mat &m_origin, dspm::Mat &m_modified, int start_row, int start_col, const char *message)
+{
+    float *m_origin_ptr = m_origin.data;
+    float *m_modified_ptr = m_modified.data;
+
+    // set ptr of modified matrix back to the beginning
+    const int ptr_shift = (start_row * m_origin.cols) + start_col;
+    m_modified_ptr -= ptr_shift;
+    const int end_of_matrix_space = m_origin.length - m_modified.length - ptr_shift - ((m_modified.rows - 1) * m_modified.padding);
+
+    // original matrix area before the sub-matrix
+    for (int index = 0; index < ptr_shift; index++) {
+        TEST_ASSERT_EQUAL_FLOAT_MESSAGE(*m_origin_ptr, *m_modified_ptr, message);
+        m_origin_ptr++;
+        m_modified_ptr++;
+    }
+
+    // in and between the sub-matrix area
+    for (int row = 0; row < m_modified.rows; row++) {
+        // The actual sub-matrix (accessed area)
+        for (int mat_col = 0; mat_col < m_modified.cols; mat_col++) {
+            m_origin_ptr++;
+            m_modified_ptr++;
+        }
+
+        // padding area
+        if (row != (m_modified.rows - 1)) {     // skip padding after last row
+            for (int padd_col = 0; padd_col < m_modified.padding; padd_col++) {
+                TEST_ASSERT_EQUAL_FLOAT_MESSAGE(*m_origin_ptr, *m_modified_ptr, message);
+                m_origin_ptr++;
+                m_modified_ptr++;
+            }
+        }
+    }
+
+    // original matrix area after the sub-matrix
+    for (int index = 0; index < end_of_matrix_space; index++) {
+        TEST_ASSERT_EQUAL_FLOAT_MESSAGE(*m_origin_ptr, *m_modified_ptr, message);
+        m_origin_ptr++;
+        m_modified_ptr++;
+    }
+}
--- a/managed_components/espressif__esp-dsp/modules/matrix/mul/test/test_mat_f32.cpp
+++ b/managed_components/espressif__esp-dsp/modules/matrix/mul/test/test_mat_f32.cpp
@@ -0,0 +1,270 @@
+// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <string.h>
+#include "unity.h"
+#include "esp_dsp.h"
+#include "dsp_platform.h"
+#include "esp_log.h"
+
+#include "dspm_mult.h"
+#include "esp_attr.h"
+#include "dsp_tests.h"
+#include "mat.h"
+
+static const char *TAG = "dspm_Mat";
+
+TEST_CASE("Mat class ", "[dspm]")
+{
+    int m = 3;
+    int n = 3;
+    dspm::Mat mat(m, n);
+    std::cout << "Test matrix: rows: " << mat.rows << ", columns: " << mat.cols << std::endl;
+    std::cout << mat;
+}
+
+TEST_CASE("Mat class check solve ", "[dspm]")
+{
+    int m = 3;
+    int n = 3;
+    float data_a[9] = {3, 2, 1, 2, 3, 1, 2, 1, 3};
+    float data_b[9] = {5, -1, 4};
+    dspm::Mat A(data_a, m, n);
+    dspm::Mat b(data_b, m, 1);
+    dspm::Mat x1 = dspm::Mat::solve(A, b);
+    std::cout << "Solve result matrix: rows: " << x1.rows << ", columns: " << x1.cols << std::endl;
+    std::cout << (x1 * 12).t();
+    dspm::Mat x2 = dspm::Mat::roots(A, b);
+    std::cout << "Roots result matrix: rows: " << x2.rows << ", columns: " << x2.cols << std::endl;
+    std::cout << (x2 * 12).t();
+    dspm::Mat diff_b = x1 - x2;
+    std::cout << "Difference between solve() abd roots(): " << diff_b.t();
+    for (int m = 0 ; m < diff_b.rows; m++) {
+        for (int n = 0 ; n < diff_b.cols ; n++) {
+            if (fabs(diff_b(m, n)) > 0.000001) {
+                TEST_ASSERT_MESSAGE (false, "Calculation is incorrect! Error more then expected!");
+            }
+        }
+    }
+}
+
+TEST_CASE("Mat class basic operations", "[dspm]")
+{
+    int M = 4;
+    int N = 4;
+
+    dspm::Mat A(M, N);
+    dspm::Mat x(N, 1);
+    for (int m = 0 ; m < M ; m++) {
+        for (int n = 0 ; n < N ; n++) {
+            A(m, n) = N * (m + 1) + (n + 1);
+        }
+        x(m, 0) = m + 2;
+    }
+
+    A(0, 0) = 10;
+    A(0, 1) = 11;
+
+
+    dspm::Mat b = A * x;
+    dspm::Mat x1_ = dspm::Mat::solve(A, b);
+    dspm::Mat x2_ = dspm::Mat::roots(A, b);
+
+    ESP_LOGI(TAG, "Matrix A:");
+    std::cout << A;
+    ESP_LOGI(TAG, "Matrix x.t():");
+    std::cout << x.t();
+    ESP_LOGI(TAG, "Matrix b.t():");
+    std::cout << b.t();
+    ESP_LOGI(TAG, "Solve result:");
+    std::cout << x1_.t();
+    ESP_LOGI(TAG, "Roots result:");
+    std::cout << x2_.t();
+    dspm::Mat check_b = A * x1_;
+    ESP_LOGI(TAG, "Result b.t():");
+    std::cout << check_b.t();
+    dspm::Mat diff_b = check_b - b;
+    ESP_LOGI(TAG, "Difference:");
+    std::cout << diff_b.t();
+
+    for (int m = 0 ; m < diff_b.rows; m++) {
+        for (int n = 0 ; n < diff_b.cols ; n++) {
+            float error = fabs(diff_b(m, n));
+            if (fabs(diff_b(m, n)) > 0.0001) {
+                ESP_LOGE(TAG, "Solve calculation error: %f", error);
+                TEST_ASSERT_MESSAGE (false, "Calculation is incorrect! Error more then expected!");
+            }
+        }
+    }
+}
+
+TEST_CASE("Mat class operators", "[dspm]")
+{
+    int M = 4;
+    int N = 4;
+
+    dspm::Mat test1(M, N);
+    dspm::Mat test2(M, N);
+    dspm::Mat result(M, N);
+    float *check_array = new float[M * N];
+    for (int m = 0 ; m < M ; m++) {
+        for (int n = 0 ; n < N ; n++) {
+            test1(m, n) = (m * N + n) * 2;
+            test2(m, n) = m * N + n;
+            result(m, n) = 0;
+        }
+    }
+
+    result = test1 + test2;
+    for (int m = 0 ; m < M ; m++) {
+        for (int n = 0 ; n < N ; n++) {
+            if ((result(m, n) != (test1(m, n) + test2(m, n))) ||
+                    (result(m, n) != 3 * (m * N + n)) ||
+                    (result.data[m * N + n] != 3 * (m * N + n))) {
+                TEST_ASSERT_MESSAGE (false, "Error in + operator!");
+            }
+        }
+    }
+    result = test1 - test2;
+    for (int m = 0 ; m < M ; m++) {
+        for (int n = 0 ; n < N ; n++) {
+            if ((result(m, n) != (test1(m, n) - test2(m, n))) ||
+                    (result(m, n) != (m * N + n)) ||
+                    (result.data[m * N + n] != (m * N + n))) {
+                TEST_ASSERT_MESSAGE (false, "Error in - operator!");
+            }
+        }
+    }
+    // Check * operator (result = A*B;)
+    // result = I*test2
+    // result == test2
+    test1 = test1.eye(test1.rows);
+    result = test1 * test2;
+    dspm::Mat result2 = test1;
+    result2 *= test2;
+
+    for (int m = 0 ; m < M ; m++) {
+        for (int n = 0 ; n < N ; n++) {
+            // if (result(m,n) < 0.000000001)
+            // {
+            //     result(m,n) = 0;
+            // }
+            if ((result(m, n) != test2(m, n)) ||
+                    (result(m, n) != (m * N + n)) ||
+                    (result.data[m * N + n] != (m * N + n))) {
+                std::cout << "Error: " << result(m, n) << "!=" << test2(m, n) << " , "
+                          << result(m, n) << "!=" << (m * N + n) << " , "
+                          << result.data[m * N + n] << "!=" << (m * N + n) << std::endl;
+                TEST_ASSERT_MESSAGE (false, "Error in * operator!");
+            }
+        }
+    }
+    if (!(result == result2)) {
+        std::cout << "result matrix: " << std::endl << result << std::endl;
+        std::cout << "result2 matrix: " << std::endl << result2 << std::endl;
+        TEST_ASSERT_MESSAGE (false, "Error in *= or in == operator!");
+    }
+    // Check * and + operator (result = A*const1 + const2;)
+
+    test1 = test2;
+    float const1 = 2;
+    float const2 = 10;
+    result = test1 * const1 + const2;
+    result = (result - const2) / const1;
+    for (int m = 0 ; m < M ; m++) {
+        for (int n = 0 ; n < N ; n++) {
+            if ((result(m, n) != test2(m, n)) ||
+                    (result(m, n) != (m * N + n)) ||
+                    (result.data[m * N + n] != (m * N + n))
+               ) {
+                TEST_ASSERT_MESSAGE (false, "Error in + * const operator!");
+            }
+        }
+    }
+    // Test block(...):
+    int count = 0;
+    for (int m = 0 ; m < M ; m++) {
+        for (int n = 0 ; n < N ; n++) {
+            result(m, n) = count++;
+        }
+    }
+    std::cout << "Original matrix: " <<  std::endl;
+    std::cout << result << std::endl;
+    std::cout << "block: " << std::endl;
+    std::cout << result.block(1, 1, M - 1, N - 1) << std::endl;
+    // Test normalize()
+    result = dspm::Mat(2, 2);
+    for (int m = 0 ; m < result.rows ; m++) {
+        for (int n = 0 ; n < result.cols ; n++) {
+            result(m, n) = 1;
+        }
+    }
+    std::cout << "Befor normalize: " << std::endl;
+    std::cout << result << std::endl;
+    result.normalize();
+    std::cout << "normalize: " << std::endl;
+    std::cout << result << std::endl;
+
+    for (int m = 0 ; m < result.rows ; m++) {
+        for (int n = 0 ; n < result.cols ; n++) {
+            if (std::abs(result(m, n) - 0.5) > dspm::Mat::abs_tol) {
+                ESP_LOGE(TAG, "Error bigger then expected: %f", std::abs(result(m, n) - 0.5));
+                TEST_ASSERT_MESSAGE (false, "Error in normalize() operation! ");
+            }
+        }
+    }
+    // Test inverse()
+    float m_data[] = {2, 5, 7,
+                      6, 3, 4,
+                      5, -2, -3
+                     };
+    float m_result[] = {  1.0000,   -1.0000,    1.0000,
+                          -38.0000,   41.0000,  -34.0000,
+                          27.0000,  -29.0000,   24.0000
+                       };
+    result = dspm::Mat(m_data, 3, 3);
+    result = result.inverse();
+    std::cout << "inverse: " << std::endl;
+    std::cout << result << std::endl;
+    for (int i = 0 ; i < 3 * 3 ; i++) {
+        if (std::abs(result.data[i] - m_result[i]) > 1e-4) {
+            printf("Error at[%i] = %f, expected= %f, calculated = %f \n", i, std::abs(result.data[i] - m_result[i]), m_result[i], result.data[i]);
+            TEST_ASSERT_MESSAGE (false, "Error in inverse() operation!\n");
+        }
+    }
+
+    result = dspm::Mat(m_data, 3, 3);
+    result = result.pinv();
+    std::cout << "pinv: " << std::endl;
+    std::cout << result << std::endl;
+    for (int i = 0 ; i < 3 * 3 ; i++) {
+        if (std::abs(result.data[i] - m_result[i]) > 1e-2) {
+            printf("Error at[%i] = %f, expected= %f, calculated = %f \n", i, std::abs(result.data[i] - m_result[i]), m_result[i], result.data[i]);
+            TEST_ASSERT_MESSAGE (false, "Error in pinv() operation!\n");
+        }
+    }
+
+    delete[] check_array;
+}
+
+TEST_CASE("mat.cpp functionality", "[dsps]")
+{
+    int max_size = 10;
+    for (int i = 3 ; i < max_size ; i++) {
+        dspm::Mat A = dspm::Mat::eye(i);
+        float det = A.det(i);
+        printf("Det[%i] = %f\n", i, det);
+        TEST_ASSERT_EQUAL(det, 1);
+    }
+}
--- a/managed_components/espressif__esp-dsp/modules/matrix/mul/test/test_mat_sub_f32.cpp
+++ b/managed_components/espressif__esp-dsp/modules/matrix/mul/test/test_mat_sub_f32.cpp
@@ -0,0 +1,917 @@
+/*
+ * SPDX-FileCopyrightText: 2023 Espressif Systems (Shanghai) CO LTD
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <string.h>
+#include <malloc.h>
+#include "unity.h"
+#include "esp_dsp.h"
+#include "dsp_platform.h"
+#include "esp_log.h"
+
+#include "dspm_mult.h"
+#include "esp_attr.h"
+#include "dsp_tests.h"
+#include "mat.h"
+#include "test_mat_common.h"
+
+static const char *TAG = "[dspm]";
+
+#define MAT_ROW 6       // test_matrix rows
+#define MAT_COL 6       // test_matrix cols
+#define ROI_ROW 4       // sub_matrix rows
+#define ROI_COL 4       // sub_matrix cols
+#define START_ROI 1     // start row/col dimension to create sub matrix from test matrix
+
+dspm::Mat::Rect roi_rect(START_ROI, START_ROI, ROI_ROW, ROI_COL);
+
+// matrix subset
+TEST_CASE("Mat class matrix subset", TAG)
+{
+    float data[25] = {0, 1, 2, 3, 4,
+                      5, 6, 7, 8, 9,
+                      0, 1, 2, 3, 4,
+                      5, 6, 7, 8, 9,
+                      0, 1, 2, 3, 4
+                     };
+
+    // Test matrix dimensions
+    const int m = 5;
+    const int n = 5;
+
+    dspm::Mat mat(data, m, n);
+    std::cout << "Test matrix: rows: " << mat.rows << ", columns: " << mat.cols << std::endl;
+    std::cout << mat << std::endl;
+
+    // Sub matrix method 1 - sub-matrix dimensions
+    int start_row = 1;
+    int start_col = 1;
+    int roi_rows = 4;
+    int roi_cols = 3;
+
+    // Create matrix subset as a shallow copy of mat matrix (no matrix data are copied)
+    dspm::Mat mat_subset1 = mat.getROI(start_row, start_col, roi_rows, roi_cols);
+
+    // Create matrix subset as a deep copy of mat matrix (matrix data are copied)
+    dspm::Mat mat_subset1_check = mat.Get(start_row, roi_rows, start_col, roi_cols);
+
+    std::cout << "Matrix subset, method 1: rows: " << mat_subset1.rows << ", columns: " << mat_subset1.cols << std::endl;
+    std::cout << mat_subset1 << std::endl;
+
+    // Compare the deep and the shallow copies
+    test_assert_equal_mat_mat(mat_subset1_check, mat_subset1, "matrix subset 1");
+
+    // Sub matrix method 2 - sub-matrix dimensions as a matrix rectangle
+    int x = 1;
+    int y = 1;
+    int width = 4;
+    int height = 3;
+
+    // Create matrix ROI as a rectangle area
+    dspm::Mat::Rect roi_rect(x, y, width, height);
+    dspm::Mat mat_subset2 = mat.getROI(roi_rect);
+    std::cout << "Matrix subset method 2: rows: " << mat_subset2.rows << ", columns: " << mat_subset2.cols << std::endl;
+    std::cout << mat_subset2 << std::endl;
+    dspm::Mat mat_subset2_check = mat.Get(roi_rect);
+
+    test_assert_equal_mat_mat(mat_subset2_check, mat_subset2, "matrix subset 2");
+
+    // Sub matrix method 2 - sub-matrix dimensions with specified stride
+    start_row = 0;
+    start_col = 1;
+    roi_rows = 3;
+    roi_cols = 3;
+    int stride = 10;
+
+    dspm::Mat mat_subset3 = mat.getROI(start_row, start_col, roi_rows, roi_cols, stride);
+    std::cout << "Matrix subset method 3: rows: " << mat_subset1.rows << ", columns: " << mat_subset3.cols << std::endl;
+    std::cout << mat_subset3 << std::endl;
+    dspm::Mat mat_subset3_check = mat.Get(start_row, 5, start_col, roi_cols);
+
+    for (int row = 0; row < mat_subset3_check.rows; row++) {
+        if (row % 2) {
+            continue;
+        };
+        for (int col =  0; col < mat_subset3_check.cols; col++) {
+            TEST_ASSERT_EQUAL_FLOAT(mat_subset3_check(row, col), mat_subset3(row / 2, col));
+        }
+    }
+}
+
+static void test_mat_subset_operator_eq()
+{
+    dspm::Mat mat(2, 2);
+    for (int i = 0; i < mat.length; i++) {
+        mat.data[i] = 1;
+    }
+
+    dspm::Mat mat1(2, 2);
+    for (int i = 0; i < mat1.length; i++) {
+        mat1.data[i] = i + 1;
+    }
+
+    // matrices, dimensions are equal
+    // mat(2, 2), mat1(2, 2)
+    mat = mat1;
+    TEST_ASSERT_EQUAL_INT(2, mat.rows);
+    TEST_ASSERT_EQUAL_INT(mat1.rows, mat.rows);
+    TEST_ASSERT_EQUAL_INT(2, mat.cols);
+    TEST_ASSERT_EQUAL_INT(mat1.cols, mat.cols);
+    test_assert_equal_mat_mat(mat1, mat, "=operator, mat = mat (equal dim)");
+
+    dspm::Mat mat2(3, 3);
+    for (int i = 0; i < mat2.length; i++) {
+        mat2.data[i] = (i + 1) * 2;
+    }
+
+    // matrices, dimensions are not equal
+    // mat1(2, 2), mat2(3, 3)
+    mat1 = mat2;
+    TEST_ASSERT_EQUAL_INT(3, mat1.rows);
+    TEST_ASSERT_EQUAL_INT(mat2.rows, mat1.rows);
+    TEST_ASSERT_EQUAL_INT(3, mat1.cols);
+    TEST_ASSERT_EQUAL_INT(mat2.cols, mat1.cols);
+    test_assert_equal_mat_mat(mat2, mat1, "=operator, mat = mat (not equal dim)");
+
+    dspm::Mat mat3(4, 4);
+    dspm::Mat mat4(4, 4);
+    dspm::Mat mat4_compare(4, 4);
+    for (int i = 0; i < mat3.length; i++) {
+        mat3.data[i] = (i + 1) * 3;
+        mat4.data[i] = (i + 1) * 4;
+        mat4_compare.data[i] = (i + 1) * 4;
+    }
+    dspm::Mat mat3_sub_3x3 = mat3.getROI(1, 1, 3, 3);
+    dspm::Mat mat3_sub_2x2 = mat3.getROI(1, 1, 2, 2);
+    dspm::Mat mat3_mat_2x2 = mat3.Get(1, 2, 1, 2);
+
+    // matrix and sub-matrix, dimensions are equal
+    // mat1(3, 3), mat3_sub_3x3(3, 3)
+    mat1 = mat3_sub_3x3;
+    TEST_ASSERT_FALSE(mat1.sub_matrix);
+    TEST_ASSERT_EQUAL_INT(3, mat1.rows);
+    TEST_ASSERT_EQUAL_INT(mat3_sub_3x3.rows, mat1.rows);
+    TEST_ASSERT_EQUAL_INT(3, mat1.cols);
+    TEST_ASSERT_EQUAL_INT(mat3_sub_3x3.cols, mat1.cols);
+    test_assert_equal_mat_mat(mat3_sub_3x3, mat1, "=operator, mat = sub_mat (equal dim)");
+
+    dspm::Mat mat4_sub_2x2 = mat4.getROI(1, 1, 2, 2);
+    dspm::Mat mat4_mat_2x2 = mat4.Get(1, 2, 1, 2);
+
+    // matrix and sub-matrix, dimensions are not equal
+    // mat1(3, 3), mat4_sub_2x2(2, 2)
+    mat1 = mat4_sub_2x2;
+    TEST_ASSERT_FALSE(mat1.sub_matrix);
+    TEST_ASSERT_EQUAL_INT(2, mat1.rows);
+    TEST_ASSERT_EQUAL_INT(mat4_sub_2x2.rows, mat1.rows);
+    TEST_ASSERT_EQUAL_INT(2, mat1.cols);
+    TEST_ASSERT_EQUAL_INT(mat4_sub_2x2.cols, mat1.cols);
+    test_assert_equal_mat_mat(mat4_sub_2x2, mat1, "=operator, mat = sub_mat (not equal dim)");
+
+    // sub-matrix and sub-matrix, dimensions are not equal
+    // mat4_sub_2x2(2, 2), mat3_sub_3x3(3, 3)
+    ESP_LOGI("=operator test", "following is an expected error message about matrices not having equal dimensions");
+    mat4_sub_2x2 = mat3_sub_3x3;
+    TEST_ASSERT_TRUE(mat4_sub_2x2.sub_matrix);
+    TEST_ASSERT_EQUAL_INT(2, mat4_sub_2x2.rows);
+    TEST_ASSERT_EQUAL_INT(2, mat4_sub_2x2.cols);
+    test_assert_equal_mat_mat(mat4_mat_2x2, mat4_sub_2x2, "=operator, sub_mat = sub_mat (not equal dim)");
+    test_assert_check_area_mat_mat(mat4_compare, mat4_sub_2x2, 1, 1, "=operator area, sub_mat = sub_mat (not equal dim)");
+
+    // sub-matrix and sub-matrix, dimensions are equal
+    // mat4_sub_2x2(2, 2), mat3_sub_2x2(2, 2)
+    mat4_sub_2x2 = mat3_sub_2x2;
+    TEST_ASSERT_TRUE(mat4_sub_2x2.sub_matrix);
+    TEST_ASSERT_EQUAL_INT(2, mat4_sub_2x2.rows);
+    TEST_ASSERT_EQUAL_INT(mat3_sub_2x2.rows, mat4_sub_2x2.rows);
+    TEST_ASSERT_EQUAL_INT(2, mat4_sub_2x2.cols);
+    TEST_ASSERT_EQUAL_INT(mat3_sub_2x2.cols, mat4_sub_2x2.cols);
+    test_assert_equal_mat_mat(mat3_mat_2x2, mat4_sub_2x2, "=operator, sub_mat = sub_mat (equal dim)");
+    test_assert_check_area_mat_mat(mat4_compare, mat4_sub_2x2, 1, 1, "=operator area, sub_mat = sub_mat (equal dim)");
+}
+
+// operator==
+static void test_mat_subset_operator_eq_eq(void)
+{
+    dspm::Mat A(MAT_ROW, MAT_COL);
+    dspm::Mat B(MAT_ROW, MAT_COL);
+
+    for (int i = 0; i < A.length; i++) {
+        A.data[i] = i;
+        B.data[i] = i * 2;
+    }
+
+    dspm::Mat A_sub = A.getROI(roi_rect);
+    dspm::Mat A_mat = A.Get(roi_rect);
+
+    dspm::Mat B_sub = B.getROI(roi_rect);
+
+    for (int row = 0; row < B_sub.rows; row++) {
+        for (int col = 0; col < B_sub.cols; col++) {
+            B_sub(row, col) = B_sub(row, col) / 2;
+        }
+    }
+    dspm::Mat B_mat = B.Get(roi_rect);
+    dspm::Mat B_mat_neq_cont = B_mat * 3;
+    dspm::Mat B_mat_neq_dim(3, 3);
+
+    TEST_ASSERT_TRUE(A_mat == B_mat);
+    TEST_ASSERT_TRUE(A_sub == B_sub);
+    TEST_ASSERT_TRUE(A_sub == B_mat);
+    TEST_ASSERT_TRUE(A_mat == B_sub);
+    ESP_LOGI("==operator test", "following is an expected error message about matrices not having equal content");
+    TEST_ASSERT_FALSE(A_sub == B_mat_neq_cont);
+    TEST_ASSERT_FALSE(A_sub == B_mat_neq_dim);
+}
+
+// operator/
+static void test_mat_subset_operator_mat_div_mat(void)
+{
+    dspm::Mat mat(MAT_ROW, MAT_COL);
+    for (int i = 0; i < mat.length; i++) {
+        mat.data[i] = i;
+    }
+
+    dspm::Mat C = mat;
+    dspm::Mat C_compare_area = mat;
+
+    dspm::Mat A_sub = mat.getROI(roi_rect);
+    dspm::Mat A_mat = mat.Get(roi_rect);
+
+    dspm::Mat B_sub = mat.getROI(roi_rect);
+    dspm::Mat B_mat = mat.Get(roi_rect);
+
+    dspm::Mat C_sub = C.getROI(roi_rect);
+    dspm::Mat C_mat = C.Get(roi_rect);
+    dspm::Mat C_compare(ROI_ROW, ROI_COL);
+
+    for (int i = 0; i < C_compare.length; i++) {
+        C_compare.data[i] = A_mat.data[i] / B_mat.data[i];
+    }
+
+    C_mat = A_mat / B_mat;
+    test_assert_equal_mat_mat(C_compare, C_mat, "/ operator, mat = mat / mat");
+
+    C_mat = A_sub / B_sub;
+    test_assert_equal_mat_mat(C_compare, C_mat, "/ operator, mat = sub_mat / sub_mat");
+
+    C_mat = A_sub / B_mat;
+    test_assert_equal_mat_mat(C_compare, C_mat, "/ operator, mat = sub_mat / mat");
+
+    C_mat = A_mat / B_sub;
+    test_assert_equal_mat_mat(C_compare, C_mat, "/ operator, mat = mat / sub_mat");
+
+    C_sub = A_sub / B_sub;
+    test_assert_equal_mat_mat(C_compare, C_sub, "/ operator, sub_mat = sub_mat / sub_mat");
+    test_assert_check_area_mat_mat(C_compare_area, C_sub, START_ROI, START_ROI,  "/ operator, area check, sub_mat = sub_mat / sub_mat");
+
+    C_sub = A_mat / B_sub;
+    test_assert_equal_mat_mat(C_compare, C_sub, "/ operator, sub_mat = mat / sub_mat");
+    test_assert_check_area_mat_mat(C_compare_area, C_sub, START_ROI, START_ROI,  "/ operator, area check, sub_mat = sub_mat / sub_mat");
+
+    C = mat;
+    C_mat = C.Get(roi_rect);    // C_mat must be refreshed
+    C_mat /= A_mat;
+    test_assert_equal_mat_mat(C_compare, C_mat, "/ operator, mat /= mat");
+
+    C = mat;
+    C_mat = C.Get(roi_rect);    // C_mat must be refreshed
+    C_mat /= A_sub;
+    test_assert_equal_mat_mat(C_compare, C_mat, "/ operator, mat /= sub_mat");
+
+    C = mat;                    // C must be refreshed, to refresh the C_sub
+    C_sub /= A_mat;
+    test_assert_equal_mat_mat(C_compare, C_sub, "/ operator, sub_mat /= mat");
+    test_assert_check_area_mat_mat(C_compare_area, C_sub, START_ROI, START_ROI,  "/ operator, area check, sub_mat /= mat");
+
+    C = mat;                    // C must be refreshed, to refresh the C_sub
+    C_sub /= A_sub;
+    test_assert_equal_mat_mat(C_compare, C_sub, "/ operator, sub_mat /= sub_mat");
+    test_assert_check_area_mat_mat(C_compare_area, C_sub, START_ROI, START_ROI,  "/ operator, area check, sub_mat /= sub_mat");
+}
+
+// operator^
+static void test_mat_subset_operator_xor(void)
+{
+    dspm::Mat mat(5, 5);
+    dspm::Mat mat_area_check(5, 5);
+    for (int i = 0; i < mat.length; i++) {
+        mat.data[i] = i;
+        mat_area_check.data[i] = i;
+    }
+
+    dspm::Mat::Rect roi_rect(1, 1, 3, 3);
+    dspm::Mat mat_mat = mat.Get(roi_rect);
+    dspm::Mat mat_sub = mat.getROI(roi_rect);
+
+    // XOR 0
+    dspm::Mat res_mat = mat_mat ^ 0;
+    dspm::Mat res_sub = mat_sub ^ 0;
+    test_assert_equal_mat_mat(res_mat, res_sub, "sub-matrix operator^ 0");
+    test_assert_check_area_mat_mat(mat_area_check, mat_sub, 1, 1, "sub-matrix area check operator^ 0");
+
+    // XOR 1
+    res_mat = mat_mat ^ 1;
+    res_sub = mat_sub ^ 1;
+    test_assert_equal_mat_mat(res_mat, res_sub, "sub-matrix operator^ 1");
+    test_assert_check_area_mat_mat(mat_area_check, mat_sub, 1, 1, "sub-matrix area check operator^ 1");
+
+    // XOR even
+    res_mat = mat_mat ^ 2;
+    res_sub = mat_sub ^ 2;
+    test_assert_equal_mat_mat(res_mat, res_sub, "sub-matrix operator^ 2");
+    test_assert_check_area_mat_mat(mat_area_check, mat_sub, 1, 1, "sub-matrix area check operator^ 2");
+
+    // XOR odd
+    res_mat = mat_mat ^ 3;
+    res_sub = mat_sub ^ 3;
+    test_assert_equal_mat_mat(res_mat, res_sub, "sub-matrix operator^ 3");
+    test_assert_check_area_mat_mat(mat_area_check, mat_sub, 1, 1, "sub-matrix area check operator^ 3");
+}
+
+// operator/
+static void test_mat_subset_operator_mat_div_const(void)
+{
+    const float div_const = 2;
+
+    dspm::Mat mat(MAT_ROW, MAT_COL);
+    for (int i = 0; i < mat.length; i++) {
+        mat.data[i] = i;
+    }
+
+    dspm::Mat C_compare_area = mat;
+    dspm::Mat C = mat;
+
+    dspm::Mat A_sub = mat.getROI(roi_rect);
+    dspm::Mat A_mat = mat.Get(roi_rect);
+
+    dspm::Mat C_sub = C.getROI(roi_rect);
+    dspm::Mat C_mat = C.Get(roi_rect);
+    dspm::Mat C_compare = mat.Get(roi_rect);
+
+    for (int i = 0; i < C_compare.length; i++) {
+        C_compare.data[i] /= div_const;
+    }
+
+    C_mat = A_mat / div_const;
+    test_assert_equal_mat_mat(C_compare, C_mat, "/ operator, mat = mat / const");
+
+    C_mat = A_sub / div_const;
+    test_assert_equal_mat_mat(C_compare, C_mat, "/ operator, mat = sub_mat / const");
+    C_mat = C.Get(roi_rect);
+
+    C_mat /= div_const;
+    test_assert_equal_mat_mat(C_compare, C_mat, "/ operator, mat /= const");
+
+    C_sub = A_mat / div_const;
+    test_assert_equal_mat_mat(C_compare, C_sub, "/ operator, sub_mat = mat / const");
+    test_assert_check_area_mat_mat(C_compare_area, C_sub, START_ROI, START_ROI,  "/ operator, area check, sub_mat = mat / const");
+
+    C = mat;
+    C_sub /= div_const;
+    test_assert_equal_mat_mat(C_compare, C_sub, "/ operator, sub_mat /= const");
+    test_assert_check_area_mat_mat(C_compare_area, C_sub, START_ROI, START_ROI,  "/ operator, area check, sub_mat /= const");
+}
+
+// operator-
+static void test_mat_subset_operator_mat_sub_const(void)
+{
+    const float sub_const = 2;
+
+    dspm::Mat mat(MAT_ROW, MAT_COL);
+    for (int i = 0; i < mat.length; i++) {
+        mat.data[i] = i;
+    }
+
+    dspm::Mat C_compare_area = mat;
+    dspm::Mat C = mat;
+
+    dspm::Mat A_sub = mat.getROI(roi_rect);
+    dspm::Mat A_mat = mat.Get(roi_rect);
+
+    dspm::Mat C_sub = C.getROI(roi_rect);
+    dspm::Mat C_mat = C.Get(roi_rect);
+    dspm::Mat C_compare = mat.Get(roi_rect);
+
+    for (int i = 0; i < C_compare.length; i++) {
+        C_compare.data[i] -= sub_const;
+    }
+
+    C_mat = A_mat - sub_const;
+    test_assert_equal_mat_mat(C_compare, C_mat, "- operator, mat = mat - const");
+
+    C_mat = A_sub - sub_const;
+    test_assert_equal_mat_mat(C_compare, C_mat, "- operator, mat = sub_mat - const");
+    C_mat = C.Get(roi_rect);
+
+    C_mat -= sub_const;
+    test_assert_equal_mat_mat(C_compare, C_mat, "- operator, mat -= const");
+
+    C_sub = A_mat - sub_const;
+    test_assert_equal_mat_mat(C_compare, C_sub, "- operator, sub_mat = mat - const");
+    test_assert_check_area_mat_mat(C_compare_area, C_sub, START_ROI, START_ROI,  "- operator, area check, sub_mat = mat - const");
+
+    C = mat;
+    C_sub -= sub_const;
+    test_assert_equal_mat_mat(C_compare, C_sub, "- operator, sub_mat -= const");
+    test_assert_check_area_mat_mat(C_compare_area, C_sub, START_ROI, START_ROI,  "- operator, area check, sub_mat -= const");
+}
+
+// operator-
+static void test_mat_subset_operator_mat_sub_mat(void)
+{
+    dspm::Mat mat(MAT_ROW, MAT_COL);
+    for (int i = 0; i < mat.length; i++) {
+        mat.data[i] = i;
+    }
+
+    dspm::Mat C = mat;
+    dspm::Mat C_compare_area = mat;
+
+    dspm::Mat A_sub = mat.getROI(roi_rect);
+    dspm::Mat A_mat = mat.Get(roi_rect);
+
+    dspm::Mat B_sub = mat.getROI(roi_rect);
+    dspm::Mat B_mat = mat.Get(roi_rect);
+
+    dspm::Mat C_sub = C.getROI(roi_rect);
+    dspm::Mat C_mat = C.Get(roi_rect);
+    dspm::Mat C_compare(ROI_ROW, ROI_COL);
+
+    for (int i = 0; i < C_compare.length; i++) {
+        C_compare.data[i] = A_mat.data[i] - B_mat.data[i];
+    }
+
+    C_mat = A_mat - B_mat;
+    test_assert_equal_mat_mat(C_compare, C_mat, "- operator, mat = mat - mat");
+
+    C_mat = A_sub - B_sub;
+    test_assert_equal_mat_mat(C_compare, C_mat, "- operator, mat = sub_mat - sub_mat");
+
+    C_mat = A_sub - B_mat;
+    test_assert_equal_mat_mat(C_compare, C_mat, "- operator, mat = sub_mat - mat");
+
+    C_mat = A_mat - B_sub;
+    test_assert_equal_mat_mat(C_compare, C_mat, "- operator, mat = mat - sub_mat");
+
+    C_sub = A_sub - B_sub;
+    test_assert_equal_mat_mat(C_compare, C_sub, "- operator, sub_mat = sub_mat - sub_mat");
+    test_assert_check_area_mat_mat(C_compare_area, C_sub, START_ROI, START_ROI,  "- operator, area check, sub_mat = sub_mat - sub_mat");
+
+    C_sub = A_mat - B_sub;
+    test_assert_equal_mat_mat(C_compare, C_sub, "- operator, sub_mat = mat - sub_mat");
+    test_assert_check_area_mat_mat(C_compare_area, C_sub, START_ROI, START_ROI,  "- operator, area check, sub_mat = sub_mat - sub_mat");
+
+    C = mat;
+    C_mat = C.Get(roi_rect);    // C_mat must be refreshed
+    C_mat -= A_mat;
+    test_assert_equal_mat_mat(C_compare, C_mat, "- operator, mat -= mat");
+
+    C = mat;
+    C_mat = C.Get(roi_rect);    // C_mat must be refreshed
+    C_mat -= A_sub;
+    test_assert_equal_mat_mat(C_compare, C_mat, "- operator, mat -= sub_mat");
+
+    C = mat;                    // C must be refreshed, to refresh the C_sub
+    C_sub -= A_mat;
+    test_assert_equal_mat_mat(C_compare, C_sub, "- operator, sub_mat -= mat");
+    test_assert_check_area_mat_mat(C_compare_area, C_sub, START_ROI, START_ROI,  "- operator, area check, sub_mat -= mat");
+
+    C = mat;                    // C must be refreshed, to refresh the C_sub
+    C_sub -= A_sub;
+    test_assert_equal_mat_mat(C_compare, C_sub, "- operator, sub_mat -= sub_mat");
+    test_assert_check_area_mat_mat(C_compare_area, C_sub, START_ROI, START_ROI,  "- operator, area check, sub_mat -= sub_mat");
+}
+
+// operator+
+static void test_mat_subset_operator_mat_add_mat(void)
+{
+    dspm::Mat mat(MAT_ROW, MAT_COL);
+    for (int i = 0; i < mat.length; i++) {
+        mat.data[i] = i;
+    }
+
+    dspm::Mat C = mat;
+    dspm::Mat C_compare_area = mat;
+
+    dspm::Mat A_sub = mat.getROI(roi_rect);
+    dspm::Mat A_mat = mat.Get(roi_rect);
+
+    dspm::Mat B_sub = mat.getROI(roi_rect);
+    dspm::Mat B_mat = mat.Get(roi_rect);
+
+    dspm::Mat C_sub = C.getROI(roi_rect);
+    dspm::Mat C_mat = C.Get(roi_rect);
+    dspm::Mat C_compare(ROI_ROW, ROI_COL);
+
+    for (int i = 0; i < C_compare.length; i++) {
+        C_compare.data[i] = A_mat.data[i] + B_mat.data[i];
+    }
+
+    C_mat = A_mat + B_mat;
+    test_assert_equal_mat_mat(C_compare, C_mat, "+ operator, mat = mat + mat");
+
+    C_mat = A_sub + B_sub;
+    test_assert_equal_mat_mat(C_compare, C_mat, "+ operator, mat = sub_mat + sub_mat");
+
+    C_mat = A_sub + B_mat;
+    test_assert_equal_mat_mat(C_compare, C_mat, "+ operator, mat = sub_mat + mat");
+
+    C_sub = A_sub + B_sub;
+    test_assert_equal_mat_mat(C_compare, C_sub, "+ operator, sub_mat = sub_mat + sub_mat");
+    test_assert_check_area_mat_mat(C_compare_area, C_sub, START_ROI, START_ROI,  "+ operator, area check, sub_mat = sub_mat + sub_mat");
+
+    C_sub = A_mat + B_sub;
+    test_assert_equal_mat_mat(C_compare, C_sub, "+ operator, sub_mat = mat + sub_mat");
+    test_assert_check_area_mat_mat(C_compare_area, C_sub, START_ROI, START_ROI,  "+ operator, area check, sub_mat = sub_mat + sub_mat");
+
+    C = mat;
+    C_mat = C.Get(roi_rect);    // C_mat must be refreshed
+    C_mat += A_mat;
+    test_assert_equal_mat_mat(C_compare, C_mat, "+ operator, mat += mat");
+
+    C = mat;
+    C_mat = C.Get(roi_rect);    // C_mat must be refreshed
+    C_mat += A_sub;
+    test_assert_equal_mat_mat(C_compare, C_mat, "+ operator, mat += sub_mat");
+
+    C = mat;                    // C must be refreshed, to refresh the C_sub
+    C_sub += A_mat;
+    test_assert_equal_mat_mat(C_compare, C_sub, "+ operator, sub_mat += mat");
+    test_assert_check_area_mat_mat(C_compare_area, C_sub, START_ROI, START_ROI,  "+ operator, area check, sub_mat += mat");
+
+    C = mat;                    // C must be refreshed, to refresh the C_sub
+    C_sub += A_sub;
+    test_assert_equal_mat_mat(C_compare, C_sub, "+ operator, sub_mat += sub_mat");
+    test_assert_check_area_mat_mat(C_compare_area, C_sub, START_ROI, START_ROI,  "+ operator, area check, sub_mat += sub_mat");
+}
+
+// operator+
+static void test_mat_subset_operator_mat_add_const(void)
+{
+    const float add_const = 2;
+
+    dspm::Mat mat(MAT_ROW, MAT_COL);
+    for (int i = 0; i < mat.length; i++) {
+        mat.data[i] = i;
+    }
+
+    dspm::Mat C_compare_area = mat;
+    dspm::Mat C = mat;
+
+    dspm::Mat A_sub = mat.getROI(roi_rect);
+    dspm::Mat A_mat = mat.Get(roi_rect);
+
+    dspm::Mat C_sub = C.getROI(roi_rect);
+    dspm::Mat C_mat = C.Get(roi_rect);
+    dspm::Mat C_compare = mat.Get(roi_rect);
+
+    for (int i = 0; i < C_compare.length; i++) {
+        C_compare.data[i] += add_const;
+    }
+
+    C_mat = A_sub + add_const;
+    test_assert_equal_mat_mat(C_compare, C_mat, "+ operator, mat = sub_mat + const");
+    C_mat = C.Get(roi_rect);
+
+    C_mat += add_const;
+    test_assert_equal_mat_mat(C_compare, C_mat, "+ operator, mat += const");
+
+    C_sub = A_mat + add_const;
+    test_assert_equal_mat_mat(C_compare, C_sub, "+ operator, sub_mat = mat + const");
+    test_assert_check_area_mat_mat(C_compare_area, C_sub, START_ROI, START_ROI,  "+ operator, area check, sub_mat = mat + const");
+
+    C = mat;
+    C_sub += add_const;
+    test_assert_equal_mat_mat(C_compare, C_sub, "+ operator, sub_mat += const");
+    test_assert_check_area_mat_mat(C_compare_area, C_sub, START_ROI, START_ROI,  "+ operator, area check, sub_mat += const");
+}
+
+// operator*
+static void test_mat_subset_operator_mat_mul_const(void)
+{
+    const float mul_const = 2;
+
+    dspm::Mat mat(MAT_ROW, MAT_COL);
+    for (int i = 0; i < mat.length; i++) {
+        mat.data[i] = i;
+    }
+
+    dspm::Mat C_compare_area = mat;
+    dspm::Mat C = mat;
+
+    dspm::Mat A_sub = mat.getROI(roi_rect);
+    dspm::Mat A_mat = mat.Get(roi_rect);
+
+    dspm::Mat C_sub = C.getROI(roi_rect);
+    dspm::Mat C_mat = C.Get(roi_rect);
+    dspm::Mat C_compare = mat.Get(roi_rect);
+
+    for (int i = 0; i < C_compare.length; i++) {
+        C_compare.data[i] *= mul_const;
+    }
+
+    C_mat = A_mat * mul_const;
+    test_assert_equal_mat_mat(C_compare, C_mat, "* operator, mat = mat * const");
+
+    C_mat = A_sub * mul_const;
+    test_assert_equal_mat_mat(C_compare, C_mat, "* operator, mat = sub_mat * const");
+    C_mat = C.Get(roi_rect);
+
+    C_mat *= mul_const;
+    test_assert_equal_mat_mat(C_compare, C_mat, "* operator, mat *= const");
+
+    C_sub = A_mat * mul_const;
+    test_assert_equal_mat_mat(C_compare, C_sub, "* operator, sub_mat = mat * const");
+    test_assert_check_area_mat_mat(C_compare_area, C_sub, START_ROI, START_ROI,  "* operator, area check, sub_mat = mat * const");
+
+    C = mat;
+    C_sub *= mul_const;
+    test_assert_equal_mat_mat(C_compare, C_sub, "* operator, sub_mat *= const");
+    test_assert_check_area_mat_mat(C_compare_area, C_sub, START_ROI, START_ROI,  "* operator, area check, sub_mat *= const");
+}
+
+// operator*
+static void test_mat_subset_operator_mat_mul_mat_2(void)
+{
+    dspm::Mat mat(MAT_ROW, MAT_COL);
+    for (int i = 0; i < mat.length; i++) {
+        mat.data[i] = i;
+    }
+
+    dspm::Mat C_compare_area = mat;
+    dspm::Mat C = mat;
+
+    const int m = 4, n = 4, k = 4;
+    dspm::Mat::Rect roi_rect_mul(1, 1, k, m);
+
+    dspm::Mat A_sub = mat.getROI(roi_rect_mul);
+    dspm::Mat A_mat = mat.Get(roi_rect_mul);
+
+    dspm::Mat C_sub = C.getROI(roi_rect_mul);
+    dspm::Mat C_mat = C.Get(roi_rect_mul);
+    dspm::Mat C_compare = dspm::Mat::ones(m, k);
+
+    for (int i = 0 ; i < m ; i++) {
+        for (int j = 0 ; j < k ; j++) {
+            C_compare.data[(i * k) + j] = 0;
+            for (int s = 0 ; s < n ; s++) {
+                C_compare.data[(i * k) + j] += A_mat.data[(i * n) + s] * C_mat.data[(s * k) + j];
+            }
+        }
+    }
+
+    C_mat *= A_mat;
+    test_assert_equal_mat_mat(C_compare, C_mat, "*= operator, mat *= mat");
+    C_mat = C.Get(roi_rect_mul);
+
+    C_mat *= A_sub;
+    test_assert_equal_mat_mat(C_compare, C_mat, "*= operator, mat *= sub_mat");
+
+    C_sub *= A_sub;
+    test_assert_equal_mat_mat(C_compare, C_sub, "*= operator, sub_mat *= sub_mat");
+    test_assert_check_area_mat_mat(C_compare_area, C_sub, START_ROI, START_ROI,  "*= operator, area check, sub_mat *= sub_mat");
+
+    C = mat;
+    C_sub *= A_mat;
+    test_assert_equal_mat_mat(C_compare, C_sub, "*= operator, sub_mat *= sub_mat");
+    test_assert_check_area_mat_mat(C_compare_area, C_sub, START_ROI, START_ROI,  "*= operator, area check, sub_mat *= sub_mat");
+}
+
+// operator*
+static void test_mat_subset_operator_mat_mul_mat_1(void)
+{
+    dspm::Mat mat(MAT_ROW, MAT_COL);
+    for (int i = 0; i < mat.length; i++) {
+        mat.data[i] = i;
+    }
+
+    dspm::Mat C = dspm::Mat::ones(6);
+    dspm::Mat C_compare_area = dspm::Mat::ones(6);
+
+    // matrix dimensions
+    const int m = 4, n = 3, k = 4;
+    dspm::Mat::Rect A_roi_rect(2, 1, n, m);
+    dspm::Mat::Rect B_roi_rect(1, 2, k, n);
+    dspm::Mat::Rect C_roi_rect(1, 1, k, m);
+
+    dspm::Mat A_sub = mat.getROI(A_roi_rect);
+    dspm::Mat A_mat = mat.Get(A_roi_rect);
+
+    dspm::Mat B_sub = mat.getROI(B_roi_rect);
+    dspm::Mat B_mat = mat.Get(B_roi_rect);
+
+    dspm::Mat C_sub = C.getROI(C_roi_rect);
+    dspm::Mat C_mat = C.Get(C_roi_rect);
+    dspm::Mat C_compare = dspm::Mat::ones(m, k);
+
+    for (int i = 0 ; i < m ; i++) {
+        for (int j = 0 ; j < k ; j++) {
+            C_compare.data[(i * k) + j] = 0;
+            for (int s = 0 ; s < n ; s++) {
+                C_compare.data[(i * k) + j] += A_mat.data[(i * n) + s] * B_mat.data[(s * k) + j];
+            }
+        }
+    }
+
+    C_mat = A_mat * B_mat;
+    test_assert_equal_mat_mat(C_compare, C_mat, "* operator, mat = mat * mat");
+
+    C_mat = A_sub * B_sub;
+    test_assert_equal_mat_mat(C_compare, C_mat, "* operator, mat = sub_mat * sub_mat");
+
+    C_mat = A_sub * B_mat;
+    test_assert_equal_mat_mat(C_compare, C_mat, "* operator, mat = sub_mat * mat");
+
+    C_sub = A_sub * B_sub;
+    test_assert_equal_mat_mat(C_compare, C_sub, "* operator, sub_mat = sub_mat * sub_mat");
+    test_assert_check_area_mat_mat(C_compare_area, C_sub, START_ROI, START_ROI,  "* operator, area check, sub_mat = sub_mat * sub_mat");
+
+    C_sub = A_mat * B_sub;
+    test_assert_equal_mat_mat(C_compare, C_sub, "*operator, sub_mat = mat * sub_mat");
+    test_assert_check_area_mat_mat(C_compare_area, C_sub, START_ROI, START_ROI,  "* operator, area check, sub_mat = sub_mat * sub_mat");
+}
+
+TEST_CASE("Matrix subset operators", TAG)
+{
+    test_mat_subset_operator_eq();                  // mat = mat
+    test_mat_subset_operator_eq_eq();               // mat == mat
+    test_mat_subset_operator_xor();                 // mat ^ const
+    test_mat_subset_operator_mat_mul_mat_1();       // mat * mat
+    test_mat_subset_operator_mat_mul_mat_2();       // mat * mat
+    test_mat_subset_operator_mat_mul_const();       // mat * const
+    test_mat_subset_operator_mat_add_mat();         // mat + mat
+    test_mat_subset_operator_mat_add_const();       // mat + const
+    test_mat_subset_operator_mat_sub_mat();         // mat - mat
+    test_mat_subset_operator_mat_sub_const();       // mat - const
+    test_mat_subset_operator_mat_div_mat();         // mat / mat
+    test_mat_subset_operator_mat_div_const();       // mat / const
+}
+
+static void test_mat_subset_solve(void)
+{
+    int m = 3;
+    int n = 3;
+    float data_a[9] = {3, 2, 1, 2, 3, 1, 2, 1, 3};
+    float data_b[9] = {5, -1, 4};
+    dspm::Mat A(data_a, m, n);
+    dspm::Mat b(data_b, m, 1);
+
+    dspm::Mat A_origin = dspm::Mat::ones(5);
+    dspm::Mat b_origin = dspm::Mat::ones(5, 3);
+    dspm::Mat A_origin_area_check = dspm::Mat::ones(5);
+    dspm::Mat b_origin_area_check = dspm::Mat::ones(5, 3);
+
+    A_origin.Copy(A, 1, 1);
+    b_origin.Copy(b, 1, 1);
+
+    // create sub-matrices
+    dspm::Mat A_sub = A_origin.getROI(1, 1, m, n);
+    dspm::Mat b_sub = b_origin.getROI(1, 1, m, 1);
+
+    dspm::Mat x1 = dspm::Mat::solve(A_sub, b_sub);
+    test_assert_check_area_mat_mat(A_origin_area_check, A_sub, 1, 1, "check solve, area A");
+    test_assert_check_area_mat_mat(b_origin_area_check, b_sub, 1, 1, "check solve, area b");
+
+    std::cout << "Solve result matrix: rows: " << x1.rows << ", columns: " << x1.cols << std::endl;
+    std::cout << (x1 * 12).t();
+    dspm::Mat x2 = dspm::Mat::roots(A_sub, b_sub);
+    test_assert_check_area_mat_mat(A_origin_area_check, A_sub, 1, 1, "check solve, area A");
+    test_assert_check_area_mat_mat(b_origin_area_check, b_sub, 1, 1, "check solve, area b");
+
+    std::cout << "Roots result matrix: rows: " << x2.rows << ", columns: " << x2.cols << std::endl;
+    std::cout << (x2 * 12).t();
+    dspm::Mat diff_b = x1 - x2;
+    std::cout << "Difference between solve() abd roots(): " << diff_b.t();
+    for (int row = 0; row < diff_b.rows; row++) {
+        for (int col = 0; col < diff_b.cols; col++) {
+            if (fabs(diff_b(row, col)) > 0.000001) {
+                TEST_ASSERT_MESSAGE (false, "Calculation is incorrect! Error more then expected!");
+            }
+        }
+    }
+}
+
+static void test_mat_subset_inverse(void)
+{
+    // Test inverse()
+    dspm::Mat result;
+    float m_data[] = {2, 5, 7,
+                      6, 3, 4,
+                      5, -2, -3
+                     };
+    float m_result[] = {  1.0000,   -1.0000,    1.0000,
+                          -38.0000,   41.0000,  -34.0000,
+                          27.0000,  -29.0000,   24.0000
+                       };
+
+    result = dspm::Mat(m_data, 3, 3);
+
+    dspm::Mat result_origin = dspm::Mat::ones(5);
+    dspm::Mat result_origin_area_check = dspm::Mat::ones(5);
+
+    result_origin.Copy(result, 1, 1);
+    dspm::Mat result_sub = result_origin.getROI(1, 1, 3, 3);
+
+    result = result_sub.inverse();
+    test_assert_check_area_mat_mat(result_origin_area_check, result_sub, 1, 1, "area check inverse");
+
+    std::cout << "inverse: " << std::endl;
+    std::cout << result << std::endl;
+    for (int i = 0; i < 3 * 3; i++) {
+        if (std::abs(result.data[i] - m_result[i]) > 1e-4) {
+            printf("Error at[%i] = %f, expected= %f, calculated = %f\n", i, std::abs(result.data[i] - m_result[i]), m_result[i], result.data[i]);
+            TEST_ASSERT_MESSAGE (false, "Error in inverse() operation!\n");
+        }
+    }
+    result = dspm::Mat(m_data, 3, 3);
+    result_origin = dspm::Mat::ones(5);
+    result_origin.Copy(result, 1, 1);
+    result_sub = result_origin.getROI(1, 1, 3, 3);
+
+    result = result_sub.pinv();
+    test_assert_check_area_mat_mat(result_origin_area_check, result_sub, 1, 1, "area check pinv");
+
+    std::cout << "pinv: " << std::endl;
+    std::cout << result << std::endl;
+    for (int i = 0; i < 3 * 3; i++) {
+        if (std::abs(result.data[i] - m_result[i]) > 1e-2) {
+            printf("Error at[%i] = %f, expected= %f, calculated = %f \n", i, std::abs(result.data[i] - m_result[i]), m_result[i], result.data[i]);
+            TEST_ASSERT_MESSAGE (false, "Error in pinv() operation!\n");
+        }
+    }
+}
+
+static void test_mat_subset_normalize(void)
+{
+    dspm::Mat result_origin = dspm::Mat::ones(4);
+    dspm::Mat result_area_check = dspm::Mat::ones(4);
+    dspm::Mat result_sub = result_origin.getROI(1, 1, 2, 2);
+
+    std::cout << "Befor normalize: " << std::endl;
+    std::cout << result_sub << std::endl;
+    result_sub.normalize();
+    test_assert_check_area_mat_mat(result_area_check, result_sub, 1, 1, "normalize area check");
+    std::cout << "normalize: " << std::endl;
+    std::cout << result_sub << std::endl;
+
+    for (int row = 0; row < result_sub.rows; row++) {
+        for (int col = 0 ; col < result_sub.cols ; col++) {
+            if (std::abs(result_sub(row, col) - 0.5) > dspm::Mat::abs_tol) {
+                ESP_LOGE(TAG, "Error bigger then expected: %f", std::abs(result_sub(row, col) - 0.5));
+                TEST_ASSERT_MESSAGE (false, "Error in normalize() operation! ");
+            }
+        }
+    }
+}
+
+static void test_mat_subset_swap_trans_dot_clear(void)
+{
+    dspm::Mat mat(5, 5);
+    dspm::Mat mat_area_check(5, 5);
+    for (int row = 0; row < mat.rows; row++) {
+        for (int col = 0; col < mat.cols; col++) {
+            mat(row, col) = row + 1;
+            mat_area_check(row, col) = row + 1;
+        }
+    }
+
+    dspm::Mat::Rect roi_rect(1, 1, 3, 3);
+    dspm::Mat mat_sub = mat.getROI(roi_rect);
+    dspm::Mat mat_mat = mat.Get(roi_rect);
+
+    // check swap rows
+    mat_sub.swapRows(0, 1);
+    mat_mat.swapRows(0, 1);
+    test_assert_equal_mat_mat(mat_sub, mat_mat, "sub-matrix swapRows");
+    test_assert_check_area_mat_mat(mat_area_check, mat_sub, 1, 1, "area check sub-matrix swapRows");
+
+    // check transpose
+    dspm::Mat mat_sub_res = mat_sub.t();
+    dspm::Mat mat_mat_res = mat_mat.t();
+    test_assert_equal_mat_mat(mat_mat_res, mat_sub_res, "sub-matrix transpose");
+    test_assert_check_area_mat_mat(mat_area_check, mat_sub, 1, 1, "area check sub-matrix transpose");
+
+    // check dot product
+    float dot_mat = dspm::Mat::dotProduct(mat_mat, mat_mat);
+    float dot_sub = dspm::Mat::dotProduct(mat_sub, mat_sub);
+    TEST_ASSERT_EQUAL_FLOAT(dot_mat, dot_sub);
+
+    // check clear
+    mat_sub.clear();
+    mat_mat.clear();
+    test_assert_equal_mat_const(mat_sub, 0, "sub-matrix clear");
+    test_assert_equal_mat_mat(mat_mat, mat_sub, "sub-matrix clear");
+    test_assert_check_area_mat_mat(mat_area_check, mat_sub, 1, 1, "area check sub-matrix clear");
+}
+
+
+TEST_CASE("Matrix subset methods check", TAG)
+{
+    test_mat_subset_solve();
+    test_mat_subset_inverse();
+    test_mat_subset_normalize();
+    test_mat_subset_swap_trans_dot_clear();
+}
--- a/managed_components/espressif__esp-dsp/modules/matrix/mul/test/test_mmult_3x3xx_f32_ae32.c
+++ b/managed_components/espressif__esp-dsp/modules/matrix/mul/test/test_mmult_3x3xx_f32_ae32.c
@@ -0,0 +1,187 @@
+// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <string.h>
+#include "unity.h"
+#include "esp_dsp.h"
+#include "dsp_platform.h"
+#include "esp_log.h"
+
+#include "dspm_mult.h"
+#include "esp_attr.h"
+#include "dsp_tests.h"
+
+static const char *TAG = "dspm_mult_3x3xX_f32";
+
+// Test dsps_dotprod_s16_ansi function
+TEST_CASE("dspm_mult_3x3x1_f32 functionality", "[dspm]")
+{
+    int m = 3;
+    int n = 3;
+    int k = 1;
+
+
+    float A[m][n];
+    float *A_ptr = (float *)A;
+
+    float B[n][k];
+    float *B_ptr = (float *)B;
+
+    float C[m][k];
+    float *C_ptr = (float *)C;
+    float C_compare[m][k];
+    float *Cc_ptr = (float *)C_compare;
+
+    for (int i = 0; i < m; i++) {
+        for (int j = 0; j < n; j++) {
+            A[i][j] = i;
+        }
+    }
+    for (int i = 0; i < n; i++) {
+        for (int j = 0; j < k; j++) {
+            B[i][j] = i;
+        }
+    }
+
+    dspm_mult_3x3x1_f32(A_ptr, B_ptr, C_ptr);
+    dspm_mult_f32_ansi(A_ptr, B_ptr, Cc_ptr, m, n, k);
+
+    for (int i = 0 ; i < m ; i++) {
+        for (int j = 0 ; j < k ; j++) {
+            ESP_LOGD(TAG, "[%i][%i] calc=%f, expected =%f", i, j, C[i][j], C_compare[i][j]);
+        }
+    }
+    //Compare and check results
+    for (int i = 0; i < m * k; i++) {
+        if (Cc_ptr[i] != C_ptr[i]) {
+            TEST_ASSERT_EQUAL(C_ptr[i], Cc_ptr[i]);
+        }
+    }
+}
+
+TEST_CASE("dspm_mult_3x3x3_f32 functionality", "[dspm]")
+{
+    int m = 3;
+    int n = 3;
+    int k = 3;
+
+
+    float A[m][n];
+    float *A_ptr = (float *)A;
+
+    float B[n][k];
+    float *B_ptr = (float *)B;
+
+    float C[m][k];
+    float *C_ptr = (float *)C;
+    float C_compare[m][k];
+    float *Cc_ptr = (float *)C_compare;
+
+    for (int i = 0; i < m; i++) {
+        for (int j = 0; j < n; j++) {
+            A[i][j] = i;
+            C[i][j] = 0;
+        }
+    }
+    for (int i = 0; i < n; i++) {
+        for (int j = 0; j < k; j++) {
+            B[i][j] = i;
+        }
+    }
+
+    dspm_mult_3x3x3_f32(A_ptr, B_ptr, C_ptr);
+    dspm_mult_f32_ansi(A_ptr, B_ptr, Cc_ptr, m, n, k);
+
+    for (int i = 0 ; i < m ; i++) {
+        for (int j = 0 ; j < k ; j++) {
+            ESP_LOGD(TAG, "[%i][%i] calc=%f, expected =%f", i, j, C[i][j], C_compare[i][j]);
+        }
+    }
+    // Compare and check results
+    for (int i = 0 ; i < m * k ; i++) {
+        if (Cc_ptr[i] != C_ptr[i]) {
+            TEST_ASSERT_EQUAL( C_ptr[i], Cc_ptr[i]);
+        }
+    }
+}
+
+static portMUX_TYPE testnlock = portMUX_INITIALIZER_UNLOCKED;
+
+TEST_CASE("dspm_mult_3x3x1_f32 benchmark", "[dspm]")
+{
+    int m = 3;
+    int n = 3;
+    int k = 1;
+
+    float A[m][n];
+    float *A_ptr = (float *)A;
+
+    float B[n][k];
+    float *B_ptr = (float *)B;
+
+    float C[m][k];
+    float *C_ptr = (float *)C;
+
+
+    portENTER_CRITICAL(&testnlock);
+
+    unsigned int start_b = dsp_get_cpu_cycle_count();
+    int repeat_count = 1024;
+    for (int i = 0 ; i < repeat_count ; i++) {
+        dspm_mult_3x3x1_f32(A_ptr, B_ptr, C_ptr);
+    }
+    unsigned int end_b = dsp_get_cpu_cycle_count();
+    portEXIT_CRITICAL(&testnlock);
+
+    float total_b = end_b - start_b;
+    float cycles = total_b / (repeat_count);
+    ESP_LOGI("dspm_mult_3x3x1_f32", "dspm_mult_3x3x1_f32 - %f per multiplication (ae32 - 134, ansi - 285)", cycles);
+    float min_exec = 60;
+    float max_exec = 200;
+    TEST_ASSERT_EXEC_IN_RANGE(min_exec, max_exec, cycles);
+}
+
+TEST_CASE("dspm_mult_3x3x3_f32 benchmark", "[dspm]")
+{
+    int m = 4;
+    int n = 4;
+    int k = 4;
+
+    float A[m][n];
+    float *A_ptr = (float *)A;
+
+    float B[n][k];
+    float *B_ptr = (float *)B;
+
+    float C[m][k];
+    float *C_ptr = (float *)C;
+
+
+    portENTER_CRITICAL(&testnlock);
+
+    unsigned int start_b = dsp_get_cpu_cycle_count();
+    int repeat_count = 1024;
+    for (int i = 0 ; i < repeat_count ; i++) {
+        dspm_mult_3x3x3_f32(A_ptr, B_ptr, C_ptr);
+    }
+    unsigned int end_b = dsp_get_cpu_cycle_count();
+    portEXIT_CRITICAL(&testnlock);
+
+    float total_b = end_b - start_b;
+    float cycles = total_b / (repeat_count);
+    ESP_LOGI("dspm_mult_3x3x3_f32", "dspm_mult_3x3x3_f32 - %f per multiplication", cycles);
+    float min_exec = 100;
+    float max_exec = 400;
+    TEST_ASSERT_EXEC_IN_RANGE(min_exec, max_exec, cycles);
+}
--- a/managed_components/espressif__esp-dsp/modules/matrix/mul/test/test_mmult_4x4xx_f32_ae32.c
+++ b/managed_components/espressif__esp-dsp/modules/matrix/mul/test/test_mmult_4x4xx_f32_ae32.c
@@ -0,0 +1,186 @@
+// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <string.h>
+#include "unity.h"
+#include "esp_dsp.h"
+#include "dsp_platform.h"
+#include "esp_log.h"
+
+#include "dspm_mult.h"
+#include "esp_attr.h"
+#include "dsp_tests.h"
+
+static const char *TAG = "dspm_mult_4x4x1_f32_ae32";
+
+TEST_CASE("dspm_mult_4x4x1_f32_ae32 functionality", "[dspm]")
+{
+    int m = 4;
+    int n = 4;
+    int k = 1;
+
+    float A[m][n];
+    float *A_ptr = (float *)A;
+
+    float B[n][k];
+    float *B_ptr = (float *)B;
+
+    float C[m][k];
+    float *C_ptr = (float *)C;
+    float C_compare[m][k];
+    float *Cc_ptr = (float *)C_compare;
+
+    for (int i = 0; i < m; i++) {
+        for (int j = 0; j < n; j++) {
+            A[i][j] = i;
+        }
+    }
+    for (int i = 0; i < n; i++) {
+        for (int j = 0; j < k; j++) {
+            B[i][j] = i;
+        }
+    }
+
+    dspm_mult_4x4x1_f32(A_ptr, B_ptr, C_ptr);
+    dspm_mult_f32_ansi(A_ptr, B_ptr, Cc_ptr, m, n, k);
+
+    for (int i = 0 ; i < m ; i++) {
+        for (int j = 0 ; j < k ; j++) {
+            ESP_LOGD(TAG, "[%i][%i] calc=%f, expected =%f", i, j, C[i][j], C_compare[i][j]);
+        }
+    }
+    //Compare and check results
+    for (int i = 0; i < m * k; i++) {
+        if (Cc_ptr[i] != C_ptr[i]) {
+            TEST_ASSERT_EQUAL(C_ptr[i], Cc_ptr[i]);
+        }
+    }
+}
+
+TEST_CASE("dspm_mult_4x4x4_f32_ae32 functionality", "[dspm]")
+{
+    int m = 4;
+    int n = 4;
+    int k = 4;
+
+
+    float A[m][n];
+    float *A_ptr = (float *)A;
+
+    float B[n][k];
+    float *B_ptr = (float *)B;
+
+    float C[m][k];
+    float *C_ptr = (float *)C;
+    float C_compare[m][k];
+    float *Cc_ptr = (float *)C_compare;
+
+    for (int i = 0; i < m; i++) {
+        for (int j = 0; j < n; j++) {
+            A[i][j] = i;
+            C[i][j] = 0;
+        }
+    }
+    for (int i = 0; i < n; i++) {
+        for (int j = 0; j < k; j++) {
+            B[i][j] = i;
+        }
+    }
+
+    dspm_mult_4x4x4_f32(A_ptr, B_ptr, C_ptr);
+    dspm_mult_f32_ansi(A_ptr, B_ptr, Cc_ptr, m, n, k);
+
+    for (int i = 0 ; i < m ; i++) {
+        for (int j = 0 ; j < k ; j++) {
+            ESP_LOGD(TAG, "[%i][%i] calc=%f, expected =%f", i, j, C[i][j], C_compare[i][j]);
+        }
+    }
+    // Compare and check results
+    for (int i = 0 ; i < m * k ; i++) {
+        if (Cc_ptr[i] != C_ptr[i]) {
+            TEST_ASSERT_EQUAL( C_ptr[i], Cc_ptr[i]);
+        }
+    }
+}
+
+static portMUX_TYPE testnlock = portMUX_INITIALIZER_UNLOCKED;
+
+TEST_CASE("dspm_mult_4x4x1_f32_ae32 benchmark", "[dspm]")
+{
+    int m = 4;
+    int n = 4;
+    int k = 1;
+
+    float A[m][n];
+    float *A_ptr = (float *)A;
+
+    float B[n][k];
+    float *B_ptr = (float *)B;
+
+    float C[m][k];
+    float *C_ptr = (float *)C;
+
+
+    portENTER_CRITICAL(&testnlock);
+
+    unsigned int start_b = dsp_get_cpu_cycle_count();
+    int repeat_count = 1024;
+    for (int i = 0 ; i < repeat_count ; i++) {
+        dspm_mult_4x4x1_f32(A_ptr, B_ptr, C_ptr);
+    }
+    unsigned int end_b = dsp_get_cpu_cycle_count();
+    portEXIT_CRITICAL(&testnlock);
+
+    float total_b = end_b - start_b;
+    float cycles = total_b / (repeat_count);
+    ESP_LOGI("dspm_mult_4x4x1_f32_ae32", "dspm_mult_4x4x1_f32_ae32 - %f per multiplication", cycles);
+    float min_exec = 60;
+    float max_exec = 300;
+    TEST_ASSERT_EXEC_IN_RANGE(min_exec, max_exec, cycles);
+}
+
+TEST_CASE("dspm_mult_4x4x4_f32_ae32 benchmark", "[dspm]")
+{
+    int m = 4;
+    int n = 4;
+    int k = 4;
+
+    float A[m][n];
+    float *A_ptr = (float *)A;
+
+    float B[n][k];
+    float *B_ptr = (float *)B;
+
+    float C[m][k];
+    float *C_ptr = (float *)C;
+
+    ESP_LOGI(TAG, "A: %8.8"PRIx32", B: %8.8"PRIx32", C=%8.8"PRIx32"", (uint32_t)A_ptr, (uint32_t)B_ptr, (uint32_t)C_ptr);
+
+    portENTER_CRITICAL(&testnlock);
+
+    unsigned int start_b = dsp_get_cpu_cycle_count();
+    int repeat_count = 1024;
+    for (int i = 0 ; i < repeat_count ; i++) {
+        dspm_mult_4x4x4_f32(A_ptr, B_ptr, C_ptr);
+    }
+    unsigned int end_b = dsp_get_cpu_cycle_count();
+    portEXIT_CRITICAL(&testnlock);
+
+    float total_b = end_b - start_b;
+    float cycles = total_b / (repeat_count);
+    ESP_LOGI("dspm_mult_4x4x4_f32_ae32", "dspm_mult_4x4x4_f32_ae32 - %f per multiplication", cycles);
+    float min_exec = 50;
+    float max_exec = 750;
+    TEST_ASSERT_EXEC_IN_RANGE(min_exec, max_exec, cycles);
+}
--- a/managed_components/espressif__esp-dsp/modules/matrix/mul/test/test_mmult_ex_f32_aexx.cpp
+++ b/managed_components/espressif__esp-dsp/modules/matrix/mul/test/test_mmult_ex_f32_aexx.cpp
@@ -0,0 +1,285 @@
+/*
+ * SPDX-FileCopyrightText: 2023 Espressif Systems (Shanghai) CO LTD
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <string.h>
+#include <malloc.h>
+#include "unity.h"
+#include "esp_dsp.h"
+#include "dsp_platform.h"
+#include "esp_log.h"
+
+#include "dspm_mult.h"
+#include "esp_attr.h"
+#include "dsp_tests.h"
+#include "test_mat_common.h"
+
+
+// create ROI rectangles
+dspm::Mat::Rect A_roi_rect;
+dspm::Mat::Rect B_roi_rect;
+dspm::Mat::Rect C_roi_rect;
+
+static void dspm_mult_ex_f32_aexx_functionality_in_cycle(m_test_data_t *test_d)
+{
+    char message[120];
+    sprintf(message, "var = %d,  A_s_row = %d, A_s_col = %d, B_s_row = %d B_s_col = %d, C_s_row = %d,  C_s_col = %d, m = %d, n = %d, k = %d\n", test_d->var,
+            test_d->A_start_row, test_d->A_start_col, test_d->B_start_row, test_d->B_start_col,
+            test_d->C_start_row, test_d->C_start_col, test_d->m, test_d->n, test_d->k);
+
+    // aligned data for A B C matrices
+    float *A_data = (float *)memalign(16, ((test_d->m + (2 * test_d->A_start_row)) * (test_d->n + (2 * test_d->A_start_col))) * sizeof(float));
+    float *B_data = (float *)memalign(16, ((test_d->n + (2 * test_d->B_start_row)) * (test_d->k + (2 * test_d->B_start_col))) * sizeof(float));
+    float *C_data = (float *)memalign(16, ((test_d->m + (2 * test_d->C_start_row)) * (test_d->k + (2 * test_d->C_start_col))) * sizeof(float));
+
+    // create A B C matrices with m n k dimensions + padding
+    // padding is from both sides of the targeted sub-matrix
+    // 1 1 1 1
+    // 1 x x 1
+    // 1 x x 1
+    // 1 1 1 1
+    dspm::Mat A(A_data, test_d->m + (2 * test_d->A_start_row), test_d->n + (2 * test_d->A_start_col));
+    dspm::Mat B(B_data, test_d->n + (2 * test_d->B_start_row), test_d->k + (2 * test_d->B_start_col));
+    dspm::Mat C(C_data, test_d->m + (2 * test_d->C_start_row), test_d->k + (2 * test_d->C_start_col));
+
+    // create ROI rectangles for sub-matrices
+    A_roi_rect.resizeRect(test_d->A_start_col, test_d->A_start_row, test_d->n, test_d->m);
+    B_roi_rect.resizeRect(test_d->B_start_col, test_d->B_start_row, test_d->k, test_d->n);
+    C_roi_rect.resizeRect(test_d->C_start_col, test_d->C_start_row, test_d->k, test_d->m);
+
+    // aligned data for sub-matrices
+    float *A_sub_data = (float *)memalign(16, A_roi_rect.areaRect() * sizeof(float));
+    float *B_sub_data = (float *)memalign(16, B_roi_rect.areaRect() * sizeof(float));
+    float *C_sub_data = (float *)memalign(16, C_roi_rect.areaRect() * sizeof(float));
+
+    // create sub-matrices A, B C matrices with aligned data
+    // matrices are used as sub-matrices with data copying for a matrix operation testing
+    dspm::Mat A_sub(A_sub_data, test_d->m, test_d->n);
+    dspm::Mat B_sub(B_sub_data, test_d->n, test_d->k);
+    dspm::Mat C_sub(C_sub_data, test_d->m, test_d->k);
+
+    // fill A B matrices with numbers
+    // fill C matrix with ones
+    for (int i = 0; i < A.length; i++) {
+        A.data[i] = i + 1;
+    }
+    for (int i = 0; i < B.length; i++) {
+        B.data[i] = i + 1;
+    }
+
+    if (test_d->var < 4) {
+        for (int i = 0; i < C.length; i++) {
+            C.data[i] = 1;
+        }
+    }
+
+    // Combinations of A B C matrices and sub-matrices are created for testing
+    // As an example: case 1
+    // Matrices A and C are sub-matrices - the data are defined as a pointer to an external buffer
+    // Matrix B is a matrix - the data are copied into the B matrix
+    switch (test_d->var) {
+    case 0: {
+        A_sub.CopyHead(A.getROI(A_roi_rect));    // A sub-matrix - NO DATA CPY
+        B_sub.CopyHead(B.getROI(B_roi_rect));    // B sub-matrix - NO DATA CPY
+        C_sub.CopyHead(C.getROI(C_roi_rect));    // C sub-matrix - NO DATA CPY
+    } break;
+    case 1: {
+        A_sub = A.Get(A_roi_rect);               // A matrix     - DATA CPY
+        B_sub.CopyHead(B.getROI(B_roi_rect));    // B sub-matrix - NO DATA CPY
+        C_sub.CopyHead(C.getROI(C_roi_rect));    // C sub-matrix - NO DATA CPY
+    } break;
+    case 2: {
+        A_sub.CopyHead(A.getROI(A_roi_rect));    // A sub-matrix - NO DATA CPY
+        B_sub = B.Get(B_roi_rect);               // B matrix     - DATA CPY
+        C_sub.CopyHead(C.getROI(C_roi_rect));    // C sub-matrix - NO DATA CPY
+    } break;
+    case 3: {
+        A_sub = A.Get(A_roi_rect);               // A matrix     - DATA CPY
+        B_sub = B.Get(B_roi_rect);               // B matrix     - DATA CPY
+        C_sub.CopyHead(C.getROI(C_roi_rect));    // C sub-matrix - NO DATA CPY
+    } break;
+    case 4: {
+        A_sub.CopyHead(A.getROI(A_roi_rect));    // A sub-matrix - NO DATA CPY
+        B_sub.CopyHead(B.getROI(B_roi_rect));    // B sub-matrix - NO DATA CPY
+        C_sub = C.Get(C_roi_rect);               // B matrix     - DATA CPY
+    } break;
+    case 5: {
+        A_sub.CopyHead(A.getROI(A_roi_rect));    // A sub-matrix - NO DATA CPY
+        B_sub = B.Get(B_roi_rect);               // B matrix     - DATA CPY
+        C_sub = C.Get(C_roi_rect);               // C matrix     - DATA CPY
+    } break;
+    case 6: {
+        A_sub = A.Get(A_roi_rect);               // A matrix     - DATA CPY
+        B_sub.CopyHead(B.getROI(B_roi_rect));    // B sub-matrix - NO DATA CPY
+        C_sub = C.Get(C_roi_rect);               // C matrix     - DATA CPY
+    } break;
+    default:
+        break;
+    }
+
+    // create A B check sub-matrices, actual matrix data are COPIED
+    dspm::Mat A_sub_check = A.Get(A_roi_rect);
+    dspm::Mat B_sub_check = B.Get(B_roi_rect);
+    dspm::Mat C_sub_check(test_d->m, test_d->k);
+
+    // Calculate C_sub_check = A_sub_check * B_sub_check
+    for (int i = 0 ; i < test_d->m ; i++) {
+        for (int j = 0 ; j < test_d->k ; j++) {
+            C_sub_check(i, j) = 0;
+            for (int s = 0 ; s < test_d->n ; s++) {
+                C_sub_check(i, j) += A_sub_check(i, s) * B_sub_check(s, j);
+            }
+        }
+    }
+
+    dspm_mult_ex_f32(A_sub.data, B_sub.data, C_sub.data, test_d->m, test_d->n, test_d->k, A_sub.padding, B_sub.padding, C_sub.padding);
+
+    // C is a sub-matrix
+    if (C_sub.sub_matrix) {
+        // Create a copy of the original C matrix (filled with ones 1)
+        // to check if an area around the sub-matrix is unaffected after a matrix operation
+        dspm::Mat C_area_check = dspm::Mat::ones(test_d->m + (2 * test_d->C_start_row), test_d->k + (2 * test_d->C_start_col));
+        test_assert_equal_mat_mat(C_sub_check, C_sub, message);
+        test_assert_check_area_mat_mat(C_area_check, C_sub, test_d->C_start_row, test_d->C_start_col, message);
+        // C is a matrix
+    } else {
+        test_assert_equal_mat_mat(C_sub_check, C_sub, message);
+    }
+
+    free(A_data);
+    free(B_data);
+    free(C_data);
+    free(A_sub_data);
+    free(B_sub_data);
+    free(C_sub_data);
+}
+
+TEST_CASE("dspm_mult_ex_f32_aexx functionality", "[dspm]")
+{
+    m_test_data_t test_data;
+
+    const int test_varations = 7;
+    const int start_col_min = 0;
+    const int start_row_min = 0;
+
+#if CONFIG_IDF_TARGET_ESP32S3
+    const int start_col_max = 4;
+    const int start_row_max = 4;
+    const int col_row_increment = 4;
+    const int m_max = 12;
+    const int n_max = 12;
+    const int k_mak = 12;
+    const int dim_increment = 4;
+    const int dim_start = 4;
+#elif CONFIG_IDF_TARGET_ESP32P4
+    const int start_col_max = 1;
+    const int start_row_max = 1;
+    const int col_row_increment = 1;
+    const int m_max = 4;
+    const int n_max = 4;
+    const int k_mak = 4;
+    const int dim_increment = 1;
+    const int dim_start = 2; // <= the esp.lp.setup instruction is not working with loop count 1. The min value is 2.
+#else
+    const int start_col_max = 1;
+    const int start_row_max = 1;
+    const int col_row_increment = 1;
+    const int m_max = 4;
+    const int n_max = 4;
+    const int k_mak = 4;
+    const int dim_increment = 1;
+    const int dim_start = 1;
+#endif
+
+    for (int var = 0; var < test_varations; var++) {
+        // C Matrix starting row for sub-matrix
+        for (int C_start_row = start_row_min; C_start_row <= start_row_max; C_start_row += col_row_increment) {
+
+            // C Matrix starting col for sub-matrix
+            for (int C_start_col = start_col_min; C_start_col <= start_col_max; C_start_col += col_row_increment) {
+
+                // A Matrix starting row for sub-matrix
+                for (int A_start_row = start_row_min; A_start_row <= start_row_max; A_start_row += col_row_increment) {
+
+                    // A Matrix starting col for sub-matrix
+                    for (int A_start_col = start_col_min; A_start_col <= start_col_max; A_start_col += col_row_increment) {
+
+                        // B Matrix starting row for sub-matrix
+                        for (int B_start_row = start_row_min; B_start_row <= start_row_max; B_start_row += col_row_increment) {
+
+                            // B Matrix starting col for sub-matrix
+                            for (int B_start_col = start_col_min; B_start_col <= start_col_max; B_start_col += col_row_increment) {
+
+                                // sub-matrix m parameter
+                                for (int m = dim_start; m <= m_max; m += dim_increment) {
+
+                                    // sub-matrix n paramter
+                                    for (int n = dim_start; n <= n_max; n += dim_increment) {
+
+                                        // sub-matrix k parameter
+                                        for (int k = dim_start; k <= k_mak; k += dim_increment) {
+
+                                            test_data = {var, A_start_row, A_start_col, B_start_row, B_start_col, C_start_row, C_start_col, m, n, k};
+                                            dspm_mult_ex_f32_aexx_functionality_in_cycle(&test_data);
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        std::cout << var + 1 << "/" << test_varations << " of test done" << std::endl;
+    }
+}
+
+static portMUX_TYPE testnlock = portMUX_INITIALIZER_UNLOCKED;
+
+TEST_CASE("dspm_mult_ex_f32_aexx benchmark", "[dspm]")
+{
+    const int m = 4;
+    const int n = 4;
+    const int k = 4;
+    const int start_row_col = 4;
+
+    A_roi_rect.resizeRect(start_row_col, start_row_col, n, m);
+    B_roi_rect.resizeRect(start_row_col, start_row_col, k, n);
+    C_roi_rect.resizeRect(start_row_col, start_row_col, k, m);
+
+    float *A_data = (float *)memalign(16, (m + (2 * start_row_col)) * (n + (2 * start_row_col)) * sizeof(float));
+    float *B_data = (float *)memalign(16, (n + (2 * start_row_col)) * (k + (2 * start_row_col)) * sizeof(float));
+    float *C_data = (float *)memalign(16, (m + (2 * start_row_col)) * (k + (2 * start_row_col)) * sizeof(float));
+
+    dspm::Mat A(A_data, m + (2 * start_row_col), n + (2 * start_row_col));
+    dspm::Mat B(B_data, n + (2 * start_row_col), k + (2 * start_row_col));
+    dspm::Mat C(C_data, m + (2 * start_row_col), k + (2 * start_row_col));
+
+    dspm::Mat A_subset = A.getROI(A_roi_rect);
+    dspm::Mat B_subset = B.getROI(B_roi_rect);
+    dspm::Mat C_subset = C.getROI(C_roi_rect);
+
+    portENTER_CRITICAL(&testnlock);
+    dspm_mult_ex_f32(A_subset.data, B_subset.data, C_subset.data, m, n, k, A_subset.padding, B_subset.padding, C_subset.padding);
+
+    unsigned int start_b = dsp_get_cpu_cycle_count();
+    int repeat_count = 1024;
+    for (int i = 0 ; i < repeat_count ; i++) {
+        dspm_mult_ex_f32(A_subset.data, B_subset.data, C_subset.data, m, n, k, A_subset.padding, B_subset.padding, C_subset.padding);
+    }
+    unsigned int end_b = dsp_get_cpu_cycle_count();
+    portEXIT_CRITICAL(&testnlock);
+
+    float total_b = end_b - start_b;
+    float cycles = total_b / (repeat_count);
+    printf("Benchmark dspm_mult_f32 - %f per multiplication 4x4 + overhead.\n", cycles);
+    float min_exec = 100;
+    float max_exec = 750;
+    TEST_ASSERT_EXEC_IN_RANGE(min_exec, max_exec, cycles);
+
+    free(A_data);
+    free(B_data);
+    free(C_data);
+}
--- a/managed_components/espressif__esp-dsp/modules/matrix/mul/test/test_mmult_ex_f32_ansi.cpp
+++ b/managed_components/espressif__esp-dsp/modules/matrix/mul/test/test_mmult_ex_f32_ansi.cpp
@@ -0,0 +1,176 @@
+/*
+ * SPDX-FileCopyrightText: 2023 Espressif Systems (Shanghai) CO LTD
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <string.h>
+#include "unity.h"
+#include "esp_dsp.h"
+#include "dsp_platform.h"
+#include "esp_log.h"
+
+#include "dspm_mult.h"
+#include "esp_attr.h"
+#include "dsp_tests.h"
+#include "test_mat_common.h"
+
+TEST_CASE("dspm_mult_ex_f32_ansi functionality", "[dspm]")
+{
+    // create ROI rectangles
+    dspm::Mat::Rect A_roi_rect;
+    dspm::Mat::Rect B_roi_rect;
+    dspm::Mat::Rect C_roi_rect;
+
+    char message[60];
+    for (int var = 0; var < 7; var++) {
+        for (int start_row = 0; start_row < 2; start_row++) {
+            for (int start_col = 0; start_col < 2; start_col++) {
+                for (int m = 1; m < 6; m++) {
+                    for (int n = 1; n < 6; n++) {
+                        for (int k = 1; k < 6; k++) {
+                            sprintf(message, "var = %d  s_row = %d  s_col = %d, m = %d, n = %d, k = %d", var, start_row, start_col, m, n, k);
+                            // create A B C matrices with m n k dimensions + padding
+                            // padding is from both sides of the targeted sub-matrix
+                            // 1 1 1 1
+                            // 1 x x 1
+                            // 1 x x 1
+                            // 1 1 1 1
+                            dspm::Mat A(m + (2 * start_row), n + (2 * start_col));
+                            dspm::Mat B(n + (2 * start_row), k + (2 * start_col));
+                            dspm::Mat C = dspm::Mat::ones(m + (2 * start_row), k + (2 * start_col));
+
+                            // create A B C sub matrices with undefined dimensions
+                            dspm::Mat A_sub;
+                            dspm::Mat B_sub;
+                            dspm::Mat C_sub;
+
+                            // adjust ROI rectangles
+                            A_roi_rect.resizeRect(start_col, start_row, n, m);
+                            B_roi_rect.resizeRect(start_col, start_row, k, n);
+                            C_roi_rect.resizeRect(start_col, start_row, k, m);
+
+                            // fill A B matrices with numbers
+                            // fill C matrix with ones
+                            for (int i = 0; i < A.length; i++) {
+                                A.data[i] = i + 1;
+                            }
+                            for (int i = 0; i < B.length; i++) {
+                                B.data[i] = i + 1;
+                            }
+
+                            // Combinations of A B C matrices and sub-matrices are created for testing
+                            // As an example: case 1
+                            // Matrices B and C are sub-matrices - the data are defined as a pointer to an external buffer
+                            // Matrix B is a matrix - the data are copied into the B matrix
+                            switch (var) {
+                            case 0: {
+                                A_sub.CopyHead(A.getROI(A_roi_rect));    // A sub-matrix - NO DATA CPY
+                                B_sub.CopyHead(B.getROI(B_roi_rect));    // B sub-matrix - NO DATA CPY
+                                C_sub.CopyHead(C.getROI(C_roi_rect));    // C sub-matrix - NO DATA CPY
+                            } break;
+                            case 1: {
+                                A_sub = A.Get(A_roi_rect);               // A matrix     - DATA CPY
+                                B_sub.CopyHead(B.getROI(B_roi_rect));    // B sub_matirx - NO DATA CPY
+                                C_sub.CopyHead(C.getROI(C_roi_rect));    // C sub_matirx - NO DATA CPY
+                            } break;
+                            case 2: {
+                                A_sub.CopyHead(A.getROI(A_roi_rect));    // A sub-matrix - NO DATA CPY
+                                B_sub = B.Get(B_roi_rect);               // B matrix     - DATA CPY
+                                C_sub.CopyHead(C.getROI(C_roi_rect));    // C sub-matrix - NO DATA CPY
+                            } break;
+                            case 3: {
+                                A_sub = A.Get(A_roi_rect);               // A matrix     - DATA CPY
+                                B_sub = B.Get(B_roi_rect);               // B matrix     - DATA CPY
+                                C_sub.CopyHead(C.getROI(C_roi_rect));    // C sub-matrix - NO DATA CPY
+                            } break;
+                            case 4: {
+                                A_sub.CopyHead(A.getROI(A_roi_rect));    // A sub-matrix - NO DATA CPY
+                                B_sub.CopyHead(B.getROI(B_roi_rect));    // B sub-matrix - NO DATA CPY
+                                C_sub = C.Get(C_roi_rect);               // C matrix     - DATA CPY
+                            } break;
+                            case 5: {
+                                A_sub.CopyHead(A.getROI(A_roi_rect));    // A sub-matrix - NO DATA CPY
+                                B_sub = B.Get(B_roi_rect);               // B matrix     - DATA CPY
+                                C_sub = C.Get(C_roi_rect);               // C matrix     - DATA CPY
+                            } break;
+                            case 6: {
+                                A_sub = A.Get(A_roi_rect);               // A matrix     - DATA CPY
+                                B_sub.CopyHead(B.getROI(B_roi_rect));    // B sub-matrix - NO DATA CPY
+                                C_sub = C.Get(C_roi_rect);               // C matrix     - DATA CPY
+                            } break;
+                            default:
+                                break;
+                            }
+
+                            // create A B check sub-matrices, actual matrix data are COPIED
+                            dspm::Mat A_sub_check = A.Get(A_roi_rect);
+                            dspm::Mat B_sub_check = B.Get(B_roi_rect);
+                            dspm::Mat C_sub_check(m, k);
+
+                            // Calculate C_sub_check = A_sub_check * B_sub_check
+                            for (int i = 0 ; i < m ; i++) {
+                                for (int j = 0 ; j < k ; j++) {
+                                    C_sub_check(i, j) = 0;
+                                    for (int s = 0 ; s < n ; s++) {
+                                        C_sub_check(i, j) += A_sub_check(i, s) * B_sub_check(s, j);
+                                    }
+                                }
+                            }
+
+                            dspm_mult_ex_f32_ansi(A_sub.data, B_sub.data, C_sub.data, m, n, k, A_sub.padding, B_sub.padding, C_sub.padding);
+
+                            // C is a sub-matrix
+                            if (C_sub.sub_matrix) {
+                                // Create a copy of the original C matrix (filled with ones 1)
+                                // to check if an area around the sub-matrix is unaffected after a matrix operation
+                                dspm::Mat C_area_check = dspm::Mat::ones(m + (2 * start_row), k + (2 * start_col));
+                                test_assert_equal_mat_mat(C_sub_check, C_sub, message);
+                                test_assert_check_area_mat_mat(C_area_check, C_sub, start_row, start_col, message);
+                                // C is a matrix
+                            } else {
+                                test_assert_equal_mat_mat(C_sub_check, C_sub, message);
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+
+static portMUX_TYPE testnlock = portMUX_INITIALIZER_UNLOCKED;
+
+TEST_CASE("dspm_mult_ex_f32_ansi benchmark", "[dspm]")
+{
+    const int m = 4;
+    const int n = 4;
+    const int k = 4;
+    const int M_off = 1;
+
+    dspm::Mat A(m + M_off, n + M_off);
+    dspm::Mat B(n + M_off, k + M_off);
+    dspm::Mat C(m + M_off, k + M_off);
+
+    dspm::Mat A_subset = A.getROI(M_off, M_off, m, n);
+    dspm::Mat B_subset = B.getROI(M_off, M_off, n, k);
+    dspm::Mat C_subset = C.getROI(M_off, M_off, m, k);
+
+    portENTER_CRITICAL(&testnlock);
+    dspm_mult_ex_f32_ansi(A_subset.data, B_subset.data, C_subset.data, m, n, k, A_subset.padding, B_subset.padding, C_subset.padding);
+
+    unsigned int start_b = dsp_get_cpu_cycle_count();
+    int repeat_count = 1024;
+    for (int i = 0 ; i < repeat_count ; i++) {
+        dspm_mult_ex_f32_ansi(A_subset.data, B_subset.data, C_subset.data, m, n, k, A_subset.padding, B_subset.padding, C_subset.padding);
+    }
+    unsigned int end_b = dsp_get_cpu_cycle_count();
+    portEXIT_CRITICAL(&testnlock);
+
+    float total_b = end_b - start_b;
+    float cycles = total_b / (repeat_count);
+    printf("Benchmark dspm_mult_f32 - %f per multiplication 4x4 + overhead.\n", cycles);
+    float min_exec = 100;
+    float max_exec = 1400;
+    TEST_ASSERT_EXEC_IN_RANGE(min_exec, max_exec, cycles);
+}
--- a/managed_components/espressif__esp-dsp/modules/matrix/mul/test/test_mmult_f32_ae32.c
+++ b/managed_components/espressif__esp-dsp/modules/matrix/mul/test/test_mmult_f32_ae32.c
@@ -0,0 +1,108 @@
+// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <string.h>
+#include "unity.h"
+#include "esp_dsp.h"
+#include "dsp_platform.h"
+#include "esp_log.h"
+
+#include "dspm_mult.h"
+#include "esp_attr.h"
+#include "dsp_tests.h"
+
+static const char *TAG = "dspm_mult_f32_aexx";
+
+// Test dsps_dotprod_s16_ansi function
+TEST_CASE("dspm_mult_f32 functionality", "[dspm]")
+{
+    int m = 4;
+    int n = 3;
+    int k = 4;
+
+
+    float A[m][n];
+    float *A_ptr = (float *)A;
+
+    float B[n][k];
+    float *B_ptr = (float *)B;
+
+    float C[m][k];
+    float *C_ptr = (float *)C;
+    float C_compare[m][k];
+    float *Cc_ptr = (float *)C_compare;
+
+    for (int i = 0 ; i < m * n; i++) {
+        A_ptr[i] = i;
+        B_ptr[i] = i;
+    }
+    for (int i = 0 ; i < m ; i++) {
+        for (int j = 0 ; j < k ; j++) {
+            C_compare[i][j] = 0;
+            for (int s = 0 ; s < n ; s++) {
+                C_compare[i][j] += A[i][s] * B[s][j];
+            }
+        }
+    }
+    dspm_mult_f32(A_ptr, B_ptr, C_ptr, m, n, k);
+
+    for (int i = 0 ; i < m ; i++) {
+        for (int j = 0 ; j < k ; j++) {
+            ESP_LOGI(TAG, "[%i][%i] calc=%f, expected =%f", i, j, C[i][j], C_compare[i][j]);
+        }
+    }
+    // Compare and check results
+    for (int i = 0 ; i < m * k ; i++) {
+        if (Cc_ptr[i] != C_ptr[i]) {
+            TEST_ASSERT_EQUAL( C_ptr[i], Cc_ptr[i]);
+        }
+    }
+}
+
+static portMUX_TYPE testnlock = portMUX_INITIALIZER_UNLOCKED;
+
+TEST_CASE("dspm_mult_f32 benchmark", "[dspm]")
+{
+    int m = 4;
+    int n = 4;
+    int k = 4;
+
+    float A[m][n];
+    float *A_ptr = (float *)A;
+
+    float B[n][k];
+    float *B_ptr = (float *)B;
+
+    float C[m][k];
+    float *C_ptr = (float *)C;
+
+
+    ESP_LOGI(TAG, "A: %8.8"PRIx32", B: %8.8"PRIx32", C=%8.8"PRIx32"", (uint32_t)A_ptr, (uint32_t)B_ptr, (uint32_t)C_ptr);
+    portENTER_CRITICAL(&testnlock);
+
+    unsigned int start_b = dsp_get_cpu_cycle_count();
+    int repeat_count = 1024;
+    for (int i = 0 ; i < repeat_count ; i++) {
+        dspm_mult_f32(A_ptr, B_ptr, C_ptr, m, n, k);
+    }
+    unsigned int end_b = dsp_get_cpu_cycle_count();
+    portEXIT_CRITICAL(&testnlock);
+
+    float total_b = end_b - start_b;
+    float cycles = total_b / (repeat_count);
+    printf("Benchmark dspm_mult_f32 - %f per multiplication 4x4 + overhead.\n", cycles);
+    float min_exec = 100;
+    float max_exec = 800;
+    TEST_ASSERT_EXEC_IN_RANGE(min_exec, max_exec, cycles);
+}
--- a/managed_components/espressif__esp-dsp/modules/matrix/mul/test/test_mmult_f32_ansi.c
+++ b/managed_components/espressif__esp-dsp/modules/matrix/mul/test/test_mmult_f32_ansi.c
@@ -0,0 +1,118 @@
+// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <string.h>
+#include "unity.h"
+#include "esp_dsp.h"
+#include "dsp_platform.h"
+#include "esp_log.h"
+
+#include "dspm_mult.h"
+#include "esp_attr.h"
+#include "dsp_tests.h"
+
+static const char *TAG = "dspm_mult_f32_ansi";
+
+// Test dsps_dotprod_s16_ansi function
+TEST_CASE("dspm_mult_f32_ansi functionality", "[dspm]")
+{
+    for (int m = 1 ; m < 8 ; m++) {
+        for (int n = 1; n < 8 ; n++) {
+            for (int k = 1; k < 8 ; k++) {
+                float A[m][n];
+                float *A_ptr = (float *)A;
+
+                float B[n][k];
+                float *B_ptr = (float *)B;
+
+                float C[m][k];
+                float *C_ptr = (float *)C;
+                float C_compare[m][k];
+                float *Cc_ptr = (float *)C_compare;
+
+                for (int i = 0 ; i < m * n; i++) {
+                    A_ptr[i] = i;
+                    B_ptr[i] = i;
+                }
+                for (int i = 0 ; i < m ; i++) {
+                    for (int j = 0 ; j < n ; j++) {
+                        A[i][j] = i * n + j;
+                    }
+                }
+                for (int i = 0 ; i < n ; i++) {
+                    for (int j = 0 ; j < k ; j++) {
+                        B[i][j] = i * k + j;
+                    }
+                }
+                for (int i = 0 ; i < m ; i++) {
+                    for (int j = 0 ; j < k ; j++) {
+                        C_compare[i][j] = 0;
+                        for (int s = 0 ; s < n ; s++) {
+                            C_compare[i][j] += A[i][s] * B[s][j];
+                        }
+                    }
+                }
+                dspm_mult_f32_ansi(A_ptr, B_ptr, C_ptr, m, n, k);
+
+                for (int i = 0 ; i < m ; i++) {
+                    for (int j = 0 ; j < k ; j++) {
+                        ESP_LOGD(TAG, "[%i][%i] calc=%f, expected =%f", i, j, C[i][j], C_compare[i][j]);
+                    }
+                }
+                // Compare and check results
+                for (int i = 0 ; i < m * k ; i++) {
+                    if (Cc_ptr[i] != C_ptr[i]) {
+                        TEST_ASSERT_EQUAL(Cc_ptr[i], C_ptr[i]);
+                    }
+                }
+            }
+        }
+    }
+}
+
+static portMUX_TYPE testnlock = portMUX_INITIALIZER_UNLOCKED;
+
+TEST_CASE("dspm_mult_f32_ansi benchmark", "[dspm]")
+{
+    int m = 4;
+    int n = 4;
+    int k = 4;
+
+    float A[m][n];
+    float *A_ptr = (float *)A;
+
+    float B[n][k];
+    float *B_ptr = (float *)B;
+
+    float C[m][k];
+    float *C_ptr = (float *)C;
+
+
+    portENTER_CRITICAL(&testnlock);
+
+    unsigned int start_b = dsp_get_cpu_cycle_count();
+    int repeat_count = 1024;
+    for (int i = 0 ; i < repeat_count ; i++) {
+        dspm_mult_f32_ansi(A_ptr, B_ptr, C_ptr, m, n, k);
+    }
+    unsigned int end_b = dsp_get_cpu_cycle_count();
+    portEXIT_CRITICAL(&testnlock);
+
+    float total_b = end_b - start_b;
+    float cycles = total_b / (repeat_count);
+    printf("Benchmark dspm_mult_f32_ansi - %f per multiplication 4x4 + overhead.\n", cycles);
+    float min_exec = 100;
+    float max_exec = 2000;
+    TEST_ASSERT_EXEC_IN_RANGE(min_exec, max_exec, cycles);
+}
--- a/managed_components/espressif__esp-dsp/modules/matrix/mul/test/test_mmult_s16_ae32.c
+++ b/managed_components/espressif__esp-dsp/modules/matrix/mul/test/test_mmult_s16_ae32.c
@@ -0,0 +1,106 @@
+// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <string.h>
+#include "unity.h"
+#include "esp_dsp.h"
+#include "dsp_platform.h"
+#include "esp_log.h"
+
+#include "dspm_mult.h"
+#include "esp_attr.h"
+#include "esp_log.h"
+
+// Test dsps_dotprod_s16_ansi function
+TEST_CASE("dspm_mult_s16_aexx functionality", "[dspm]")
+{
+    for (int m = 1 ; m < 8 ; m++) {
+        for (int n = 1 ; n < 16 ; n++) {
+            for (int k = 1 ; k < 16 ; k++) {
+
+                int16_t A[m][n];
+                int16_t *A_ptr = (int16_t *)A;
+
+                int16_t B[n][k];
+                int16_t *B_ptr = (int16_t *)B;
+
+                int16_t C[m][k];
+                int16_t *C_ptr = (int16_t *)C;
+                int16_t C_compare[m][k];
+                int16_t *Cc_ptr = (int16_t *)C_compare;
+                for (int shift = -4 ; shift < 4 ; shift++) {
+                    for (int i = 0 ; i < m ; i++) {
+                        for (int j = 0 ; j < n; j++) {
+                            A[i][j] = 0x123;
+                        }
+                    }
+                    for (int i = 0 ; i < n ; i++) {
+                        for (int j = 0 ; j < k; j++) {
+                            B[i][j] = 0x123;
+                        }
+                    }
+
+                    dspm_mult_s16_ansi(A_ptr, B_ptr, Cc_ptr, m, n, k, shift);
+                    dspm_mult_s16(A_ptr, B_ptr, C_ptr,  m, n, k, shift);
+
+                    // Compare and check results
+                    for (int i = 0 ; i < m * k ; i++) {
+                        if (Cc_ptr[i] != C_ptr[i]) {
+                            ESP_LOGE("dspm_mult_s16_aexx", "Process path m=%i, n=%i, k=%i,  shift=%i", m, n, k, shift);
+                            ESP_LOGE("dspm_mult_s16_aexx", "data[%i] %4.4x != %4.4x expected \n", i, C_ptr[i], Cc_ptr[i]);
+                            TEST_ASSERT_EQUAL(Cc_ptr[i], C_ptr[i]);
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+
+static portMUX_TYPE testnlock = portMUX_INITIALIZER_UNLOCKED;
+
+TEST_CASE("dspm_mult_s16_aexx benchmark", "[dspm]")
+{
+    unsigned int start_b = dsp_get_cpu_cycle_count();
+    unsigned int end_b = dsp_get_cpu_cycle_count();
+    for (int m = 2 ; m <= 8 ; m++) {
+        for (int n = 2 ; n <= 16 ; n++) {
+            for (int k = 1 ; k <= 16 ; k++) {
+
+                int16_t A[m][n];
+                int16_t *A_ptr = (int16_t *)A;
+
+                int16_t B[m][n];
+                int16_t *B_ptr = (int16_t *)B;
+
+                int16_t C[m][k];
+                int16_t *C_ptr = (int16_t *)C;
+
+                memset(A, 0, sizeof(A));
+                memset(B, 0, sizeof(A));
+                memset(C, 0, sizeof(A));
+                portENTER_CRITICAL(&testnlock);
+
+                start_b = dsp_get_cpu_cycle_count();
+                dspm_mult_s16(A_ptr, B_ptr, C_ptr, m, n, k, 0);
+                end_b = dsp_get_cpu_cycle_count();
+                portEXIT_CRITICAL(&testnlock);
+
+                float total_b = end_b - start_b;
+                float cycles = total_b;
+                ESP_LOGD("dspm_mult_s16_aexx", "dspm_mult_s16_aexx[%i][%i][%i] - %f", m, n, k, cycles);
+            }
+        }
+    }
+}
--- a/managed_components/espressif__esp-dsp/modules/matrix/mul/test/test_mmult_s16_ansi.c
+++ b/managed_components/espressif__esp-dsp/modules/matrix/mul/test/test_mmult_s16_ansi.c
@@ -0,0 +1,110 @@
+// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <string.h>
+#include "unity.h"
+#include "esp_dsp.h"
+#include "dsp_platform.h"
+#include "esp_log.h"
+
+#include "dspm_mult.h"
+#include "esp_attr.h"
+#include "dsp_tests.h"
+
+static const char *TAG = "dspm_mult_s16_ansi";
+
+// Test dsps_dotprod_s16_ansi function
+TEST_CASE("dspm_mult_s16_ansi functionality", "[dspm]")
+{
+    int m = 4;
+    int n = 3;
+    int k = 4;
+
+
+    int16_t A[m][n];
+    int16_t *A_ptr = (int16_t *)A;
+
+    int16_t B[n][k];
+    int16_t *B_ptr = (int16_t *)B;
+
+    int16_t C[m][k];
+    int16_t *C_ptr = (int16_t *)C;
+    int16_t C_compare[m][k];
+    int16_t *Cc_ptr = (int16_t *)C_compare;
+
+    int shift = 0;
+    for (int i = 0 ; i < m * n; i++) {
+        A_ptr[i] = 0x1000;
+        B_ptr[i] = 0x200;
+    }
+    long long store_reg = 0;
+    for (int i = 0 ; i < m ; i++) {
+        for (int j = 0 ; j < k ; j++) {
+            store_reg = (0x7fff >> shift);
+            for (int s = 0 ; s < n ; s++) {
+                store_reg += ((int32_t)A[i][s] * (int32_t)B[s][j]);
+            }
+            C_compare[i][j] = store_reg >> (15 - shift);
+        }
+    }
+    dspm_mult_s16_ansi(A_ptr, B_ptr, C_ptr, m, n, k, shift);
+
+    for (int i = 0 ; i < m ; i++) {
+        for (int j = 0 ; j < k ; j++) {
+            ESP_LOGD(TAG, "[%i][%i] calc=%i, expected =%i", i, j, C[i][j], C_compare[i][j]);
+        }
+    }
+    // Compare and check results
+    for (int i = 0 ; i < m * k ; i++) {
+        if (Cc_ptr[i] != C_ptr[i]) {
+            TEST_ASSERT_EQUAL(Cc_ptr[i], C_ptr[i]);
+        }
+    }
+}
+
+static portMUX_TYPE testnlock = portMUX_INITIALIZER_UNLOCKED;
+
+TEST_CASE("dspm_mult_s16_ansi benchmark", "[dspm]")
+{
+    int m = 4;
+    int n = 4;
+    int k = 4;
+
+    int16_t A[m][n];
+    int16_t *A_ptr = (int16_t *)A;
+
+    int16_t B[n][k];
+    int16_t *B_ptr = (int16_t *)B;
+
+    int16_t C[m][k];
+    int16_t *C_ptr = (int16_t *)C;
+
+
+    portENTER_CRITICAL(&testnlock);
+
+    unsigned int start_b = dsp_get_cpu_cycle_count();
+    int repeat_count = 1024;
+    for (int i = 0 ; i < repeat_count ; i++) {
+        dspm_mult_s16_ansi(A_ptr, B_ptr, C_ptr, m, n, k, 0);
+    }
+    unsigned int end_b = dsp_get_cpu_cycle_count();
+    portEXIT_CRITICAL(&testnlock);
+
+    float total_b = end_b - start_b;
+    float cycles = total_b / (repeat_count);
+    ESP_LOGI("dspm_mult_s16_ansi", "Benchmark dspm_mult_s16_ansi - %f per multiplication %ix%ix%i.\n", cycles, m, n, k);
+    float min_exec = 1000;
+    float max_exec = 3000;
+    TEST_ASSERT_EXEC_IN_RANGE(min_exec, max_exec, cycles);
+}