add some code

2025-09-05 13:25:11 +08:00
parent 9ff0a99e7a
commit 3cf1229a85
8911 changed files with 2535396 additions and 0 deletions
--- a/managed_components/espressif__esp-dsp/modules/math/add/fixed/dsps_add_s16_ae32.S
+++ b/managed_components/espressif__esp-dsp/modules/math/add/fixed/dsps_add_s16_ae32.S
@@ -0,0 +1,82 @@
+// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License. 
+
+#include "dsps_add_platform.h"
+#if (dsps_add_s16_ae32_enabled == 1)
+
+    .text
+    .align  4
+    .global dsps_add_s16_ae32
+    .type   dsps_add_s16_ae32,@function
+// The function implements the following C code:
+// esp_err_t dsps_add_s16_ansi(const int16_t *input1, const int16_t *input2, int16_t *output, int len, int step1, int step2, int step_out, int shift)
+// {
+//     for (int i = 0 ; i < len ; i++) {
+//         int32_t acc = (int32_t)input1[i * step1] + (int32_t)input2[i * step2];
+//         output[i * step_out] = acc >> shift;
+//     }
+//     return ESP_OK;
+// }
+dsps_add_s16_ae32: 
+// input1   - a2
+// input2   - a3
+// output   - a4
+// len      - a5
+// step_in1 - a6
+// step_in2 - a7
+// step_out - stack (a10)
+// shift    - stack (a9)
+
+    entry	a1, 16
+
+
+    // l32i.n	a10, a1, 16
+    // s16i	a10, a4, 0
+    // l32i.n	a10, a1, 20
+    // s16i	a10, a4, 2       
+
+    l32i.n	a10, a1, 16     // Load step_out to the a10 register
+    l32i.n	a9, a1,  20     // Load shift to the a9 register
+    ssr     a9              // sar = a9
+
+    slli 	a6, a6, 1  	// a6 - step_in<<1
+    slli 	a7, a7, 1  	// a7 - step_in<<1
+    slli 	a10, a10, 1 // a8 - step_out<<1
+
+    // s16i	a10, a4, 0
+    // s16i	a6, a4, 2
+    // s16i	a7, a4, 4
+    // s16i	a5, a4, 6
+
+    l16si   a11, a2, 0
+    l16si   a8,  a3, 0
+    add     a8, a11, a8
+    srl     a9, a8          // a8 = a8>>sar    
+
+    loopnez a5, .loop_end_add_s16_ae32
+        add.n   a2, a2, a6      // input1+=step_in1;
+        add.n   a3, a3, a7      // input2+=step_in2;
+
+        l16si   a11, a2, 0
+        l16si   a8,  a3, 0
+        s16i	a9,  a4, 0      // store result to the putput
+        add     a8, a11, a8
+    	srl     a9, a8          // a8 = a8>>sar    
+
+        add.n   a4, a4, a10     // output+=step_out;
+.loop_end_add_s16_ae32:
+    movi.n	a2, 0 // return status ESP_OK
+    retw.n
+
+#endif // dsps_add_s16_ae32_enabled
--- a/managed_components/espressif__esp-dsp/modules/math/add/fixed/dsps_add_s16_aes3.S
+++ b/managed_components/espressif__esp-dsp/modules/math/add/fixed/dsps_add_s16_aes3.S
@@ -0,0 +1,105 @@
+/*
+ * SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+
+#include "dsps_add_platform.h"
+#if (dsps_add_s16_aes3_enabled == 1)
+
+    .text
+    .align  4
+    .global dsps_add_s16_aes3
+    .type   dsps_add_s16_aes3,@function
+// The function implements the following C code:
+// esp_err_t dsps_add_s16_ansi(const int16_t *input1, const int16_t *input2, int16_t *output, int len, int step1, int step2, int step_out, int shift)
+// {
+//     for (int i = 0 ; i < len ; i++) {
+//         int32_t acc = (int32_t)input1[i * step1] + (int32_t)input2[i * step2];
+//         output[i * step_out] = acc >> shift;
+//     }
+//     return ESP_OK;
+// }
+dsps_add_s16_aes3: 
+// input1   - a2
+// input2   - a3
+// output   - a4
+// len      - a5
+// step_in1 - a6
+// step_in2 - a7
+// step_out - stack (a10)
+// shift    - stack (a9)
+
+    entry	a1, 16
+
+    l32i.n	a10, a1, 16     // Load step_out to the a10 register
+    l32i.n	a9, a1,  20     // Load shift to the a9 register
+    ssr     a9              // sar = a9
+
+    // Check if any of steps is not 0
+    addi    a15, a6, -1
+    bnez    a15, .add_s16_ae32_mode // Branch if step !=0
+    addi    a15, a7, -1
+    bnez    a15, .add_s16_ae32_mode // Branch if step !=0
+    addi    a15, a10, -1
+    bnez    a15,.add_s16_ae32_mode  // Branch if step !=0
+
+    // Check addresses
+    movi        a15, 0xF                                              // modulo 16 mask
+    bany        a2, a15, .add_s16_ae32_mode                          // jump if != 0
+    bany        a3, a15, .add_s16_ae32_mode                          // jump if != 0
+
+    // Check length (should be divided to 8)
+    movi        a15, 0x7                                              // modulo 8 mask
+    bany        a5, a15, .add_s16_ae32_mode                          // jump if != 0
+
+    // Process main function for S3
+    
+    slli 	a6, a6, 1  	// a6 - step_in<<1
+    slli 	a7, a7, 1  	// a7 - step_in<<1
+    slli 	a10, a10, 1 // a8 - step_out<<1
+	wsr.sar	a9          // load sar register
+
+    // Preload q1 from a3
+    //ee.vld.128.ip     q1, a3, 16
+    srli    a5, a5, 3
+    ee.vld.128.ip     q0, a2, 16
+    loopnez a5, .loop_end_add_s16_aes3_main
+        ee.vld.128.ip     q1, a3, 16
+        ee.vadds.s16.ld.incp q0, a2, q4, q0, q1
+        ee.vst.128.ip     q4, a4, 16
+.loop_end_add_s16_aes3_main:
+
+    // Exit for Esp32s3 mode
+    movi.n	a2, 0 // return status ESP_OK
+    retw.n
+
+
+.add_s16_ae32_mode:
+    slli 	a6, a6, 1  	// a6 - step_in<<1
+    slli 	a7, a7, 1  	// a7 - step_in<<1
+    slli 	a10, a10, 1 // a8 - step_out<<1
+
+    l16si   a11, a2, 0
+    l16si   a8,  a3, 0
+    add     a8, a11, a8
+    srl     a9, a8          // a8 = a8>>sar    
+
+    loopnez a5, .loop_end_add_s16_aes3
+        add.n   a2, a2, a6      // input1+=step_in1;
+        add.n   a3, a3, a7      // input2+=step_in2;
+
+        l16si   a11, a2, 0
+        l16si   a8,  a3, 0
+        s16i	a9,  a4, 0      // store result to the putput
+        add     a8, a11, a8
+    	srl     a9, a8          // a8 = a8>>sar    
+
+        add.n   a4, a4, a10     // output+=step_out;
+.loop_end_add_s16_aes3:
+    // Exit for Esp32 mode
+    movi.n	a2, 0 // return status ESP_OK
+    retw.n
+
+#endif // dsps_add_s16_aes3_enabled
--- a/managed_components/espressif__esp-dsp/modules/math/add/fixed/dsps_add_s16_ansi.c
+++ b/managed_components/espressif__esp-dsp/modules/math/add/fixed/dsps_add_s16_ansi.c
@@ -0,0 +1,27 @@
+/*
+ * SPDX-FileCopyrightText: 2023-2024 Espressif Systems (Shanghai) CO LTD
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+
+#include "dsps_add.h"
+
+esp_err_t dsps_add_s16_ansi(const int16_t *input1, const int16_t *input2, int16_t *output, int len, int step1, int step2, int step_out, int shift)
+{
+    if (NULL == input1) {
+        return ESP_ERR_DSP_PARAM_OUTOFRANGE;
+    }
+    if (NULL == input2) {
+        return ESP_ERR_DSP_PARAM_OUTOFRANGE;
+    }
+    if (NULL == output) {
+        return ESP_ERR_DSP_PARAM_OUTOFRANGE;
+    }
+
+    for (int i = 0 ; i < len ; i++) {
+        int32_t acc = (int32_t)input1[i * step1] + (int32_t)input2[i * step2];
+        output[i * step_out] = acc >> shift;
+    }
+    return ESP_OK;
+}
--- a/managed_components/espressif__esp-dsp/modules/math/add/fixed/dsps_add_s8_aes3.S
+++ b/managed_components/espressif__esp-dsp/modules/math/add/fixed/dsps_add_s8_aes3.S
@@ -0,0 +1,97 @@
+/*
+ * SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include "dsps_add_platform.h"
+#if (dsps_add_s16_aes3_enabled == 1)
+
+    .text
+    .align  4
+    .global dsps_add_s8_aes3
+    .type   dsps_add_s8_aes3,@function
+// The function implements the following C code:
+// esp_err_t dsps_add_s8_ansi(const int16_t *input1, const int16_t *input2, int16_t *output, int len, int step1, int step2, int step_out, int shift)
+// {
+//     for (int i = 0 ; i < len ; i++) {
+//         int32_t acc = (int32_t)input1[i * step1] + (int32_t)input2[i * step2];
+//         output[i * step_out] = acc >> shift;
+//     }
+//     return ESP_OK;
+// }
+dsps_add_s8_aes3: 
+// input1   - a2
+// input2   - a3
+// output   - a4
+// len      - a5
+// step_in1 - a6
+// step_in2 - a7
+// step_out - stack (a10)
+// shift    - stack (a9)
+
+    entry	a1, 16
+
+    l32i.n	a10, a1, 16     // Load step_out to the a10 register
+    l32i.n	a9, a1,  20     // Load shift to the a9 register
+    ssr     a9              // sar = a9
+
+    // Check if any of steps is not 0
+    addi    a15, a6, -1
+    bnez    a15, .add_s8_ae32_mode // Branch if step !=0
+    addi    a15, a7, -1
+    bnez    a15, .add_s8_ae32_mode // Branch if step !=0
+    addi    a15, a10, -1
+    bnez    a15,.add_s8_ae32_mode  // Branch if step !=0
+
+    // Check addresses
+    movi        a15, 0xF                                              // modulo 16 mask
+    bany        a2, a15, .add_s8_ae32_mode                          // jump if != 0
+    bany        a3, a15, .add_s8_ae32_mode                          // jump if != 0
+
+    // Check length (should be divided to 8)
+    movi        a15, 0xf                                              // modulo 8 mask
+    bany        a5, a15, .add_s8_ae32_mode                          // jump if != 0
+
+    // Process main function for S3
+    
+	wsr.sar	a9          // load sar register
+
+    // Preload q1 from a3
+    //ee.vld.128.ip     q1, a3, 16
+    srli    a5, a5, 4
+    ee.vld.128.ip     q0, a2, 16
+    loopnez a5, .loop_end_add_s8_aes3_main
+        ee.vld.128.ip     q1, a3, 16
+        ee.vadds.s8.ld.incp q0, a2, q4, q0, q1
+        ee.vst.128.ip     q4, a4, 16
+.loop_end_add_s8_aes3_main:
+
+    // Exit for Esp32s3 mode
+    movi.n	a2, 0 // return status ESP_OK
+    retw.n
+
+
+.add_s8_ae32_mode:
+    l8ui    a11, a2, 0
+    l8ui    a8,  a3, 0
+    add     a8, a11, a8
+    srl     a9, a8          // a8 = a8>>sar    
+
+    loopnez a5, .loop_end_add_s8_aes3
+        add.n   a2, a2, a6      // input1+=step_in1;
+        add.n   a3, a3, a7      // input2+=step_in2;
+
+        l8ui   a11, a2, 0
+        l8ui   a8,  a3, 0
+        s8i	a9,  a4, 0      // store result to the putput
+        add     a8, a11, a8
+    	srl     a9, a8          // a8 = a8>>sar    
+
+        add.n   a4, a4, a10     // output+=step_out;
+.loop_end_add_s8_aes3:
+    // Exit for Esp32 mode
+    movi.n	a2, 0 // return status ESP_OK
+    retw.n
+
+#endif // dsps_add_s8_aes3_enabled
--- a/managed_components/espressif__esp-dsp/modules/math/add/fixed/dsps_add_s8_ansi.c
+++ b/managed_components/espressif__esp-dsp/modules/math/add/fixed/dsps_add_s8_ansi.c
@@ -0,0 +1,26 @@
+/*
+ * SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include "dsps_add.h"
+
+esp_err_t dsps_add_s8_ansi(const int8_t *input1, const int8_t *input2, int8_t *output, int len, int step1, int step2, int step_out, int shift)
+{
+    if (NULL == input1) {
+        return ESP_ERR_DSP_PARAM_OUTOFRANGE;
+    }
+    if (NULL == input2) {
+        return ESP_ERR_DSP_PARAM_OUTOFRANGE;
+    }
+    if (NULL == output) {
+        return ESP_ERR_DSP_PARAM_OUTOFRANGE;
+    }
+
+    for (int i = 0 ; i < len ; i++) {
+        int32_t acc = (int32_t)input1[i * step1] + (int32_t)input2[i * step2];
+        output[i * step_out] = acc >> shift;
+    }
+    return ESP_OK;
+}
--- a/managed_components/espressif__esp-dsp/modules/math/add/float/dsps_add_f32_ae32.S
+++ b/managed_components/espressif__esp-dsp/modules/math/add/float/dsps_add_f32_ae32.S
@@ -0,0 +1,64 @@
+// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License. 
+
+#include "dsps_add_platform.h"
+#if (dsps_add_f32_ae32_enabled == 1)
+
+// This is bi quad filter form II for ESP32 processor.
+    .text
+    .align  4
+    .global dsps_add_f32_ae32
+    .type   dsps_add_f32_ae32,@function
+// The function implements the following C code:
+// esp_err_t dsps_add_f32_ansi(const float *input1, const float *input2, float *output, int len, int step1, int step2, int step_out)
+// {
+//     for (int i = 0 ; i < len ; i++) {
+//         output[i * step_out] = input1[i * step1] + input2[i * step2];
+//     }
+//     return ESP_OK;
+// }
+
+dsps_add_f32_ae32: 
+// input1   - a2
+// input2   - a3
+// output   - a4
+// len      - a5
+// step1    - a6
+// step2    - a7
+// step_out - stack (a8) 
+
+    entry	a1, 16
+    
+    l32i.n	a8, a1, 16 // Load step_out to the a8 register
+    slli 	a6, a6, 2  // a6 - step1<<2
+    slli 	a7, a7, 2  // a7 - step2<<2
+    slli 	a8, a8, 2  // a8 - step_out<<2
+
+        lsi     f0, a2, 0
+        add.n   a2, a2, a6     // input1_ptr+=step1;
+    loopnez a5, loop_end_add_f32_ae32
+        lsi      f1, a3, 0
+        add.n    a3, a3, a7     // input2_ptr+=step2;
+
+        add.s    f2, f1, f0     // f2 = f1 + f0
+        lsi      f0, a2, 0
+        add.n    a2, a2, a6     // input1_ptr+=step1;        
+        ssi	     f2, a4, 0
+        add.n    a4, a4, a8     // input2_ptr+=step2;
+loop_end_add_f32_ae32:
+
+    movi.n	a2, 0 // return status ESP_OK
+    retw.n
+
+#endif // dsps_add_f32_ae32_enabled
--- a/managed_components/espressif__esp-dsp/modules/math/add/float/dsps_add_f32_ansi.c
+++ b/managed_components/espressif__esp-dsp/modules/math/add/float/dsps_add_f32_ansi.c
@@ -0,0 +1,33 @@
+// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "dsps_add.h"
+
+esp_err_t dsps_add_f32_ansi(const float *input1, const float *input2, float *output, int len, int step1, int step2, int step_out)
+{
+    if (NULL == input1) {
+        return ESP_ERR_DSP_PARAM_OUTOFRANGE;
+    }
+    if (NULL == input2) {
+        return ESP_ERR_DSP_PARAM_OUTOFRANGE;
+    }
+    if (NULL == output) {
+        return ESP_ERR_DSP_PARAM_OUTOFRANGE;
+    }
+
+    for (int i = 0 ; i < len ; i++) {
+        output[i * step_out] = input1[i * step1] + input2[i * step2];
+    }
+    return ESP_OK;
+}
--- a/managed_components/espressif__esp-dsp/modules/math/add/include/dsps_add.h
+++ b/managed_components/espressif__esp-dsp/modules/math/add/include/dsps_add.h
@@ -0,0 +1,89 @@
+// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef _dsps_add_H_
+#define _dsps_add_H_
+#include "dsp_err.h"
+
+#include "dsps_add_platform.h"
+
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+
+/**@{*/
+/**
+ * @brief   add two arrays
+ *
+ * The function add one input array to another
+ * out[i*step_out] = input1[i*step1] + input2[i*step2]; i=[0..len)
+ * The implementation use ANSI C and could be compiled and run on any platform
+ *
+ * @param[in] input1: input array 1
+ * @param[in] input2: input array 2
+ * @param output: output array
+ * @param len: amount of operations for arrays
+ * @param step1: step over input array 1 (by default should be 1)
+ * @param step2: step over input array 2 (by default should be 1)
+ * @param step_out: step over output array (by default should be 1)
+ *
+ * @return
+ *      - ESP_OK on success
+ *      - One of the error codes from DSP library
+ */
+esp_err_t dsps_add_f32_ansi(const float *input1, const float *input2, float *output, int len, int step1, int step2, int step_out);
+esp_err_t dsps_add_f32_ae32(const float *input1, const float *input2, float *output, int len, int step1, int step2, int step_out);
+
+esp_err_t dsps_add_s16_ansi(const int16_t *input1, const int16_t *input2, int16_t *output, int len, int step1, int step2, int step_out, int shift);
+esp_err_t dsps_add_s16_ae32(const int16_t *input1, const int16_t *input2, int16_t *output, int len, int step1, int step2, int step_out, int shift);
+esp_err_t dsps_add_s16_aes3(const int16_t *input1, const int16_t *input2, int16_t *output, int len, int step1, int step2, int step_out, int shift);
+
+esp_err_t dsps_add_s8_ansi(const int8_t *input1, const int8_t *input2, int8_t *output, int len, int step1, int step2, int step_out, int shift);
+esp_err_t dsps_add_s8_aes3(const int8_t *input1, const int8_t *input2, int8_t *output, int len, int step1, int step2, int step_out, int shift);
+
+/**@}*/
+
+#ifdef __cplusplus
+}
+#endif
+
+#if CONFIG_DSP_OPTIMIZED
+
+#if (dsps_add_f32_ae32_enabled == 1)
+#define dsps_add_f32 dsps_add_f32_ae32
+#else
+#define dsps_add_f32 dsps_add_f32_ansi
+#endif
+
+#if (dsps_add_s16_aes3_enabled == 1)
+#define dsps_add_s16 dsps_add_s16_aes3
+#define dsps_add_s8 dsps_add_s8_aes3
+#elif (dsps_add_s16_ae32_enabled == 1)
+#define dsps_add_s16 dsps_add_s16_ae32
+#define dsps_add_s8 dsps_add_s8_ansi
+#else
+#define dsps_add_s16 dsps_add_s16_ansi
+#define dsps_add_s8 dsps_add_s8_ansi
+#endif
+
+#else // CONFIG_DSP_OPTIMIZED
+#define dsps_add_f32 dsps_add_f32_ansi
+#define dsps_add_s16 dsps_add_s16_ansi
+#define dsps_add_s8 dsps_add_s8_ansi
+#endif // CONFIG_DSP_OPTIMIZED
+
+#endif // _dsps_add_H_
--- a/managed_components/espressif__esp-dsp/modules/math/add/include/dsps_add_platform.h
+++ b/managed_components/espressif__esp-dsp/modules/math/add/include/dsps_add_platform.h
@@ -0,0 +1,32 @@
+#ifndef _dsps_add_platform_H_
+#define _dsps_add_platform_H_
+
+#include "sdkconfig.h"
+
+#ifdef __XTENSA__
+#include <xtensa/config/core-isa.h>
+#include <xtensa/config/core-matmap.h>
+
+#if (CONFIG_IDF_TARGET_ESP32S3 == 1)
+#define dsps_add_f32_ae32_enabled  1
+#define dsps_add_s16_aes3_enabled  1
+#else
+
+#if ((XCHAL_HAVE_FP == 1) && (XCHAL_HAVE_LOOPS == 1))
+
+#define dsps_add_f32_ae32_enabled  1
+#define dsps_add_s16_ae32_enabled  1
+
+#endif
+
+#if (XCHAL_HAVE_LOOPS == 1)
+#define dsps_add_f32_ae32_enabled  1
+#define dsps_add_s16_ae32_enabled  1
+#endif
+
+#endif // CONFIG_IDF_TARGET_ESP32S3
+
+#endif // __XTENSA__
+
+
+#endif // _dsps_add_platform_H_
--- a/managed_components/espressif__esp-dsp/modules/math/add/test/test_dsps_add_f32_ansi.c
+++ b/managed_components/espressif__esp-dsp/modules/math/add/test/test_dsps_add_f32_ansi.c
@@ -0,0 +1,71 @@
+// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <string.h>
+#include "unity.h"
+#include "dsp_platform.h"
+#include "esp_log.h"
+
+#include "dsp_tests.h"
+#include "dsps_add.h"
+#include "esp_attr.h"
+
+static const char *TAG = "dsps_add";
+
+TEST_CASE("dsps_add_f32_ansi functionality", "[dsps]")
+{
+    int n = 32;
+    float x[n];
+    float y[n];
+    for (int i = 0 ; i < n ; i++) {
+        x[i] = i;
+        y[i] = 2 * i;
+    }
+    dsps_add_f32_ansi(x, x, x, n, 1, 1, 1);
+    for (int i = 0 ; i < n ; i++) {
+        if (x[i] != y[i]) {
+            TEST_ASSERT_EQUAL(x[i], y[i]);
+        }
+    }
+}
+
+TEST_CASE("dsps_add_f32 functionality", "[dsps]")
+{
+    int n = 32;
+    float x[n];
+    float y[n];
+    for (int i = 0 ; i < n ; i++) {
+        x[i] = i;
+        y[i] = 2 * i;
+    }
+    dsps_add_f32(x, x, x, n, 1, 1, 1);
+    for (int i = 0 ; i < n ; i++) {
+        if (x[i] != y[i]) {
+            TEST_ASSERT_EQUAL(x[i], y[i]);
+        }
+    }
+
+    int repeat_count = 1;
+
+    dsps_add_f32(x, x, x, n, 1, 1, 1);
+
+    unsigned int start_b = dsp_get_cpu_cycle_count();
+    dsps_add_f32(x, x, x, n, 1, 1, 1);
+    unsigned int end_b = dsp_get_cpu_cycle_count();
+
+    float total_b = end_b - start_b;
+    float cycles = total_b / (n * repeat_count);
+    ESP_LOGI(TAG, "dsps_add_f32 - %f cycles per sample \n", cycles);
+
+}
--- a/managed_components/espressif__esp-dsp/modules/math/add/test/test_dsps_add_s16_ae32.c
+++ b/managed_components/espressif__esp-dsp/modules/math/add/test/test_dsps_add_s16_ae32.c
@@ -0,0 +1,57 @@
+/*
+ * SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <string.h>
+#include "unity.h"
+#include "dsp_platform.h"
+#include "esp_log.h"
+
+#include "dsp_tests.h"
+#include "dsps_add.h"
+#include "esp_attr.h"
+
+#if (dsps_add_s16_ae32_enabled == 1)
+
+static const char *TAG = "dsps_add";
+TEST_CASE("dsps_add_s16_ae32 functionality", "[dsps]")
+{
+    int n = 64;
+    int16_t x[n];
+    int16_t y[n];
+    int32_t temp;
+    int shift = 0;
+    for (int i = 0 ; i < n ; i++) {
+        x[i] = i << 4;
+        temp = ((int32_t)x[i] + (int32_t)x[i]) >> shift;
+        y[i] = temp;
+    }
+
+    dsps_add_s16_ae32(x, x, x, n, 1, 1, 1, shift);
+    for (int i = 0 ; i < n ; i++) {
+        ESP_LOGD(TAG, "x[%i] = %i  %i", i, x[i], y[i]);
+        if (x[i] != y[i]) {
+            TEST_ASSERT_EQUAL(x[i], y[i]);
+        }
+    }
+}
+
+TEST_CASE("dsps_add_s16_ae32 benchmark", "[dsps]")
+{
+    const int n = 256;
+    int16_t x[n];
+    for (int i = 0 ; i < n ; i++) {
+        x[i] = i << 4;
+    }
+
+    unsigned int start_b = dsp_get_cpu_cycle_count();
+    dsps_add_s16_ae32(x, x, x, n, 1, 1, 1, 0);
+    unsigned int end_b = dsp_get_cpu_cycle_count();
+
+    float cycles = end_b - start_b;
+    ESP_LOGI(TAG, "dsps_add_s16_ae32 - %f cycles per sample \n", cycles);
+}
+
+#endif // (dsps_add_s16_ae32_enabled == 1)
--- a/managed_components/espressif__esp-dsp/modules/math/add/test/test_dsps_add_s16_aes3.c
+++ b/managed_components/espressif__esp-dsp/modules/math/add/test/test_dsps_add_s16_aes3.c
@@ -0,0 +1,61 @@
+/*
+ * SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <string.h>
+#include "unity.h"
+#include "dsp_platform.h"
+#include "esp_log.h"
+#include <malloc.h>
+
+#include "dsps_add.h"
+#include "esp_attr.h"
+#include "dsp_tests.h"
+
+#if (dsps_add_s16_aes3_enabled == 1)
+
+static const char *TAG = "dsps_add";
+TEST_CASE("dsps_add_s16_aes3 functionality", "[dsps]")
+{
+    int n = 64;
+    int16_t *x = (int16_t *)memalign(16, n * sizeof(int16_t));
+    int16_t *y = (int16_t *)memalign(16, n * sizeof(int16_t));
+    int32_t temp;
+    int shift = 0;
+    for (int i = 0 ; i < n ; i++) {
+        x[i] = i << 4;
+        temp = ((int32_t)x[i] + (int32_t)x[i]) >> shift;
+        y[i] = temp;
+    }
+
+    dsps_add_s16_aes3(x, x, x, n, 1, 1, 1, shift);
+    for (int i = 0 ; i < n ; i++) {
+        ESP_LOGD(TAG, "x[%i] = %i  %i", i, x[i], y[i]);
+        if (x[i] != y[i]) {
+            TEST_ASSERT_EQUAL(x[i], y[i]);
+        }
+    }
+    free(x);
+    free(y);
+}
+
+TEST_CASE("dsps_add_s16_aes3 benchmark", "[dsps]")
+{
+    const int n = 2048;
+    int16_t *x = (int16_t *)memalign(16, n * sizeof(int16_t));
+    for (int i = 0 ; i < n ; i++) {
+        x[i] = i << 4;
+    }
+
+    unsigned int start_b = dsp_get_cpu_cycle_count();
+    dsps_add_s16_aes3(x, x, x, n, 1, 1, 1, 0);
+    unsigned int end_b = dsp_get_cpu_cycle_count();
+
+    float cycles = end_b - start_b;
+    ESP_LOGI(TAG, "dsps_add_s16_aes3 - %f cycles per sample \n", cycles);
+    free(x);
+
+}
+#endif // (dsps_add_s16_aes3_enabled == 1)
--- a/managed_components/espressif__esp-dsp/modules/math/add/test/test_dsps_add_s16_ansi.c
+++ b/managed_components/espressif__esp-dsp/modules/math/add/test/test_dsps_add_s16_ansi.c
@@ -0,0 +1,53 @@
+/*
+ * SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <string.h>
+#include "unity.h"
+#include "dsp_platform.h"
+#include "esp_log.h"
+
+#include "dsp_tests.h"
+#include "dsps_add.h"
+#include "esp_attr.h"
+
+static const char *TAG = "dsps_add";
+
+TEST_CASE("dsps_add_s16_ansi functionality", "[dsps]")
+{
+    int n = 64;
+    int16_t x[n];
+    int16_t y[n];
+    int32_t temp;
+    int shift = 0;
+    for (int i = 0 ; i < n ; i++) {
+        x[i] = i << 4;
+        temp = ((int32_t)x[i] + (int32_t)x[i]) >> shift;
+        y[i] = temp;
+    }
+
+    dsps_add_s16_ansi(x, x, x, n, 1, 1, 1, 0);
+    for (int i = 0 ; i < n ; i++) {
+        if (x[i] != y[i]) {
+            TEST_ASSERT_EQUAL(x[i], y[i]);
+        }
+    }
+}
+
+TEST_CASE("dsps_add_s16_ansi benchmark", "[dsps]")
+{
+    const int n = 256;
+    int16_t x[n];
+    for (int i = 0 ; i < n ; i++) {
+        x[i] = i << 4;
+    }
+
+    unsigned int start_b = dsp_get_cpu_cycle_count();
+    dsps_add_s16_ansi(x, x, x, n, 1, 1, 1, 0);
+    unsigned int end_b = dsp_get_cpu_cycle_count();
+
+    float cycles = end_b - start_b;
+    ESP_LOGI(TAG, "dsps_add_s16_ansi - %f cycles per sample \n", cycles);
+}
--- a/managed_components/espressif__esp-dsp/modules/math/add/test/test_dsps_add_s8_aes3.c
+++ b/managed_components/espressif__esp-dsp/modules/math/add/test/test_dsps_add_s8_aes3.c
@@ -0,0 +1,61 @@
+/*
+ * SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <string.h>
+#include "unity.h"
+#include "dsp_platform.h"
+#include "esp_log.h"
+#include <malloc.h>
+
+#include "dsps_add.h"
+#include "esp_attr.h"
+#include "dsp_tests.h"
+
+#if (dsps_add_s16_aes3_enabled == 1)
+
+static const char *TAG = "dsps_add";
+TEST_CASE("dsps_add_s8_aes3 functionality", "[dsps]")
+{
+    int n = 64;
+    int8_t *x = (int8_t *)memalign(16, n * sizeof(int8_t));
+    int8_t *y = (int8_t *)memalign(16, n * sizeof(int8_t));
+    int32_t temp;
+    int shift = 0;
+    for (int i = 0 ; i < n ; i++) {
+        x[i] = i - n / 2;
+        temp = ((int32_t)x[i] + (int32_t)x[i]) >> shift;
+        y[i] = temp;
+    }
+
+    dsps_add_s8_aes3(x, x, x, n, 1, 1, 1, shift);
+    for (int i = 0 ; i < n ; i++) {
+        ESP_LOGD(TAG, "x[%i] = %i  %i", i, x[i], y[i]);
+        if (x[i] != y[i]) {
+            TEST_ASSERT_EQUAL(x[i], y[i]);
+        }
+    }
+    free(x);
+    free(y);
+}
+
+TEST_CASE("dsps_add_s8_aes3 benchmark", "[dsps]")
+{
+    const int n = 2048;
+    int8_t *x = (int8_t *)memalign(16, n * sizeof(int8_t));
+    for (int i = 0 ; i < n ; i++) {
+        x[i] = i << 4;
+    }
+
+    unsigned int start_b = dsp_get_cpu_cycle_count();
+    dsps_add_s8_aes3(x, x, x, n, 1, 1, 1, 0);
+    unsigned int end_b = dsp_get_cpu_cycle_count();
+
+    float cycles = end_b - start_b;
+    ESP_LOGI(TAG, "dsps_add_s8_aes3 - %f cycles per sample \n", cycles);
+    free(x);
+
+}
+#endif // (dsps_add_s16_aes3_enabled == 1)
--- a/managed_components/espressif__esp-dsp/modules/math/add/test/test_dsps_add_s8_ansi.c
+++ b/managed_components/espressif__esp-dsp/modules/math/add/test/test_dsps_add_s8_ansi.c
@@ -0,0 +1,53 @@
+/*
+ * SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <string.h>
+#include "unity.h"
+#include "dsp_platform.h"
+#include "esp_log.h"
+
+#include "dsp_tests.h"
+#include "dsps_add.h"
+#include "esp_attr.h"
+
+static const char *TAG = "dsps_add";
+
+TEST_CASE("dsps_add_s8_ansi functionality", "[dsps]")
+{
+    int n = 64;
+    int8_t x[n];
+    int8_t y[n];
+    int32_t temp;
+    int shift = 0;
+    for (int i = 0 ; i < n ; i++) {
+        x[i] = i - n / 2;
+        temp = ((int32_t)x[i] + (int32_t)x[i]) >> shift;
+        y[i] = temp;
+    }
+
+    dsps_add_s8_ansi(x, x, x, n, 1, 1, 1, 0);
+    for (int i = 0 ; i < n ; i++) {
+        if (x[i] != y[i]) {
+            TEST_ASSERT_EQUAL(x[i], y[i]);
+        }
+    }
+}
+
+TEST_CASE("dsps_add_s8_ansi benchmark", "[dsps]")
+{
+    const int n = 256;
+    int8_t x[n];
+    for (int i = 0 ; i < n ; i++) {
+        x[i] = i << 4;
+    }
+
+    unsigned int start_b = dsp_get_cpu_cycle_count();
+    dsps_add_s8_ansi(x, x, x, n, 1, 1, 1, 0);
+    unsigned int end_b = dsp_get_cpu_cycle_count();
+
+    float cycles = end_b - start_b;
+    ESP_LOGI(TAG, "dsps_add_s8_ansi - %f cycles per sample \n", cycles);
+}
--- a/managed_components/espressif__esp-dsp/modules/math/addc/float/dsps_addc_f32_ae32.S
+++ b/managed_components/espressif__esp-dsp/modules/math/addc/float/dsps_addc_f32_ae32.S
@@ -0,0 +1,57 @@
+// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License. 
+
+#include "dsps_addc_platform.h"
+#if (dsps_addc_f32_ae32_enabled == 1)
+
+// This is bi quad filter form II for ESP32 processor.
+    .text
+    .align  4
+    .global dsps_addc_f32_ae32
+    .type   dsps_addc_f32_ae32,@function
+// The function implements the following C code:
+// esp_err_t dsps_addc_f32_ansi(const float *input, float *output, int len, float C, int step_in, int step_out)
+// {
+//     for (int i = 0 ; i < len ; i++) {
+//         output[i * step_out] = input[i * step_in] + C;
+//     }
+//     return ESP_OK;
+// }
+dsps_addc_f32_ae32: 
+// input   - a2
+// output   - a3
+// len      - a4
+// C        - a5
+// step_in  - a6
+// step_out - a7
+
+    entry	a1, 16
+    
+    slli 	a6, a6, 2  	// a6 - step_in<<2
+    slli 	a7, a7, 2  	// a7 - step_out<<2
+    wfr		f0, a5		// a5 - load to the f0
+
+    loopnez a4, loop_end_addc_f32_ae32
+        lsi      f1, a2, 0
+
+        add.s    f2, f1, f0     // f2 = f1 + f0
+        add.n    a2, a2, a6     // input1_ptr+=step_in;
+        ssi	     f2, a3, 0
+        add.n    a3, a3, a7     // output+=step_out;
+loop_end_addc_f32_ae32:
+
+    movi.n	a2, 0 // return status ESP_OK
+    retw.n
+
+#endif // dsps_addc_f32_ae32_enabled
--- a/managed_components/espressif__esp-dsp/modules/math/addc/float/dsps_addc_f32_ansi.c
+++ b/managed_components/espressif__esp-dsp/modules/math/addc/float/dsps_addc_f32_ansi.c
@@ -0,0 +1,30 @@
+// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "dsps_addc.h"
+
+esp_err_t dsps_addc_f32_ansi(const float *input, float *output, int len, float C, int step_in, int step_out)
+{
+    if (NULL == input) {
+        return ESP_ERR_DSP_PARAM_OUTOFRANGE;
+    }
+    if (NULL == output) {
+        return ESP_ERR_DSP_PARAM_OUTOFRANGE;
+    }
+
+    for (int i = 0 ; i < len ; i++) {
+        output[i * step_out] = input[i * step_in] + C;
+    }
+    return ESP_OK;
+}
--- a/managed_components/espressif__esp-dsp/modules/math/addc/include/dsps_addc.h
+++ b/managed_components/espressif__esp-dsp/modules/math/addc/include/dsps_addc.h
@@ -0,0 +1,65 @@
+// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef _dsps_addc_H_
+#define _dsps_addc_H_
+#include "dsp_err.h"
+
+#include "dsps_addc_platform.h"
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+
+/**@{*/
+/**
+ * @brief   add constant
+ *
+ * The function adds constant to the input array
+ * x[i*step_out] = y[i*step_in] + C; i=[0..len)
+ * The implementation use ANSI C and could be compiled and run on any platform
+ *
+ * @param[in] input: input array
+ * @param output: output array
+ * @param len: amount of operations for arrays
+ * @param C: constant value
+ * @param step_in: step over input array (by default should be 1)
+ * @param step_out: step over output array (by default should be 1)
+ *
+ * @return
+ *      - ESP_OK on success
+ *      - One of the error codes from DSP library
+ */
+esp_err_t dsps_addc_f32_ansi(const float *input, float *output, int len, float C, int step_in, int step_out);
+esp_err_t dsps_addc_f32_ae32(const float *input, float *output, int len, float C, int step_in, int step_out);
+/**@}*/
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#if CONFIG_DSP_OPTIMIZED
+#if (dsps_addc_f32_ae32_enabled == 1)
+#define dsps_addc_f32 dsps_addc_f32_ae32
+#else
+#define dsps_addc_f32 dsps_addc_f32_ansi
+#endif
+#else
+#define dsps_addc_f32 dsps_addc_f32_ansi
+#endif // CONFIG_DSP_OPTIMIZED
+
+#endif // _dsps_addc_H_
--- a/managed_components/espressif__esp-dsp/modules/math/addc/include/dsps_addc_platform.h
+++ b/managed_components/espressif__esp-dsp/modules/math/addc/include/dsps_addc_platform.h
@@ -0,0 +1,19 @@
+#ifndef _dsps_addc_platform_H_
+#define _dsps_addc_platform_H_
+
+#include "sdkconfig.h"
+
+#ifdef __XTENSA__
+#include <xtensa/config/core-isa.h>
+#include <xtensa/config/core-matmap.h>
+
+
+#if ((XCHAL_HAVE_FP == 1) && (XCHAL_HAVE_LOOPS == 1))
+
+#define dsps_addc_f32_ae32_enabled  1
+
+#endif
+#endif // __XTENSA__
+
+
+#endif // _dsps_addc_platform_H_
--- a/managed_components/espressif__esp-dsp/modules/math/addc/test/test_dsps_addc_f32_ansi.c
+++ b/managed_components/espressif__esp-dsp/modules/math/addc/test/test_dsps_addc_f32_ansi.c
@@ -0,0 +1,71 @@
+// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <string.h>
+#include "unity.h"
+#include "dsp_platform.h"
+#include "esp_log.h"
+
+#include "dsp_tests.h"
+#include "dsps_addc.h"
+#include "esp_attr.h"
+
+static const char *TAG = "dsps_addc";
+
+
+TEST_CASE("dsps_addc_f32_ansi functionality", "[dsps]")
+{
+    int n = 64;
+    float x[n];
+    float y[n];
+    for (int i = 0 ; i < n ; i++) {
+        x[i] = i;
+        y[i] = i + 10;
+    }
+    dsps_addc_f32_ansi(x, x, n, 10, 1, 1);
+    for (int i = 0 ; i < n ; i++) {
+        if (x[i] != y[i]) {
+            TEST_ASSERT_EQUAL(x[i], y[i]);
+        }
+    }
+}
+
+TEST_CASE("dsps_addc_f32 functionality", "[dsps]")
+{
+    int n = 64;
+    float x[n];
+    float y[n];
+    for (int i = 0 ; i < n ; i++) {
+        x[i] = i;
+        y[i] = i + 10;
+    }
+    dsps_addc_f32(x, x, n, 10, 1, 1);
+    for (int i = 0 ; i < n ; i++) {
+        if (x[i] != y[i]) {
+            TEST_ASSERT_EQUAL(x[i], y[i]);
+        }
+    }
+
+    int repeat_count = 1;
+
+    dsps_addc_f32(x, x, n, 10, 1, 1);
+
+    unsigned int start_b = dsp_get_cpu_cycle_count();
+    dsps_addc_f32(x, x, n, 10, 1, 1);
+    unsigned int end_b = dsp_get_cpu_cycle_count();
+
+    float total_b = end_b - start_b;
+    float cycles = total_b / (n * repeat_count);
+    ESP_LOGI(TAG, "dsps_addc_f32 - %f cycles per sample \n", cycles);
+}
--- a/managed_components/espressif__esp-dsp/modules/math/include/dsps_math.h
+++ b/managed_components/espressif__esp-dsp/modules/math/include/dsps_math.h
@@ -0,0 +1,25 @@
+// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef _dsps_math_H_
+#define _dsps_math_H_
+
+#include "dsps_add.h"
+#include "dsps_sub.h"
+#include "dsps_mul.h"
+#include "dsps_addc.h"
+#include "dsps_mulc.h"
+#include "dsps_sqrt.h"
+
+#endif // _dsps_math_H_
--- a/managed_components/espressif__esp-dsp/modules/math/mul/fixed/dsps_mul_s16_ae32.S
+++ b/managed_components/espressif__esp-dsp/modules/math/mul/fixed/dsps_mul_s16_ae32.S
@@ -0,0 +1,74 @@
+/*
+ * SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include "dsps_mul_platform.h"
+#if (dsps_mul_s16_ae32_enabled == 1)
+
+    .text
+    .align  4
+    .global dsps_mul_s16_ae32
+    .type   dsps_mul_s16_ae32,@function
+// The function implements the following C code:
+// esp_err_t dsps_mul_s16_ansi(const int16_t *input1, const int16_t *input2, int16_t *output, int len, int step1, int step2, int step_out, int shift)
+// {
+//     for (int i = 0 ; i < len ; i++) {
+//         int32_t acc = (int32_t)input1[i * step1] + (int32_t)input2[i * step2];
+//         output[i * step_out] = acc >> shift;
+//     }
+//     return ESP_OK;
+// }
+dsps_mul_s16_ae32: 
+// input1   - a2
+// input2   - a3
+// output   - a4
+// len      - a5
+// step_in1 - a6
+// step_in2 - a7
+// step_out - stack (a10)
+// shift    - stack (a9)
+
+    entry	a1, 16
+
+
+    // l32i.n	a10, a1, 16
+    // s16i	a10, a4, 0
+    // l32i.n	a10, a1, 20
+    // s16i	a10, a4, 2       
+
+    l32i.n	a10, a1, 16     // Load step_out to the a10 register
+    l32i.n	a9, a1,  20     // Load shift to the a9 register
+    ssr     a9              // sar = a9
+
+    slli 	a6, a6, 1  	// a6 - step_in<<1
+    slli 	a7, a7, 1  	// a7 - step_in<<1
+    slli 	a10, a10, 1 // a8 - step_out<<1
+
+    // s16i	a10, a4, 0
+    // s16i	a6, a4, 2
+    // s16i	a7, a4, 4
+    // s16i	a5, a4, 6
+
+    l16si   a11, a2, 0
+    l16si   a8,  a3, 0
+    add     a8, a11, a8
+    srl     a9, a8          // a8 = a8>>sar    
+
+    loopnez a5, .loop_end_mul_s16_ae32
+        add.n   a2, a2, a6      // input1+=step_in1;
+        add.n   a3, a3, a7      // input2+=step_in2;
+
+        l16si   a11, a2, 0
+        l16si   a8,  a3, 0
+        s16i	a9,  a4, 0      // store result to the putput
+        mull     a8, a11, a8
+    	srl     a9, a8          // a8 = a8>>sar    
+
+        add.n   a4, a4, a10     // output+=step_out;
+.loop_end_mul_s16_ae32:
+    movi.n	a2, 0 // return status ESP_OK
+    retw.n
+
+#endif // dsps_mul_s16_ae32_enabled
--- a/managed_components/espressif__esp-dsp/modules/math/mul/fixed/dsps_mul_s16_aes3.S
+++ b/managed_components/espressif__esp-dsp/modules/math/mul/fixed/dsps_mul_s16_aes3.S
@@ -0,0 +1,104 @@
+/*
+ * SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include "dsps_mul_platform.h"
+#if (dsps_mul_s16_aes3_enabled == 1)
+
+    .text
+    .align  4
+    .global dsps_mul_s16_aes3
+    .type   dsps_mul_s16_aes3,@function
+// The function implements the following C code:
+// esp_err_t dsps_mul_s16_ansi(const int16_t *input1, const int16_t *input2, int16_t *output, int len, int step1, int step2, int step_out, int shift)
+// {
+//     for (int i = 0 ; i < len ; i++) {
+//         int32_t acc = (int32_t)input1[i * step1] + (int32_t)input2[i * step2];
+//         output[i * step_out] = acc >> shift;
+//     }
+//     return ESP_OK;
+// }
+dsps_mul_s16_aes3: 
+// input1   - a2
+// input2   - a3
+// output   - a4
+// len      - a5
+// step_in1 - a6
+// step_in2 - a7
+// step_out - stack (a10)
+// shift    - stack (a9)
+
+    entry	a1, 16
+
+    l32i.n	a10, a1, 16     // Load step_out to the a10 register
+    l32i.n	a9, a1,  20     // Load shift to the a9 register
+    ssr     a9              // sar = a9
+
+    // Check if any of steps is not 0
+    addi    a15, a6, -1
+    bnez    a15, .mul_s16_ae32_mode // Branch if step !=0
+    addi    a15, a7, -1
+    bnez    a15, .mul_s16_ae32_mode // Branch if step !=0
+    addi    a15, a10, -1
+    bnez    a15,.mul_s16_ae32_mode  // Branch if step !=0
+
+    // Check addresses
+    movi        a15, 0xF                                              // modulo 16 mask
+    bany        a2, a15, .mul_s16_ae32_mode                          // jump if != 0
+    bany        a3, a15, .mul_s16_ae32_mode                          // jump if != 0
+
+    // Check length (should be divided to 8)
+    movi        a15, 0x7                                              // modulo 8 mask
+    bany        a5, a15, .mul_s16_ae32_mode                          // jump if != 0
+
+    // Process main function for S3
+    
+    slli 	a6, a6, 1  	// a6 - step_in<<1
+    slli 	a7, a7, 1  	// a7 - step_in<<1
+    slli 	a10, a10, 1 // a8 - step_out<<1
+	wsr.sar	a9          // load sar register
+
+    // Preload q1 from a3
+    //ee.vld.128.ip     q1, a3, 16
+    srli    a5, a5, 3
+    ee.vld.128.ip     q0, a2, 16
+    loopnez a5, .loop_end_mul_s16_aes3_main
+        ee.vld.128.ip     q1, a3, 16
+        ee.vmul.s16.ld.incp q0, a2, q4, q0, q1
+        ee.vst.128.ip     q4, a4, 16
+.loop_end_mul_s16_aes3_main:
+
+    // Exit for Esp32s3 mode
+    movi.n	a2, 0 // return status ESP_OK
+    retw.n
+
+
+.mul_s16_ae32_mode:
+    slli 	a6, a6, 1  	// a6 - step_in<<1
+    slli 	a7, a7, 1  	// a7 - step_in<<1
+    slli 	a10, a10, 1 // a8 - step_out<<1
+
+    l16si   a11, a2, 0
+    l16si   a8,  a3, 0
+    mull    a8, a11, a8
+    srl     a9, a8          // a8 = a8>>sar    
+
+    loopnez a5, .loop_end_mul_s16_aes3
+        add.n   a2, a2, a6      // input1+=step_in1;
+        add.n   a3, a3, a7      // input2+=step_in2;
+
+        l16si   a11, a2, 0
+        l16si   a8,  a3, 0
+        s16i	a9,  a4, 0      // store result to the putput
+        mull    a8, a11, a8
+    	srl     a9, a8          // a8 = a8>>sar    
+
+        add.n   a4, a4, a10     // output+=step_out;
+.loop_end_mul_s16_aes3:
+    // Exit for Esp32 mode
+    movi.n	a2, 0 // return status ESP_OK
+    retw.n
+
+#endif // dsps_mul_s16_aes3_enabled
--- a/managed_components/espressif__esp-dsp/modules/math/mul/fixed/dsps_mul_s16_ansi.c
+++ b/managed_components/espressif__esp-dsp/modules/math/mul/fixed/dsps_mul_s16_ansi.c
@@ -0,0 +1,34 @@
+// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "dsps_mul.h"
+
+esp_err_t dsps_mul_s16_ansi(const int16_t *input1, const int16_t *input2, int16_t *output, int len, int step1, int step2, int step_out, int shift)
+{
+    if (NULL == input1) {
+        return ESP_ERR_DSP_PARAM_OUTOFRANGE;
+    }
+    if (NULL == input2) {
+        return ESP_ERR_DSP_PARAM_OUTOFRANGE;
+    }
+    if (NULL == output) {
+        return ESP_ERR_DSP_PARAM_OUTOFRANGE;
+    }
+
+    for (int i = 0 ; i < len ; i++) {
+        int ttt = (int)input1[i * step1] * (int)input2[i * step2];
+        output[i * step_out] = ttt >> shift;
+    }
+    return ESP_OK;
+}
--- a/managed_components/espressif__esp-dsp/modules/math/mul/fixed/dsps_mul_s8_aes3.S
+++ b/managed_components/espressif__esp-dsp/modules/math/mul/fixed/dsps_mul_s8_aes3.S
@@ -0,0 +1,97 @@
+/*
+ * SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include "dsps_mul_platform.h"
+#if (dsps_mul_s16_aes3_enabled == 1)
+
+    .text
+    .align  4
+    .global dsps_mul_s8_aes3
+    .type   dsps_mul_s8_aes3,@function
+// The function implements the following C code:
+// esp_err_t dsps_mul_s8_ansi(const int16_t *input1, const int16_t *input2, int16_t *output, int len, int step1, int step2, int step_out, int shift)
+// {
+//     for (int i = 0 ; i < len ; i++) {
+//         int32_t acc = (int32_t)input1[i * step1] + (int32_t)input2[i * step2];
+//         output[i * step_out] = acc >> shift;
+//     }
+//     return ESP_OK;
+// }
+dsps_mul_s8_aes3: 
+// input1   - a2
+// input2   - a3
+// output   - a4
+// len      - a5
+// step_in1 - a6
+// step_in2 - a7
+// step_out - stack (a10)
+// shift    - stack (a9)
+
+    entry	a1, 16
+
+    l32i.n	a10, a1, 16     // Load step_out to the a10 register
+    l32i.n	a9, a1,  20     // Load shift to the a9 register
+    ssr     a9              // sar = a9
+
+    // Check if any of steps is not 0
+    addi    a15, a6, -1
+    bnez    a15, .sub_s8_ae32_mode // Branch if step !=0
+    addi    a15, a7, -1
+    bnez    a15, .sub_s8_ae32_mode // Branch if step !=0
+    addi    a15, a10, -1
+    bnez    a15,.sub_s8_ae32_mode  // Branch if step !=0
+
+    // Check addresses
+    movi        a15, 0xF                                              // modulo 16 mask
+    bany        a2, a15, .sub_s8_ae32_mode                          // jump if != 0
+    bany        a3, a15, .sub_s8_ae32_mode                          // jump if != 0
+
+    // Check length (should be divided to 8)
+    movi        a15, 0xf                                              // modulo 8 mask
+    bany        a5, a15, .sub_s8_ae32_mode                          // jump if != 0
+
+    // Process main function for S3
+    
+	wsr.sar	a9          // load sar register
+
+    // Preload q1 from a3
+    //ee.vld.128.ip     q1, a3, 16
+    srli    a5, a5, 4
+    ee.vld.128.ip     q0, a2, 16
+    loopnez a5, .loop_end_mul_s8_aes3_main
+        ee.vld.128.ip     q1, a3, 16
+        ee.vmul.s8.ld.incp q0, a2, q4, q0, q1
+        ee.vst.128.ip     q4, a4, 16
+.loop_end_mul_s8_aes3_main:
+
+    // Exit for Esp32s3 mode
+    movi.n	a2, 0 // return status ESP_OK
+    retw.n
+
+
+.sub_s8_ae32_mode:
+    l8ui    a11, a2, 0
+    l8ui    a8,  a3, 0
+    mull    a8, a11, a8
+    srl     a9, a8          // a8 = a8>>sar    
+
+    loopnez a5, .loop_end_mul_s8_aes3
+        add.n   a2, a2, a6      // input1+=step_in1;
+        add.n   a3, a3, a7      // input2+=step_in2;
+
+        l8ui   a11, a2, 0
+        l8ui   a8,  a3, 0
+        s8i	a9,  a4, 0      // store result to the putput
+        mull    a8, a11, a8
+    	srl     a9, a8          // a8 = a8>>sar    
+
+        add.n   a4, a4, a10     // output+=step_out;
+.loop_end_mul_s8_aes3:
+    // Exit for Esp32 mode
+    movi.n	a2, 0 // return status ESP_OK
+    retw.n
+
+#endif // dsps_mul_s8_aes3_enabled
--- a/managed_components/espressif__esp-dsp/modules/math/mul/fixed/dsps_mul_s8_ansi.c
+++ b/managed_components/espressif__esp-dsp/modules/math/mul/fixed/dsps_mul_s8_ansi.c
@@ -0,0 +1,26 @@
+/*
+ * SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include "dsps_mul.h"
+
+esp_err_t dsps_mul_s8_ansi(const int8_t *input1, const int8_t *input2, int8_t *output, int len, int step1, int step2, int step_out, int shift)
+{
+    if (NULL == input1) {
+        return ESP_ERR_DSP_PARAM_OUTOFRANGE;
+    }
+    if (NULL == input2) {
+        return ESP_ERR_DSP_PARAM_OUTOFRANGE;
+    }
+    if (NULL == output) {
+        return ESP_ERR_DSP_PARAM_OUTOFRANGE;
+    }
+
+    for (int i = 0 ; i < len ; i++) {
+        int32_t acc = (int32_t)input1[i * step1] * (int32_t)input2[i * step2];
+        output[i * step_out] = acc >> shift;
+    }
+    return ESP_OK;
+}
--- a/managed_components/espressif__esp-dsp/modules/math/mul/float/dsps_mul_f32_ae32.S
+++ b/managed_components/espressif__esp-dsp/modules/math/mul/float/dsps_mul_f32_ae32.S
@@ -0,0 +1,64 @@
+// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License. 
+
+#include "dsps_mul_platform.h"
+#if (dsps_mul_f32_ae32_enabled == 1)
+
+// This is bi quad filter form II for ESP32 processor.
+    .text
+    .align  4
+    .global dsps_mul_f32_ae32
+    .type   dsps_mul_f32_ae32,@function
+// The function implements the following C code:
+// esp_err_t dsps_mul_f32_ansi(const float *input1, const float *input2, float *output, int len, int step1, int step2, int step_out)
+// {
+//     for (int i = 0 ; i < len ; i++) {
+//         output[i * step_out] = input1[i * step1] * input2[i * step2];
+//     }
+//     return ESP_OK;
+// }
+
+dsps_mul_f32_ae32: 
+// input1   - a2
+// input2   - a3
+// output   - a4
+// len      - a5
+// step1    - a6
+// step2    - a7
+// step_out - stack (a8) 
+
+    entry	a1, 16
+    
+    l32i.n	a8, a1, 16 // Load step_out to the a8 register
+    slli 	a6, a6, 2  // a6 - step1<<2
+    slli 	a7, a7, 2  // a7 - step2<<2
+    slli 	a8, a8, 2  // a8 - step_out<<2
+
+        lsi     f0, a2, 0
+        add.n   a2, a2, a6     // input1_ptr+=step1;
+    loopnez a5, loop_end_mul_f32_ae32
+        lsi      f1, a3, 0
+        add.n    a3, a3, a7     // input2_ptr+=step2;
+
+        mul.s    f2, f1, f0     // f2 = f1*f0
+        lsi      f0, a2, 0
+        add.n    a2, a2, a6     // input1_ptr+=step1;        
+        ssi	     f2, a4, 0
+        add.n    a4, a4, a8     // input2_ptr+=step2;
+loop_end_mul_f32_ae32:
+
+    movi.n	a2, 0 // return status ESP_OK
+    retw.n
+
+#endif // dsps_mul_f32_ae32_enabled
--- a/managed_components/espressif__esp-dsp/modules/math/mul/float/dsps_mul_f32_ansi.c
+++ b/managed_components/espressif__esp-dsp/modules/math/mul/float/dsps_mul_f32_ansi.c
@@ -0,0 +1,33 @@
+// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "dsps_mul.h"
+
+esp_err_t dsps_mul_f32_ansi(const float *input1, const float *input2, float *output, int len, int step1, int step2, int step_out)
+{
+    if (NULL == input1) {
+        return ESP_ERR_DSP_PARAM_OUTOFRANGE;
+    }
+    if (NULL == input2) {
+        return ESP_ERR_DSP_PARAM_OUTOFRANGE;
+    }
+    if (NULL == output) {
+        return ESP_ERR_DSP_PARAM_OUTOFRANGE;
+    }
+
+    for (int i = 0 ; i < len ; i++) {
+        output[i * step_out] = input1[i * step1] * input2[i * step2];
+    }
+    return ESP_OK;
+}
--- a/managed_components/espressif__esp-dsp/modules/math/mul/include/dsps_mul.h
+++ b/managed_components/espressif__esp-dsp/modules/math/mul/include/dsps_mul.h
@@ -0,0 +1,111 @@
+// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef _dsps_mul_H_
+#define _dsps_mul_H_
+#include "dsp_err.h"
+
+#include "dsps_mul_platform.h"
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+
+/**@{*/
+/**
+ * @brief   Multiply two arrays
+ *
+ * The function multiply one input array to another and store result to other array
+ * out[i*step_out] = input1[i*step1] * input2[i*step2]; i=[0..len)
+ * The implementation use ANSI C and could be compiled and run on any platform
+ *
+ * @param[in] input1: input array 1
+ * @param[in] input2: input array 2
+ * @param output: output array
+ * @param len: amount of operations for arrays
+ * @param step1: step over input array 1 (by default should be 1)
+ * @param step2: step over input array 2 (by default should be 1)
+ * @param step_out: step over output array (by default should be 1)
+ *
+ * @return
+ *      - ESP_OK on success
+ *      - One of the error codes from DSP library
+ */
+esp_err_t dsps_mul_f32_ansi(const float *input1, const float *input2, float *output, int len, int step1, int step2, int step_out);
+esp_err_t dsps_mul_f32_ae32(const float *input1, const float *input2, float *output, int len, int step1, int step2, int step_out);
+/**@}*/
+
+
+/**@{*/
+/**
+ * @brief   Multiply two arrays
+ *
+ * The function multiply one input array to another and store result to other array
+ * out[i*step_out] = input1[i*step1] * input2[i*step2]; i=[0..len)
+ * The implementation use ANSI C and could be compiled and run on any platform
+ *
+ * @param[in] input1: input array 1
+ * @param[in] input2: input array 2
+ * @param output: output array
+ * @param len: amount of operations for arrays
+ * @param step1: step over input array 1 (by default should be 1)
+ * @param step2: step over input array 2 (by default should be 1)
+ * @param step_out: step over output array (by default should be 1)
+ * @param shift: output shift after multiplication (by default should be 15)
+ *
+ * @return
+ *      - ESP_OK on success
+ *      - One of the error codes from DSP library
+ */
+esp_err_t dsps_mul_s16_ansi(const int16_t *input1, const int16_t *input2, int16_t *output, int len, int step1, int step2, int step_out, int shift);
+esp_err_t dsps_mul_s16_ae32(const int16_t *input1, const int16_t *input2, int16_t *output, int len, int step1, int step2, int step_out, int shift);
+esp_err_t dsps_mul_s16_aes3(const int16_t *input1, const int16_t *input2, int16_t *output, int len, int step1, int step2, int step_out, int shift);
+
+esp_err_t dsps_mul_s8_ansi(const int8_t *input1, const int8_t *input2, int8_t *output, int len, int step1, int step2, int step_out, int shift);
+esp_err_t dsps_mul_s8_aes3(const int8_t *input1, const int8_t *input2, int8_t *output, int len, int step1, int step2, int step_out, int shift);
+
+/**@}*/
+
+#ifdef __cplusplus
+}
+#endif
+
+#if CONFIG_DSP_OPTIMIZED
+
+#if (dsps_mul_f32_ae32_enabled == 1)
+#define dsps_mul_f32 dsps_mul_f32_ae32
+#else
+#define dsps_mul_f32 dsps_mul_f32_ansi
+#endif
+
+#if (dsps_mul_s16_aes3_enabled == 1)
+#define dsps_mul_s16 dsps_mul_s16_aes3
+#define dsps_mul_s8  dsps_mul_s8_aes3
+#elif (dsps_mul_s16_ae32_enabled == 1)
+#define dsps_mul_s16 dsps_mul_s16_ae32
+#define dsps_mul_s8  dsps_mul_s8_ansi
+#else
+#define dsps_mul_s16 dsps_mul_s16_ansi
+#define dsps_mul_s8  dsps_mul_s8_ansi
+#endif
+
+#else // CONFIG_DSP_OPTIMIZED
+#define dsps_mul_f32 dsps_mul_f32_ansi
+#define dsps_mul_s16 dsps_mul_s16_ansi
+#define dsps_mul_s8  dsps_mul_s8_ansi
+#endif // CONFIG_DSP_OPTIMIZED
+
+#endif // _dsps_mul_H_
--- a/managed_components/espressif__esp-dsp/modules/math/mul/include/dsps_mul_platform.h
+++ b/managed_components/espressif__esp-dsp/modules/math/mul/include/dsps_mul_platform.h
@@ -0,0 +1,30 @@
+#ifndef _dsps_mul_platform_H_
+#define _dsps_mul_platform_H_
+
+#include "sdkconfig.h"
+
+#ifdef __XTENSA__
+#include <xtensa/config/core-isa.h>
+#include <xtensa/config/core-matmap.h>
+
+
+#if ((XCHAL_HAVE_FP == 1) && (XCHAL_HAVE_LOOPS == 1))
+
+#define dsps_mul_f32_ae32_enabled  1
+#define dsps_mul_s16_ae32_enabled  1
+
+#endif
+
+#if (XCHAL_HAVE_LOOPS == 1)
+#define dsps_mul_f32_ae32_enabled  1
+#define dsps_mul_s16_ae32_enabled  1
+#endif
+
+#if (CONFIG_IDF_TARGET_ESP32S3 == 1)
+#define dsps_mul_f32_ae32_enabled  1
+#define dsps_mul_s16_aes3_enabled  1
+#endif
+
+#endif // __XTENSA__
+
+#endif // _dsps_mul_platform_H_
--- a/managed_components/espressif__esp-dsp/modules/math/mul/test/test_dsps_mul_f32_ansi.c
+++ b/managed_components/espressif__esp-dsp/modules/math/mul/test/test_dsps_mul_f32_ansi.c
@@ -0,0 +1,71 @@
+// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <string.h>
+#include "unity.h"
+#include "dsp_platform.h"
+#include "esp_log.h"
+
+#include "dsp_tests.h"
+#include "dsps_mul.h"
+#include "esp_attr.h"
+
+static const char *TAG = "dsps_mul";
+
+TEST_CASE("dsps_mul_f32_ansi functionality", "[dsps]")
+{
+    int n = 32;
+    float x[n];
+    float y[n];
+    for (int i = 0 ; i < n ; i++) {
+        x[i] = i;
+        y[i] = i * i;
+    }
+    dsps_mul_f32_ansi(x, x, x, n, 1, 1, 1);
+    for (int i = 0 ; i < n ; i++) {
+        if (x[i] != y[i]) {
+            TEST_ASSERT_EQUAL(x[i], y[i]);
+        }
+    }
+}
+
+TEST_CASE("dsps_mul_f32 functionality", "[dsps]")
+{
+    int n = 32;
+    float x[n];
+    float y[n];
+    for (int i = 0 ; i < n ; i++) {
+        x[i] = i;
+        y[i] = i * i;
+    }
+    dsps_mul_f32(x, x, x, n, 1, 1, 1);
+    for (int i = 0 ; i < n ; i++) {
+        if (x[i] != y[i]) {
+            TEST_ASSERT_EQUAL(x[i], y[i]);
+        }
+    }
+
+    int repeat_count = 1;
+
+    dsps_mul_f32(x, x, x, n, 1, 1, 1);
+
+    unsigned int start_b = dsp_get_cpu_cycle_count();
+    dsps_mul_f32(x, x, x, n, 1, 1, 1);
+    unsigned int end_b = dsp_get_cpu_cycle_count();
+
+    float total_b = end_b - start_b;
+    float cycles = total_b / (n * repeat_count);
+    ESP_LOGI(TAG, "dsps_mul_f32 - %f cycles per sample \n", cycles);
+
+}
--- a/managed_components/espressif__esp-dsp/modules/math/mul/test/test_dsps_mul_s16_ae32.c
+++ b/managed_components/espressif__esp-dsp/modules/math/mul/test/test_dsps_mul_s16_ae32.c
@@ -0,0 +1,62 @@
+// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <string.h>
+#include "unity.h"
+#include "dsp_platform.h"
+#include "esp_log.h"
+
+#include "dsp_tests.h"
+#include "dsps_mul.h"
+#include "esp_attr.h"
+
+static const char *TAG = "dsps_mul";
+
+TEST_CASE("dsps_mul_s16 functionality", "[dsps]")
+{
+    int n = 64;
+    int16_t x[n];
+    int16_t y[n];
+    int32_t temp;
+    int shift = 0;
+    for (int i = 0 ; i < n ; i++) {
+        x[i] = i << 4;
+        temp = ((int32_t)x[i] * (int32_t)x[i]) >> shift;
+        y[i] = temp;
+    }
+
+    dsps_mul_s16(x, x, x, n, 1, 1, 1, shift);
+    for (int i = 0 ; i < n ; i++) {
+        ESP_LOGD(TAG, "x[%i] = %i  %i", i, x[i], y[i]);
+        if (x[i] != y[i]) {
+            TEST_ASSERT_EQUAL(x[i], y[i]);
+        }
+    }
+}
+
+TEST_CASE("dsps_mul_s16 benchmark", "[dsps]")
+{
+    const int n = 256;
+    int16_t x[n];
+    for (int i = 0 ; i < n ; i++) {
+        x[i] = i << 4;
+    }
+
+    unsigned int start_b = dsp_get_cpu_cycle_count();
+    dsps_mul_s16(x, x, x, n, 1, 1, 1, 0);
+    unsigned int end_b = dsp_get_cpu_cycle_count();
+
+    float cycles = end_b - start_b;
+    ESP_LOGI(TAG, "dsps_mul_s16 - %f cycles per sample \n", cycles);
+}
--- a/managed_components/espressif__esp-dsp/modules/math/mul/test/test_dsps_mul_s16_aes3.c
+++ b/managed_components/espressif__esp-dsp/modules/math/mul/test/test_dsps_mul_s16_aes3.c
@@ -0,0 +1,69 @@
+// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <string.h>
+#include "unity.h"
+#include "dsp_platform.h"
+#include "esp_log.h"
+#include <malloc.h>
+
+#include "dsps_mul.h"
+#include "esp_attr.h"
+#include "dsp_tests.h"
+
+#if (dsps_mul_s16_aes3_enabled == 1)
+
+static const char *TAG = "dsps_mul";
+TEST_CASE("dsps_mul_s16_aes3 functionality", "[dsps]")
+{
+    int n = 64;
+    int16_t *x = (int16_t *)memalign(16, n * sizeof(int16_t));
+    int16_t *y = (int16_t *)memalign(16, n * sizeof(int16_t));
+    int32_t temp;
+    int shift = 0;
+    for (int i = 0 ; i < n ; i++) {
+        x[i] = i << 4;
+        temp = ((int32_t)x[i] * (int32_t)x[i]) >> shift;
+        y[i] = temp;
+    }
+
+    dsps_mul_s16_aes3(x, x, x, n, 1, 1, 1, shift);
+    for (int i = 0 ; i < n ; i++) {
+        ESP_LOGD(TAG, "x[%i] = %i  %i", i, x[i], y[i]);
+        if (x[i] != y[i]) {
+            TEST_ASSERT_EQUAL(x[i], y[i]);
+        }
+    }
+    free(x);
+    free(y);
+}
+
+TEST_CASE("dsps_mul_s16_aes3 benchmark", "[dsps]")
+{
+    const int n = 2048;
+    int16_t *x = (int16_t *)memalign(16, n * sizeof(int16_t));
+    for (int i = 0 ; i < n ; i++) {
+        x[i] = i << 4;
+    }
+
+    unsigned int start_b = dsp_get_cpu_cycle_count();
+    dsps_mul_s16_aes3(x, x, x, n, 1, 1, 1, 0);
+    unsigned int end_b = dsp_get_cpu_cycle_count();
+
+    float cycles = end_b - start_b;
+    ESP_LOGI(TAG, "dsps_mul_s16_aes3 - %f cycles per sample \n", cycles);
+    free(x);
+
+}
+#endif // (dsps_mul_s16_aes3_enabled == 1)
--- a/managed_components/espressif__esp-dsp/modules/math/mul/test/test_dsps_mul_s16_ansi.c
+++ b/managed_components/espressif__esp-dsp/modules/math/mul/test/test_dsps_mul_s16_ansi.c
@@ -0,0 +1,66 @@
+// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <string.h>
+#include "unity.h"
+#include "dsp_platform.h"
+#include "esp_log.h"
+#include <malloc.h>
+
+#include "dsps_mul.h"
+#include "esp_attr.h"
+#include "dsp_tests.h"
+
+static const char *TAG = "dsps_mul";
+TEST_CASE("dsps_mul_s16_ansi functionality", "[dsps]")
+{
+    int n = 64;
+    int16_t *x = (int16_t *)memalign(16, n * sizeof(int16_t));
+    int16_t *y = (int16_t *)memalign(16, n * sizeof(int16_t));
+    int32_t temp;
+    int shift = 0;
+    for (int i = 0 ; i < n ; i++) {
+        x[i] = i << 4;
+        temp = ((int32_t)x[i] * (int32_t)x[i]) >> shift;
+        y[i] = temp;
+    }
+
+    dsps_mul_s16_ansi(x, x, x, n, 1, 1, 1, shift);
+    for (int i = 0 ; i < n ; i++) {
+        ESP_LOGD(TAG, "x[%i] = %i  %i", i, x[i], y[i]);
+        if (x[i] != y[i]) {
+            TEST_ASSERT_EQUAL(x[i], y[i]);
+        }
+    }
+    free(x);
+    free(y);
+}
+
+TEST_CASE("dsps_mul_s16_ansi benchmark", "[dsps]")
+{
+    const int n = 2048;
+    int16_t *x = (int16_t *)memalign(16, n * sizeof(int16_t));
+    for (int i = 0 ; i < n ; i++) {
+        x[i] = i << 4;
+    }
+
+    unsigned int start_b = dsp_get_cpu_cycle_count();
+    dsps_mul_s16_ansi(x, x, x, n, 1, 1, 1, 0);
+    unsigned int end_b = dsp_get_cpu_cycle_count();
+
+    float cycles = end_b - start_b;
+    ESP_LOGI(TAG, "dsps_mul_s16_ansi - %f cycles per sample \n", cycles);
+    free(x);
+
+}
--- a/managed_components/espressif__esp-dsp/modules/math/mul/test/test_dsps_mul_s8_aes3.c
+++ b/managed_components/espressif__esp-dsp/modules/math/mul/test/test_dsps_mul_s8_aes3.c
@@ -0,0 +1,69 @@
+// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <string.h>
+#include "unity.h"
+#include "dsp_platform.h"
+#include "esp_log.h"
+#include <malloc.h>
+
+#include "dsps_mul.h"
+#include "esp_attr.h"
+#include "dsp_tests.h"
+
+#if (dsps_mul_s16_aes3_enabled == 1)
+
+static const char *TAG = "dsps_mul";
+TEST_CASE("dsps_mul_s8_aes3 functionality", "[dsps]")
+{
+    int n = 64;
+    int8_t *x = (int8_t *)memalign(16, n * sizeof(int8_t));
+    int8_t *y = (int8_t *)memalign(16, n * sizeof(int8_t));
+    int32_t temp;
+    int shift = 0;
+    for (int i = 0 ; i < n ; i++) {
+        x[i] = i - n / 2;
+        temp = ((int32_t)x[i] * (int32_t)x[i]) >> shift;
+        y[i] = temp;
+    }
+
+    dsps_mul_s8_aes3(x, x, x, n, 1, 1, 1, shift);
+    for (int i = 0 ; i < n ; i++) {
+        ESP_LOGD(TAG, "x[%i] = %i  %i", i, x[i], y[i]);
+        if (x[i] != y[i]) {
+            TEST_ASSERT_EQUAL(x[i], y[i]);
+        }
+    }
+    free(x);
+    free(y);
+}
+
+TEST_CASE("dsps_mul_s8_aes3 benchmark", "[dsps]")
+{
+    const int n = 256;
+    int8_t *x = (int8_t *)memalign(16, n * sizeof(int8_t));
+    for (int i = 0 ; i < n ; i++) {
+        x[i] = i << 4;
+    }
+
+    unsigned int start_b = dsp_get_cpu_cycle_count();
+    dsps_mul_s8_aes3(x, x, x, n, 1, 1, 1, 0);
+    unsigned int end_b = dsp_get_cpu_cycle_count();
+
+    float cycles = end_b - start_b;
+    ESP_LOGI(TAG, "dsps_mul_s8_aes3 - %f cycles per sample \n", cycles);
+    free(x);
+
+}
+#endif // (dsps_mul_s16_aes3_enabled == 1)
--- a/managed_components/espressif__esp-dsp/modules/math/mul/test/test_dsps_mul_s8_ansi.c
+++ b/managed_components/espressif__esp-dsp/modules/math/mul/test/test_dsps_mul_s8_ansi.c
@@ -0,0 +1,61 @@
+// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <string.h>
+#include "unity.h"
+#include "dsp_platform.h"
+#include "esp_log.h"
+
+#include "dsp_tests.h"
+#include "dsps_mul.h"
+#include "esp_attr.h"
+
+static const char *TAG = "dsps_mul";
+
+TEST_CASE("dsps_mul_s8_ansi functionality", "[dsps]")
+{
+    int n = 64;
+    int8_t x[n];
+    int8_t y[n];
+    int32_t temp;
+    int shift = 0;
+    for (int i = 0 ; i < n ; i++) {
+        x[i] = i - n / 2;
+        temp = ((int32_t)x[i] * (int32_t)x[i]) >> shift;
+        y[i] = temp;
+    }
+
+    dsps_mul_s8_ansi(x, x, x, n, 1, 1, 1, 0);
+    for (int i = 0 ; i < n ; i++) {
+        if (x[i] != y[i]) {
+            TEST_ASSERT_EQUAL(x[i], y[i]);
+        }
+    }
+}
+
+TEST_CASE("dsps_mul_s8_ansi benchmark", "[dsps]")
+{
+    const int n = 256;
+    int8_t x[n];
+    for (int i = 0 ; i < n ; i++) {
+        x[i] = i << 4;
+    }
+
+    unsigned int start_b = dsp_get_cpu_cycle_count();
+    dsps_mul_s8_ansi(x, x, x, n, 1, 1, 1, 0);
+    unsigned int end_b = dsp_get_cpu_cycle_count();
+
+    float cycles = end_b - start_b;
+    ESP_LOGI(TAG, "dsps_mul_s8_ansi - %f cycles per sample \n", cycles);
+}
--- a/managed_components/espressif__esp-dsp/modules/math/mulc/fixed/dsps_mulc_s16_ae32.S
+++ b/managed_components/espressif__esp-dsp/modules/math/mulc/fixed/dsps_mulc_s16_ae32.S
@@ -0,0 +1,77 @@
+// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License. 
+
+#include "dsps_mulc_platform.h"
+#if (dsps_mulc_s16_ae32_enabled == 1)
+
+    .text
+    .align  4
+    .global dsps_mulc_s16_ae32
+    .type   dsps_mulc_s16_ae32,@function
+// The function implements the following C code:
+// esp_err_t dsps_mulc_f32_ansi(const float *input, float *output, int len, float C, int step_in, int step_out)
+// {
+//     for (int i = 0 ; i < len ; i++) {
+//         int32_t acc = (int32_t)input[i * step_in] * (int32_t)C;
+//         output[i * step_out] = (int16_t)(acc>>15);
+//     }
+//     return ESP_OK;
+// }
+dsps_mulc_s16_ae32: 
+// input   - a2
+// output   - a3
+// len      - a4
+// C        - a5
+// step_in  - a6
+// step_out - a7
+
+    entry	a1, 16
+
+    movi.n	a8, 15      // output shift
+    ssr     a8
+
+    srli    a4, a4, 1   // a4 = a4>>1
+    slli 	a6, a6, 2  	// a6 - step_in<<3, because we load two inputs per loop
+    slli 	a7, a7, 1  	// a7 - step_out<<2
+
+    addi    a6, a6, -4;
+    addi    a2, a2, -4;
+
+	ldinc m0, a2
+
+    loopnez a4, loop_end_mulc_f32_ae32
+        add.n       a2, a2, a6     // input+=step_input;
+        mul.DA.LL   m0, a5 
+        rsr a8, acchi
+        rsr a9, acclo
+        src a8, a8, a9  // Here result in a8
+    	s16i	a8, a3, 0   // store result to the putput        
+        // rsr a9, acclo
+    	// s16i	a9, a3, 0   // store result to the putput        
+        add.n   a3, a3, a7     // output+=step_out;
+        mul.DA.HL   m0, a5         
+
+        rsr a8, acchi
+        rsr a9, acclo
+	    ldinc       m0,   a2               // load next data
+        src a10, a8, a9  // Here result in a8
+    	s16i	a10, a3, 0   // store result to the putput
+        // // rsr a9, acclo
+    	// // s16i	a9, a3, 0   // store result to the putput        
+        add.n   a3, a3, a7  // output+=step_out;
+loop_end_mulc_f32_ae32:
+    movi.n	a2, 0 // return status ESP_OK
+    retw.n
+    
+#endif // dsps_mulc_s16_ae32_enabled
--- a/managed_components/espressif__esp-dsp/modules/math/mulc/fixed/dsps_mulc_s16_ansi.c
+++ b/managed_components/espressif__esp-dsp/modules/math/mulc/fixed/dsps_mulc_s16_ansi.c
@@ -0,0 +1,31 @@
+// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "dsps_mulc.h"
+
+esp_err_t dsps_mulc_s16_ansi(const int16_t *input, int16_t *output, int len, int16_t C, int step_in, int step_out)
+{
+    if (NULL == input) {
+        return ESP_ERR_DSP_PARAM_OUTOFRANGE;
+    }
+    if (NULL == output) {
+        return ESP_ERR_DSP_PARAM_OUTOFRANGE;
+    }
+
+    for (int i = 0 ; i < len ; i++) {
+        int32_t acc = (int32_t)input[i * step_in] * (int32_t)C;
+        output[i * step_out] = (int16_t)(acc >> 15);
+    }
+    return ESP_OK;
+}
--- a/managed_components/espressif__esp-dsp/modules/math/mulc/float/dsps_mulc_f32_ae32.S
+++ b/managed_components/espressif__esp-dsp/modules/math/mulc/float/dsps_mulc_f32_ae32.S
@@ -0,0 +1,57 @@
+// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License. 
+
+#include "dsps_mulc_platform.h"
+#if (dsps_mulc_f32_ae32_enabled == 1)
+
+// This is bi quad filter form II for ESP32 processor.
+    .text
+    .align  4
+    .global dsps_mulc_f32_ae32
+    .type   dsps_mulc_f32_ae32,@function
+// The function implements the following C code:
+// esp_err_t dsps_mulc_f32_ansi(const float *input, float *output, int len, float C, int step_in, int step_out)
+// {
+//     for (int i = 0 ; i < len ; i++) {
+//         output[i * step_out] = input[i * step_in] * C;
+//     }
+//     return ESP_OK;
+// }
+dsps_mulc_f32_ae32: 
+// input   - a2
+// output   - a3
+// len      - a4
+// C        - a5
+// step_in  - a6
+// step_out - a7
+
+    entry	a1, 16
+    
+    slli 	a6, a6, 2  	// a6 - step_in<<2
+    slli 	a7, a7, 2  	// a7 - step_out<<2
+    wfr		f0, a5		// a5 - load to the f0
+
+    loopnez a4, loop_end_mulc_f32_ae32
+        lsi      f1, a2, 0
+
+        mul.s    f2, f1, f0     // f2 = f1 * f0
+        add.n    a2, a2, a6     // input1_ptr+=step_in;
+        ssi	     f2, a3, 0
+        add.n    a3, a3, a7     // output+=step_out;
+loop_end_mulc_f32_ae32:
+
+    movi.n	a2, 0 // return status ESP_OK
+    retw.n
+
+#endif // dsps_mulc_f32_ae32_enabled
--- a/managed_components/espressif__esp-dsp/modules/math/mulc/float/dsps_mulc_f32_ansi.c
+++ b/managed_components/espressif__esp-dsp/modules/math/mulc/float/dsps_mulc_f32_ansi.c
@@ -0,0 +1,30 @@
+// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "dsps_mulc.h"
+
+esp_err_t dsps_mulc_f32_ansi(const float *input, float *output, int len, float C, int step_in, int step_out)
+{
+    if (NULL == input) {
+        return ESP_ERR_DSP_PARAM_OUTOFRANGE;
+    }
+    if (NULL == output) {
+        return ESP_ERR_DSP_PARAM_OUTOFRANGE;
+    }
+
+    for (int i = 0 ; i < len ; i++) {
+        output[i * step_out] = input[i * step_in] * C;
+    }
+    return ESP_OK;
+}
--- a/managed_components/espressif__esp-dsp/modules/math/mulc/include/dsps_mulc.h
+++ b/managed_components/espressif__esp-dsp/modules/math/mulc/include/dsps_mulc.h
@@ -0,0 +1,74 @@
+// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef _dsps_mulc_H_
+#define _dsps_mulc_H_
+#include "dsp_err.h"
+
+#include "dsps_mulc_platform.h"
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+/**@{*/
+/**
+ * @brief   multiply constant
+ *
+ * The function multiplies input array to the constant value
+ * x[i*step_out] = y[i*step_in]*C; i=[0..len)
+ * The implementation use ANSI C and could be compiled and run on any platform
+ *
+ * @param[in] input: input array
+ * @param output: output array
+ * @param len: amount of operations for arrays
+ * @param C: constant value
+ * @param step_in: step over input array (by default should be 1)
+ * @param step_out: step over output array (by default should be 1)
+ *
+ * @return
+ *      - ESP_OK on success
+ *      - One of the error codes from DSP library
+ */
+esp_err_t dsps_mulc_f32_ansi(const float *input, float *output, int len, float C, int step_in, int step_out);
+esp_err_t dsps_mulc_f32_ae32(const float *input, float *output, int len, float C, int step_in, int step_out);
+
+esp_err_t dsps_mulc_s16_ae32(const int16_t *input, int16_t *output, int len, int16_t C, int step_in, int step_out);
+esp_err_t dsps_mulc_s16_ansi(const int16_t *input, int16_t *output, int len, int16_t C, int step_in, int step_out);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#if CONFIG_DSP_OPTIMIZED
+#if (dsps_mulc_f32_ae32_enabled == 1)
+#define dsps_mulc_f32 dsps_mulc_f32_ae32
+#else //
+#define dsps_mulc_f32 dsps_mulc_f32_ansi
+#endif
+#if (dsps_mulc_s16_ae32_enabled == 1)
+#define dsps_mulc_s16 dsps_mulc_s16_ae32
+#else
+#define dsps_mulc_s16 dsps_mulc_s16_ansi
+#endif // dsps_mulc_s16_ae32_enabled
+
+#else
+#define dsps_mulc_f32 dsps_mulc_f32_ansi
+#define dsps_mulc_s16 dsps_mulc_s16_ansi
+#endif
+
+
+#endif // _dsps_mulc_H_
--- a/managed_components/espressif__esp-dsp/modules/math/mulc/include/dsps_mulc_platform.h
+++ b/managed_components/espressif__esp-dsp/modules/math/mulc/include/dsps_mulc_platform.h
@@ -0,0 +1,25 @@
+#ifndef _dsps_mulc_platform_H_
+#define _dsps_mulc_platform_H_
+
+#include "sdkconfig.h"
+
+#ifdef __XTENSA__
+#include <xtensa/config/core-isa.h>
+#include <xtensa/config/core-matmap.h>
+
+
+#if ((XCHAL_HAVE_FP == 1) && (XCHAL_HAVE_LOOPS == 1))
+
+#define dsps_mulc_f32_ae32_enabled  1
+
+#endif
+
+#if ((XCHAL_HAVE_LOOPS == 1) && (XCHAL_HAVE_MAC16 == 1))
+
+#define dsps_mulc_s16_ae32_enabled 1
+
+#endif //
+#endif // __XTENSA__
+
+
+#endif // _dsps_mulc_platform_H_
--- a/managed_components/espressif__esp-dsp/modules/math/mulc/test/test_dsps_mulc_f32_ansi.c
+++ b/managed_components/espressif__esp-dsp/modules/math/mulc/test/test_dsps_mulc_f32_ansi.c
@@ -0,0 +1,70 @@
+// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <string.h>
+#include "unity.h"
+#include "dsp_platform.h"
+#include "esp_log.h"
+
+#include "dsp_tests.h"
+#include "dsps_mulc.h"
+#include "esp_attr.h"
+
+static const char *TAG = "dsps_mulc";
+
+TEST_CASE("dsps_mulc_f32_ansi functionality", "[dsps]")
+{
+    int n = 64;
+    float x[n];
+    float y[n];
+    for (int i = 0 ; i < n ; i++) {
+        x[i] = i;
+        y[i] = i * 10;
+    }
+    dsps_mulc_f32_ansi(x, x, n, 10, 1, 1);
+    for (int i = 0 ; i < n ; i++) {
+        if (x[i] != y[i]) {
+            TEST_ASSERT_EQUAL(x[i], y[i]);
+        }
+    }
+}
+
+TEST_CASE("dsps_mulc_f32 functionality", "[dsps]")
+{
+    int n = 64;
+    float x[n];
+    float y[n];
+    for (int i = 0 ; i < n ; i++) {
+        x[i] = i;
+        y[i] = i * 10;
+    }
+    dsps_mulc_f32(x, x, n, 10, 1, 1);
+    for (int i = 0 ; i < n ; i++) {
+        if (x[i] != y[i]) {
+            TEST_ASSERT_EQUAL(x[i], y[i]);
+        }
+    }
+
+    int repeat_count = 1;
+
+    dsps_mulc_f32(x, x, n, 10, 1, 1);
+
+    unsigned int start_b = dsp_get_cpu_cycle_count();
+    dsps_mulc_f32(x, x, n, 10, 1, 1);
+    unsigned int end_b = dsp_get_cpu_cycle_count();
+
+    float total_b = end_b - start_b;
+    float cycles = total_b / (n * repeat_count);
+    ESP_LOGI(TAG, "dsps_mulc_f32 - %f cycles per sample \n", cycles);
+}
--- a/managed_components/espressif__esp-dsp/modules/math/mulc/test/test_dsps_mulc_s16_ae32.c
+++ b/managed_components/espressif__esp-dsp/modules/math/mulc/test/test_dsps_mulc_s16_ae32.c
@@ -0,0 +1,61 @@
+// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <string.h>
+#include "unity.h"
+#include "dsp_platform.h"
+#include "esp_log.h"
+
+#include "dsp_tests.h"
+#include "dsps_mulc.h"
+#include "esp_attr.h"
+
+static const char *TAG = "dsps_mulc";
+
+TEST_CASE("dsps_mulc_s16 functionality", "[dsps]")
+{
+    int n = 64;
+    int16_t x[n];
+    int16_t y[n];
+    int32_t temp;
+    int16_t test_const = 0x2000;
+    for (int i = 0 ; i < n ; i++) {
+        x[i] = i << 4;
+        temp = (int32_t)x[i] * (int32_t)test_const;
+        y[i] = temp >> 15;
+    }
+
+    dsps_mulc_s16(x, x, n, test_const, 1, 1);
+    for (int i = 0 ; i < n ; i++) {
+        if (x[i] != y[i]) {
+            TEST_ASSERT_EQUAL(x[i], y[i]);
+        }
+    }
+}
+
+TEST_CASE("dsps_mulc_s16 benchmark", "[dsps]")
+{
+    const int n = 256;
+    int16_t x[n];
+    for (int i = 0 ; i < n ; i++) {
+        x[i] = i << 4;
+    }
+
+    unsigned int start_b = dsp_get_cpu_cycle_count();
+    dsps_mulc_s16(x, x, n, 10, 1, 1);
+    unsigned int end_b = dsp_get_cpu_cycle_count();
+
+    float cycles = end_b - start_b;
+    ESP_LOGI(TAG, "dsps_mulc_s16 - %f cycles per sample \n", cycles);
+}
--- a/managed_components/espressif__esp-dsp/modules/math/mulc/test/test_dsps_mulc_s16_ansi.c
+++ b/managed_components/espressif__esp-dsp/modules/math/mulc/test/test_dsps_mulc_s16_ansi.c
@@ -0,0 +1,61 @@
+// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <string.h>
+#include "unity.h"
+#include "dsp_platform.h"
+#include "esp_log.h"
+
+#include "dsp_tests.h"
+#include "dsps_mulc.h"
+#include "esp_attr.h"
+
+static const char *TAG = "dsps_mulc";
+
+TEST_CASE("dsps_mulc_s16_ansi functionality", "[dsps]")
+{
+    int n = 64;
+    int16_t x[n];
+    int16_t y[n];
+    int32_t temp;
+    int16_t test_const = 0x2000;
+    for (int i = 0 ; i < n ; i++) {
+        x[i] = i << 4;
+        temp = (int32_t)x[i] * (int32_t)test_const;
+        y[i] = temp >> 15;
+    }
+
+    dsps_mulc_s16_ansi(x, x, n, test_const, 1, 1);
+    for (int i = 0 ; i < n ; i++) {
+        if (x[i] != y[i]) {
+            TEST_ASSERT_EQUAL(x[i], y[i]);
+        }
+    }
+}
+
+TEST_CASE("dsps_mulc_s16 benchmark", "[dsps]")
+{
+    const int n = 256;
+    int16_t x[n];
+    for (int i = 0 ; i < n ; i++) {
+        x[i] = i << 4;
+    }
+
+    unsigned int start_b = dsp_get_cpu_cycle_count();
+    dsps_mulc_s16_ansi(x, x, n, 10, 1, 1);
+    unsigned int end_b = dsp_get_cpu_cycle_count();
+
+    float cycles = end_b - start_b;
+    ESP_LOGI(TAG, "dsps_mulc_f32 - %f cycles per sample \n", cycles);
+}
--- a/managed_components/espressif__esp-dsp/modules/math/sqrt/float/dsps_sqrt_f32_ansi.c
+++ b/managed_components/espressif__esp-dsp/modules/math/sqrt/float/dsps_sqrt_f32_ansi.c
@@ -0,0 +1,56 @@
+// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "dsps_sqrt.h"
+#include <math.h>
+
+
+inline float dsps_sqrtf_f32_ansi(float f)
+{
+    int result;
+    int *f_ptr = (int *)&f;
+    result = 0x1fbb4000 + (*f_ptr >> 1);
+    const int *p = &result;
+    float *f_result = (float *)p;
+    return *f_result;
+}
+
+esp_err_t dsps_sqrt_f32_ansi(const float *input, float *output, int len)
+{
+    if (NULL == input) {
+        return ESP_ERR_DSP_PARAM_OUTOFRANGE;
+    }
+    if (NULL == output) {
+        return ESP_ERR_DSP_PARAM_OUTOFRANGE;
+    }
+
+    for (int i = 0 ; i < len ; i++) {
+        output[i] = dsps_sqrtf_f32_ansi(input[i]);
+    }
+    return ESP_OK;
+}
+
+float dsps_inverted_sqrtf_f32_ansi(float data )
+{
+    const float x2 = data * 0.5F;
+    const float threehalfs = 1.5F;
+
+    union {
+        float f;
+        uint32_t i;
+    } conv = {data}; // member 'f' set to value of 'data'.
+    conv.i  = 0x5f3759df - ( conv.i >> 1 );
+    conv.f  *= ( threehalfs - ( x2 * conv.f * conv.f ) );
+    return conv.f;
+}
--- a/managed_components/espressif__esp-dsp/modules/math/sqrt/include/dsps_sqrt.h
+++ b/managed_components/espressif__esp-dsp/modules/math/sqrt/include/dsps_sqrt.h
@@ -0,0 +1,91 @@
+// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef _dsps_sqrt_H_
+#define _dsps_sqrt_H_
+#include "dsp_err.h"
+
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+/**@{*/
+/**
+ * @brief   square root approximation
+ *
+ * The function takes square root approximation
+ * x[i] ~ sqrt(y[i]); i=[0..len)
+ * The implementation use ANSI C and could be compiled and run on any platform
+ *
+ * @param[in] input: input array
+ * @param output: output array
+ * @param len: amount of operations for arrays
+ *
+ * @return
+ *      - ESP_OK on success
+ *      - One of the error codes from DSP library
+ */
+esp_err_t dsps_sqrt_f32_ansi(const float *input, float *output, int len);
+//esp_err_t dsps_sqrt_s32_ansi(const int32_t *input, int16_t *output, int len);
+
+/**@{*/
+/**
+ * @brief   square root approximation
+ *
+ * The function takes square root approximation
+ * x ~ sqrt(y);
+ * The implementation use ANSI C and could be compiled and run on any platform
+ *
+ * @param[in] data: input value
+ *
+ * @return
+ *      - square root value
+ */
+float dsps_sqrtf_f32_ansi(const float data);
+
+
+/**@{*/
+/**
+ * @brief   inverted square root approximation
+ *
+ * The function takes inverted square root approximation
+ * x ~ 1/sqrt(y);
+ * The implementation use ANSI C and could be compiled and run on any platform
+ *
+ * @param[in] data: input value
+ *
+ * @return
+ *      - inverted square root value
+ */
+float dsps_inverted_sqrtf_f32_ansi(float data );
+/**@}*/
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#ifdef CONFIG_DSP_OPTIMIZED
+#define dsps_sqrt_f32 dsps_sqrt_f32_ansi
+#define dsps_sqrtf_f32 dsps_sqrtf_f32_ansi
+#define dsps_inverted_sqrtf_f32 dsps_inverted_sqrtf_f32_ansi
+#else
+#define dsps_sqrt_f32 dsps_sqrt_f32_ansi
+#define dsps_sqrtf_f32 dsps_sqrtf_f32_ansi
+#define dsps_inverted_sqrtf_f32 dsps_inverted_sqrtf_f32_ansi
+#endif
+
+#endif // _dsps_sqrt_H_
--- a/managed_components/espressif__esp-dsp/modules/math/sqrt/test/test_dsps_sqrt_f32_ansi.c
+++ b/managed_components/espressif__esp-dsp/modules/math/sqrt/test/test_dsps_sqrt_f32_ansi.c
@@ -0,0 +1,81 @@
+// Copyright 2018-2023 Espressif Systems (Shanghai) PTE LTD
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <string.h>
+#include "unity.h"
+#include "dsp_platform.h"
+#include "esp_log.h"
+
+#include "dsp_tests.h"
+#include "dsps_sqrt.h"
+#include "esp_attr.h"
+
+static const char *TAG = "dsps_sqrt";
+
+TEST_CASE("dsps_sqrtf_f32_ansi functionality", "[dsps]")
+{
+    float max_err = -1000;
+    float max_value = 0;
+    float min_err = 0;
+    float min_value = (float)INT32_MAX;
+    int test_points = 100000;
+    for (size_t i = 0; i < test_points; i++) {
+        float test_value = rand();
+        // if (test_value > max_value) max_value = test_value;
+        // if (test_value < min_value) min_value = test_value;
+        float x = sqrtf(test_value);
+        float y = dsps_sqrtf_f32(test_value);
+        float error = 20 * log10f(fabs((x - y) / x) + 0.000001);
+        if (error > max_err) {
+            max_err = error;
+            max_value = test_value;
+        }
+        if (error < min_err) {
+            min_err = error;
+            min_value = test_value;
+        }
+        if (error > -25) {
+            ESP_LOGE(TAG, "dsps_sqrtf_f32_ansi: error = %f dB,  value = %f (0x%8.8x)\n", error, test_value, (int)test_value);
+            TEST_ASSERT_EQUAL(x, y);
+        }
+    }
+    ESP_LOGI(TAG, "dsps_sqrtf_f32_ansi: max error = %f dB,   min error = %f dB, max_value = %f (0x%8.8x), min_value = %f (0x%8.8x)\n", max_err, min_err, max_value, (int)max_value, min_value, (int)min_value);
+}
+
+TEST_CASE("dsps_sqrt_f32_ansi functionality", "[dsps]")
+{
+    int n = 256;
+    float *x = (float *)malloc(sizeof(float) * n);
+    float *result = (float *)malloc(sizeof(float) * n);
+    float *y = (float *)malloc(sizeof(float) * n);
+    for (int i = 0 ; i < n ; i++) {
+        y[i] = i * 10;
+        x[i] = y[i] * y[i];
+    }
+    unsigned int start_b = dsp_get_cpu_cycle_count();
+    dsps_sqrt_f32_ansi(x, result, n);
+    float cycles = dsp_get_cpu_cycle_count() - start_b;
+
+    for (int i = 0 ; i < n ; i++) {
+        //printf("Result[%i] = %f, expected = %f,  diff = %f\n", i, result[i], y[i], 20*logf(fabs((result[i] - y[i])/y[i]) + 0.000001));
+        float error = 20 * log10f(fabs((result[i] - y[i]) / y[i]) + 0.000001);
+        if (error > -25) {
+            TEST_ASSERT_EQUAL(result[i], y[i]);
+        }
+    }
+    ESP_LOGI(TAG, "dsps_sqrt_f32_ansi - %f cycles for %i samples \n", cycles, n);
+    free(x);
+    free(y);
+    free(result);
+}
--- a/managed_components/espressif__esp-dsp/modules/math/sub/fixed/dsps_sub_s16_ae32.S
+++ b/managed_components/espressif__esp-dsp/modules/math/sub/fixed/dsps_sub_s16_ae32.S
@@ -0,0 +1,74 @@
+/*
+ * SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include "dsps_sub_platform.h"
+#if (dsps_sub_s16_ae32_enabled == 1)
+
+    .text
+    .align  4
+    .global dsps_sub_s16_ae32
+    .type   dsps_sub_s16_ae32,@function
+// The function implements the following C code:
+// esp_err_t dsps_sub_s16_ansi(const int16_t *input1, const int16_t *input2, int16_t *output, int len, int step1, int step2, int step_out, int shift)
+// {
+//     for (int i = 0 ; i < len ; i++) {
+//         int32_t acc = (int32_t)input1[i * step1] + (int32_t)input2[i * step2];
+//         output[i * step_out] = acc >> shift;
+//     }
+//     return ESP_OK;
+// }
+dsps_sub_s16_ae32: 
+// input1   - a2
+// input2   - a3
+// output   - a4
+// len      - a5
+// step_in1 - a6
+// step_in2 - a7
+// step_out - stack (a10)
+// shift    - stack (a9)
+
+    entry	a1, 16
+
+
+    // l32i.n	a10, a1, 16
+    // s16i	a10, a4, 0
+    // l32i.n	a10, a1, 20
+    // s16i	a10, a4, 2       
+
+    l32i.n	a10, a1, 16     // Load step_out to the a10 register
+    l32i.n	a9, a1,  20     // Load shift to the a9 register
+    ssr     a9              // sar = a9
+
+    slli 	a6, a6, 1  	// a6 - step_in<<1
+    slli 	a7, a7, 1  	// a7 - step_in<<1
+    slli 	a10, a10, 1 // a8 - step_out<<1
+
+    // s16i	a10, a4, 0
+    // s16i	a6, a4, 2
+    // s16i	a7, a4, 4
+    // s16i	a5, a4, 6
+
+    l16si   a11, a2, 0
+    l16si   a8,  a3, 0
+    add     a8, a11, a8
+    srl     a9, a8          // a8 = a8>>sar    
+
+    loopnez a5, .loop_end_sub_s16_ae32
+        add.n   a2, a2, a6      // input1+=step_in1;
+        add.n   a3, a3, a7      // input2+=step_in2;
+
+        l16si   a11, a2, 0
+        l16si   a8,  a3, 0
+        s16i	a9,  a4, 0      // store result to the putput
+        sub     a8, a11, a8
+    	srl     a9, a8          // a8 = a8>>sar    
+
+        add.n   a4, a4, a10     // output+=step_out;
+.loop_end_sub_s16_ae32:
+    movi.n	a2, 0 // return status ESP_OK
+    retw.n
+
+#endif // dsps_sub_s16_ae32_enabled
--- a/managed_components/espressif__esp-dsp/modules/math/sub/fixed/dsps_sub_s16_aes3.S
+++ b/managed_components/espressif__esp-dsp/modules/math/sub/fixed/dsps_sub_s16_aes3.S
@@ -0,0 +1,104 @@
+/*
+ * SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include "dsps_sub_platform.h"
+#if (dsps_sub_s16_aes3_enabled == 1)
+
+    .text
+    .align  4
+    .global dsps_sub_s16_aes3
+    .type   dsps_sub_s16_aes3,@function
+// The function implements the following C code:
+// esp_err_t dsps_sub_s16_ansi(const int16_t *input1, const int16_t *input2, int16_t *output, int len, int step1, int step2, int step_out, int shift)
+// {
+//     for (int i = 0 ; i < len ; i++) {
+//         int32_t acc = (int32_t)input1[i * step1] + (int32_t)input2[i * step2];
+//         output[i * step_out] = acc >> shift;
+//     }
+//     return ESP_OK;
+// }
+dsps_sub_s16_aes3: 
+// input1   - a2
+// input2   - a3
+// output   - a4
+// len      - a5
+// step_in1 - a6
+// step_in2 - a7
+// step_out - stack (a10)
+// shift    - stack (a9)
+
+    entry	a1, 16
+
+    l32i.n	a10, a1, 16     // Load step_out to the a10 register
+    l32i.n	a9, a1,  20     // Load shift to the a9 register
+    ssr     a9              // sar = a9
+
+    // Check if any of steps is not 0
+    addi    a15, a6, -1
+    bnez    a15, .sub_s16_ae32_mode // Branch if step !=0
+    addi    a15, a7, -1
+    bnez    a15, .sub_s16_ae32_mode // Branch if step !=0
+    addi    a15, a10, -1
+    bnez    a15,.sub_s16_ae32_mode  // Branch if step !=0
+
+    // Check addresses
+    movi        a15, 0xF                                              // modulo 16 mask
+    bany        a2, a15, .sub_s16_ae32_mode                          // jump if != 0
+    bany        a3, a15, .sub_s16_ae32_mode                          // jump if != 0
+
+    // Check length (should be divided to 8)
+    movi        a15, 0x7                                              // modulo 8 mask
+    bany        a5, a15, .sub_s16_ae32_mode                          // jump if != 0
+
+    // Process main function for S3
+    
+    slli 	a6, a6, 1  	// a6 - step_in<<1
+    slli 	a7, a7, 1  	// a7 - step_in<<1
+    slli 	a10, a10, 1 // a8 - step_out<<1
+	wsr.sar	a9          // load sar register
+
+    // Preload q1 from a3
+    //ee.vld.128.ip     q1, a3, 16
+    srli    a5, a5, 3
+    ee.vld.128.ip     q0, a2, 16
+    loopnez a5, .loop_end_sub_s16_aes3_main
+        ee.vld.128.ip     q1, a3, 16
+        ee.vsubs.s16.ld.incp q0, a2, q4, q0, q1
+        ee.vst.128.ip     q4, a4, 16
+.loop_end_sub_s16_aes3_main:
+
+    // Exit for Esp32s3 mode
+    movi.n	a2, 0 // return status ESP_OK
+    retw.n
+
+
+.sub_s16_ae32_mode:
+    slli 	a6, a6, 1  	// a6 - step_in<<1
+    slli 	a7, a7, 1  	// a7 - step_in<<1
+    slli 	a10, a10, 1 // a8 - step_out<<1
+
+    l16si   a11, a2, 0
+    l16si   a8,  a3, 0
+    sub     a8, a11, a8
+    srl     a9, a8          // a8 = a8>>sar    
+
+    loopnez a5, .loop_end_sub_s16_aes3
+        add.n   a2, a2, a6      // input1+=step_in1;
+        add.n   a3, a3, a7      // input2+=step_in2;
+
+        l16si   a11, a2, 0
+        l16si   a8,  a3, 0
+        s16i	a9,  a4, 0      // store result to the putput
+        sub     a8, a11, a8
+    	srl     a9, a8          // a8 = a8>>sar    
+
+        add.n   a4, a4, a10     // output+=step_out;
+.loop_end_sub_s16_aes3:
+    // Exit for Esp32 mode
+    movi.n	a2, 0 // return status ESP_OK
+    retw.n
+
+#endif // dsps_sub_s16_aes3_enabled
--- a/managed_components/espressif__esp-dsp/modules/math/sub/fixed/dsps_sub_s16_ansi.c
+++ b/managed_components/espressif__esp-dsp/modules/math/sub/fixed/dsps_sub_s16_ansi.c
@@ -0,0 +1,26 @@
+/*
+ * SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include "dsps_add.h"
+
+esp_err_t dsps_sub_s16_ansi(const int16_t *input1, const int16_t *input2, int16_t *output, int len, int step1, int step2, int step_out, int shift)
+{
+    if (NULL == input1) {
+        return ESP_ERR_DSP_PARAM_OUTOFRANGE;
+    }
+    if (NULL == input2) {
+        return ESP_ERR_DSP_PARAM_OUTOFRANGE;
+    }
+    if (NULL == output) {
+        return ESP_ERR_DSP_PARAM_OUTOFRANGE;
+    }
+
+    for (int i = 0 ; i < len ; i++) {
+        int32_t acc = (int32_t)input1[i * step1] - (int32_t)input2[i * step2];
+        output[i * step_out] = acc >> shift;
+    }
+    return ESP_OK;
+}
--- a/managed_components/espressif__esp-dsp/modules/math/sub/fixed/dsps_sub_s8_aes3.S
+++ b/managed_components/espressif__esp-dsp/modules/math/sub/fixed/dsps_sub_s8_aes3.S
@@ -0,0 +1,97 @@
+/*
+ * SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include "dsps_sub_platform.h"
+#if (dsps_sub_s16_aes3_enabled == 1)
+
+    .text
+    .align  4
+    .global dsps_sub_s8_aes3
+    .type   dsps_sub_s8_aes3,@function
+// The function implements the following C code:
+// esp_err_t dsps_sub_s8_ansi(const int16_t *input1, const int16_t *input2, int16_t *output, int len, int step1, int step2, int step_out, int shift)
+// {
+//     for (int i = 0 ; i < len ; i++) {
+//         int32_t acc = (int32_t)input1[i * step1] + (int32_t)input2[i * step2];
+//         output[i * step_out] = acc >> shift;
+//     }
+//     return ESP_OK;
+// }
+dsps_sub_s8_aes3: 
+// input1   - a2
+// input2   - a3
+// output   - a4
+// len      - a5
+// step_in1 - a6
+// step_in2 - a7
+// step_out - stack (a10)
+// shift    - stack (a9)
+
+    entry	a1, 16
+
+    l32i.n	a10, a1, 16     // Load step_out to the a10 register
+    l32i.n	a9, a1,  20     // Load shift to the a9 register
+    ssr     a9              // sar = a9
+
+    // Check if any of steps is not 0
+    addi    a15, a6, -1
+    bnez    a15, .sub_s8_ae32_mode // Branch if step !=0
+    addi    a15, a7, -1
+    bnez    a15, .sub_s8_ae32_mode // Branch if step !=0
+    addi    a15, a10, -1
+    bnez    a15,.sub_s8_ae32_mode  // Branch if step !=0
+
+    // Check addresses
+    movi        a15, 0xF                                              // modulo 16 mask
+    bany        a2, a15, .sub_s8_ae32_mode                          // jump if != 0
+    bany        a3, a15, .sub_s8_ae32_mode                          // jump if != 0
+
+    // Check length (should be divided to 8)
+    movi        a15, 0xf                                              // modulo 8 mask
+    bany        a5, a15, .sub_s8_ae32_mode                          // jump if != 0
+
+    // Process main function for S3
+    
+	wsr.sar	a9          // load sar register
+
+    // Preload q1 from a3
+    //ee.vld.128.ip     q1, a3, 16
+    srli    a5, a5, 4
+    ee.vld.128.ip     q0, a2, 16
+    loopnez a5, .loop_end_sub_s8_aes3_main
+        ee.vld.128.ip     q1, a3, 16
+        ee.vsubs.s8.ld.incp q0, a2, q4, q0, q1
+        ee.vst.128.ip     q4, a4, 16
+.loop_end_sub_s8_aes3_main:
+
+    // Exit for Esp32s3 mode
+    movi.n	a2, 0 // return status ESP_OK
+    retw.n
+
+
+.sub_s8_ae32_mode:
+    l8ui    a11, a2, 0
+    l8ui    a8,  a3, 0
+    sub     a8, a11, a8
+    srl     a9, a8          // a8 = a8>>sar    
+
+    loopnez a5, .loop_end_sub_s8_aes3
+        add.n   a2, a2, a6      // input1+=step_in1;
+        add.n   a3, a3, a7      // input2+=step_in2;
+
+        l8ui   a11, a2, 0
+        l8ui   a8,  a3, 0
+        s8i	a9,  a4, 0      // store result to the putput
+        sub     a8, a11, a8
+    	srl     a9, a8          // a8 = a8>>sar    
+
+        add.n   a4, a4, a10     // output+=step_out;
+.loop_end_sub_s8_aes3:
+    // Exit for Esp32 mode
+    movi.n	a2, 0 // return status ESP_OK
+    retw.n
+
+#endif // dsps_sub_s8_aes3_enabled
--- a/managed_components/espressif__esp-dsp/modules/math/sub/fixed/dsps_sub_s8_ansi.c
+++ b/managed_components/espressif__esp-dsp/modules/math/sub/fixed/dsps_sub_s8_ansi.c
@@ -0,0 +1,26 @@
+/*
+ * SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include "dsps_sub.h"
+
+esp_err_t dsps_sub_s8_ansi(const int8_t *input1, const int8_t *input2, int8_t *output, int len, int step1, int step2, int step_out, int shift)
+{
+    if (NULL == input1) {
+        return ESP_ERR_DSP_PARAM_OUTOFRANGE;
+    }
+    if (NULL == input2) {
+        return ESP_ERR_DSP_PARAM_OUTOFRANGE;
+    }
+    if (NULL == output) {
+        return ESP_ERR_DSP_PARAM_OUTOFRANGE;
+    }
+
+    for (int i = 0 ; i < len ; i++) {
+        int32_t acc = (int32_t)input1[i * step1] - (int32_t)input2[i * step2];
+        output[i * step_out] = acc >> shift;
+    }
+    return ESP_OK;
+}
--- a/managed_components/espressif__esp-dsp/modules/math/sub/float/dsps_sub_f32_ae32.S
+++ b/managed_components/espressif__esp-dsp/modules/math/sub/float/dsps_sub_f32_ae32.S
@@ -0,0 +1,64 @@
+// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License. 
+
+#include "dsps_sub_platform.h"
+#if (dsps_sub_f32_ae32_enabled == 1)
+
+// This is bi quad filter form II for ESP32 processor.
+    .text
+    .align  4
+    .global dsps_sub_f32_ae32
+    .type   dsps_sub_f32_ae32,@function
+// The function implements the following C code:
+// esp_err_t dsps_sub_f32_ae32(const float *input1, const float *input2, float *output, int len, int step1, int step2, int step_out)
+// {
+//     for (int i = 0 ; i < len ; i++) {
+//         output[i * step_out] = input1[i * step1] - input2[i * step2];
+//     }
+//     return ESP_OK;
+// }
+
+dsps_sub_f32_ae32: 
+// input1   - a2
+// input2   - a3
+// output   - a4
+// len      - a5
+// step1    - a6
+// step2    - a7
+// step_out - stack (a8) 
+
+    entry	a1, 16
+    
+    l32i.n	a8, a1, 16 // Load step_out to the a8 register
+    slli 	a6, a6, 2  // a6 - step1<<2
+    slli 	a7, a7, 2  // a7 - step2<<2
+    slli 	a8, a8, 2  // a8 - step_out<<2
+
+        lsi     f0, a2, 0
+        add.n   a2, a2, a6     // input1_ptr+=step1;
+    loopnez a5, loop_end_sub_f32_ae32
+        lsi      f1, a3, 0
+        add.n    a3, a3, a7     // input2_ptr+=step2;
+
+        sub.s    f2, f0, f1     // f2 = f0 - f1
+        lsi      f0, a2, 0
+        add.n    a2, a2, a6     // input1_ptr+=step1;        
+        ssi	     f2, a4, 0
+        add.n    a4, a4, a8     // input2_ptr+=step2;
+loop_end_sub_f32_ae32:
+
+    movi.n	a2, 0 // return status ESP_OK
+    retw.n
+
+#endif // dsps_sub_f32_ae32_enabled
--- a/managed_components/espressif__esp-dsp/modules/math/sub/float/dsps_sub_f32_ansi.c
+++ b/managed_components/espressif__esp-dsp/modules/math/sub/float/dsps_sub_f32_ansi.c
@@ -0,0 +1,33 @@
+// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "dsps_sub.h"
+
+esp_err_t dsps_sub_f32_ansi(const float *input1, const float *input2, float *output, int len, int step1, int step2, int step_out)
+{
+    if (NULL == input1) {
+        return ESP_ERR_DSP_PARAM_OUTOFRANGE;
+    }
+    if (NULL == input2) {
+        return ESP_ERR_DSP_PARAM_OUTOFRANGE;
+    }
+    if (NULL == output) {
+        return ESP_ERR_DSP_PARAM_OUTOFRANGE;
+    }
+
+    for (int i = 0 ; i < len ; i++) {
+        output[i * step_out] = input1[i * step1] - input2[i * step2];
+    }
+    return ESP_OK;
+}
--- a/managed_components/espressif__esp-dsp/modules/math/sub/include/dsps_sub.h
+++ b/managed_components/espressif__esp-dsp/modules/math/sub/include/dsps_sub.h
@@ -0,0 +1,87 @@
+// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef _dsps_sub_H_
+#define _dsps_sub_H_
+#include "dsp_err.h"
+
+#include "dsps_sub_platform.h"
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+
+/**@{*/
+/**
+ * @brief   sub arrays
+ *
+ * The function subtract one array from another
+ * out[i*step_out] = input1[i*step1] - input2[i*step2]; i=[0..len)
+ * The implementation use ANSI C and could be compiled and run on any platform
+ *
+ * @param[in] input1: input array 1
+ * @param[in] input2: input array 2
+ * @param output: output array
+ * @param len: amount of operations for arrays
+ * @param step1: step over input array 1 (by default should be 1)
+ * @param step2: step over input array 2 (by default should be 1)
+ * @param step_out: step over output array (by default should be 1)
+ *
+ * @return
+ *      - ESP_OK on success
+ *      - One of the error codes from DSP library
+ */
+esp_err_t dsps_sub_f32_ansi(const float *input1, const float *input2, float *output, int len, int step1, int step2, int step_out);
+esp_err_t dsps_sub_f32_ae32(const float *input1, const float *input2, float *output, int len, int step1, int step2, int step_out);
+
+esp_err_t dsps_sub_s16_ansi(const int16_t *input1, const int16_t *input2, int16_t *output, int len, int step1, int step2, int step_out, int shift);
+esp_err_t dsps_sub_s16_ae32(const int16_t *input1, const int16_t *input2, int16_t *output, int len, int step1, int step2, int step_out, int shift);
+esp_err_t dsps_sub_s16_aes3(const int16_t *input1, const int16_t *input2, int16_t *output, int len, int step1, int step2, int step_out, int shift);
+
+esp_err_t dsps_sub_s8_ansi(const int8_t *input1, const int8_t *input2, int8_t *output, int len, int step1, int step2, int step_out, int shift);
+esp_err_t dsps_sub_s8_aes3(const int8_t *input1, const int8_t *input2, int8_t *output, int len, int step1, int step2, int step_out, int shift);
+/**@}*/
+
+#ifdef __cplusplus
+}
+#endif
+
+#if CONFIG_DSP_OPTIMIZED
+
+#if (dsps_sub_f32_ae32_enabled == 1)
+#define dsps_sub_f32 dsps_sub_f32_ae32
+#else
+#define dsps_sub_f32 dsps_sub_f32_ansi
+#endif
+
+#if (dsps_sub_s16_aes3_enabled == 1)
+#define dsps_sub_s16 dsps_sub_s16_aes3
+#define dsps_sub_s8 dsps_sub_s8_aes3
+#elif (dsps_sub_s16_ae32_enabled == 1)
+#define dsps_sub_s16 dsps_sub_s16_ae32
+#define dsps_sub_s8 dsps_sub_s8_ansi
+#else
+#define dsps_sub_s16 dsps_sub_s16_ansi
+#define dsps_sub_s8 dsps_sub_s8_ansi
+#endif
+
+#else // CONFIG_DSP_OPTIMIZED
+#define dsps_sub_f32 dsps_sub_f32_ansi
+#define dsps_sub_s16 dsps_sub_s16_ansi
+#define dsps_sub_s8  dsps_sub_s8_ansi
+#endif // CONFIG_DSP_OPTIMIZED
+
+#endif // _dsps_sub_H_
--- a/managed_components/espressif__esp-dsp/modules/math/sub/include/dsps_sub_platform.h
+++ b/managed_components/espressif__esp-dsp/modules/math/sub/include/dsps_sub_platform.h
@@ -0,0 +1,30 @@
+#ifndef _dsps_sub_platform_H_
+#define _dsps_sub_platform_H_
+
+#include "sdkconfig.h"
+
+#ifdef __XTENSA__
+#include <xtensa/config/core-isa.h>
+#include <xtensa/config/core-matmap.h>
+
+
+#if ((XCHAL_HAVE_FP == 1) && (XCHAL_HAVE_LOOPS == 1))
+
+#define dsps_sub_f32_ae32_enabled  1
+#define dsps_sub_s16_ae32_enabled  1
+
+#endif
+
+#if (XCHAL_HAVE_LOOPS == 1)
+#define dsps_sub_f32_ae32_enabled  1
+#define dsps_sub_s16_ae32_enabled  1
+#endif
+
+#if (CONFIG_IDF_TARGET_ESP32S3 == 1)
+#define dsps_sub_f32_ae32_enabled  1
+#define dsps_sub_s16_aes3_enabled  1
+#endif
+
+#endif // __XTENSA__
+
+#endif // _dsps_sub_platform_H_
--- a/managed_components/espressif__esp-dsp/modules/math/sub/test/test_dsps_sub_f32_ansi.c
+++ b/managed_components/espressif__esp-dsp/modules/math/sub/test/test_dsps_sub_f32_ansi.c
@@ -0,0 +1,73 @@
+// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <string.h>
+#include "unity.h"
+#include "dsp_platform.h"
+#include "esp_log.h"
+
+#include "dsp_tests.h"
+#include "dsps_sub.h"
+#include "esp_attr.h"
+
+static const char *TAG = "dsps_sub";
+
+TEST_CASE("dsps_sub_f32_ansi functionality", "[dsps]")
+{
+    int n = 32;
+    float x[n];
+    float y[n];
+    float z[n];
+    for (int i = 0 ; i < n ; i++) {
+        x[i] = i * 3;
+        y[i] = i * 2;
+        z[i] = i;
+    }
+    dsps_sub_f32_ansi(x, y, x, n, 1, 1, 1);
+    for (int i = 0 ; i < n ; i++) {
+        if (x[i] != z[i]) {
+            TEST_ASSERT_EQUAL(x[i], z[i]);
+        }
+    }
+}
+
+TEST_CASE("dsps_sub_f32 functionality", "[dsps]")
+{
+    int n = 32;
+    float x[n];
+    float y[n];
+    float z[n];
+    for (int i = 0 ; i < n ; i++) {
+        x[i] = i * 3;
+        y[i] = i * 2;
+        z[i] = i;
+    }
+    dsps_sub_f32(x, y, x, n, 1, 1, 1);
+    for (int i = 0 ; i < n ; i++) {
+        if (x[i] != z[i]) {
+            TEST_ASSERT_EQUAL(x[i], z[i]);
+        }
+    }
+
+    int repeat_count = 1;
+    dsps_sub_f32(x, x, x, n, 1, 1, 1);
+
+    unsigned int start_b = dsp_get_cpu_cycle_count();
+    dsps_sub_f32(x, x, x, n, 1, 1, 1);
+    unsigned int end_b = dsp_get_cpu_cycle_count();
+
+    float total_b = end_b - start_b;
+    float cycles = total_b / (n * repeat_count);
+    ESP_LOGI(TAG, "dsps_sub_f32 - %f cycles per sample \n", cycles);
+}
--- a/managed_components/espressif__esp-dsp/modules/math/sub/test/test_dsps_sub_s16_ae32.c
+++ b/managed_components/espressif__esp-dsp/modules/math/sub/test/test_dsps_sub_s16_ae32.c
@@ -0,0 +1,57 @@
+/*
+ * SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <string.h>
+#include "unity.h"
+#include "dsp_platform.h"
+#include "esp_log.h"
+
+#include "dsp_tests.h"
+#include "dsps_sub.h"
+#include "esp_attr.h"
+
+#if (dsps_sub_s16_ae32_enabled == 1)
+
+static const char *TAG = "dsps_sub";
+
+TEST_CASE("dsps_sub_s16_ae32 functionality", "[dsps]")
+{
+    int n = 64;
+    int16_t x[n];
+    int16_t y[n];
+    int32_t temp;
+    int shift = 0;
+    for (int i = 0 ; i < n ; i++) {
+        x[i] = i << 4;
+        temp = ((int32_t)x[i] - (int32_t)x[i]) >> shift;
+        y[i] = temp;
+    }
+
+    dsps_sub_s16_ae32(x, x, x, n, 1, 1, 1, shift);
+    for (int i = 0 ; i < n ; i++) {
+        ESP_LOGD(TAG, "x[%i] = %i  %i", i, x[i], y[i]);
+        if (x[i] != y[i]) {
+            TEST_ASSERT_EQUAL(x[i], y[i]);
+        }
+    }
+}
+
+TEST_CASE("dsps_sub_s16_ae32 benchmark", "[dsps]")
+{
+    const int n = 256;
+    int16_t x[n];
+    for (int i = 0 ; i < n ; i++) {
+        x[i] = i << 4;
+    }
+
+    unsigned int start_b = dsp_get_cpu_cycle_count();
+    dsps_sub_s16_ae32(x, x, x, n, 1, 1, 1, 0);
+    unsigned int end_b = dsp_get_cpu_cycle_count();
+
+    float cycles = end_b - start_b;
+    ESP_LOGI(TAG, "dsps_sub_s16_ae32 - %f cycles per sample \n", cycles);
+}
+#endif // dsps_sub_s16_ae32_enabled
--- a/managed_components/espressif__esp-dsp/modules/math/sub/test/test_dsps_sub_s16_aes3.c
+++ b/managed_components/espressif__esp-dsp/modules/math/sub/test/test_dsps_sub_s16_aes3.c
@@ -0,0 +1,61 @@
+/*
+ * SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <string.h>
+#include "unity.h"
+#include "dsp_platform.h"
+#include "esp_log.h"
+#include <malloc.h>
+
+#include "dsps_sub.h"
+#include "esp_attr.h"
+#include "dsp_tests.h"
+
+#if (dsps_sub_s16_aes3_enabled == 1)
+
+static const char *TAG = "dsps_sub";
+TEST_CASE("dsps_sub_s16_aes3 functionality", "[dsps]")
+{
+    int n = 64;
+    int16_t *x = (int16_t *)memalign(16, n * sizeof(int16_t));
+    int16_t *y = (int16_t *)memalign(16, n * sizeof(int16_t));
+    int32_t temp;
+    int shift = 0;
+    for (int i = 0 ; i < n ; i++) {
+        x[i] = i << 4;
+        temp = ((int32_t)x[i] - (int32_t)x[i]) >> shift;
+        y[i] = temp;
+    }
+
+    dsps_sub_s16_aes3(x, x, x, n, 1, 1, 1, shift);
+    for (int i = 0 ; i < n ; i++) {
+        ESP_LOGD(TAG, "x[%i] = %i  %i", i, x[i], y[i]);
+        if (x[i] != y[i]) {
+            TEST_ASSERT_EQUAL(x[i], y[i]);
+        }
+    }
+    free(x);
+    free(y);
+}
+
+TEST_CASE("dsps_sub_s16_aes3 benchmark", "[dsps]")
+{
+    const int n = 256;
+    int16_t *x = (int16_t *)memalign(16, n * sizeof(int16_t));
+    for (int i = 0 ; i < n ; i++) {
+        x[i] = i << 4;
+    }
+
+    unsigned int start_b = dsp_get_cpu_cycle_count();
+    dsps_sub_s16_aes3(x, x, x, n, 1, 1, 1, 0);
+    unsigned int end_b = dsp_get_cpu_cycle_count();
+
+    float cycles = end_b - start_b;
+    ESP_LOGI(TAG, "dsps_sub_s16_aes3 - %f cycles per sample \n", cycles);
+    free(x);
+
+}
+#endif // (dsps_sub_s16_aes3_enabled == 1)
--- a/managed_components/espressif__esp-dsp/modules/math/sub/test/test_dsps_sub_s16_ansi.c
+++ b/managed_components/espressif__esp-dsp/modules/math/sub/test/test_dsps_sub_s16_ansi.c
@@ -0,0 +1,54 @@
+/*
+ * SPDX-FileCopyrightText: 2018-2024 Espressif Systems (Shanghai) CO LTD
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <string.h>
+#include "unity.h"
+#include "dsp_platform.h"
+#include "esp_log.h"
+
+#include "dsp_tests.h"
+#include "dsps_sub.h"
+#include "esp_attr.h"
+
+static const char *TAG = "dsps_sub";
+
+TEST_CASE("dsps_sub_s16_ansi functionality", "[dsps]")
+{
+    int n = 64;
+    int16_t x[n];
+    int16_t y[n];
+    int32_t temp;
+    int shift = 0;
+    for (int i = 0 ; i < n ; i++) {
+        x[i] = i << 4;
+        temp = ((int32_t)x[i] - (int32_t)x[i]) >> shift;
+        y[i] = temp;
+    }
+
+    dsps_sub_s16_ansi(x, x, x, n, 1, 1, 1, shift);
+    for (int i = 0 ; i < n ; i++) {
+        ESP_LOGD(TAG, "x[%i] = %i  %i", i, x[i], y[i]);
+        if (x[i] != y[i]) {
+            TEST_ASSERT_EQUAL(x[i], y[i]);
+        }
+    }
+}
+
+TEST_CASE("dsps_sub_s16_ansi benchmark", "[dsps]")
+{
+    const int n = 256;
+    int16_t x[n];
+    for (int i = 0 ; i < n ; i++) {
+        x[i] = i << 4;
+    }
+
+    unsigned int start_b = dsp_get_cpu_cycle_count();
+    dsps_sub_s16_ansi(x, x, x, n, 1, 1, 1, 0);
+    unsigned int end_b = dsp_get_cpu_cycle_count();
+
+    float cycles = end_b - start_b;
+    ESP_LOGI(TAG, "dsps_sub_s16_ansi - %f cycles per sample \n", cycles);
+}
--- a/managed_components/espressif__esp-dsp/modules/math/sub/test/test_dsps_sub_s8_aes3.c
+++ b/managed_components/espressif__esp-dsp/modules/math/sub/test/test_dsps_sub_s8_aes3.c
@@ -0,0 +1,61 @@
+/*
+ * SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <string.h>
+#include "unity.h"
+#include "dsp_platform.h"
+#include "esp_log.h"
+#include <malloc.h>
+
+#include "dsps_sub.h"
+#include "esp_attr.h"
+#include "dsp_tests.h"
+
+#if (dsps_sub_s16_aes3_enabled == 1)
+
+static const char *TAG = "dsps_sub";
+TEST_CASE("dsps_sub_s8_aes3 functionality", "[dsps]")
+{
+    int n = 64;
+    int8_t *x = (int8_t *)memalign(16, n * sizeof(int8_t));
+    int8_t *y = (int8_t *)memalign(16, n * sizeof(int8_t));
+    int32_t temp;
+    int shift = 0;
+    for (int i = 0 ; i < n ; i++) {
+        x[i] = i - n / 2;
+        temp = ((int32_t)x[i] - (int32_t)x[i]) >> shift;
+        y[i] = temp;
+    }
+
+    dsps_sub_s8_aes3(x, x, x, n, 1, 1, 1, shift);
+    for (int i = 0 ; i < n ; i++) {
+        ESP_LOGD(TAG, "x[%i] = %i  %i", i, x[i], y[i]);
+        if (x[i] != y[i]) {
+            TEST_ASSERT_EQUAL(x[i], y[i]);
+        }
+    }
+    free(x);
+    free(y);
+}
+
+TEST_CASE("dsps_sub_s8_aes3 benchmark", "[dsps]")
+{
+    const int n = 256;
+    int8_t *x = (int8_t *)memalign(16, n * sizeof(int8_t));
+    for (int i = 0 ; i < n ; i++) {
+        x[i] = i << 4;
+    }
+
+    unsigned int start_b = dsp_get_cpu_cycle_count();
+    dsps_sub_s8_aes3(x, x, x, n, 1, 1, 1, 0);
+    unsigned int end_b = dsp_get_cpu_cycle_count();
+
+    float cycles = end_b - start_b;
+    ESP_LOGI(TAG, "dsps_sub_s8_aes3 - %f cycles per sample \n", cycles);
+    free(x);
+
+}
+#endif // (dsps_sub_s16_aes3_enabled == 1)
--- a/managed_components/espressif__esp-dsp/modules/math/sub/test/test_dsps_sub_s8_ansi.c
+++ b/managed_components/espressif__esp-dsp/modules/math/sub/test/test_dsps_sub_s8_ansi.c
@@ -0,0 +1,53 @@
+/*
+ * SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <string.h>
+#include "unity.h"
+#include "dsp_platform.h"
+#include "esp_log.h"
+
+#include "dsp_tests.h"
+#include "dsps_sub.h"
+#include "esp_attr.h"
+
+static const char *TAG = "dsps_sub";
+
+TEST_CASE("dsps_sub_s8_ansi functionality", "[dsps]")
+{
+    int n = 64;
+    int8_t x[n];
+    int8_t y[n];
+    int32_t temp;
+    int shift = 0;
+    for (int i = 0 ; i < n ; i++) {
+        x[i] = i - n / 2;
+        temp = ((int32_t)x[i] - (int32_t)x[i]) >> shift;
+        y[i] = temp;
+    }
+
+    dsps_sub_s8_ansi(x, x, x, n, 1, 1, 1, 0);
+    for (int i = 0 ; i < n ; i++) {
+        if (x[i] != y[i]) {
+            TEST_ASSERT_EQUAL(x[i], y[i]);
+        }
+    }
+}
+
+TEST_CASE("dsps_sub_s8_ansi benchmark", "[dsps]")
+{
+    const int n = 256;
+    int8_t x[n];
+    for (int i = 0 ; i < n ; i++) {
+        x[i] = i << 4;
+    }
+
+    unsigned int start_b = dsp_get_cpu_cycle_count();
+    dsps_sub_s8_ansi(x, x, x, n, 1, 1, 1, 0);
+    unsigned int end_b = dsp_get_cpu_cycle_count();
+
+    float cycles = end_b - start_b;
+    ESP_LOGI(TAG, "dsps_sub_s8_ansi - %f cycles per sample \n", cycles);
+}