add some code

2025-09-05 13:25:11 +08:00
parent 9ff0a99e7a
commit 3cf1229a85
8911 changed files with 2535396 additions and 0 deletions
--- a/managed_components/espressif__esp-dsp/modules/math/mul/fixed/dsps_mul_s16_ae32.S
+++ b/managed_components/espressif__esp-dsp/modules/math/mul/fixed/dsps_mul_s16_ae32.S
@@ -0,0 +1,74 @@
+/*
+ * SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include "dsps_mul_platform.h"
+#if (dsps_mul_s16_ae32_enabled == 1)
+
+    .text
+    .align  4
+    .global dsps_mul_s16_ae32
+    .type   dsps_mul_s16_ae32,@function
+// The function implements the following C code:
+// esp_err_t dsps_mul_s16_ansi(const int16_t *input1, const int16_t *input2, int16_t *output, int len, int step1, int step2, int step_out, int shift)
+// {
+//     for (int i = 0 ; i < len ; i++) {
+//         int32_t acc = (int32_t)input1[i * step1] + (int32_t)input2[i * step2];
+//         output[i * step_out] = acc >> shift;
+//     }
+//     return ESP_OK;
+// }
+dsps_mul_s16_ae32: 
+// input1   - a2
+// input2   - a3
+// output   - a4
+// len      - a5
+// step_in1 - a6
+// step_in2 - a7
+// step_out - stack (a10)
+// shift    - stack (a9)
+
+    entry	a1, 16
+
+
+    // l32i.n	a10, a1, 16
+    // s16i	a10, a4, 0
+    // l32i.n	a10, a1, 20
+    // s16i	a10, a4, 2       
+
+    l32i.n	a10, a1, 16     // Load step_out to the a10 register
+    l32i.n	a9, a1,  20     // Load shift to the a9 register
+    ssr     a9              // sar = a9
+
+    slli 	a6, a6, 1  	// a6 - step_in<<1
+    slli 	a7, a7, 1  	// a7 - step_in<<1
+    slli 	a10, a10, 1 // a8 - step_out<<1
+
+    // s16i	a10, a4, 0
+    // s16i	a6, a4, 2
+    // s16i	a7, a4, 4
+    // s16i	a5, a4, 6
+
+    l16si   a11, a2, 0
+    l16si   a8,  a3, 0
+    add     a8, a11, a8
+    srl     a9, a8          // a8 = a8>>sar    
+
+    loopnez a5, .loop_end_mul_s16_ae32
+        add.n   a2, a2, a6      // input1+=step_in1;
+        add.n   a3, a3, a7      // input2+=step_in2;
+
+        l16si   a11, a2, 0
+        l16si   a8,  a3, 0
+        s16i	a9,  a4, 0      // store result to the putput
+        mull     a8, a11, a8
+    	srl     a9, a8          // a8 = a8>>sar    
+
+        add.n   a4, a4, a10     // output+=step_out;
+.loop_end_mul_s16_ae32:
+    movi.n	a2, 0 // return status ESP_OK
+    retw.n
+
+#endif // dsps_mul_s16_ae32_enabled
--- a/managed_components/espressif__esp-dsp/modules/math/mul/fixed/dsps_mul_s16_aes3.S
+++ b/managed_components/espressif__esp-dsp/modules/math/mul/fixed/dsps_mul_s16_aes3.S
@@ -0,0 +1,104 @@
+/*
+ * SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include "dsps_mul_platform.h"
+#if (dsps_mul_s16_aes3_enabled == 1)
+
+    .text
+    .align  4
+    .global dsps_mul_s16_aes3
+    .type   dsps_mul_s16_aes3,@function
+// The function implements the following C code:
+// esp_err_t dsps_mul_s16_ansi(const int16_t *input1, const int16_t *input2, int16_t *output, int len, int step1, int step2, int step_out, int shift)
+// {
+//     for (int i = 0 ; i < len ; i++) {
+//         int32_t acc = (int32_t)input1[i * step1] + (int32_t)input2[i * step2];
+//         output[i * step_out] = acc >> shift;
+//     }
+//     return ESP_OK;
+// }
+dsps_mul_s16_aes3: 
+// input1   - a2
+// input2   - a3
+// output   - a4
+// len      - a5
+// step_in1 - a6
+// step_in2 - a7
+// step_out - stack (a10)
+// shift    - stack (a9)
+
+    entry	a1, 16
+
+    l32i.n	a10, a1, 16     // Load step_out to the a10 register
+    l32i.n	a9, a1,  20     // Load shift to the a9 register
+    ssr     a9              // sar = a9
+
+    // Check if any of steps is not 0
+    addi    a15, a6, -1
+    bnez    a15, .mul_s16_ae32_mode // Branch if step !=0
+    addi    a15, a7, -1
+    bnez    a15, .mul_s16_ae32_mode // Branch if step !=0
+    addi    a15, a10, -1
+    bnez    a15,.mul_s16_ae32_mode  // Branch if step !=0
+
+    // Check addresses
+    movi        a15, 0xF                                              // modulo 16 mask
+    bany        a2, a15, .mul_s16_ae32_mode                          // jump if != 0
+    bany        a3, a15, .mul_s16_ae32_mode                          // jump if != 0
+
+    // Check length (should be divided to 8)
+    movi        a15, 0x7                                              // modulo 8 mask
+    bany        a5, a15, .mul_s16_ae32_mode                          // jump if != 0
+
+    // Process main function for S3
+    
+    slli 	a6, a6, 1  	// a6 - step_in<<1
+    slli 	a7, a7, 1  	// a7 - step_in<<1
+    slli 	a10, a10, 1 // a8 - step_out<<1
+	wsr.sar	a9          // load sar register
+
+    // Preload q1 from a3
+    //ee.vld.128.ip     q1, a3, 16
+    srli    a5, a5, 3
+    ee.vld.128.ip     q0, a2, 16
+    loopnez a5, .loop_end_mul_s16_aes3_main
+        ee.vld.128.ip     q1, a3, 16
+        ee.vmul.s16.ld.incp q0, a2, q4, q0, q1
+        ee.vst.128.ip     q4, a4, 16
+.loop_end_mul_s16_aes3_main:
+
+    // Exit for Esp32s3 mode
+    movi.n	a2, 0 // return status ESP_OK
+    retw.n
+
+
+.mul_s16_ae32_mode:
+    slli 	a6, a6, 1  	// a6 - step_in<<1
+    slli 	a7, a7, 1  	// a7 - step_in<<1
+    slli 	a10, a10, 1 // a8 - step_out<<1
+
+    l16si   a11, a2, 0
+    l16si   a8,  a3, 0
+    mull    a8, a11, a8
+    srl     a9, a8          // a8 = a8>>sar    
+
+    loopnez a5, .loop_end_mul_s16_aes3
+        add.n   a2, a2, a6      // input1+=step_in1;
+        add.n   a3, a3, a7      // input2+=step_in2;
+
+        l16si   a11, a2, 0
+        l16si   a8,  a3, 0
+        s16i	a9,  a4, 0      // store result to the putput
+        mull    a8, a11, a8
+    	srl     a9, a8          // a8 = a8>>sar    
+
+        add.n   a4, a4, a10     // output+=step_out;
+.loop_end_mul_s16_aes3:
+    // Exit for Esp32 mode
+    movi.n	a2, 0 // return status ESP_OK
+    retw.n
+
+#endif // dsps_mul_s16_aes3_enabled
--- a/managed_components/espressif__esp-dsp/modules/math/mul/fixed/dsps_mul_s16_ansi.c
+++ b/managed_components/espressif__esp-dsp/modules/math/mul/fixed/dsps_mul_s16_ansi.c
@@ -0,0 +1,34 @@
+// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "dsps_mul.h"
+
+esp_err_t dsps_mul_s16_ansi(const int16_t *input1, const int16_t *input2, int16_t *output, int len, int step1, int step2, int step_out, int shift)
+{
+    if (NULL == input1) {
+        return ESP_ERR_DSP_PARAM_OUTOFRANGE;
+    }
+    if (NULL == input2) {
+        return ESP_ERR_DSP_PARAM_OUTOFRANGE;
+    }
+    if (NULL == output) {
+        return ESP_ERR_DSP_PARAM_OUTOFRANGE;
+    }
+
+    for (int i = 0 ; i < len ; i++) {
+        int ttt = (int)input1[i * step1] * (int)input2[i * step2];
+        output[i * step_out] = ttt >> shift;
+    }
+    return ESP_OK;
+}
--- a/managed_components/espressif__esp-dsp/modules/math/mul/fixed/dsps_mul_s8_aes3.S
+++ b/managed_components/espressif__esp-dsp/modules/math/mul/fixed/dsps_mul_s8_aes3.S
@@ -0,0 +1,97 @@
+/*
+ * SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include "dsps_mul_platform.h"
+#if (dsps_mul_s16_aes3_enabled == 1)
+
+    .text
+    .align  4
+    .global dsps_mul_s8_aes3
+    .type   dsps_mul_s8_aes3,@function
+// The function implements the following C code:
+// esp_err_t dsps_mul_s8_ansi(const int16_t *input1, const int16_t *input2, int16_t *output, int len, int step1, int step2, int step_out, int shift)
+// {
+//     for (int i = 0 ; i < len ; i++) {
+//         int32_t acc = (int32_t)input1[i * step1] + (int32_t)input2[i * step2];
+//         output[i * step_out] = acc >> shift;
+//     }
+//     return ESP_OK;
+// }
+dsps_mul_s8_aes3: 
+// input1   - a2
+// input2   - a3
+// output   - a4
+// len      - a5
+// step_in1 - a6
+// step_in2 - a7
+// step_out - stack (a10)
+// shift    - stack (a9)
+
+    entry	a1, 16
+
+    l32i.n	a10, a1, 16     // Load step_out to the a10 register
+    l32i.n	a9, a1,  20     // Load shift to the a9 register
+    ssr     a9              // sar = a9
+
+    // Check if any of steps is not 0
+    addi    a15, a6, -1
+    bnez    a15, .sub_s8_ae32_mode // Branch if step !=0
+    addi    a15, a7, -1
+    bnez    a15, .sub_s8_ae32_mode // Branch if step !=0
+    addi    a15, a10, -1
+    bnez    a15,.sub_s8_ae32_mode  // Branch if step !=0
+
+    // Check addresses
+    movi        a15, 0xF                                              // modulo 16 mask
+    bany        a2, a15, .sub_s8_ae32_mode                          // jump if != 0
+    bany        a3, a15, .sub_s8_ae32_mode                          // jump if != 0
+
+    // Check length (should be divided to 8)
+    movi        a15, 0xf                                              // modulo 8 mask
+    bany        a5, a15, .sub_s8_ae32_mode                          // jump if != 0
+
+    // Process main function for S3
+    
+	wsr.sar	a9          // load sar register
+
+    // Preload q1 from a3
+    //ee.vld.128.ip     q1, a3, 16
+    srli    a5, a5, 4
+    ee.vld.128.ip     q0, a2, 16
+    loopnez a5, .loop_end_mul_s8_aes3_main
+        ee.vld.128.ip     q1, a3, 16
+        ee.vmul.s8.ld.incp q0, a2, q4, q0, q1
+        ee.vst.128.ip     q4, a4, 16
+.loop_end_mul_s8_aes3_main:
+
+    // Exit for Esp32s3 mode
+    movi.n	a2, 0 // return status ESP_OK
+    retw.n
+
+
+.sub_s8_ae32_mode:
+    l8ui    a11, a2, 0
+    l8ui    a8,  a3, 0
+    mull    a8, a11, a8
+    srl     a9, a8          // a8 = a8>>sar    
+
+    loopnez a5, .loop_end_mul_s8_aes3
+        add.n   a2, a2, a6      // input1+=step_in1;
+        add.n   a3, a3, a7      // input2+=step_in2;
+
+        l8ui   a11, a2, 0
+        l8ui   a8,  a3, 0
+        s8i	a9,  a4, 0      // store result to the putput
+        mull    a8, a11, a8
+    	srl     a9, a8          // a8 = a8>>sar    
+
+        add.n   a4, a4, a10     // output+=step_out;
+.loop_end_mul_s8_aes3:
+    // Exit for Esp32 mode
+    movi.n	a2, 0 // return status ESP_OK
+    retw.n
+
+#endif // dsps_mul_s8_aes3_enabled
--- a/managed_components/espressif__esp-dsp/modules/math/mul/fixed/dsps_mul_s8_ansi.c
+++ b/managed_components/espressif__esp-dsp/modules/math/mul/fixed/dsps_mul_s8_ansi.c
@@ -0,0 +1,26 @@
+/*
+ * SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include "dsps_mul.h"
+
+esp_err_t dsps_mul_s8_ansi(const int8_t *input1, const int8_t *input2, int8_t *output, int len, int step1, int step2, int step_out, int shift)
+{
+    if (NULL == input1) {
+        return ESP_ERR_DSP_PARAM_OUTOFRANGE;
+    }
+    if (NULL == input2) {
+        return ESP_ERR_DSP_PARAM_OUTOFRANGE;
+    }
+    if (NULL == output) {
+        return ESP_ERR_DSP_PARAM_OUTOFRANGE;
+    }
+
+    for (int i = 0 ; i < len ; i++) {
+        int32_t acc = (int32_t)input1[i * step1] * (int32_t)input2[i * step2];
+        output[i * step_out] = acc >> shift;
+    }
+    return ESP_OK;
+}
--- a/managed_components/espressif__esp-dsp/modules/math/mul/float/dsps_mul_f32_ae32.S
+++ b/managed_components/espressif__esp-dsp/modules/math/mul/float/dsps_mul_f32_ae32.S
@@ -0,0 +1,64 @@
+// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License. 
+
+#include "dsps_mul_platform.h"
+#if (dsps_mul_f32_ae32_enabled == 1)
+
+// This is bi quad filter form II for ESP32 processor.
+    .text
+    .align  4
+    .global dsps_mul_f32_ae32
+    .type   dsps_mul_f32_ae32,@function
+// The function implements the following C code:
+// esp_err_t dsps_mul_f32_ansi(const float *input1, const float *input2, float *output, int len, int step1, int step2, int step_out)
+// {
+//     for (int i = 0 ; i < len ; i++) {
+//         output[i * step_out] = input1[i * step1] * input2[i * step2];
+//     }
+//     return ESP_OK;
+// }
+
+dsps_mul_f32_ae32: 
+// input1   - a2
+// input2   - a3
+// output   - a4
+// len      - a5
+// step1    - a6
+// step2    - a7
+// step_out - stack (a8) 
+
+    entry	a1, 16
+    
+    l32i.n	a8, a1, 16 // Load step_out to the a8 register
+    slli 	a6, a6, 2  // a6 - step1<<2
+    slli 	a7, a7, 2  // a7 - step2<<2
+    slli 	a8, a8, 2  // a8 - step_out<<2
+
+        lsi     f0, a2, 0
+        add.n   a2, a2, a6     // input1_ptr+=step1;
+    loopnez a5, loop_end_mul_f32_ae32
+        lsi      f1, a3, 0
+        add.n    a3, a3, a7     // input2_ptr+=step2;
+
+        mul.s    f2, f1, f0     // f2 = f1*f0
+        lsi      f0, a2, 0
+        add.n    a2, a2, a6     // input1_ptr+=step1;        
+        ssi	     f2, a4, 0
+        add.n    a4, a4, a8     // input2_ptr+=step2;
+loop_end_mul_f32_ae32:
+
+    movi.n	a2, 0 // return status ESP_OK
+    retw.n
+
+#endif // dsps_mul_f32_ae32_enabled
--- a/managed_components/espressif__esp-dsp/modules/math/mul/float/dsps_mul_f32_ansi.c
+++ b/managed_components/espressif__esp-dsp/modules/math/mul/float/dsps_mul_f32_ansi.c
@@ -0,0 +1,33 @@
+// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "dsps_mul.h"
+
+esp_err_t dsps_mul_f32_ansi(const float *input1, const float *input2, float *output, int len, int step1, int step2, int step_out)
+{
+    if (NULL == input1) {
+        return ESP_ERR_DSP_PARAM_OUTOFRANGE;
+    }
+    if (NULL == input2) {
+        return ESP_ERR_DSP_PARAM_OUTOFRANGE;
+    }
+    if (NULL == output) {
+        return ESP_ERR_DSP_PARAM_OUTOFRANGE;
+    }
+
+    for (int i = 0 ; i < len ; i++) {
+        output[i * step_out] = input1[i * step1] * input2[i * step2];
+    }
+    return ESP_OK;
+}
--- a/managed_components/espressif__esp-dsp/modules/math/mul/include/dsps_mul.h
+++ b/managed_components/espressif__esp-dsp/modules/math/mul/include/dsps_mul.h
@@ -0,0 +1,111 @@
+// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef _dsps_mul_H_
+#define _dsps_mul_H_
+#include "dsp_err.h"
+
+#include "dsps_mul_platform.h"
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+
+/**@{*/
+/**
+ * @brief   Multiply two arrays
+ *
+ * The function multiply one input array to another and store result to other array
+ * out[i*step_out] = input1[i*step1] * input2[i*step2]; i=[0..len)
+ * The implementation use ANSI C and could be compiled and run on any platform
+ *
+ * @param[in] input1: input array 1
+ * @param[in] input2: input array 2
+ * @param output: output array
+ * @param len: amount of operations for arrays
+ * @param step1: step over input array 1 (by default should be 1)
+ * @param step2: step over input array 2 (by default should be 1)
+ * @param step_out: step over output array (by default should be 1)
+ *
+ * @return
+ *      - ESP_OK on success
+ *      - One of the error codes from DSP library
+ */
+esp_err_t dsps_mul_f32_ansi(const float *input1, const float *input2, float *output, int len, int step1, int step2, int step_out);
+esp_err_t dsps_mul_f32_ae32(const float *input1, const float *input2, float *output, int len, int step1, int step2, int step_out);
+/**@}*/
+
+
+/**@{*/
+/**
+ * @brief   Multiply two arrays
+ *
+ * The function multiply one input array to another and store result to other array
+ * out[i*step_out] = input1[i*step1] * input2[i*step2]; i=[0..len)
+ * The implementation use ANSI C and could be compiled and run on any platform
+ *
+ * @param[in] input1: input array 1
+ * @param[in] input2: input array 2
+ * @param output: output array
+ * @param len: amount of operations for arrays
+ * @param step1: step over input array 1 (by default should be 1)
+ * @param step2: step over input array 2 (by default should be 1)
+ * @param step_out: step over output array (by default should be 1)
+ * @param shift: output shift after multiplication (by default should be 15)
+ *
+ * @return
+ *      - ESP_OK on success
+ *      - One of the error codes from DSP library
+ */
+esp_err_t dsps_mul_s16_ansi(const int16_t *input1, const int16_t *input2, int16_t *output, int len, int step1, int step2, int step_out, int shift);
+esp_err_t dsps_mul_s16_ae32(const int16_t *input1, const int16_t *input2, int16_t *output, int len, int step1, int step2, int step_out, int shift);
+esp_err_t dsps_mul_s16_aes3(const int16_t *input1, const int16_t *input2, int16_t *output, int len, int step1, int step2, int step_out, int shift);
+
+esp_err_t dsps_mul_s8_ansi(const int8_t *input1, const int8_t *input2, int8_t *output, int len, int step1, int step2, int step_out, int shift);
+esp_err_t dsps_mul_s8_aes3(const int8_t *input1, const int8_t *input2, int8_t *output, int len, int step1, int step2, int step_out, int shift);
+
+/**@}*/
+
+#ifdef __cplusplus
+}
+#endif
+
+#if CONFIG_DSP_OPTIMIZED
+
+#if (dsps_mul_f32_ae32_enabled == 1)
+#define dsps_mul_f32 dsps_mul_f32_ae32
+#else
+#define dsps_mul_f32 dsps_mul_f32_ansi
+#endif
+
+#if (dsps_mul_s16_aes3_enabled == 1)
+#define dsps_mul_s16 dsps_mul_s16_aes3
+#define dsps_mul_s8  dsps_mul_s8_aes3
+#elif (dsps_mul_s16_ae32_enabled == 1)
+#define dsps_mul_s16 dsps_mul_s16_ae32
+#define dsps_mul_s8  dsps_mul_s8_ansi
+#else
+#define dsps_mul_s16 dsps_mul_s16_ansi
+#define dsps_mul_s8  dsps_mul_s8_ansi
+#endif
+
+#else // CONFIG_DSP_OPTIMIZED
+#define dsps_mul_f32 dsps_mul_f32_ansi
+#define dsps_mul_s16 dsps_mul_s16_ansi
+#define dsps_mul_s8  dsps_mul_s8_ansi
+#endif // CONFIG_DSP_OPTIMIZED
+
+#endif // _dsps_mul_H_
--- a/managed_components/espressif__esp-dsp/modules/math/mul/include/dsps_mul_platform.h
+++ b/managed_components/espressif__esp-dsp/modules/math/mul/include/dsps_mul_platform.h
@@ -0,0 +1,30 @@
+#ifndef _dsps_mul_platform_H_
+#define _dsps_mul_platform_H_
+
+#include "sdkconfig.h"
+
+#ifdef __XTENSA__
+#include <xtensa/config/core-isa.h>
+#include <xtensa/config/core-matmap.h>
+
+
+#if ((XCHAL_HAVE_FP == 1) && (XCHAL_HAVE_LOOPS == 1))
+
+#define dsps_mul_f32_ae32_enabled  1
+#define dsps_mul_s16_ae32_enabled  1
+
+#endif
+
+#if (XCHAL_HAVE_LOOPS == 1)
+#define dsps_mul_f32_ae32_enabled  1
+#define dsps_mul_s16_ae32_enabled  1
+#endif
+
+#if (CONFIG_IDF_TARGET_ESP32S3 == 1)
+#define dsps_mul_f32_ae32_enabled  1
+#define dsps_mul_s16_aes3_enabled  1
+#endif
+
+#endif // __XTENSA__
+
+#endif // _dsps_mul_platform_H_
--- a/managed_components/espressif__esp-dsp/modules/math/mul/test/test_dsps_mul_f32_ansi.c
+++ b/managed_components/espressif__esp-dsp/modules/math/mul/test/test_dsps_mul_f32_ansi.c
@@ -0,0 +1,71 @@
+// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <string.h>
+#include "unity.h"
+#include "dsp_platform.h"
+#include "esp_log.h"
+
+#include "dsp_tests.h"
+#include "dsps_mul.h"
+#include "esp_attr.h"
+
+static const char *TAG = "dsps_mul";
+
+TEST_CASE("dsps_mul_f32_ansi functionality", "[dsps]")
+{
+    int n = 32;
+    float x[n];
+    float y[n];
+    for (int i = 0 ; i < n ; i++) {
+        x[i] = i;
+        y[i] = i * i;
+    }
+    dsps_mul_f32_ansi(x, x, x, n, 1, 1, 1);
+    for (int i = 0 ; i < n ; i++) {
+        if (x[i] != y[i]) {
+            TEST_ASSERT_EQUAL(x[i], y[i]);
+        }
+    }
+}
+
+TEST_CASE("dsps_mul_f32 functionality", "[dsps]")
+{
+    int n = 32;
+    float x[n];
+    float y[n];
+    for (int i = 0 ; i < n ; i++) {
+        x[i] = i;
+        y[i] = i * i;
+    }
+    dsps_mul_f32(x, x, x, n, 1, 1, 1);
+    for (int i = 0 ; i < n ; i++) {
+        if (x[i] != y[i]) {
+            TEST_ASSERT_EQUAL(x[i], y[i]);
+        }
+    }
+
+    int repeat_count = 1;
+
+    dsps_mul_f32(x, x, x, n, 1, 1, 1);
+
+    unsigned int start_b = dsp_get_cpu_cycle_count();
+    dsps_mul_f32(x, x, x, n, 1, 1, 1);
+    unsigned int end_b = dsp_get_cpu_cycle_count();
+
+    float total_b = end_b - start_b;
+    float cycles = total_b / (n * repeat_count);
+    ESP_LOGI(TAG, "dsps_mul_f32 - %f cycles per sample \n", cycles);
+
+}
--- a/managed_components/espressif__esp-dsp/modules/math/mul/test/test_dsps_mul_s16_ae32.c
+++ b/managed_components/espressif__esp-dsp/modules/math/mul/test/test_dsps_mul_s16_ae32.c
@@ -0,0 +1,62 @@
+// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <string.h>
+#include "unity.h"
+#include "dsp_platform.h"
+#include "esp_log.h"
+
+#include "dsp_tests.h"
+#include "dsps_mul.h"
+#include "esp_attr.h"
+
+static const char *TAG = "dsps_mul";
+
+TEST_CASE("dsps_mul_s16 functionality", "[dsps]")
+{
+    int n = 64;
+    int16_t x[n];
+    int16_t y[n];
+    int32_t temp;
+    int shift = 0;
+    for (int i = 0 ; i < n ; i++) {
+        x[i] = i << 4;
+        temp = ((int32_t)x[i] * (int32_t)x[i]) >> shift;
+        y[i] = temp;
+    }
+
+    dsps_mul_s16(x, x, x, n, 1, 1, 1, shift);
+    for (int i = 0 ; i < n ; i++) {
+        ESP_LOGD(TAG, "x[%i] = %i  %i", i, x[i], y[i]);
+        if (x[i] != y[i]) {
+            TEST_ASSERT_EQUAL(x[i], y[i]);
+        }
+    }
+}
+
+TEST_CASE("dsps_mul_s16 benchmark", "[dsps]")
+{
+    const int n = 256;
+    int16_t x[n];
+    for (int i = 0 ; i < n ; i++) {
+        x[i] = i << 4;
+    }
+
+    unsigned int start_b = dsp_get_cpu_cycle_count();
+    dsps_mul_s16(x, x, x, n, 1, 1, 1, 0);
+    unsigned int end_b = dsp_get_cpu_cycle_count();
+
+    float cycles = end_b - start_b;
+    ESP_LOGI(TAG, "dsps_mul_s16 - %f cycles per sample \n", cycles);
+}
--- a/managed_components/espressif__esp-dsp/modules/math/mul/test/test_dsps_mul_s16_aes3.c
+++ b/managed_components/espressif__esp-dsp/modules/math/mul/test/test_dsps_mul_s16_aes3.c
@@ -0,0 +1,69 @@
+// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <string.h>
+#include "unity.h"
+#include "dsp_platform.h"
+#include "esp_log.h"
+#include <malloc.h>
+
+#include "dsps_mul.h"
+#include "esp_attr.h"
+#include "dsp_tests.h"
+
+#if (dsps_mul_s16_aes3_enabled == 1)
+
+static const char *TAG = "dsps_mul";
+TEST_CASE("dsps_mul_s16_aes3 functionality", "[dsps]")
+{
+    int n = 64;
+    int16_t *x = (int16_t *)memalign(16, n * sizeof(int16_t));
+    int16_t *y = (int16_t *)memalign(16, n * sizeof(int16_t));
+    int32_t temp;
+    int shift = 0;
+    for (int i = 0 ; i < n ; i++) {
+        x[i] = i << 4;
+        temp = ((int32_t)x[i] * (int32_t)x[i]) >> shift;
+        y[i] = temp;
+    }
+
+    dsps_mul_s16_aes3(x, x, x, n, 1, 1, 1, shift);
+    for (int i = 0 ; i < n ; i++) {
+        ESP_LOGD(TAG, "x[%i] = %i  %i", i, x[i], y[i]);
+        if (x[i] != y[i]) {
+            TEST_ASSERT_EQUAL(x[i], y[i]);
+        }
+    }
+    free(x);
+    free(y);
+}
+
+TEST_CASE("dsps_mul_s16_aes3 benchmark", "[dsps]")
+{
+    const int n = 2048;
+    int16_t *x = (int16_t *)memalign(16, n * sizeof(int16_t));
+    for (int i = 0 ; i < n ; i++) {
+        x[i] = i << 4;
+    }
+
+    unsigned int start_b = dsp_get_cpu_cycle_count();
+    dsps_mul_s16_aes3(x, x, x, n, 1, 1, 1, 0);
+    unsigned int end_b = dsp_get_cpu_cycle_count();
+
+    float cycles = end_b - start_b;
+    ESP_LOGI(TAG, "dsps_mul_s16_aes3 - %f cycles per sample \n", cycles);
+    free(x);
+
+}
+#endif // (dsps_mul_s16_aes3_enabled == 1)
--- a/managed_components/espressif__esp-dsp/modules/math/mul/test/test_dsps_mul_s16_ansi.c
+++ b/managed_components/espressif__esp-dsp/modules/math/mul/test/test_dsps_mul_s16_ansi.c
@@ -0,0 +1,66 @@
+// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <string.h>
+#include "unity.h"
+#include "dsp_platform.h"
+#include "esp_log.h"
+#include <malloc.h>
+
+#include "dsps_mul.h"
+#include "esp_attr.h"
+#include "dsp_tests.h"
+
+static const char *TAG = "dsps_mul";
+TEST_CASE("dsps_mul_s16_ansi functionality", "[dsps]")
+{
+    int n = 64;
+    int16_t *x = (int16_t *)memalign(16, n * sizeof(int16_t));
+    int16_t *y = (int16_t *)memalign(16, n * sizeof(int16_t));
+    int32_t temp;
+    int shift = 0;
+    for (int i = 0 ; i < n ; i++) {
+        x[i] = i << 4;
+        temp = ((int32_t)x[i] * (int32_t)x[i]) >> shift;
+        y[i] = temp;
+    }
+
+    dsps_mul_s16_ansi(x, x, x, n, 1, 1, 1, shift);
+    for (int i = 0 ; i < n ; i++) {
+        ESP_LOGD(TAG, "x[%i] = %i  %i", i, x[i], y[i]);
+        if (x[i] != y[i]) {
+            TEST_ASSERT_EQUAL(x[i], y[i]);
+        }
+    }
+    free(x);
+    free(y);
+}
+
+TEST_CASE("dsps_mul_s16_ansi benchmark", "[dsps]")
+{
+    const int n = 2048;
+    int16_t *x = (int16_t *)memalign(16, n * sizeof(int16_t));
+    for (int i = 0 ; i < n ; i++) {
+        x[i] = i << 4;
+    }
+
+    unsigned int start_b = dsp_get_cpu_cycle_count();
+    dsps_mul_s16_ansi(x, x, x, n, 1, 1, 1, 0);
+    unsigned int end_b = dsp_get_cpu_cycle_count();
+
+    float cycles = end_b - start_b;
+    ESP_LOGI(TAG, "dsps_mul_s16_ansi - %f cycles per sample \n", cycles);
+    free(x);
+
+}
--- a/managed_components/espressif__esp-dsp/modules/math/mul/test/test_dsps_mul_s8_aes3.c
+++ b/managed_components/espressif__esp-dsp/modules/math/mul/test/test_dsps_mul_s8_aes3.c
@@ -0,0 +1,69 @@
+// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <string.h>
+#include "unity.h"
+#include "dsp_platform.h"
+#include "esp_log.h"
+#include <malloc.h>
+
+#include "dsps_mul.h"
+#include "esp_attr.h"
+#include "dsp_tests.h"
+
+#if (dsps_mul_s16_aes3_enabled == 1)
+
+static const char *TAG = "dsps_mul";
+TEST_CASE("dsps_mul_s8_aes3 functionality", "[dsps]")
+{
+    int n = 64;
+    int8_t *x = (int8_t *)memalign(16, n * sizeof(int8_t));
+    int8_t *y = (int8_t *)memalign(16, n * sizeof(int8_t));
+    int32_t temp;
+    int shift = 0;
+    for (int i = 0 ; i < n ; i++) {
+        x[i] = i - n / 2;
+        temp = ((int32_t)x[i] * (int32_t)x[i]) >> shift;
+        y[i] = temp;
+    }
+
+    dsps_mul_s8_aes3(x, x, x, n, 1, 1, 1, shift);
+    for (int i = 0 ; i < n ; i++) {
+        ESP_LOGD(TAG, "x[%i] = %i  %i", i, x[i], y[i]);
+        if (x[i] != y[i]) {
+            TEST_ASSERT_EQUAL(x[i], y[i]);
+        }
+    }
+    free(x);
+    free(y);
+}
+
+TEST_CASE("dsps_mul_s8_aes3 benchmark", "[dsps]")
+{
+    const int n = 256;
+    int8_t *x = (int8_t *)memalign(16, n * sizeof(int8_t));
+    for (int i = 0 ; i < n ; i++) {
+        x[i] = i << 4;
+    }
+
+    unsigned int start_b = dsp_get_cpu_cycle_count();
+    dsps_mul_s8_aes3(x, x, x, n, 1, 1, 1, 0);
+    unsigned int end_b = dsp_get_cpu_cycle_count();
+
+    float cycles = end_b - start_b;
+    ESP_LOGI(TAG, "dsps_mul_s8_aes3 - %f cycles per sample \n", cycles);
+    free(x);
+
+}
+#endif // (dsps_mul_s16_aes3_enabled == 1)
--- a/managed_components/espressif__esp-dsp/modules/math/mul/test/test_dsps_mul_s8_ansi.c
+++ b/managed_components/espressif__esp-dsp/modules/math/mul/test/test_dsps_mul_s8_ansi.c
@@ -0,0 +1,61 @@
+// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <string.h>
+#include "unity.h"
+#include "dsp_platform.h"
+#include "esp_log.h"
+
+#include "dsp_tests.h"
+#include "dsps_mul.h"
+#include "esp_attr.h"
+
+static const char *TAG = "dsps_mul";
+
+TEST_CASE("dsps_mul_s8_ansi functionality", "[dsps]")
+{
+    int n = 64;
+    int8_t x[n];
+    int8_t y[n];
+    int32_t temp;
+    int shift = 0;
+    for (int i = 0 ; i < n ; i++) {
+        x[i] = i - n / 2;
+        temp = ((int32_t)x[i] * (int32_t)x[i]) >> shift;
+        y[i] = temp;
+    }
+
+    dsps_mul_s8_ansi(x, x, x, n, 1, 1, 1, 0);
+    for (int i = 0 ; i < n ; i++) {
+        if (x[i] != y[i]) {
+            TEST_ASSERT_EQUAL(x[i], y[i]);
+        }
+    }
+}
+
+TEST_CASE("dsps_mul_s8_ansi benchmark", "[dsps]")
+{
+    const int n = 256;
+    int8_t x[n];
+    for (int i = 0 ; i < n ; i++) {
+        x[i] = i << 4;
+    }
+
+    unsigned int start_b = dsp_get_cpu_cycle_count();
+    dsps_mul_s8_ansi(x, x, x, n, 1, 1, 1, 0);
+    unsigned int end_b = dsp_get_cpu_cycle_count();
+
+    float cycles = end_b - start_b;
+    ESP_LOGI(TAG, "dsps_mul_s8_ansi - %f cycles per sample \n", cycles);
+}