add some code

This commit is contained in:
2025-09-05 13:25:11 +08:00
parent 9ff0a99e7a
commit 3cf1229a85
8911 changed files with 2535396 additions and 0 deletions

View File

@@ -0,0 +1,47 @@
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "dspi_dotprod.h"
esp_err_t dspi_dotprod_f32_ansi(image2d_t *in_image, image2d_t *filter, float *out_value, int count_x, int count_y)
{
if (in_image->step_x * count_x > in_image->stride_x) {
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
}
if (in_image->step_y * count_y > in_image->stride_y) {
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
}
if (filter->step_x * count_x > filter->stride_x) {
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
}
if (filter->step_y * count_y > filter->stride_y) {
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
}
float *i_data = (float *)in_image->data;
float *f_data = (float *)filter->data;
int i_step = in_image->stride_x * in_image->step_y;
int f_step = filter->stride_x * filter->step_y;
float acc = 0;
for (int y = 0; y < count_y; y++) {
for (int x = 0; x < count_x; x++) {
acc += i_data[in_image->step_x * x] * f_data[filter->step_x * x];
}
i_data += i_step;
f_data += f_step;
}
*out_value = acc;
return ESP_OK;
}

View File

@@ -0,0 +1,47 @@
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "dspi_dotprod.h"
esp_err_t dspi_dotprod_off_f32_ansi(image2d_t *in_image, image2d_t *filter, float *out_value, int count_x, int count_y, float offset)
{
if (in_image->step_x * count_x > in_image->stride_x) {
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
}
if (in_image->step_y * count_y > in_image->stride_y) {
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
}
if (filter->step_x * count_x > filter->stride_x) {
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
}
if (filter->step_y * count_y > filter->stride_y) {
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
}
float *i_data = (float *)in_image->data;
float *f_data = (float *)filter->data;
int i_step = in_image->stride_x * in_image->step_y;
int f_step = filter->stride_x * filter->step_y;
float acc = 0;
for (int y = 0; y < count_y; y++) {
for (int x = 0; x < count_x; x++) {
acc += i_data[in_image->step_x * x] * (f_data[filter->step_x * x] + offset);
}
i_data += i_step;
f_data += f_step;
}
*out_value = acc;
return ESP_OK;
}

View File

@@ -0,0 +1,62 @@
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "dsps_dotprod_platform.h"
#if (dotprod_f32_ae32_enabled == 1)
#include "dsps_dotprod_f32_m_ae32.S"
// This is dot product function for ESP32 processor.
.text
.align 4
.global dsps_dotprod_f32_ae32
.global .dsps_dotprod_f32_ae32_body
.type dsps_dotprod_f32_ae32,@function
// The function implements the following C code:
//esp_err_t dsps_dotprod_f32_ae32(const float* src1, const float* src2, float* dest, int len)
//{
// float acc = 0;
// for (int i=0 ; i< len ; i++)
// {
// acc += src1[i]*src2[i];
// }
// *dest = acc;
// return ESP_OK;
//}
dsps_dotprod_f32_ae32:
// src1 - a2
// src2 - a3
// dest - a4
// len - a5
entry a1, 16
.dsps_dotprod_f32_ae32_body:
// Array increment for floating point data should be 4
movi.n a8, 4
// Clear initial state of the result register
movi.n a9, 0
wfr f1, a9
// a2 - input1
// a3 - input2
// a5 - length
// a8 - 4, step in arrays
dotprod_f32_ae32 a2, a3, a5, a9, a8;
ssi f1, a4, 0 // Store result from f1 to memory at a4
movi.n a2, 0 // return status ESP_OK
retw.n
#endif // dotprode_f32_ae32_enabled

View File

@@ -0,0 +1,85 @@
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "dsps_dotprod_platform.h"
#if (dsps_dotprod_f32_aes3_enabled == 1)
// This is dot product function for ESP32 processor.
.text
.align 4
.global dsps_dotprod_f32_aes3
.global .dsps_dotprod_f32_ae32_body
.type dsps_dotprod_f32_aes3,@function
// The function implements the following C code:
//esp_err_t dsps_dotprod_f32_ae32(const float* src1, const float* src2, float* dest, int len)
//{
// float acc = 0;
// for (int i=0 ; i< len ; i++)
// {
// acc += src1[i]*src2[i];
// }
// *dest = acc;
// return ESP_OK;
//}
dsps_dotprod_f32_aes3:
// src1 - a2
// src2 - a3
// dest - a4
// len - a5
entry a1, 16
// Check length and align
movi.n a10, 3
and a10, a10, a5
movi.n a9, 15
or a11, a3, a2
and a11, a9, a11
or a10, a10, a11
beqz a10, .dsps_dotprod_f32_aes3_body
// Call Esp32 function
J .dsps_dotprod_f32_ae32_body
.dsps_dotprod_f32_aes3_body:
// Clear initial state of the result register
movi.n a9, 0
wfr f0, a9
wfr f1, a9
wfr f2, a9
wfr f3, a9
// a2 - input1
// a3 - input2
// a5 - length
srli a6, a5, 2 // N count
// lsx f0, a2, a9
loopnez a6, .loop_mac_end_m_ae32
EE.LDF.128.IP f11, f10, f9, f8, a2, 16
EE.LDF.128.IP f7, f6, f5, f4, a3, 16
madd.s f0, f4, f8 // f0 = X11*Y11
madd.s f1, f5, f9 // f1 = X12*Y11
madd.s f2, f6, f10 // f2 = X13*Y11
madd.s f3, f7, f11 // f3 = X14*Y11
.loop_mac_end_m_ae32:
add.s f0, f0, f1
add.s f0, f0, f2
add.s f0, f0, f3
ssi f0, a4, 0 // Store result from f1 to memory at a4
movi.n a2, 0 // return status ESP_OK
retw.n
#endif // dotprode_f32_ae32_enabled

View File

@@ -0,0 +1,25 @@
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "dsps_dotprod.h"
esp_err_t dsps_dotprod_f32_ansi(const float *src1, const float *src2, float *dest, int len)
{
float acc = 0;
for (int i = 0 ; i < len ; i++) {
acc += src1[i] * src2[i];
}
*dest = acc;
return ESP_OK;
}

View File

@@ -0,0 +1,77 @@
// Copyright 2024 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "dsps_dotprod_platform.h"
#if (dsps_dotprod_f32_arp4_enabled == 1)
.text
.align 4
.global dsps_dotprod_f32_arp4
.type dsps_dotprod_f32_arp4,@function
// The function implements the following C code:
//esp_err_t dsps_dotprod_f32(const float* src1, const float* src2, float* dest, int len)
//{
// float acc = 0;
// for (int i=0 ; i< len ; i++)
// {
// acc += src1[i]*src2[i];
// }
// *dest = acc;
// return ESP_OK;
//}
dsps_dotprod_f32_arp4:
// src1 - a0
// src2 - a1
// dest - a2
// len - a3
add sp,sp,-16
fmv.w.x fa2,zero
flw fa0, 0(a0)
flw fa1, 0(a1)
add a0, a0, 4
add a1, a1, 4
li a4, 2
ble a3, a4, .loop_less_2
// Loop when len > 2
esp.lp.setup 0, a3, .dotprod_loop
fmadd.s fa2, fa0, fa1, fa2
flw fa0, 0(a0)
flw fa1, 0(a1)
add a0, a0, 4
.dotprod_loop: add a1, a1, 4
fsw fa2, 0(a2)
add sp,sp,16
li a0,0
ret
// Loop when len <=2
.loop_less_2:
fmadd.s fa2, fa0, fa1, fa2
flw fa0, 0(a0)
flw fa1, 0(a1)
add a0, a0, 4
add a1, a1, 4
add a3, a3, -1
bnez a3, .loop_less_2
fsw fa2, 0(a2)
add sp,sp,16
li a0,0
ret
#endif // dotprode_f32_arp4_enabled

View File

@@ -0,0 +1,42 @@
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
.macro dotprod_f32_ae32 x1 x2 count step1 step2
// This macro calculates floating point dot product for count float samples
// x1, x2 - input arrays
// count - amount of samples
// step1 - start step
//,step2 - A register for array step increment. (should be divided by 4)
// f1 - contains initial value
//
// result in f1
//
// Macros body:
// f1 += x1[i*step1]*x2[i*step2]; i: 0..counter-1
// affected: f0, f1, f2
// Example: dotprod_f32_ae32 a2 a3 a5 a8 a9
// a8 == 4, step is 4 bytes
// a5 == 32, length of array is 32
//
// mov \step1, \step2
lsx f0, \x2, \step1
// sub \x1, \x1, \step1 // To compensate first increment
loopnez \count, .loop_mac_end_m_ae32
lsx f2, \x1, \step1
madd.s f1, f2, f0
add.n \step1, \step1, \step2
lsx f0, \x2, \step1
.loop_mac_end_m_ae32:
.endm

View File

@@ -0,0 +1,64 @@
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "dsps_dotprod_platform.h"
#if (dotprode_f32_ae32_enabled == 1)
#include "dsps_dotprode_f32_m_ae32.S"
// This is dot product function for ESP32 processor.
.text
.align 4
.global dsps_dotprode_f32_ae32
.type dsps_dotprode_f32_ae32,@function
// The function implements the following C code:
//esp_err_t dsps_dotprod_f32_ae32(const float* src1, const float* src2, float* dest, int len)
//{
// float acc = 0;
// for (int i=0 ; i< len ; i++)
// {
// acc += src1[i]*src2[i];
// }
// *dest = acc;
// return ESP_OK;
//}
dsps_dotprode_f32_ae32:
// src1 - a2
// src2 - a3
// dest - a4
// len - a5
// step1- a6
// step2- a7
entry a1, 16
// Array increment for floating point data should be 4
slli a6,a6, 2
slli a7,a7, 2
// Clear initial state of the result register
movi.n a9, 0
wfr f1, a9
// a2 - input1
// a3 - input2
// a5 - length
// a6,a7, step in arrays
dotprode_f32_ae32 a2, a3, a5, a6, a7;
ssi f1, a4, 0 // Store result from f1 to memory at a4
movi.n a2, 0 // return status ESP_OK
retw.n
#endif //dotprode_f32_ae32_enabled

View File

@@ -0,0 +1,25 @@
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "dsps_dotprod.h"
esp_err_t dsps_dotprode_f32_ansi(const float *src1, const float *src2, float *dest, int len, int step1, int step2)
{
float acc = 0;
for (int i = 0 ; i < len ; i++) {
acc += src1[i * step1] * src2[i * step2];
}
*dest = acc;
return ESP_OK;
}

View File

@@ -0,0 +1,78 @@
// Copyright 2024 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "dsps_dotprod_platform.h"
#if (dsps_dotprod_f32_arp4_enabled == 1)
.text
.align 4
.global dsps_dotprode_f32_arp4
.type dsps_dotprode_f32_arp4,@function
// The function implements the following C code:
//esp_err_t dsps_dotprode_f32(const float *src1, const float *src2, float *dest, int len, int step1, int step2)
//{
// float acc = 0;
// for (int i = 0 ; i < len ; i++) {
// acc += src1[i * step1] * src2[i * step2];
// }
// *dest = acc;
// return ESP_OK;
//}
dsps_dotprode_f32_arp4:
// src1 - a0
// src2 - a1
// dest - a2
// len - a3
add sp,sp,-16
fmv.w.x fa2,zero
slli a4, a4, 2 // step address increment by 4
slli a5, a5, 2 // step address increment by 4
flw fa0, 0(a0)
flw fa1, 0(a1)
add a0, a0, a4
add a1, a1, a5
li a6, 2
ble a3, a6, .loop_less_2
// Loop when len > 2
esp.lp.setup 0, a3, .dotprod_loop
fmadd.s fa2, fa0, fa1, fa2
flw fa0, 0(a0)
flw fa1, 0(a1)
add a0, a0, a4
.dotprod_loop: add a1, a1, a5
fsw fa2, 0(a2)
add sp,sp,16
li a0,0
ret
// Loop when len <=2
.loop_less_2:
fmadd.s fa2, fa0, fa1, fa2
flw fa0, 0(a0)
flw fa1, 0(a1)
add a0, a0, a4
add a1, a1, a5
add a3, a3, -1
bnez a3, .loop_less_2
fsw fa2, 0(a2)
add sp,sp,16
li a0,0
ret
#endif // dotprode_f32_arp4_enabled

View File

@@ -0,0 +1,41 @@
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
.macro dotprode_f32_ae32 x1 x2 count step1 step2
// This macro calculates floating point dot product for count float samples
// x1, x2 - input arrays
// count - amount of samples
// step1,step2 - A register for array step. (should be divided by 4)
// f1 - contains initial value
//
// result in f1
//
// Macros body:
// f1 += x1[i*step1]*x2[i*step2]; i: 0..counter-1
// affected: f0, f1, f2
// Example: dotprod_f32_ae32 a2 a3 a5 a8 a9
// a8 == 4, step is 4 bytes
// a5 == 32, length of array is 32
//
lsi f0, \x2, 0
sub \x1, \x1, \step1 // To compensate first increment
loopnez \count, .loop_mace_end_m_ae32
add.n \x1, \x1, \step1
lsi f2, \x1, 0
madd.s f1, f2, f0
add.n \x2, \x2, \step2
lsi f0, \x2, 0
.loop_mace_end_m_ae32:
.endm