add some code
This commit is contained in:
@@ -0,0 +1,47 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "dspi_dotprod.h"
|
||||
|
||||
esp_err_t dspi_dotprod_f32_ansi(image2d_t *in_image, image2d_t *filter, float *out_value, int count_x, int count_y)
|
||||
{
|
||||
if (in_image->step_x * count_x > in_image->stride_x) {
|
||||
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
|
||||
}
|
||||
if (in_image->step_y * count_y > in_image->stride_y) {
|
||||
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
|
||||
}
|
||||
if (filter->step_x * count_x > filter->stride_x) {
|
||||
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
|
||||
}
|
||||
if (filter->step_y * count_y > filter->stride_y) {
|
||||
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
|
||||
}
|
||||
|
||||
float *i_data = (float *)in_image->data;
|
||||
float *f_data = (float *)filter->data;
|
||||
int i_step = in_image->stride_x * in_image->step_y;
|
||||
int f_step = filter->stride_x * filter->step_y;
|
||||
|
||||
float acc = 0;
|
||||
for (int y = 0; y < count_y; y++) {
|
||||
for (int x = 0; x < count_x; x++) {
|
||||
acc += i_data[in_image->step_x * x] * f_data[filter->step_x * x];
|
||||
}
|
||||
i_data += i_step;
|
||||
f_data += f_step;
|
||||
}
|
||||
*out_value = acc;
|
||||
return ESP_OK;
|
||||
}
|
||||
@@ -0,0 +1,47 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "dspi_dotprod.h"
|
||||
|
||||
esp_err_t dspi_dotprod_off_f32_ansi(image2d_t *in_image, image2d_t *filter, float *out_value, int count_x, int count_y, float offset)
|
||||
{
|
||||
if (in_image->step_x * count_x > in_image->stride_x) {
|
||||
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
|
||||
}
|
||||
if (in_image->step_y * count_y > in_image->stride_y) {
|
||||
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
|
||||
}
|
||||
if (filter->step_x * count_x > filter->stride_x) {
|
||||
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
|
||||
}
|
||||
if (filter->step_y * count_y > filter->stride_y) {
|
||||
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
|
||||
}
|
||||
|
||||
float *i_data = (float *)in_image->data;
|
||||
float *f_data = (float *)filter->data;
|
||||
int i_step = in_image->stride_x * in_image->step_y;
|
||||
int f_step = filter->stride_x * filter->step_y;
|
||||
|
||||
float acc = 0;
|
||||
for (int y = 0; y < count_y; y++) {
|
||||
for (int x = 0; x < count_x; x++) {
|
||||
acc += i_data[in_image->step_x * x] * (f_data[filter->step_x * x] + offset);
|
||||
}
|
||||
i_data += i_step;
|
||||
f_data += f_step;
|
||||
}
|
||||
*out_value = acc;
|
||||
return ESP_OK;
|
||||
}
|
||||
@@ -0,0 +1,62 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "dsps_dotprod_platform.h"
|
||||
#if (dotprod_f32_ae32_enabled == 1)
|
||||
|
||||
#include "dsps_dotprod_f32_m_ae32.S"
|
||||
|
||||
// This is dot product function for ESP32 processor.
|
||||
.text
|
||||
.align 4
|
||||
.global dsps_dotprod_f32_ae32
|
||||
.global .dsps_dotprod_f32_ae32_body
|
||||
.type dsps_dotprod_f32_ae32,@function
|
||||
// The function implements the following C code:
|
||||
//esp_err_t dsps_dotprod_f32_ae32(const float* src1, const float* src2, float* dest, int len)
|
||||
//{
|
||||
// float acc = 0;
|
||||
// for (int i=0 ; i< len ; i++)
|
||||
// {
|
||||
// acc += src1[i]*src2[i];
|
||||
// }
|
||||
// *dest = acc;
|
||||
// return ESP_OK;
|
||||
//}
|
||||
|
||||
dsps_dotprod_f32_ae32:
|
||||
// src1 - a2
|
||||
// src2 - a3
|
||||
// dest - a4
|
||||
// len - a5
|
||||
|
||||
entry a1, 16
|
||||
.dsps_dotprod_f32_ae32_body:
|
||||
// Array increment for floating point data should be 4
|
||||
movi.n a8, 4
|
||||
// Clear initial state of the result register
|
||||
movi.n a9, 0
|
||||
wfr f1, a9
|
||||
// a2 - input1
|
||||
// a3 - input2
|
||||
// a5 - length
|
||||
// a8 - 4, step in arrays
|
||||
dotprod_f32_ae32 a2, a3, a5, a9, a8;
|
||||
|
||||
ssi f1, a4, 0 // Store result from f1 to memory at a4
|
||||
|
||||
movi.n a2, 0 // return status ESP_OK
|
||||
retw.n
|
||||
|
||||
#endif // dotprode_f32_ae32_enabled
|
||||
@@ -0,0 +1,85 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "dsps_dotprod_platform.h"
|
||||
#if (dsps_dotprod_f32_aes3_enabled == 1)
|
||||
|
||||
// This is dot product function for ESP32 processor.
|
||||
.text
|
||||
.align 4
|
||||
.global dsps_dotprod_f32_aes3
|
||||
.global .dsps_dotprod_f32_ae32_body
|
||||
.type dsps_dotprod_f32_aes3,@function
|
||||
// The function implements the following C code:
|
||||
//esp_err_t dsps_dotprod_f32_ae32(const float* src1, const float* src2, float* dest, int len)
|
||||
//{
|
||||
// float acc = 0;
|
||||
// for (int i=0 ; i< len ; i++)
|
||||
// {
|
||||
// acc += src1[i]*src2[i];
|
||||
// }
|
||||
// *dest = acc;
|
||||
// return ESP_OK;
|
||||
//}
|
||||
|
||||
dsps_dotprod_f32_aes3:
|
||||
// src1 - a2
|
||||
// src2 - a3
|
||||
// dest - a4
|
||||
// len - a5
|
||||
|
||||
entry a1, 16
|
||||
// Check length and align
|
||||
movi.n a10, 3
|
||||
and a10, a10, a5
|
||||
movi.n a9, 15
|
||||
or a11, a3, a2
|
||||
and a11, a9, a11
|
||||
or a10, a10, a11
|
||||
beqz a10, .dsps_dotprod_f32_aes3_body
|
||||
// Call Esp32 function
|
||||
J .dsps_dotprod_f32_ae32_body
|
||||
|
||||
.dsps_dotprod_f32_aes3_body:
|
||||
// Clear initial state of the result register
|
||||
movi.n a9, 0
|
||||
wfr f0, a9
|
||||
wfr f1, a9
|
||||
wfr f2, a9
|
||||
wfr f3, a9
|
||||
// a2 - input1
|
||||
// a3 - input2
|
||||
// a5 - length
|
||||
|
||||
srli a6, a5, 2 // N count
|
||||
// lsx f0, a2, a9
|
||||
loopnez a6, .loop_mac_end_m_ae32
|
||||
EE.LDF.128.IP f11, f10, f9, f8, a2, 16
|
||||
EE.LDF.128.IP f7, f6, f5, f4, a3, 16
|
||||
madd.s f0, f4, f8 // f0 = X11*Y11
|
||||
madd.s f1, f5, f9 // f1 = X12*Y11
|
||||
madd.s f2, f6, f10 // f2 = X13*Y11
|
||||
madd.s f3, f7, f11 // f3 = X14*Y11
|
||||
.loop_mac_end_m_ae32:
|
||||
|
||||
add.s f0, f0, f1
|
||||
add.s f0, f0, f2
|
||||
add.s f0, f0, f3
|
||||
|
||||
ssi f0, a4, 0 // Store result from f1 to memory at a4
|
||||
|
||||
movi.n a2, 0 // return status ESP_OK
|
||||
retw.n
|
||||
|
||||
#endif // dotprode_f32_ae32_enabled
|
||||
@@ -0,0 +1,25 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "dsps_dotprod.h"
|
||||
|
||||
esp_err_t dsps_dotprod_f32_ansi(const float *src1, const float *src2, float *dest, int len)
|
||||
{
|
||||
float acc = 0;
|
||||
for (int i = 0 ; i < len ; i++) {
|
||||
acc += src1[i] * src2[i];
|
||||
}
|
||||
*dest = acc;
|
||||
return ESP_OK;
|
||||
}
|
||||
@@ -0,0 +1,77 @@
|
||||
// Copyright 2024 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "dsps_dotprod_platform.h"
|
||||
#if (dsps_dotprod_f32_arp4_enabled == 1)
|
||||
|
||||
.text
|
||||
.align 4
|
||||
.global dsps_dotprod_f32_arp4
|
||||
.type dsps_dotprod_f32_arp4,@function
|
||||
// The function implements the following C code:
|
||||
//esp_err_t dsps_dotprod_f32(const float* src1, const float* src2, float* dest, int len)
|
||||
//{
|
||||
// float acc = 0;
|
||||
// for (int i=0 ; i< len ; i++)
|
||||
// {
|
||||
// acc += src1[i]*src2[i];
|
||||
// }
|
||||
// *dest = acc;
|
||||
// return ESP_OK;
|
||||
//}
|
||||
|
||||
dsps_dotprod_f32_arp4:
|
||||
// src1 - a0
|
||||
// src2 - a1
|
||||
// dest - a2
|
||||
// len - a3
|
||||
add sp,sp,-16
|
||||
|
||||
fmv.w.x fa2,zero
|
||||
|
||||
|
||||
flw fa0, 0(a0)
|
||||
flw fa1, 0(a1)
|
||||
add a0, a0, 4
|
||||
add a1, a1, 4
|
||||
li a4, 2
|
||||
ble a3, a4, .loop_less_2
|
||||
|
||||
// Loop when len > 2
|
||||
esp.lp.setup 0, a3, .dotprod_loop
|
||||
fmadd.s fa2, fa0, fa1, fa2
|
||||
flw fa0, 0(a0)
|
||||
flw fa1, 0(a1)
|
||||
add a0, a0, 4
|
||||
.dotprod_loop: add a1, a1, 4
|
||||
fsw fa2, 0(a2)
|
||||
|
||||
add sp,sp,16
|
||||
li a0,0
|
||||
ret
|
||||
// Loop when len <=2
|
||||
.loop_less_2:
|
||||
fmadd.s fa2, fa0, fa1, fa2
|
||||
flw fa0, 0(a0)
|
||||
flw fa1, 0(a1)
|
||||
add a0, a0, 4
|
||||
add a1, a1, 4
|
||||
add a3, a3, -1
|
||||
bnez a3, .loop_less_2
|
||||
fsw fa2, 0(a2)
|
||||
add sp,sp,16
|
||||
li a0,0
|
||||
ret
|
||||
|
||||
#endif // dotprode_f32_arp4_enabled
|
||||
@@ -0,0 +1,42 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
|
||||
.macro dotprod_f32_ae32 x1 x2 count step1 step2
|
||||
// This macro calculates floating point dot product for count float samples
|
||||
// x1, x2 - input arrays
|
||||
// count - amount of samples
|
||||
// step1 - start step
|
||||
//,step2 - A register for array step increment. (should be divided by 4)
|
||||
// f1 - contains initial value
|
||||
//
|
||||
// result in f1
|
||||
//
|
||||
// Macros body:
|
||||
// f1 += x1[i*step1]*x2[i*step2]; i: 0..counter-1
|
||||
// affected: f0, f1, f2
|
||||
// Example: dotprod_f32_ae32 a2 a3 a5 a8 a9
|
||||
// a8 == 4, step is 4 bytes
|
||||
// a5 == 32, length of array is 32
|
||||
//
|
||||
// mov \step1, \step2
|
||||
lsx f0, \x2, \step1
|
||||
// sub \x1, \x1, \step1 // To compensate first increment
|
||||
loopnez \count, .loop_mac_end_m_ae32
|
||||
lsx f2, \x1, \step1
|
||||
madd.s f1, f2, f0
|
||||
add.n \step1, \step1, \step2
|
||||
lsx f0, \x2, \step1
|
||||
.loop_mac_end_m_ae32:
|
||||
.endm
|
||||
@@ -0,0 +1,64 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "dsps_dotprod_platform.h"
|
||||
#if (dotprode_f32_ae32_enabled == 1)
|
||||
|
||||
#include "dsps_dotprode_f32_m_ae32.S"
|
||||
|
||||
// This is dot product function for ESP32 processor.
|
||||
.text
|
||||
.align 4
|
||||
.global dsps_dotprode_f32_ae32
|
||||
.type dsps_dotprode_f32_ae32,@function
|
||||
// The function implements the following C code:
|
||||
//esp_err_t dsps_dotprod_f32_ae32(const float* src1, const float* src2, float* dest, int len)
|
||||
//{
|
||||
// float acc = 0;
|
||||
// for (int i=0 ; i< len ; i++)
|
||||
// {
|
||||
// acc += src1[i]*src2[i];
|
||||
// }
|
||||
// *dest = acc;
|
||||
// return ESP_OK;
|
||||
//}
|
||||
|
||||
dsps_dotprode_f32_ae32:
|
||||
// src1 - a2
|
||||
// src2 - a3
|
||||
// dest - a4
|
||||
// len - a5
|
||||
// step1- a6
|
||||
// step2- a7
|
||||
|
||||
entry a1, 16
|
||||
// Array increment for floating point data should be 4
|
||||
|
||||
slli a6,a6, 2
|
||||
slli a7,a7, 2
|
||||
// Clear initial state of the result register
|
||||
movi.n a9, 0
|
||||
wfr f1, a9
|
||||
// a2 - input1
|
||||
// a3 - input2
|
||||
// a5 - length
|
||||
// a6,a7, step in arrays
|
||||
dotprode_f32_ae32 a2, a3, a5, a6, a7;
|
||||
|
||||
ssi f1, a4, 0 // Store result from f1 to memory at a4
|
||||
|
||||
movi.n a2, 0 // return status ESP_OK
|
||||
retw.n
|
||||
|
||||
#endif //dotprode_f32_ae32_enabled
|
||||
@@ -0,0 +1,25 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "dsps_dotprod.h"
|
||||
|
||||
esp_err_t dsps_dotprode_f32_ansi(const float *src1, const float *src2, float *dest, int len, int step1, int step2)
|
||||
{
|
||||
float acc = 0;
|
||||
for (int i = 0 ; i < len ; i++) {
|
||||
acc += src1[i * step1] * src2[i * step2];
|
||||
}
|
||||
*dest = acc;
|
||||
return ESP_OK;
|
||||
}
|
||||
@@ -0,0 +1,78 @@
|
||||
// Copyright 2024 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "dsps_dotprod_platform.h"
|
||||
#if (dsps_dotprod_f32_arp4_enabled == 1)
|
||||
|
||||
.text
|
||||
.align 4
|
||||
.global dsps_dotprode_f32_arp4
|
||||
.type dsps_dotprode_f32_arp4,@function
|
||||
// The function implements the following C code:
|
||||
//esp_err_t dsps_dotprode_f32(const float *src1, const float *src2, float *dest, int len, int step1, int step2)
|
||||
//{
|
||||
// float acc = 0;
|
||||
// for (int i = 0 ; i < len ; i++) {
|
||||
// acc += src1[i * step1] * src2[i * step2];
|
||||
// }
|
||||
// *dest = acc;
|
||||
// return ESP_OK;
|
||||
//}
|
||||
|
||||
dsps_dotprode_f32_arp4:
|
||||
// src1 - a0
|
||||
// src2 - a1
|
||||
// dest - a2
|
||||
// len - a3
|
||||
add sp,sp,-16
|
||||
|
||||
fmv.w.x fa2,zero
|
||||
slli a4, a4, 2 // step address increment by 4
|
||||
slli a5, a5, 2 // step address increment by 4
|
||||
|
||||
flw fa0, 0(a0)
|
||||
flw fa1, 0(a1)
|
||||
add a0, a0, a4
|
||||
add a1, a1, a5
|
||||
li a6, 2
|
||||
ble a3, a6, .loop_less_2
|
||||
|
||||
// Loop when len > 2
|
||||
esp.lp.setup 0, a3, .dotprod_loop
|
||||
fmadd.s fa2, fa0, fa1, fa2
|
||||
flw fa0, 0(a0)
|
||||
flw fa1, 0(a1)
|
||||
add a0, a0, a4
|
||||
.dotprod_loop: add a1, a1, a5
|
||||
fsw fa2, 0(a2)
|
||||
|
||||
add sp,sp,16
|
||||
li a0,0
|
||||
ret
|
||||
// Loop when len <=2
|
||||
.loop_less_2:
|
||||
fmadd.s fa2, fa0, fa1, fa2
|
||||
flw fa0, 0(a0)
|
||||
flw fa1, 0(a1)
|
||||
add a0, a0, a4
|
||||
add a1, a1, a5
|
||||
add a3, a3, -1
|
||||
bnez a3, .loop_less_2
|
||||
|
||||
fsw fa2, 0(a2)
|
||||
add sp,sp,16
|
||||
li a0,0
|
||||
ret
|
||||
|
||||
#endif // dotprode_f32_arp4_enabled
|
||||
@@ -0,0 +1,41 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
|
||||
.macro dotprode_f32_ae32 x1 x2 count step1 step2
|
||||
// This macro calculates floating point dot product for count float samples
|
||||
// x1, x2 - input arrays
|
||||
// count - amount of samples
|
||||
// step1,step2 - A register for array step. (should be divided by 4)
|
||||
// f1 - contains initial value
|
||||
//
|
||||
// result in f1
|
||||
//
|
||||
// Macros body:
|
||||
// f1 += x1[i*step1]*x2[i*step2]; i: 0..counter-1
|
||||
// affected: f0, f1, f2
|
||||
// Example: dotprod_f32_ae32 a2 a3 a5 a8 a9
|
||||
// a8 == 4, step is 4 bytes
|
||||
// a5 == 32, length of array is 32
|
||||
//
|
||||
lsi f0, \x2, 0
|
||||
sub \x1, \x1, \step1 // To compensate first increment
|
||||
loopnez \count, .loop_mace_end_m_ae32
|
||||
add.n \x1, \x1, \step1
|
||||
lsi f2, \x1, 0
|
||||
madd.s f1, f2, f0
|
||||
add.n \x2, \x2, \step2
|
||||
lsi f0, \x2, 0
|
||||
.loop_mace_end_m_ae32:
|
||||
.endm
|
||||
Reference in New Issue
Block a user