add some code
This commit is contained in:
@@ -0,0 +1,184 @@
|
||||
// Copyright 2024 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "dspi_conv.h"
|
||||
#include "esp_log.h"
|
||||
|
||||
esp_err_t dspi_conv_f32_ansi(const image2d_t *in_image, const image2d_t *filter, image2d_t *out_image)
|
||||
{
|
||||
out_image->size_x = in_image->size_x;
|
||||
out_image->size_y = in_image->size_y;
|
||||
float *i_data = (float *)in_image->data;
|
||||
float *out_data = (float *)out_image->data;
|
||||
|
||||
int rest_x = (filter->size_x - 1) >> 1;
|
||||
int rest_y = (filter->size_y - 1) >> 1;
|
||||
|
||||
int i_pos = 0;
|
||||
int i_step = in_image->stride_x * in_image->step_y;
|
||||
int f_step = filter->stride_x * filter->step_y;
|
||||
|
||||
// Up side of image
|
||||
for (int y = 0 ; y < rest_y; y++ ) {
|
||||
int i_pos_y = i_pos;
|
||||
for (int x = 0 ; x < rest_x; x++) {
|
||||
int i_pos_x = i_pos_y;
|
||||
float acc = 0;
|
||||
float *f_data = (float *)filter->data;
|
||||
for (int m = rest_y - y ; m < filter->size_y ; m++) {
|
||||
for (int n = rest_x - x ; n < filter->size_x ; n++) {
|
||||
acc += i_data[i_pos_x + n * in_image->step_x] * f_data[filter->step_x * n];
|
||||
}
|
||||
f_data += f_step;
|
||||
i_pos_x += i_step;
|
||||
}
|
||||
i_pos_y += in_image->step_x;
|
||||
out_data[x * out_image->step_x + y * out_image->stride_x * out_image->step_y] = acc;
|
||||
}
|
||||
for (int x = rest_x ; x < in_image->size_x - filter->size_x / 2; x++) {
|
||||
int i_pos_x = i_pos_y;
|
||||
float acc = 0;
|
||||
float *f_data = (float *)filter->data;
|
||||
for (int m = rest_y - y ; m < filter->size_y ; m++) {
|
||||
for (int n = 0 ; n < filter->size_x ; n++) {
|
||||
acc += i_data[i_pos_x + n * in_image->step_x] * f_data[filter->step_x * n];
|
||||
}
|
||||
f_data += f_step;
|
||||
i_pos_x += i_step;
|
||||
}
|
||||
i_pos_y += in_image->step_x;
|
||||
out_data[x * out_image->step_x + y * out_image->stride_x * out_image->step_y] = acc;
|
||||
}
|
||||
for (int x = in_image->size_x - filter->size_x / 2 - 1; x < in_image->size_x; x++) {
|
||||
int i_pos_x = i_pos_y;
|
||||
float acc = 0;
|
||||
float *f_data = (float *)filter->data;
|
||||
for (int m = rest_y - y ; m < filter->size_y ; m++) {
|
||||
for (int n = 0 ; n < filter->size_x - (x - in_image->size_x + filter->size_x / 2 + 1); n++) {
|
||||
acc += i_data[i_pos_x + n * in_image->step_x] * f_data[filter->step_x * n];
|
||||
}
|
||||
f_data += f_step;
|
||||
i_pos_x += i_step;
|
||||
}
|
||||
i_pos_y += in_image->step_x;
|
||||
out_data[x * out_image->step_x + y * out_image->stride_x * out_image->step_y] = acc;
|
||||
}
|
||||
i_pos += in_image->stride_x * in_image->step_y;
|
||||
}
|
||||
// Middle side of image
|
||||
i_pos = 0;
|
||||
for (int y = rest_y ; y < in_image->size_y - filter->size_y / 2; y++ ) {
|
||||
int i_pos_y = i_pos;
|
||||
for (int x = 0 ; x < rest_x; x++) {
|
||||
int i_pos_x = i_pos_y;
|
||||
float acc = 0;
|
||||
float *f_data = (float *)filter->data;
|
||||
for (int m = 0 ; m < filter->size_y ; m++) {
|
||||
for (int n = rest_x - x ; n < filter->size_x ; n++) {
|
||||
acc += i_data[i_pos_x + n * in_image->step_x] * f_data[filter->step_x * n];
|
||||
}
|
||||
f_data += f_step;
|
||||
i_pos_x += i_step;
|
||||
}
|
||||
i_pos_y += in_image->step_x;
|
||||
out_data[x * out_image->step_x + y * out_image->stride_x * out_image->step_y] = acc;
|
||||
}
|
||||
for (int x = in_image->size_x - filter->size_x / 2 - 1; x < in_image->size_x; x++) {
|
||||
int i_pos_x = i_pos_y;
|
||||
float acc = 0;
|
||||
float *f_data = (float *)filter->data;
|
||||
for (int m = 0 ; m < filter->size_y ; m++) {
|
||||
for (int n = 0 ; n < filter->size_x - (x - in_image->size_x + filter->size_x / 2 + 1); n++) {
|
||||
acc += i_data[i_pos_x + n * in_image->step_x] * f_data[filter->step_x * n];
|
||||
}
|
||||
f_data += f_step;
|
||||
i_pos_x += i_step;
|
||||
}
|
||||
i_pos_y += in_image->step_x;
|
||||
out_data[x * out_image->step_x + y * out_image->stride_x * out_image->step_y] = acc;
|
||||
}
|
||||
|
||||
i_pos += in_image->stride_x * in_image->step_y;
|
||||
}
|
||||
// Down side of image
|
||||
i_pos = 0;
|
||||
for (int y = in_image->size_y - filter->size_y / 2 ; y < in_image->size_y; y++ ) {
|
||||
int i_pos_y = i_pos;
|
||||
for (int x = 0 ; x < rest_x; x++) {
|
||||
int i_pos_x = i_pos_y;
|
||||
float acc = 0;
|
||||
float *f_data = (float *)filter->data;
|
||||
for (int m = 0 ; m < filter->size_y - (y - in_image->size_y + filter->size_y / 2 + 1); m++) {
|
||||
for (int n = rest_x - x ; n < filter->size_x ; n++) {
|
||||
acc += i_data[i_pos_x + n * in_image->step_x] * f_data[filter->step_x * n];
|
||||
}
|
||||
f_data += f_step;
|
||||
i_pos_x += i_step;
|
||||
}
|
||||
i_pos_y += in_image->step_x;
|
||||
out_data[x * out_image->step_x + y * out_image->stride_x * out_image->step_y] = acc;
|
||||
}
|
||||
for (int x = rest_x ; x < in_image->size_x - filter->size_x / 2; x++) {
|
||||
int i_pos_x = i_pos_y;
|
||||
float acc = 0;
|
||||
float *f_data = (float *)filter->data;
|
||||
for (int m = 0 ; m < filter->size_y - (y - in_image->size_y + filter->size_y / 2 + 1); m++) {
|
||||
for (int n = 0 ; n < filter->size_x ; n++) {
|
||||
acc += i_data[i_pos_x + n * in_image->step_x] * f_data[filter->step_x * n];
|
||||
}
|
||||
f_data += f_step;
|
||||
i_pos_x += i_step;
|
||||
}
|
||||
i_pos_y += in_image->step_x;
|
||||
out_data[x * out_image->step_x + y * out_image->stride_x * out_image->step_y] = acc;
|
||||
}
|
||||
for (int x = in_image->size_x - filter->size_x / 2 ; x < in_image->size_x; x++) {
|
||||
int i_pos_x = i_pos_y;
|
||||
float acc = 0;
|
||||
float *f_data = (float *)filter->data;
|
||||
for (int m = 0 ; m < filter->size_y - (y - in_image->size_y + filter->size_y / 2 + 1); m++) {
|
||||
for (int n = 0 ; n < filter->size_x - (x - in_image->size_x + filter->size_x / 2 + 1); n++) {
|
||||
acc += i_data[i_pos_x + n * in_image->step_x] * f_data[filter->step_x * n];
|
||||
}
|
||||
f_data += f_step;
|
||||
i_pos_x += i_step;
|
||||
}
|
||||
i_pos_y += in_image->step_x;
|
||||
out_data[x * out_image->step_x + y * out_image->stride_x * out_image->step_y] = acc;
|
||||
}
|
||||
|
||||
i_pos += in_image->stride_x * in_image->step_y;
|
||||
}
|
||||
// Main image block
|
||||
i_pos = 0;
|
||||
for (int y = rest_y ; y < in_image->size_y - filter->size_y / 2; y++ ) {
|
||||
int i_pos_y = i_pos;
|
||||
for (int x = rest_x ; x < in_image->size_x - filter->size_x / 2; x++) {
|
||||
int i_pos_x = i_pos_y;
|
||||
float acc = 0;
|
||||
float *f_data = (float *)filter->data;
|
||||
for (int m = 0 ; m < filter->size_y ; m++) {
|
||||
for (int n = 0 ; n < filter->size_x ; n++) {
|
||||
acc += i_data[i_pos_x + n * in_image->step_x] * f_data[filter->step_x * n];
|
||||
}
|
||||
f_data += f_step;
|
||||
i_pos_x += i_step;
|
||||
}
|
||||
i_pos_y += in_image->step_x;
|
||||
out_data[x * out_image->step_x + y * out_image->stride_x * out_image->step_y] = acc;
|
||||
}
|
||||
i_pos += in_image->stride_x * in_image->step_y;
|
||||
}
|
||||
return ESP_OK;
|
||||
}
|
||||
@@ -0,0 +1,144 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "dsps_conv_platform.h"
|
||||
#if (dsps_ccorr_f32_ae32_enabled == 1)
|
||||
|
||||
#include "dsps_conv_f32_m_ae32.S"
|
||||
|
||||
// This is dot product function for ESP32 processor.
|
||||
.text
|
||||
.align 4
|
||||
.global dsps_ccorr_f32_ae32
|
||||
.type dsps_ccorr_f32_ae32,@function
|
||||
// The function implements the C code from dsps_ccorr_f32_ansi:
|
||||
//esp_err_t dsps_ccorr_f32_ansi(const float *Signal, const int siglen, const float *Kernel, const int kernlen, float *corrout);
|
||||
//
|
||||
dsps_ccorr_f32_ae32:
|
||||
// Signal - a2
|
||||
// siglen - a3
|
||||
// Kernel - a4
|
||||
// kernlen - a5
|
||||
// corrout - a6
|
||||
//
|
||||
// a11 - loop length
|
||||
|
||||
entry a1, 16
|
||||
// Array increment for floating point data should be 4
|
||||
sub a10, a3, a5
|
||||
bgez a10, dsps_ccorr_positive
|
||||
addi a10, a2, 0
|
||||
addi a2, a4, 0
|
||||
addi a4, a10, 0
|
||||
|
||||
addi a10, a3, 0
|
||||
addi a3, a5, 0
|
||||
addi a5, a10, 0
|
||||
|
||||
dsps_ccorr_positive:
|
||||
movi.n a8, 4
|
||||
addi a11, a5, 0 // lkern - loop counter
|
||||
movi.n a14, 0
|
||||
addi a9, a14, 1
|
||||
|
||||
movi.n a7, 4
|
||||
movi.n a8, -4
|
||||
|
||||
mull a13, a5, a7 // a13 - kernlen*4
|
||||
add a13, a13, a4 // a13 - Kernel[kernlen]
|
||||
addi a13, a13, -4 // a13 - Kernel[kernlen - 1]
|
||||
ccorr_loop1:
|
||||
// Clear initial state of the result register
|
||||
addi a10, a13, 0 // a10 - Kernel
|
||||
addi a12, a2, 0 // a12 - Signal
|
||||
wfr f1, a14 // clear output: convout[n] = 0;
|
||||
|
||||
// a12 - sig[0]
|
||||
// a10 - kern[n];
|
||||
// a9 - n+1
|
||||
// a7 - 4,
|
||||
// a8 - -4,
|
||||
conv_f32_ae32 a12, a10, a9, a7, a7, loop1
|
||||
|
||||
addi a9, a9, 1 // (n+1)++
|
||||
addi a13, a13, -4 // kern[n] - a4--
|
||||
|
||||
ssi f1, a6, 0 // Store result from f1 to memory at a6
|
||||
addi a6, a6, 4 // convout++ - increment output pointer
|
||||
|
||||
addi a11, a11, -1
|
||||
bnez a11, ccorr_loop1
|
||||
|
||||
// a11 - loop counter = siglen - kernlen - 1
|
||||
addi a9, a2, 4 // sig[1] - sig[kmin]
|
||||
addi a13, a5, 0
|
||||
|
||||
// skip loop if 0
|
||||
sub a11, a3, a5 // a11 - loop counter
|
||||
beqz a11, skip_ccorr_loop2
|
||||
|
||||
ccorr_loop2:
|
||||
|
||||
// Clear initial state of the result register
|
||||
addi a12, a9, 0 // a12 - Signal[kmin]
|
||||
addi a10, a4, 0 // a10 - Kernel
|
||||
wfr f1, a14 // clear output: convout[n] = 0;
|
||||
|
||||
// a12 - sig[kmin]
|
||||
// a10 - kern[0];
|
||||
// a11 - kernlen
|
||||
// a7 - 4,
|
||||
conv_f32_ae32 a12, a10, a13, a7, a7, loop2
|
||||
|
||||
addi a9, a9, 4 // in1++
|
||||
|
||||
ssi f1, a6, 0 // Store result from f1 to memory at a6
|
||||
addi a6, a6, 4 // convout++ - increment output pointer
|
||||
|
||||
addi a11, a11, -1
|
||||
bnez a11, ccorr_loop2
|
||||
|
||||
|
||||
skip_ccorr_loop2:
|
||||
|
||||
// a9 - the same
|
||||
addi a11, a5, -1
|
||||
addi a13, a5, -1
|
||||
ccorr_loop3:
|
||||
|
||||
// Clear initial state of the result register
|
||||
addi a12, a9, 0 // a12 - Signal[kmin]
|
||||
addi a10, a4, 0 // a10 - Kernel
|
||||
wfr f1, a14 // clear output: convout[n] = 0;
|
||||
|
||||
// a12 - sig[kmin]
|
||||
// a10 - kern[n - kmin];
|
||||
// a11 - length
|
||||
// a7 - 4,
|
||||
// a8 - -4,
|
||||
conv_f32_ae32 a12, a10, a11, a7, a7, loop3
|
||||
|
||||
addi a9, a9, 4 // n++
|
||||
|
||||
ssi f1, a6, 0 // Store result from f1 to memory at a6
|
||||
addi a6, a6, 4 // convout++ - increment output pointer
|
||||
|
||||
addi a11, a11, -1
|
||||
bnez a11, ccorr_loop3
|
||||
skip_ccorr_loop3:
|
||||
|
||||
movi.n a2, 0 // return status ESP_OK
|
||||
retw.n
|
||||
|
||||
#endif // dsps_ccorr_f32_ae32_enabled
|
||||
@@ -0,0 +1,81 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "dsps_conv.h"
|
||||
#include "esp_log.h"
|
||||
|
||||
static const char *TAG = "dsps_conv";
|
||||
|
||||
esp_err_t dsps_ccorr_f32_ansi(const float *Signal, const int siglen, const float *Kernel, const int kernlen, float *corrvout)
|
||||
{
|
||||
if (NULL == Signal) {
|
||||
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
|
||||
}
|
||||
if (NULL == Kernel) {
|
||||
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
|
||||
}
|
||||
if (NULL == corrvout) {
|
||||
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
|
||||
}
|
||||
|
||||
float *sig = (float *)Signal;
|
||||
float *kern = (float *)Kernel;
|
||||
int lsig = siglen;
|
||||
int lkern = kernlen;
|
||||
|
||||
if (siglen < kernlen) {
|
||||
sig = (float *)Kernel;
|
||||
kern = (float *)Signal;
|
||||
lsig = kernlen;
|
||||
lkern = siglen;
|
||||
}
|
||||
|
||||
for (int n = 0; n < lkern; n++) {
|
||||
int k;
|
||||
int kmin = lkern - 1 - n;
|
||||
corrvout[n] = 0;
|
||||
|
||||
for (k = 0; k <= n; k++) {
|
||||
corrvout[n] += sig[k] * kern[kmin + k];
|
||||
}
|
||||
ESP_LOGV(TAG, "L1 k = %i, n = %i , kmin= %i, kmax= %i", 0, n, kmin, kmin + n);
|
||||
}
|
||||
for (int n = lkern; n < lsig; n++) {
|
||||
int kmin, kmax, k;
|
||||
|
||||
corrvout[n] = 0;
|
||||
|
||||
kmin = n - lkern + 1;
|
||||
kmax = n;
|
||||
for (k = kmin; k <= kmax; k++) {
|
||||
corrvout[n] += sig[k] * kern[k - kmin];
|
||||
}
|
||||
ESP_LOGV(TAG, "L2 n=%i, kmin = %i, kmax = %i , k-kmin = %i", n, kmin, kmax, 0);
|
||||
}
|
||||
|
||||
for (int n = lsig; n < lsig + lkern - 1; n++) {
|
||||
int kmin, kmax, k;
|
||||
|
||||
corrvout[n] = 0;
|
||||
|
||||
kmin = n - lkern + 1;
|
||||
kmax = lsig - 1;
|
||||
|
||||
for (k = kmin; k <= kmax; k++) {
|
||||
corrvout[n] += sig[k] * kern[k - kmin];
|
||||
}
|
||||
ESP_LOGV(TAG, "L3 n=%i, kmin = %i, kmax = %i , k - kmin = %i", n, kmin, kmax, kmax - kmin);
|
||||
}
|
||||
return ESP_OK;
|
||||
}
|
||||
@@ -0,0 +1,147 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "dsps_conv_platform.h"
|
||||
#if (dsps_conv_f32_ae32_enabled == 1)
|
||||
|
||||
#include "dsps_conv_f32_m_ae32.S"
|
||||
|
||||
// This is dot product function for ESP32 processor.
|
||||
.text
|
||||
.align 4
|
||||
.global dsps_conv_f32_ae32
|
||||
.type dsps_conv_f32_ae32,@function
|
||||
// The function implements the C code from dsps_conv_f32_ansi:
|
||||
//esp_err_t dsps_conv_f32_ansi(const float *Signal, const int siglen, const float *Kernel, const int kernlen, float *convout);
|
||||
//
|
||||
dsps_conv_f32_ae32:
|
||||
// Signal - a2
|
||||
// siglen - a3
|
||||
// Kernel - a4
|
||||
// kernlen - a5
|
||||
// convout - a6
|
||||
//
|
||||
// a11 - loop length
|
||||
|
||||
entry a1, 16
|
||||
// Array increment for floating point data should be 4
|
||||
sub a10, a3, a5
|
||||
bgez a10, dsps_conv_positive
|
||||
addi a10, a2, 0
|
||||
addi a2, a4, 0
|
||||
addi a4, a10, 0
|
||||
|
||||
addi a10, a3, 0
|
||||
addi a3, a5, 0
|
||||
addi a5, a10, 0
|
||||
|
||||
dsps_conv_positive:
|
||||
movi.n a8, 4
|
||||
addi a11, a5, 0 // lkern - loop counter
|
||||
movi.n a14, 0
|
||||
addi a9, a14, 1
|
||||
|
||||
movi.n a7, 4
|
||||
movi.n a8, -4
|
||||
|
||||
conv_loop1:
|
||||
// Clear initial state of the result register
|
||||
addi a10, a4, 0 // a10 - Kernel
|
||||
addi a12, a2, 0 // a12 - Signal
|
||||
wfr f1, a14 // clear output: convout[n] = 0;
|
||||
|
||||
// a12 - sig[0]
|
||||
// a10 - kern[n];
|
||||
// a9 - n+1
|
||||
// a7 - 4,
|
||||
// a8 - -4,
|
||||
conv_f32_ae32 a12, a10, a9, a7, a8, loop1
|
||||
|
||||
addi a9, a9, 1 // (n+1)++
|
||||
addi a4, a4, 4 // kern[n] - a4++
|
||||
|
||||
ssi f1, a6, 0 // Store result from f1 to memory at a6
|
||||
addi a6, a6, 4 // convout++ - increment output pointer
|
||||
|
||||
addi a11, a11, -1
|
||||
bnez a11, conv_loop1
|
||||
|
||||
|
||||
// a11 - loop counter = siglen - kernlen - 1
|
||||
addi a9, a2, 0 // sig[1] - sig[kmin]
|
||||
addi a13, a5, 0
|
||||
|
||||
// skip loop if 0
|
||||
sub a11, a3, a5 // a11 - loop counter
|
||||
beqz a11, skip_conv_loop2
|
||||
|
||||
conv_loop2:
|
||||
|
||||
// Clear initial state of the result register
|
||||
addi a12, a9, 4 // a12 - Signal[kmin]
|
||||
addi a10, a4, -4 // a10 - Kernel
|
||||
wfr f1, a14 // clear output: convout[n] = 0;
|
||||
|
||||
// a12 - sig[kmin]
|
||||
// a10 - kern[n - kmin];
|
||||
// a11 - length
|
||||
// a7 - 4,
|
||||
// a8 - -4,
|
||||
conv_f32_ae32 a12, a10, a13, a7, a8, loop2
|
||||
|
||||
addi a9, a9, 4 // (n+1)++
|
||||
|
||||
ssi f1, a6, 0 // Store result from f1 to memory at a6
|
||||
addi a6, a6, 4 // convout++ - increment output pointer
|
||||
|
||||
addi a11, a11, -1
|
||||
bnez a11, conv_loop2
|
||||
|
||||
skip_conv_loop2:
|
||||
|
||||
// sub a11, a3, a5 // a11 - loop counter
|
||||
// beqz a11, skip_conv_loop3
|
||||
// a9 - the same
|
||||
addi a11, a5, -1
|
||||
addi a13, a5, -1
|
||||
// beqz a11, skip_conv_loop3
|
||||
conv_loop3:
|
||||
|
||||
// Clear initial state of the result register
|
||||
addi a12, a9, 4 // a12 - Signal[kmin]
|
||||
addi a10, a4, -4 // a10 - Kernel
|
||||
wfr f1, a14 // clear output: convout[n] = 0;
|
||||
|
||||
// a12 - sig[kmin]
|
||||
// a10 - kern[n - kmin];
|
||||
// a11 - length
|
||||
// a7 - 4,
|
||||
// a8 - -4,
|
||||
conv_f32_ae32 a12, a10, a13, a7, a8, loop3
|
||||
|
||||
addi a9, a9, 4 // (n+1)++
|
||||
|
||||
ssi f1, a6, 0 // Store result from f1 to memory at a6
|
||||
addi a6, a6, 4 // convout++ - increment output pointer
|
||||
|
||||
addi a13, a13, -1
|
||||
|
||||
addi a11, a11, -1
|
||||
bnez a11, conv_loop3
|
||||
skip_conv_loop3:
|
||||
|
||||
movi.n a2, 0 // return status ESP_OK
|
||||
retw.n
|
||||
|
||||
#endif // dsps_conv_f32_ae32_enabled
|
||||
@@ -0,0 +1,81 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "dsps_conv.h"
|
||||
#include "esp_log.h"
|
||||
|
||||
static const char *TAG = "dsps_conv";
|
||||
|
||||
esp_err_t dsps_conv_f32_ansi(const float *Signal, const int siglen, const float *Kernel, const int kernlen, float *convout)
|
||||
{
|
||||
if (NULL == Signal) {
|
||||
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
|
||||
}
|
||||
if (NULL == Kernel) {
|
||||
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
|
||||
}
|
||||
if (NULL == convout) {
|
||||
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
|
||||
}
|
||||
|
||||
float *sig = (float *)Signal;
|
||||
float *kern = (float *)Kernel;
|
||||
int lsig = siglen;
|
||||
int lkern = kernlen;
|
||||
|
||||
if (siglen < kernlen) {
|
||||
sig = (float *)Kernel;
|
||||
kern = (float *)Signal;
|
||||
lsig = kernlen;
|
||||
lkern = siglen;
|
||||
}
|
||||
|
||||
for (int n = 0; n < lkern; n++) {
|
||||
size_t k;
|
||||
|
||||
convout[n] = 0;
|
||||
|
||||
for (k = 0; k <= n; k++) {
|
||||
convout[n] += sig[k] * kern[n - k];
|
||||
}
|
||||
ESP_LOGV(TAG, "L1 kmin = %i, kmax = %i , n-kmin = %i", 0, n, n);
|
||||
}
|
||||
for (int n = lkern; n < lsig; n++) {
|
||||
int kmin, kmax, k;
|
||||
|
||||
convout[n] = 0;
|
||||
|
||||
kmin = n - lkern + 1;
|
||||
kmax = n;
|
||||
ESP_LOGV(TAG, "L2 n=%i, kmin = %i, kmax = %i , n-kmin = %i", n, kmin, kmax, n - kmin);
|
||||
for (k = kmin; k <= kmax; k++) {
|
||||
convout[n] += sig[k] * kern[n - k];
|
||||
}
|
||||
}
|
||||
|
||||
for (int n = lsig; n < lsig + lkern - 1; n++) {
|
||||
int kmin, kmax, k;
|
||||
|
||||
convout[n] = 0;
|
||||
|
||||
kmin = n - lkern + 1;
|
||||
kmax = lsig - 1;
|
||||
|
||||
for (k = kmin; k <= kmax; k++) {
|
||||
convout[n] += sig[k] * kern[n - k];
|
||||
}
|
||||
ESP_LOGV(TAG, "L3 n=%i, kmin = %i, kmax = %i , n-kmin = %i", n, kmin, kmax, n - kmin);
|
||||
}
|
||||
return ESP_OK;
|
||||
}
|
||||
@@ -0,0 +1,39 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
|
||||
.macro conv_f32_ae32 x1 x2 count step1 step2 name
|
||||
// This macro calculates floating point dot product for count float samples
|
||||
// x1, x2 - input arrays
|
||||
// count - amount of samples
|
||||
// step1 - start step
|
||||
//,step2 - A register for array step increment. (should be divided by 4)
|
||||
// f1 - contains initial value
|
||||
//
|
||||
// result in f1
|
||||
//
|
||||
// Macros body:
|
||||
// f1 += x1[]*x2[]; i: 0..counter-1
|
||||
// affected: f0, f1, f2
|
||||
// Example: conv_f32_ae32 a2 a3 a5 a8 a9
|
||||
// a8 == 4, step is 4 bytes
|
||||
// a5 == 32, length of array is 32
|
||||
//
|
||||
lsxp f0, \x2, \step2
|
||||
loopnez \count, loop_mac_end_m_ae32\name
|
||||
lsxp f2, \x1, \step1
|
||||
madd.s f1, f2, f0
|
||||
lsxp f0, \x2, \step2
|
||||
loop_mac_end_m_ae32\name:
|
||||
.endm
|
||||
@@ -0,0 +1,77 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "dsps_conv_platform.h"
|
||||
#if (dsps_corr_f32_ae32_enabled == 1)
|
||||
|
||||
#include "dsps_dotprod_f32_m_ae32.S"
|
||||
|
||||
// This is dot product function for ESP32 processor.
|
||||
.text
|
||||
.align 4
|
||||
.global dsps_corr_f32_ae32
|
||||
.type dsps_corr_f32_ae32,@function
|
||||
// The function implements the following C code:
|
||||
//esp_err_t dsps_corr_f32_ansi(const float *Signal, const int siglen, const float *Pattern, const int patlen, float *dest)
|
||||
//{
|
||||
// for (size_t n = 0; n < (siglen - patlen); n++) {
|
||||
// float k_corr = 0;
|
||||
// for (size_t m = 0; m < patlen; m++) {
|
||||
// k_corr += Signal[n + m] * Pattern[m];
|
||||
// }
|
||||
// dest[n] = k_corr;
|
||||
// }
|
||||
// return ESP_OK;
|
||||
//}
|
||||
|
||||
dsps_corr_f32_ae32:
|
||||
// Signal - a2
|
||||
// siglen - a3
|
||||
// Pattern - a4
|
||||
// patlen - a5
|
||||
// dest - a6
|
||||
// a11 - loop length
|
||||
|
||||
entry a1, 16
|
||||
// Array increment for floating point data should be 4
|
||||
movi.n a8, 4
|
||||
movi.n a13, 4
|
||||
sub a11, a3, a5 // a11 = loop length
|
||||
addi a11, a11, 1
|
||||
addi a12, a2, 0 // move input pointer to the a12
|
||||
movi.n a9, 0
|
||||
movi.n a14, 0
|
||||
|
||||
corr_loop:
|
||||
// Clear initial state of the result register
|
||||
addi a10, a4, 0 // a10 - pattern
|
||||
movi.n a9, 0 // clear a9
|
||||
wfr f1, a9 // clrar f1
|
||||
// a12 - input1
|
||||
// a10 - input2
|
||||
// a5 - length
|
||||
// a8 - 4, step in arrays
|
||||
// a9 - 0
|
||||
dotprod_f32_ae32 a12, a10, a5, a9, a8;
|
||||
|
||||
ssi f1, a6, 0 // Store result from f1 to memory at a6
|
||||
addi a6, a6, 4 // y++ - increment output pointer
|
||||
addi a12, a12, 4 // Signal++
|
||||
addi a11, a11, -1
|
||||
bnez a11, corr_loop
|
||||
|
||||
movi.n a2, 0 // return status ESP_OK
|
||||
retw.n
|
||||
|
||||
#endif // dsps_corr_f32_ae32_enabled
|
||||
@@ -0,0 +1,40 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "dsps_corr.h"
|
||||
|
||||
esp_err_t dsps_corr_f32_ansi(const float *Signal, const int siglen, const float *Pattern, const int patlen, float *dest)
|
||||
{
|
||||
if (NULL == Signal) {
|
||||
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
|
||||
}
|
||||
if (NULL == Pattern) {
|
||||
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
|
||||
}
|
||||
if (NULL == dest) {
|
||||
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
|
||||
}
|
||||
if (siglen < patlen) {
|
||||
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
|
||||
}
|
||||
|
||||
for (size_t n = 0; n <= (siglen - patlen); n++) {
|
||||
float k_corr = 0;
|
||||
for (size_t m = 0; m < patlen; m++) {
|
||||
k_corr += Signal[n + m] * Pattern[m];
|
||||
}
|
||||
dest[n] = k_corr;
|
||||
}
|
||||
return ESP_OK;
|
||||
}
|
||||
Reference in New Issue
Block a user