add some code
This commit is contained in:
@@ -0,0 +1,174 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "dspm_mult_platform.h"
|
||||
#if (dspm_mult_s16_ae32_enabled == 1)
|
||||
|
||||
#include "dsps_dotprod_s16_m_ae32.S"
|
||||
#include "dspm_mult_s16_m_ae32_vector.S"
|
||||
//esp_err_t dspm_mult_s16_ae32(const int16_t* A, const int16_t* B, int16_t* C, int m, int n, int k, int shift);
|
||||
|
||||
// This is matrix multipliction function for ESP32 processor.
|
||||
.text
|
||||
.align 4
|
||||
.global dspm_mult_s16_ae32
|
||||
.global .dspm_mult_s16_ae32_body
|
||||
.type dspm_mult_s16_ae32,@function
|
||||
|
||||
dspm_mult_s16_ae32:
|
||||
// A - a2
|
||||
// B - a3
|
||||
// C - a4
|
||||
// m - a5 - any > 0
|
||||
// n - a6 - 1,2,3, any
|
||||
// k - a7 - 1, any
|
||||
// shift - stack (a8)
|
||||
|
||||
// a14 - n*4 - pointer increment
|
||||
//
|
||||
entry a1, 80
|
||||
// ====== process matrices when k == 1 ============
|
||||
.dspm_mult_s16_ae32_body:
|
||||
l32i.n a8, a1, 80 // Load shift to the a8 register
|
||||
|
||||
|
||||
// Prepare and load round value
|
||||
ssr a8 // store shift to ssa
|
||||
movi a15, 0x7fff
|
||||
srl a15, a15
|
||||
|
||||
neg a8, a8
|
||||
addi a8, a8, 15
|
||||
ssr a8 // store shift to ssa
|
||||
movi a8, 0 // Clear a8
|
||||
|
||||
slli a14, a6, 1 // Pointer increment for n
|
||||
movi.n a10, 2 // Increment = 2
|
||||
movi.n a9, 0 // initial counter loop1
|
||||
|
||||
movi a12, 1
|
||||
beq a7, a12, vector_mult
|
||||
// We have normal path with k > 1
|
||||
// a2, a3, a4 - A,B,C
|
||||
// a5 - m
|
||||
// a6 - n
|
||||
// a7 - k
|
||||
// a8 - temp
|
||||
// a9 - temp
|
||||
// a10- k counter
|
||||
// a11- m counter
|
||||
// a12- B
|
||||
// a13- A
|
||||
// a14 - pointer increment for n
|
||||
// a15 - round value
|
||||
|
||||
bbsi a6, 0, even_N_samples
|
||||
// ---------------- for odd N
|
||||
srli a6, a6, 1 // counter a6 = a6/2. We have to do it only once
|
||||
slli a7, a7, 1 // counter a7 = a7*2. We have to do it only once
|
||||
|
||||
// loop for M
|
||||
m_loop_mmult:
|
||||
movi a10, 0 // reset k loop counter
|
||||
mov a13, a3 // set pointer to the first column
|
||||
// loop for K
|
||||
k_loop_mmult:
|
||||
|
||||
addi a12, a2, -4 // every loop the same start position
|
||||
|
||||
movi a8, 0
|
||||
wsr a8, acchi
|
||||
wsr a15, acclo // initialize acc with shifted round value
|
||||
|
||||
loopnez a6, .loop_end_mmult // loop for N
|
||||
.loop_mmult:
|
||||
ldinc m3, a12
|
||||
l16si a8, a13, 0
|
||||
add a13, a13, a7
|
||||
mula.ad.ll a8, m3
|
||||
l16si a8, a13, 0
|
||||
add a13, a13, a7
|
||||
mula.ad.lh a8, m3
|
||||
.loop_end_mmult:
|
||||
|
||||
rsr a8, acchi
|
||||
rsr a9, acclo
|
||||
src a8, a8, a9
|
||||
s16i a8, a4, 0
|
||||
addi a4, a4, 2
|
||||
// check and increment for K
|
||||
|
||||
addi a10, a10, 2
|
||||
add a13, a3, a10 // we shift collumn
|
||||
bne a10, a7, k_loop_mmult
|
||||
|
||||
// Check and increment for M
|
||||
add a2, a2, a14 // move to the next raw
|
||||
addi a5, a5, -1
|
||||
bnez.n a5, m_loop_mmult
|
||||
|
||||
movi.n a2, 0 // return status ESP_OK
|
||||
retw.n
|
||||
|
||||
even_N_samples:
|
||||
// ---------------- for odd N
|
||||
slli a7, a7, 1 // counter a7 = a7*2. We have to do it only once
|
||||
|
||||
// loop for M
|
||||
m_loop_mmult_even:
|
||||
movi a10, 0 // reset k loop counter
|
||||
mov a13, a3 // set pointer to the first column
|
||||
// loop for K
|
||||
k_loop_mmult_even:
|
||||
|
||||
mov a12, a2 // every loop the same start position
|
||||
|
||||
movi a8, 0
|
||||
wsr a8, acchi
|
||||
wsr a15, acclo // initialize acc with shifted round value
|
||||
|
||||
loopnez a6, .loop_end_mmult_even // loop for N
|
||||
.loop_mmult_even:
|
||||
l16si a9, a12, 0
|
||||
l16si a8, a13, 0
|
||||
addi a12, a12, 2
|
||||
add a13, a13, a7
|
||||
mula.aa.ll a8, a9
|
||||
.loop_end_mmult_even:
|
||||
|
||||
rsr a8, acchi
|
||||
rsr a9, acclo
|
||||
src a8, a8, a9
|
||||
s16i a8, a4, 0
|
||||
addi a4, a4, 2
|
||||
// check and increment for K
|
||||
|
||||
addi a10, a10, 2
|
||||
add a13, a3, a10 // we shift collumn
|
||||
bne a10, a7, k_loop_mmult_even
|
||||
|
||||
// Check and increment for M
|
||||
add a2, a2, a14 // move to the next raw
|
||||
addi a5, a5, -1
|
||||
bnez.n a5, m_loop_mmult_even
|
||||
|
||||
movi.n a2, 0 // return status ESP_OK
|
||||
retw.n
|
||||
|
||||
// The path where n > 1
|
||||
vector_mult:
|
||||
dspm_mult_s16_m_ae32_vector;
|
||||
|
||||
|
||||
#endif // dspm_mult_s16_ae32_enabled
|
||||
@@ -0,0 +1,142 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "dspm_mult_platform.h"
|
||||
#if (dspm_mult_s16_aes3_enabled == 1)
|
||||
#include "dsps_dotprod_s16_m_ae32.S"
|
||||
#include "dspm_mult_s16_m_ae32_vector.S"
|
||||
|
||||
//esp_err_t dspm_mult_s16_ae32(const int16_t* A, const int16_t* B, int16_t* C, int m, int n, int k, int shift);
|
||||
|
||||
// This is matrix multipliction function for ESP32 processor.
|
||||
.text
|
||||
.align 4
|
||||
.literal_position
|
||||
.literal .LC0_1_38, 32767
|
||||
.literal .LC1_1_39, 16383
|
||||
|
||||
.global dspm_mult_s16_aes3
|
||||
.global .dspm_mult_s16_ae32_body
|
||||
.type dspm_mult_s16_aes3,@function
|
||||
|
||||
dspm_mult_s16_aes3:
|
||||
|
||||
entry a1,80 #
|
||||
|
||||
movi.n a10, 7
|
||||
and a10, a10, a7
|
||||
beqz a10, .dspm_mult_s16_aes3_body
|
||||
// Call Esp32 function
|
||||
J .dspm_mult_s16_ae32_body
|
||||
|
||||
.dspm_mult_s16_aes3_body:
|
||||
mov.n a10,a4 # [0]
|
||||
mov.n a11,a5 # [1]
|
||||
l32i a5,a1,80 # [2] id:77 shift+0x0
|
||||
s32i.n a3,a1,32 # [3] gra_spill_temp_0
|
||||
|
||||
bltz a5,.Lt_0_6146 # [4]
|
||||
|
||||
#.LBB3_dspm_mult_s16_aes3: # 0x13
|
||||
l32r a9,.LC0_1_38 # [0]
|
||||
ssr a5 # [1]
|
||||
sra a9,a9 # [2]
|
||||
|
||||
.LBB23_dspm_mult_s16_aes3: # 0x1c
|
||||
s16i a9,a1,0 # [0] id:78 round_data_64+0x0
|
||||
s16i a9,a1,2 # [1] id:78 round_data_64+0x0
|
||||
s16i a9,a1,4 # [2] id:78 round_data_64+0x0
|
||||
s16i a9,a1,6 # [3] id:78 round_data_64+0x0
|
||||
s16i a9,a1,8 # [4] id:78 round_data_64+0x0
|
||||
s16i a9,a1,10 # [5] id:78 round_data_64+0x0
|
||||
s16i a9,a1,12 # [6] id:78 round_data_64+0x0
|
||||
s16i a9,a1,14 # [7] id:78 round_data_64+0x0
|
||||
|
||||
blti a11,1,.Lt_0_7426 # [0]
|
||||
|
||||
mov.n a13,a2 # [0]
|
||||
slli a4,a7,1 # [1]
|
||||
mov.n a12,a1 # [2]
|
||||
l32i.n a14,a1,32 # [3] gra_spill_temp_0
|
||||
movi.n a15,15 # [4]
|
||||
movi.n a8,0 # [5]
|
||||
slli a9,a6,1 # [6]
|
||||
s32i.n a9,a1,36 # [7] gra_spill_temp_1
|
||||
s32i.n a8,a1,44 # [8] gra_spill_temp_3
|
||||
sub a15,a15,a5 # [9]
|
||||
addi.n a8,a7,7 # [10]
|
||||
movgez a8,a7,a7 # [11]
|
||||
srai a8,a8,3 # [12]
|
||||
s32i.n a8,a1,40 # [13] gra_spill_temp_2
|
||||
slli a8,a8,4 # [14]
|
||||
add.n a14,a14,a8 # [15]
|
||||
|
||||
.Lt_0_7938: # 0x5d
|
||||
l32i.n a8,a1,40 # [0] gra_spill_temp_2
|
||||
beqz.n a8,.Lt_0_8194 # [2]
|
||||
|
||||
l32i.n a7,a1,32 # [0] gra_spill_temp_0
|
||||
mov.n a2,a13 # [1]
|
||||
|
||||
.Lt_0_8706: # 0x65
|
||||
ee.ldqa.u16.128.ip a12,0 # [0] id:80
|
||||
ee.vldbc.16.ip q1,a2,2 # [1] id:79
|
||||
mov.n a3,a7 # [2]
|
||||
ee.vld.128.xp q0,a3,a4 # [3] id:81
|
||||
addi a7,a7,16 # [4]
|
||||
blti a6,1,.Lt_0_8962 # [5]
|
||||
|
||||
srai a5,a6,1 # [0]
|
||||
bbci a6,0,.LBB68_dspm_mult_s16_aes3 # [1]
|
||||
|
||||
ee.vmulas.s16.qacc.ldbc.incp q1,a2,q0,q1 # [0] id:82
|
||||
ee.vld.128.xp q0,a3,a4 # [1] id:83
|
||||
|
||||
.LBB68_dspm_mult_s16_aes3: # 0x82
|
||||
loopgtz a5,.LBB74_dspm_mult_s16_aes3 # [0]
|
||||
|
||||
.LBB64_dspm_mult_s16_aes3: # 0x85
|
||||
ee.vld.128.xp q2,a3,a4 # [0*II+0] id:83
|
||||
ee.vmulas.s16.qacc.ldbc.incp q1,a2,q0,q1 # [0*II+1] id:82
|
||||
ee.vld.128.xp q0,a3,a4 # [0*II+2] id:83
|
||||
ee.vmulas.s16.qacc.ldbc.incp q1,a2,q2,q1 # [0*II+3] id:82
|
||||
|
||||
.LBB74_dspm_mult_s16_aes3: # 0x91
|
||||
|
||||
.Lt_0_8962: # 0x91
|
||||
mov.n a2,a13 # [0]
|
||||
ee.srcmb.s16.qacc q0,a15,1 # [1]
|
||||
ee.vst.128.ip q0,a10,16 # [2] id:85
|
||||
bne a7,a14,.Lt_0_8706 # [3]
|
||||
|
||||
.Lt_0_8194: # 0x9c
|
||||
l32i.n a8,a1,36 # [0] gra_spill_temp_1
|
||||
l32i.n a9,a1,44 # [1] gra_spill_temp_3
|
||||
add.n a13,a13,a8 # [2]
|
||||
addi.n a9,a9,1 # [3]
|
||||
s32i.n a9,a1,44 # [4] gra_spill_temp_3
|
||||
bne a11,a9,.Lt_0_7938 # [5]
|
||||
|
||||
.Lt_0_7426: # 0xa9
|
||||
movi.n a2,0 # [0]
|
||||
retw.n # [1]
|
||||
|
||||
.Lt_0_6146: # 0xad
|
||||
l32r a9,.LC1_1_39 # [0]
|
||||
ssr a5 # [1]
|
||||
sra a9,a9 # [2]
|
||||
j .LBB23_dspm_mult_s16_aes3 # [3]
|
||||
|
||||
|
||||
#endif // dspm_mult_s16_ae32_enabled
|
||||
@@ -0,0 +1,40 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "dsps_dotprod.h"
|
||||
#include "dspm_mult.h"
|
||||
|
||||
// Matrinx A(m,n), m - amount or rows, n - amount of columns
|
||||
// C(m,k) = A(m,n)*B(n,k)
|
||||
// c(i,j) = sum(a(i,s)*b(s,j)) , s=1..n
|
||||
esp_err_t dspm_mult_s16_ansi(const int16_t *A, const int16_t *B, int16_t *C, int m, int n, int k, int shift)
|
||||
{
|
||||
int final_shift = shift - 15;
|
||||
for (int i = 0 ; i < m ; i++) {
|
||||
for (int j = 0 ; j < k ; j++) {
|
||||
// This code also could be used
|
||||
//dsps_dotprode_f32_ae32(&A[i*n],&B[j],&C[i*k + j],n,1,n);
|
||||
long long acc = 0x7fff >> shift;
|
||||
for (int s = 0; s < n ; s++) {
|
||||
acc += (int32_t)A[i * n + s] * (int32_t)B[s * k + j];
|
||||
}
|
||||
if (final_shift > 0) {
|
||||
C[i * k + j] = (acc << final_shift);
|
||||
} else {
|
||||
C[i * k + j] = (acc >> (-final_shift));
|
||||
}
|
||||
}
|
||||
}
|
||||
return ESP_OK;
|
||||
}
|
||||
@@ -0,0 +1,121 @@
|
||||
// Copyright 2024 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "dspm_mult_platform.h"
|
||||
#if (dspm_mult_s16_arp4_enabled == 1)
|
||||
|
||||
// This is matrix multipliction function for Risc-V processor core.
|
||||
.text
|
||||
.align 4
|
||||
.global dspm_mult_s16_arp4
|
||||
.global dspm_mult_s16_ansi
|
||||
.global .dspm_mult_s16_arp4_body
|
||||
.type dspm_mult_s16_arp4,@function
|
||||
// The function implements the following C code:
|
||||
// esp_err_t dspm_mult_f32_ansi(const int16_t *A, const int16_t *B, int16_t *C, int m, int n, int k, int shift)
|
||||
// {
|
||||
// int final_shift = shift - 15;
|
||||
// for (int i = 0 ; i < m ; i++) {
|
||||
// for (int j = 0 ; j < k ; j++) {
|
||||
// // This code also could be used
|
||||
// //dsps_dotprode_f32_ae32(&A[i*n],&B[j],&C[i*k + j],n,1,n);
|
||||
// long long acc = 0x7fff >> shift;
|
||||
// for (int s = 0; s < n ; s++) {
|
||||
// acc += (int32_t)A[i * n + s] * (int32_t)B[s * k + j];
|
||||
// }
|
||||
// if (final_shift > 0) {
|
||||
// C[i * k + j] = (acc << final_shift);
|
||||
// } else {
|
||||
// C[i * k + j] = (acc >> (-final_shift));
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// return ESP_OK;
|
||||
// }
|
||||
|
||||
dspm_mult_s16_arp4:
|
||||
// A - a0
|
||||
// B - a1
|
||||
// C - a2
|
||||
// m - a3
|
||||
// n - a4
|
||||
// k - a5
|
||||
// shift - a6
|
||||
|
||||
// a7 - counter loop1: 0..m
|
||||
// t1 - counter loop2: 0..k
|
||||
// t0 - counter loop3: 0..n
|
||||
// x25(s9) - matrix step for input2
|
||||
// x24(s8) - pointer to current B
|
||||
// x29(t4) - pointer to initial B
|
||||
// x30(t5) - pointer to A
|
||||
// x31(t6) = 2 for increment....
|
||||
// x26(s10)- final_shift
|
||||
|
||||
or t0, a3, a4
|
||||
or t0, t0, a5
|
||||
andi t0, t0, 0x7
|
||||
beqz t0, .dspm_mult_s16_arp4_body
|
||||
j dspm_mult_s16_ansi
|
||||
//ret
|
||||
|
||||
.dspm_mult_s16_arp4_body:
|
||||
add sp,sp,-16
|
||||
sw s8, 4(sp)
|
||||
sw s9, 8(sp)
|
||||
sw s10, 12(sp)
|
||||
mv t0, a4
|
||||
li a7, 0 // counter loop1
|
||||
slli x25, a5, 1 // step = step*2
|
||||
li x31, 2
|
||||
// final_shift = shift - 15
|
||||
add x26, a6, -15
|
||||
|
||||
.dpf_loop1: // loop for m
|
||||
li t1, 0 // reset counter for loop2
|
||||
mv x29, a1
|
||||
.dpf_loop2: // loop for k
|
||||
mv x30, a0
|
||||
mv x24, x29 // load B
|
||||
// Calculating dotproduct...
|
||||
esp.zero.qacc // qacc = 0;
|
||||
esp.vldbc.16.xp q0, x30, x31 // q0 = a[mx..mx]
|
||||
esp.vld.128.xp q1, x24, x25 // q1 = b[x0..x7],
|
||||
esp.lp.setup 0, t0, .matrix_mul_loop
|
||||
esp.vmulas.s16.qacc.ldbc.incp q0,x30, q0,q1
|
||||
.matrix_mul_loop: esp.vld.128.xp q1,x24,x25
|
||||
|
||||
esp.srcmb.s16.qacc q2, x26, 0 // q2 = qacc >> shift
|
||||
esp.vst.128.ip q2, a2, 16 // save k0..k7
|
||||
add x29,x29, 16
|
||||
|
||||
// check loop 2
|
||||
addi t1, t1, 8 // Increment loop2 counter
|
||||
blt t1, a5, .dpf_loop2
|
||||
add x30, x30, -2
|
||||
mv a0, x30 //
|
||||
|
||||
// check loop 1
|
||||
add a7, a7, 1 // Increment loop1 counter
|
||||
blt a7, a3, .dpf_loop1
|
||||
|
||||
// Exit
|
||||
mv a0, a6 // return status ESP_OK
|
||||
lw s10, 12(sp)
|
||||
lw s9, 8(sp)
|
||||
lw s8, 4(sp)
|
||||
add sp,sp,16
|
||||
ret
|
||||
|
||||
#endif //dspm_mult_s16_arp4_enabled
|
||||
@@ -0,0 +1,58 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
|
||||
.macro dspm_mult_s16_ae32_MxNxN
|
||||
// A - a2
|
||||
// B - a3
|
||||
// C - a4
|
||||
// m - a5
|
||||
// n - a6
|
||||
// k - a7
|
||||
// shift - stack (a8)
|
||||
|
||||
movi a10, 4 // load 4 as a constant
|
||||
// Check if n >=4 then acceleration is possible and
|
||||
blt a6, a10, do_dotproduct
|
||||
// Here we make operations one by one...
|
||||
|
||||
|
||||
movi.n a2, 0 // return status ESP_OK
|
||||
retw.n
|
||||
|
||||
do_dotproduct:
|
||||
|
||||
mov a12, a2
|
||||
mov a13, a3
|
||||
|
||||
srli a9, a6, 2 // a9 - count/4 - 1
|
||||
addi a9, a9, -1
|
||||
|
||||
movi.n a10, 0 // load 0 to the a10 to increment second array
|
||||
dotprod_s16_ae32_full a12, a13, a9, a10, a6
|
||||
|
||||
/* Get accumulator */
|
||||
ssr a6
|
||||
rsr a2, acchi
|
||||
rsr a3, acclo
|
||||
src a2, a2, a3
|
||||
|
||||
s16i a2, a4, 0
|
||||
movi.n a2, 0
|
||||
|
||||
|
||||
movi.n a2, 0 // return status ESP_OK
|
||||
retw.n
|
||||
|
||||
.endm // dspm_mult_s16_ae32_MxNxN
|
||||
@@ -0,0 +1,105 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
|
||||
.macro dspm_mult_s16_m_ae32_vector
|
||||
// m - a5 - any > 0
|
||||
// n - a6 - 1,2,3, any
|
||||
// k - a7 - 1, any
|
||||
|
||||
|
||||
// Define path for n < 4
|
||||
movi a7, 4
|
||||
blt a6, a7, small_process_loop // jump for n < 4
|
||||
|
||||
srli a7, a6, 2
|
||||
addi a7, a7, -1
|
||||
|
||||
|
||||
mmultv_loop1:
|
||||
wsr a8, acchi
|
||||
wsr a15, acclo // initialize acc with shifted round value
|
||||
|
||||
// Clear initial state of the result register
|
||||
// a2 - A
|
||||
// a3 - B
|
||||
// a4 - C
|
||||
// a6 - n
|
||||
// a7 - n/4 - 1
|
||||
// a8 - 0
|
||||
// a15- 0x7fff>>shift
|
||||
|
||||
mov a12, a2 // load A
|
||||
mov a13, a3 // Load B
|
||||
|
||||
dotprod_s16_ae32_full a12, a13, a7, a6
|
||||
|
||||
// check loop 1
|
||||
/* Get accumulator */
|
||||
rsr a12, acchi
|
||||
rsr a13, acclo
|
||||
src a12, a12, a13
|
||||
|
||||
s16i a12, a4, 0
|
||||
addi a4, a4, 2
|
||||
|
||||
add.n a2, a2, a14 // Increment A, A = A[i*n]
|
||||
addi a9, a9, 1 // Increment loop1 counter
|
||||
blt a9, a5, mmultv_loop1
|
||||
|
||||
|
||||
movi.n a2, 0 // return status ESP_OK
|
||||
retw.n
|
||||
|
||||
small_process_loop:
|
||||
|
||||
wsr a8, acchi
|
||||
wsr a15, acclo // initialize acc with shifted round value
|
||||
|
||||
mov a12, a2 // load A
|
||||
mov a13, a3 // Load B
|
||||
|
||||
addi a12, a12, -4 // To arrange fist pointer
|
||||
addi a13, a13, -4 // To arrange fist pointer
|
||||
|
||||
bbci a6, 1, .mod2chk_short
|
||||
ldinc m0, a12
|
||||
ldinc m2, a13
|
||||
mula.dd.hh m0, m2
|
||||
mula.dd.ll m0, m2
|
||||
.mod2chk_short:
|
||||
bbci a6, 0, .mod1chk_short
|
||||
ldinc m0, a12
|
||||
ldinc m2, a13
|
||||
mula.dd.ll m0, m2
|
||||
.mod1chk_short:
|
||||
|
||||
// check loop 1
|
||||
/* Get accumulator */
|
||||
rsr a12, acchi
|
||||
rsr a13, acclo
|
||||
src a12, a12, a13
|
||||
|
||||
s16i a12, a4, 0
|
||||
addi a4, a4, 2
|
||||
|
||||
add.n a2, a2, a14 // Increment A, A = A[i*n]
|
||||
addi a9, a9, 1 // Increment loop1 counter
|
||||
blt a9, a5, small_process_loop
|
||||
|
||||
movi.n a2, 0 // return status ESP_OK
|
||||
retw.n
|
||||
|
||||
|
||||
.endm // dspm_mult_s16_m_ae32_vector
|
||||
@@ -0,0 +1,75 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "dspm_mult_platform.h"
|
||||
#if (dspm_mult_3x3x1_f32_ae32_enabled == 1)
|
||||
|
||||
// This is matrix multipliction function for ESP32 processor.
|
||||
.text
|
||||
.align 4
|
||||
.global dspm_mult_3x3x1_f32_ae32
|
||||
.type dspm_mult_3x3x1_f32_ae32,@function
|
||||
// The function implements the following C code:
|
||||
// esp_err_t dspm_mult_3x3x1_f32_ansi(const float* A, const float* B, float* C, int m, int n, int k)
|
||||
// {
|
||||
// for (int i=0 ; i< m ; i++)
|
||||
// {
|
||||
// for (int j=0 ; j< k ; j++)
|
||||
// {
|
||||
// C[i*k + j] = A[i*n]*B[j];
|
||||
// for (int s=1; s< n ; s++)
|
||||
// {
|
||||
// C[i*k + j] += A[i*n + s]*B[s*k + j];
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// return ESP_OK;
|
||||
// }
|
||||
|
||||
dspm_mult_3x3x1_f32_ae32:
|
||||
// A - a2
|
||||
// B - a3
|
||||
// C - a4
|
||||
|
||||
// a5 - 0
|
||||
// a6 - 3
|
||||
entry a1, 16
|
||||
|
||||
movi a5, 0
|
||||
movi a6, 3
|
||||
|
||||
lsi f13,a3, 0 // B[0]
|
||||
lsi f14,a3, 4 // B[1]
|
||||
lsi f15,a3, 8 // B[2]
|
||||
|
||||
// addi a2, a2, -12 // To compensate first increment
|
||||
loopnez a6, loop_mac_3x3x1_end_m_ae32
|
||||
wfr f0, a5
|
||||
lsi f2, a2, 0
|
||||
madd.s f0, f2, f13
|
||||
lsi f3, a2, 4
|
||||
madd.s f0, f3, f14
|
||||
lsi f4, a2, 8
|
||||
madd.s f0, f4, f15
|
||||
|
||||
addi a2, a2, 12
|
||||
ssi f0, a4, 0
|
||||
addi a4, a4, 4
|
||||
|
||||
loop_mac_3x3x1_end_m_ae32:
|
||||
|
||||
movi.n a2, 0 // return status ESP_OK
|
||||
retw.n
|
||||
|
||||
#endif //
|
||||
@@ -0,0 +1,85 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "dspm_mult_platform.h"
|
||||
#if (dspm_mult_3x3x3_f32_ae32_enabled == 1)
|
||||
|
||||
// This is matrix multipliction function for ESP32 processor.
|
||||
.text
|
||||
.align 4
|
||||
.global dspm_mult_3x3x3_f32_ae32
|
||||
.type dspm_mult_3x3x3_f32_ae32,@function
|
||||
// The function implements the following C code:
|
||||
// esp_err_t dspm_mult_3x3x1_f32_ansi(const float* A, const float* B, float* C, int m, int n, int k)
|
||||
// {
|
||||
// for (int i=0 ; i< m ; i++)
|
||||
// {
|
||||
// for (int j=0 ; j< k ; j++)
|
||||
// {
|
||||
// C[i*k + j] = A[i*n]*B[j];
|
||||
// for (int s=1; s< n ; s++)
|
||||
// {
|
||||
// C[i*k + j] += A[i*n + s]*B[s*k + j];
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// return ESP_OK;
|
||||
// }
|
||||
|
||||
dspm_mult_3x3x3_f32_ae32:
|
||||
// A - a2
|
||||
// B - a3
|
||||
// C - a4
|
||||
|
||||
// a5 - 0
|
||||
// a6 - 3 - internal loop for n
|
||||
// a7 - 3 - external loop for M
|
||||
entry a1, 16
|
||||
|
||||
movi a5, 0
|
||||
movi a6, 3
|
||||
movi a7, 3 // loop ccount
|
||||
|
||||
m_loop_3x3x3:
|
||||
mov a12, a2 // A
|
||||
mov a14, a4 // output pointer
|
||||
|
||||
lsi f12, a3, 0 // B[0][0]
|
||||
lsi f13, a3, 12 // B[1][0]
|
||||
lsi f14, a3, 24 // B[2][0]
|
||||
|
||||
loopnez a6, loop_mac_3x3x3_end_m_ae32
|
||||
wfr f0, a5
|
||||
|
||||
lsi f2, a12, 0
|
||||
madd.s f0, f2, f12
|
||||
lsi f3, a12, 4
|
||||
madd.s f0, f3, f13
|
||||
lsi f4, a12, 8
|
||||
madd.s f0, f4, f14
|
||||
|
||||
addi a12, a12, 12
|
||||
ssi f0, a14, 0
|
||||
addi a14, a14, 12
|
||||
loop_mac_3x3x3_end_m_ae32:
|
||||
|
||||
addi a3, a3, 4 // increment input pointer B
|
||||
addi a4, a4, 4
|
||||
addi a7, a7, -1
|
||||
bnez a7, m_loop_3x3x3
|
||||
|
||||
movi.n a2, 0 // return status ESP_OK
|
||||
retw.n
|
||||
|
||||
#endif //
|
||||
@@ -0,0 +1,77 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "dspm_mult_platform.h"
|
||||
#if (dspm_mult_4x4x1_f32_ae32_enabled == 1)
|
||||
|
||||
// This is matrix multipliction function for ESP32 processor.
|
||||
.text
|
||||
.align 4
|
||||
.global dspm_mult_4x4x1_f32_ae32
|
||||
.type dspm_mult_4x4x1_f32_ae32,@function
|
||||
// The function implements the following C code:
|
||||
// esp_err_t dspm_mult_3x3x1_f32_ansi(const float* A, const float* B, float* C, int m, int n, int k)
|
||||
// {
|
||||
// for (int i=0 ; i< m ; i++)
|
||||
// {
|
||||
// for (int j=0 ; j< k ; j++)
|
||||
// {
|
||||
// C[i*k + j] = A[i*n]*B[j];
|
||||
// for (int s=1; s< n ; s++)
|
||||
// {
|
||||
// C[i*k + j] += A[i*n + s]*B[s*k + j];
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// return ESP_OK;
|
||||
// }
|
||||
|
||||
dspm_mult_4x4x1_f32_ae32:
|
||||
// A - a2
|
||||
// B - a3
|
||||
// C - a4
|
||||
|
||||
// a5 - 0
|
||||
// a6 - 3
|
||||
entry a1, 16
|
||||
|
||||
movi a5, 0
|
||||
movi a6, 4
|
||||
|
||||
lsi f12,a3, 0 // B[0]
|
||||
lsi f13,a3, 4 // B[1]
|
||||
lsi f14,a3, 8 // B[2]
|
||||
lsi f15,a3, 12 // B[3]
|
||||
|
||||
loopnez a6, loop_mac_4x4x1_end_m_ae32
|
||||
wfr f0, a5
|
||||
lsi f2, a2, 0
|
||||
madd.s f0, f2, f12
|
||||
lsi f3, a2, 4
|
||||
madd.s f0, f3, f13
|
||||
lsi f4, a2, 8
|
||||
madd.s f0, f4, f14
|
||||
lsi f5, a2, 12
|
||||
madd.s f0, f5, f15
|
||||
|
||||
addi a2, a2, 16
|
||||
ssi f0, a4, 0
|
||||
addi a4, a4, 4
|
||||
|
||||
loop_mac_4x4x1_end_m_ae32:
|
||||
|
||||
movi.n a2, 0 // return status ESP_OK
|
||||
retw.n
|
||||
|
||||
#endif //
|
||||
@@ -0,0 +1,88 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "dspm_mult_platform.h"
|
||||
#if (dspm_mult_4x4x4_f32_ae32_enabled == 1)
|
||||
|
||||
// This is matrix multipliction function for ESP32 processor.
|
||||
.text
|
||||
.align 4
|
||||
.global dspm_mult_4x4x4_f32_ae32
|
||||
.type dspm_mult_4x4x4_f32_ae32,@function
|
||||
// The function implements the following C code:
|
||||
// esp_err_t dspm_mult_3x3x1_f32_ansi(const float* A, const float* B, float* C, int m, int n, int k)
|
||||
// {
|
||||
// for (int i=0 ; i< m ; i++)
|
||||
// {
|
||||
// for (int j=0 ; j< k ; j++)
|
||||
// {
|
||||
// C[i*k + j] = A[i*n]*B[j];
|
||||
// for (int s=1; s< n ; s++)
|
||||
// {
|
||||
// C[i*k + j] += A[i*n + s]*B[s*k + j];
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// return ESP_OK;
|
||||
// }
|
||||
|
||||
dspm_mult_4x4x4_f32_ae32:
|
||||
// A - a2
|
||||
// B - a3
|
||||
// C - a4
|
||||
|
||||
// a5 - 0
|
||||
// a6 - 4 - internal loop for n
|
||||
// a7 - 4 - external loop for M
|
||||
entry a1, 16
|
||||
|
||||
movi a5, 0
|
||||
movi a6, 4
|
||||
movi a7, 4 // loop ccount
|
||||
|
||||
m_loop_4x4x4:
|
||||
mov a12, a2 // A
|
||||
mov a14, a4 // output pointer
|
||||
|
||||
lsi f12, a3, 0 // B[0][0]
|
||||
lsi f13, a3, 16 // B[1][0]
|
||||
lsi f14, a3, 32 // B[2][0]
|
||||
lsi f15, a3, 48 // B[3][0]
|
||||
|
||||
loopnez a6, loop_mac_4x4x4_end_m_ae32
|
||||
wfr f0, a5
|
||||
|
||||
lsi f2, a12, 0
|
||||
madd.s f0, f2, f12
|
||||
lsi f3, a12, 4
|
||||
madd.s f0, f3, f13
|
||||
lsi f4, a12, 8
|
||||
madd.s f0, f4, f14
|
||||
lsi f5, a12, 12
|
||||
madd.s f0, f5, f15
|
||||
|
||||
addi a12, a12, 16
|
||||
ssi f0, a14, 0
|
||||
addi a14, a14, 16
|
||||
loop_mac_4x4x4_end_m_ae32:
|
||||
|
||||
addi a3, a3, 4 // increment input pointer B
|
||||
addi a4, a4, 4
|
||||
addi a7, a7, -1
|
||||
bnez a7, m_loop_4x4x4
|
||||
|
||||
movi.n a2, 0 // return status ESP_OK
|
||||
retw.n
|
||||
|
||||
#endif //
|
||||
@@ -0,0 +1,88 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2023 Espressif Systems (Shanghai) CO LTD
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include "dspm_mult_platform.h"
|
||||
#if (dspm_mult_f32_ae32_enabled == 1)
|
||||
|
||||
#include "dsps_dotprode_f32_m_ae32.S"
|
||||
|
||||
// This is matrix multiplication function for ESP32 processor.
|
||||
.text
|
||||
.align 4
|
||||
.global dspm_mult_ex_f32_ae32
|
||||
.global .dspm_mult_ex_f32_ae32_body
|
||||
.type dspm_mult_ex_f32_ae32,@function
|
||||
// The function implements the following C code:
|
||||
//esp_err_t dspm_mult_ex_f32_ae32(const float *A, const float *B, float *C, int m, int n, int k, int A_padd, int B_padd, int C_padd);
|
||||
|
||||
dspm_mult_ex_f32_ae32:
|
||||
|
||||
// A - a2
|
||||
// B - a3
|
||||
// C - a4
|
||||
// m - a5
|
||||
// n - a6
|
||||
// k - a7
|
||||
// A_padding - a14
|
||||
// B_padding - a15
|
||||
// C_padding - a8
|
||||
|
||||
// a10 = 4
|
||||
// a9 - counter loop1: 0..m
|
||||
// a11 - counter loop2: 0..k
|
||||
// a12 - A
|
||||
// a13 - B
|
||||
// a4 - C
|
||||
|
||||
entry a1, 16
|
||||
// Array increment for floating point data should be 4
|
||||
.dspm_mult_ex_f32_ae32_body:
|
||||
|
||||
l32i.n a14, a1, 16 // A_padding
|
||||
l32i.n a15, a1, 20 // B_padding
|
||||
l32i.n a8, a1, 24 // C_padding
|
||||
|
||||
add a14, a14, a6 // A_step = A_padding + A_cols (n)
|
||||
add a15, a15, a7 // B_step = B_padding + B_cols (k)
|
||||
slli a15, a15, 2 // Pointer increment for B (B_step * 4)
|
||||
|
||||
movi.n a10, 4 // Increment = 4
|
||||
movi.n a9, 0 // counter loop1
|
||||
const.s f3, 0 // Innitial state of accumulator, f3 = 0
|
||||
|
||||
.mult_ex_loop1:
|
||||
movi.n a11, 0 // reset counter for loop2
|
||||
.mult_ex_loop2:
|
||||
// Clear initial state of the result register
|
||||
// a2 - A
|
||||
// a3 - B
|
||||
// a6 - n
|
||||
// a10 - step == 4 bytes
|
||||
|
||||
mov a12, a2 // load A
|
||||
addx4 a13, a11, a3 // loop count to pointer value
|
||||
mov.s f1, f3 // reset f1
|
||||
|
||||
// Calculating dotproduct...
|
||||
//dotprode_f32_ae32( x1 x2 count step1 step2)
|
||||
dotprode_f32_ae32 a12, a13, a6, a10, a15;
|
||||
|
||||
addi.n a11, a11, 1 // Increment loop2 counter
|
||||
ssip f1, a4, 4 // Store restul from f1 to memory at a4 and increment a4
|
||||
|
||||
// check loop 2
|
||||
blt a11, a7, .mult_ex_loop2
|
||||
|
||||
// check loop 1
|
||||
addx4 a2, a14, a2 // A += (A_step << 2)
|
||||
addx4 a4, a8, a4 // output += (C_padding << 2)
|
||||
addi.n a9, a9, 1 // Increment loop1 counter
|
||||
blt a9, a5, .mult_ex_loop1
|
||||
|
||||
movi.n a2, 0 // return status ESP_OK
|
||||
retw.n
|
||||
|
||||
#endif //dspm_mult_f32_ae32_enabled
|
||||
@@ -0,0 +1,166 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2023 Espressif Systems (Shanghai) CO LTD
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include "dspm_mult_platform.h"
|
||||
|
||||
#if (dspm_mult_f32_aes3_enabled == 1)
|
||||
|
||||
// This is matrix multiplication function for ESP32S3 processor.
|
||||
.text
|
||||
.align 4
|
||||
.global dspm_mult_ex_f32_aes3
|
||||
.global .dspm_mult_ex_f32_ae32_body
|
||||
.type dspm_mult_ex_f32_aes3,@function
|
||||
// The function implements the following C code:
|
||||
//esp_err_t dspm_mult_ex_f32_ansi(const float* A, const float* B, float* C, int A_rows, int A_cols, int B_cols, int A_padding, int B_padding, int C_padding)
|
||||
//{
|
||||
// const int A_step = A_cols + A_padding;
|
||||
// const int B_step = B_cols + B_padding;
|
||||
// const int C_step = B_cols + C_padding;
|
||||
//
|
||||
// for (int i = 0; i < A_rows; i++) {
|
||||
// for (int j = 0; j < B_cols; j++) {
|
||||
// C[i * C_step + j] = A[i * A_step] * B[j];
|
||||
// for (int s = 1; s < A_cols; s++) {
|
||||
// C[i * C_step + j] += A[i * A_step + s] * B[s * B_step + j];
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// return ESP_OK;
|
||||
//}
|
||||
|
||||
// A - a2
|
||||
// B - a3
|
||||
// C - a4
|
||||
// m - a5
|
||||
// n - a6
|
||||
// k - a7
|
||||
// A_padd = a8
|
||||
// B_padd = a9
|
||||
// C_padd = a15
|
||||
|
||||
dspm_mult_ex_f32_aes3:
|
||||
|
||||
entry a1, 16
|
||||
l32i.n a8, a1, 16 // A_padding
|
||||
l32i.n a9, a1, 20 // B_padding
|
||||
l32i.n a15, a1, 24 // C_padding
|
||||
|
||||
// Check if we can use S3 memory model
|
||||
// Check matrices dimensions and paddings all of them must be divisible by 4
|
||||
or a12, a5, a6 // a12 = m OR n
|
||||
or a14, a8, a9 // a14 = A_padd OR B_padd
|
||||
or a12, a12, a7 // a12 = m OR n OR k
|
||||
or a14, a14, a15 // a14 = A_padd OR B_padd OR C_padd
|
||||
or a12, a12, a14 // a12 = m OR n OR k OR A_padd OR B_padd OR C_padd
|
||||
movi.n a11, 3 // a11 = byte mask
|
||||
and a12, a12, a11 // a12 = a12 AND 3 (byte mask)
|
||||
|
||||
// Check alignment of A B C matrices data pointers
|
||||
movi.n a11, 15 // a11 = byte mask
|
||||
or a10, a3, a2 // a10 = A pointer OR B pointer
|
||||
or a10, a10, a4 // a10 = A pointer OR B pointer OR C pointer
|
||||
and a10, a10, a11 // a10 = a10 AND 15 (byte mask)
|
||||
or a12, a12, a10 // a12 = mat_dim OR alignment
|
||||
beqz a12, .s3_mmult_ex // if zero, jump to s3_mult
|
||||
// Call Esp32 function
|
||||
J .dspm_mult_ex_f32_ae32_body
|
||||
|
||||
.s3_mmult_ex:
|
||||
// f0, f1, f2, f3 - multiplication result
|
||||
// f4, f5, f6, f7 - input for matrix B
|
||||
// f8, f9, f10,f11- input far matrix A
|
||||
movi.n a14, 0 // B pointer increment for y loop
|
||||
|
||||
add a15, a15, a7 // a15 = k + C_padding
|
||||
slli a10, a15, 2 // a10 = (K + C_padding) * 4 - step for rows
|
||||
|
||||
mov a15, a9 // a15 = B_padd
|
||||
slli a15, a15, 2 // a15 = B_padd * 4
|
||||
|
||||
add a7, a7, a9 // a7 = k + B_padding
|
||||
slli a12, a7, 2 // a12 = (K + B_padding) * 4 - step for rows
|
||||
srli a11, a6, 2 // a11 = n / 4
|
||||
addi.n a11, a11, -1 // a11 = innter loop count (n)
|
||||
|
||||
slli a6, a8, 2 // a6 = A_padding *4 = A_pointer step
|
||||
mov a13, a3 // backup B pointer
|
||||
mov a7, a4 // backup C pointer
|
||||
|
||||
.loop_x_mult_ex:
|
||||
movi.n a9, 0 // reset loop1 counter
|
||||
mov a8, a2 // move A matrix back to the beginning
|
||||
.loop_y_mult_ex:
|
||||
|
||||
add a13, a3, a14 // Reload Y pointer to Y11 + A14
|
||||
EE.LDF.128.IP f11, f10, f9, f8, a8, 16 // Load A values: X11, X12, X13, X14
|
||||
EE.LDF.128.XP f7, f6, f5, f4, a13, a12 // Load B value: Y11, Y12, Y13, Y14
|
||||
mul.s f0, f4, f8 // f0 = X11*Y11
|
||||
mul.s f1, f5, f8 // f1 = X12*Y11
|
||||
mul.s f2, f6, f8 // f2 = X13*Y11
|
||||
mul.s f3, f7, f8 // f3 = X14*Y11
|
||||
|
||||
EE.LDF.128.XP f7, f6, f5, f4, a13, a12 // Load B value: Y21, Y22, Y23, Y24
|
||||
madd.s f0, f4, f9 // f0 = X11*Y11 + X12*Y21
|
||||
madd.s f1, f5, f9 // f1 = X11*Y12 + X12*Y22
|
||||
madd.s f2, f6, f9 // f2 = X11*Y13 + X12*Y23
|
||||
madd.s f3, f7, f9 // f3 = X11*Y14 + X12*Y24
|
||||
|
||||
EE.LDF.128.XP f7, f6, f5, f4, a13, a12 // Load B value: Y31, Y32, Y33, Y34
|
||||
madd.s f0, f4, f10 // f0 = X11*Y11 + X12*Y21 + X13*Y31
|
||||
madd.s f1, f5, f10 // f1 = X11*Y12 + X12*Y22 + X13*Y32
|
||||
madd.s f2, f6, f10 // f2 = X11*Y13 + X12*Y23 + X13*Y33
|
||||
madd.s f3, f7, f10 // f3 = X11*Y14 + X12*Y24 + X13*Y34
|
||||
|
||||
EE.LDF.128.XP f7, f6, f5, f4, a13, a12 // Load B value: Y41, Y42, Y43, Y44
|
||||
madd.s f0, f4, f11 // f0 = X11*Y11 + X12*Y21 + X13*Y31 + X14*Y41
|
||||
madd.s f1, f5, f11 // f1 = X11*Y12 + X12*Y22 + X13*Y32 + X14*Y42
|
||||
madd.s f2, f6, f11 // f2 = X11*Y13 + X12*Y23 + X13*Y33 + X14*Y43
|
||||
madd.s f3, f7, f11 // f3 = X11*Y14 + X12*Y24 + X13*Y34 + X14*Y44
|
||||
|
||||
loopnez a11, .iner_loop_mult_ex
|
||||
EE.LDF.128.IP f11, f10, f9, f8, a8, 16 // Load A values: X15, X16, X17, X18
|
||||
|
||||
EE.LDF.128.XP f7, f6, f5, f4, a13, a12 // Load B value: Y51, Y52, Y53, Y54
|
||||
madd.s f0, f4, f8 // f0 += X15*Y51
|
||||
madd.s f1, f5, f8 // f1 += X15*Y52
|
||||
madd.s f2, f6, f8 // f2 += X15*Y53
|
||||
madd.s f3, f7, f8 // f3 += X15*Y54
|
||||
|
||||
EE.LDF.128.XP f7, f6, f5, f4, a13, a12 // Load B value: Y61, Y62, Y63, Y64
|
||||
madd.s f0, f4, f9 // f0 += X16*Y61
|
||||
madd.s f1, f5, f9 // f1 += X16*Y62
|
||||
madd.s f2, f6, f9 // f2 += X16*Y63
|
||||
madd.s f3, f7, f9 // f3 += X16*Y64
|
||||
|
||||
EE.LDF.128.XP f7, f6, f5, f4, a13, a12 // Load B value: Y71, Y72, Y73, Y74
|
||||
madd.s f0, f4, f10 // f0 =
|
||||
madd.s f1, f5, f10 // f1 =
|
||||
madd.s f2, f6, f10 // f2 =
|
||||
madd.s f3, f7, f10 // f3 =
|
||||
|
||||
EE.LDF.128.XP f7, f6, f5, f4, a13, a12 // Load B value: Y81, Y82, Y83, Y84
|
||||
madd.s f0, f4, f11 // f0 =
|
||||
madd.s f1, f5, f11 // f1 =
|
||||
madd.s f2, f6, f11 // f2 =
|
||||
madd.s f3, f7, f11 // f3 =
|
||||
.iner_loop_mult_ex:
|
||||
EE.STF.128.XP f3, f2, f1, f0, a4, a10 // Store result
|
||||
|
||||
addi.n a9, a9, 1 // Increment loop1 counter
|
||||
add a8, a8, a6 // (increase A pointer by A_padding * 4 times)
|
||||
blt a9, a5, .loop_y_mult_ex
|
||||
|
||||
addi.n a7, a7, 16 // Increase C pinter by 16
|
||||
mov a4, a7
|
||||
addi.n a14, a14, 16 // Increase B pointer by 16
|
||||
addi.n a15, a15, 16 // Increment loop2 counter by 16
|
||||
|
||||
blt a15, a12, .loop_x_mult_ex
|
||||
movi.n a2, 0 // return status ESP_OK
|
||||
retw.n
|
||||
|
||||
#endif //dspm_mult_f32_aes3_enabled
|
||||
@@ -0,0 +1,57 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2023 Espressif Systems (Shanghai) CO LTD
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include "dspm_mult.h"
|
||||
|
||||
// Matrix A(m,n), m - amount or rows, n - amount of columns
|
||||
// C(m,k) = A(m,n)*B(n,k)
|
||||
// c(i * c_step,j) = sum(a(i * a_step,s)*b(s * b_step,j)) , s=1..n
|
||||
esp_err_t dspm_mult_ex_f32_ansi(const float *A, const float *B, float *C, int A_rows, int A_cols, int B_cols, int A_padding, int B_padding, int C_padding)
|
||||
{
|
||||
if (NULL == A) {
|
||||
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
|
||||
}
|
||||
if (NULL == B) {
|
||||
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
|
||||
}
|
||||
if (NULL == C) {
|
||||
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
|
||||
}
|
||||
|
||||
if (A_rows <= 0) {
|
||||
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
|
||||
}
|
||||
if (A_cols <= 0) {
|
||||
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
|
||||
}
|
||||
if (B_cols <= 0) {
|
||||
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
|
||||
}
|
||||
|
||||
if (A_padding < 0) {
|
||||
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
|
||||
}
|
||||
if (B_padding < 0) {
|
||||
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
|
||||
}
|
||||
if (C_padding < 0) {
|
||||
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
|
||||
}
|
||||
|
||||
const int A_step = A_cols + A_padding;
|
||||
const int B_step = B_cols + B_padding;
|
||||
const int C_step = B_cols + C_padding;
|
||||
|
||||
for (int i = 0; i < A_rows; i++) {
|
||||
for (int j = 0; j < B_cols; j++) {
|
||||
C[i * C_step + j] = A[i * A_step] * B[j];
|
||||
for (int s = 1; s < A_cols; s++) {
|
||||
C[i * C_step + j] += A[i * A_step + s] * B[s * B_step + j];
|
||||
}
|
||||
}
|
||||
}
|
||||
return ESP_OK;
|
||||
}
|
||||
@@ -0,0 +1,115 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "dspm_mult_platform.h"
|
||||
#if (dspm_mult_f32_arp4_enabled == 1)
|
||||
|
||||
// This is matrix multipliction function for ESP32 processor.
|
||||
.text
|
||||
.align 4
|
||||
.global dspm_mult_ex_f32_arp4
|
||||
.global .dspm_mult_ex_f32_arp4_body
|
||||
.type dspm_mult_ex_f32_arp4,@function
|
||||
// The function implements the following C code:
|
||||
// esp_err_t dspm_mult_f32_ansi(const float *A, const float *B, float *C, int m, int n, int k, int A_padd, int B_padd, int C_padd)
|
||||
// {
|
||||
// for (int i=0 ; i< m ; i++)
|
||||
// {
|
||||
// for (int j=0 ; j< k ; j++)
|
||||
// {
|
||||
// C[i*k + j] = A[i*n]*B[j];
|
||||
// for (int s=1; s< n ; s++)
|
||||
// {
|
||||
// C[i*k + j] += A[i*n + s]*B[s*k + j];
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// return ESP_OK;
|
||||
// }
|
||||
|
||||
dspm_mult_ex_f32_arp4:
|
||||
// A - a2: a0
|
||||
// B - a3: a1
|
||||
// C - a4: a2
|
||||
// m - a5: a3
|
||||
// n - a6: a4
|
||||
// k - a7: a5
|
||||
|
||||
// a8:a6 = n*4
|
||||
// a10:t0 = 4
|
||||
// a9:a7 - counter loop1: 0..m
|
||||
// a11:t1 - counter loop2: 0..k
|
||||
// a12:t2 - A
|
||||
// a13:t3 - B
|
||||
// a14:t4
|
||||
// a15:t5
|
||||
|
||||
add sp,sp,-16
|
||||
// Array increment for floating point data should be 4
|
||||
.dspm_mult_ex_f32_arp4_body:
|
||||
|
||||
mv t5, a7
|
||||
|
||||
add t4, a6, a4 // A_step = A_padding + A_cols (n)
|
||||
add t5, t5, a5 // B_step = B_padding + B_cols (k)
|
||||
slli t5, t5, 2 // Pointer increment for B (B_step * 4)
|
||||
slli t4, t4, 2 // A_step << 2
|
||||
lw a6, 16(sp) // C_padding from stack
|
||||
slli a6, a6, 2 // C_step << 2
|
||||
|
||||
li a7, 0 // counter loop1
|
||||
|
||||
.dpf_loop1:
|
||||
li t1, 0 // reset counter for loop2
|
||||
.dpf_loop2:
|
||||
|
||||
// Clear initial state of the result register
|
||||
// a2 - A
|
||||
// a3 - B
|
||||
// a6 - n
|
||||
// a10 - step == 4 bytes
|
||||
// a8 - step n*4
|
||||
mv t2, a0 // load A
|
||||
|
||||
slli t3, t1, 2 // loop count to pointer value
|
||||
add t3, a1, t3 // load A
|
||||
|
||||
fmv.w.x fa2,zero // reset fa2
|
||||
// Calculating dotproduct...
|
||||
esp.lp.setup 0, a4, .matrix_mul_loop
|
||||
flw fa0, 0(t2)
|
||||
add t2, t2, 4
|
||||
flw fa1, 0(t3)
|
||||
fmadd.s fa2, fa1, fa0, fa2
|
||||
.matrix_mul_loop: add t3, t3, t5
|
||||
|
||||
fsw fa2, 0(a2)
|
||||
addi a2, a2, 4 // increment a2 for next time
|
||||
// check loop 2
|
||||
addi t1, t1, 1 // Increment loop2 counter
|
||||
blt t1, a5, .dpf_loop2
|
||||
|
||||
// check loop 1
|
||||
add a0, a0, t4 // A += (A_step << 2)
|
||||
add a2, a2, a6 // output += (C_padding << 2)
|
||||
|
||||
add a7, a7, 1 // Increment loop1 counter
|
||||
blt a7, a3, .dpf_loop1
|
||||
|
||||
// Exit
|
||||
li a0, 0 // return status ESP_OK
|
||||
add sp,sp,16
|
||||
ret
|
||||
|
||||
#endif //dspm_mult_ex_f32_arp4_enabled
|
||||
@@ -0,0 +1,104 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "dspm_mult_platform.h"
|
||||
#if (dspm_mult_f32_ae32_enabled == 1)
|
||||
|
||||
#include "dsps_dotprode_f32_m_ae32.S"
|
||||
|
||||
// This is matrix multipliction function for ESP32 processor.
|
||||
.text
|
||||
.align 4
|
||||
.global dspm_mult_f32_ae32
|
||||
.global .dspm_mult_f32_ae32_body
|
||||
.type dspm_mult_f32_ae32,@function
|
||||
// The function implements the following C code:
|
||||
// esp_err_t dspm_mult_f32_ansi(const float* A, const float* B, float* C, int m, int n, int k)
|
||||
// {
|
||||
// for (int i=0 ; i< m ; i++)
|
||||
// {
|
||||
// for (int j=0 ; j< k ; j++)
|
||||
// {
|
||||
// C[i*k + j] = A[i*n]*B[j];
|
||||
// for (int s=1; s< n ; s++)
|
||||
// {
|
||||
// C[i*k + j] += A[i*n + s]*B[s*k + j];
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// return ESP_OK;
|
||||
// }
|
||||
|
||||
dspm_mult_f32_ae32:
|
||||
// A - a2
|
||||
// B - a3
|
||||
// C - a4
|
||||
// m - a5
|
||||
// n - a6
|
||||
// k - a7
|
||||
|
||||
// a8 = n*4
|
||||
// a10 = 4
|
||||
// a9 - counter loop1: 0..m
|
||||
// a11 - counter loop2: 0..k
|
||||
// a12 - A
|
||||
// a13 - B
|
||||
// a4 - C
|
||||
|
||||
entry a1, 16
|
||||
// Array increment for floating point data should be 4
|
||||
.dspm_mult_f32_ae32_body:
|
||||
slli a8, a6, 2 // Pointer increment for A
|
||||
slli a15,a7, 2 // Pointer increment for B
|
||||
|
||||
movi.n a14, 0 // Innitial state of accumulator f1
|
||||
movi.n a10, 4 // Increment = 4
|
||||
movi.n a9, 0 // counter loop1
|
||||
|
||||
.dpf_loop1:
|
||||
movi.n a11, 0 // reset counter for loop2
|
||||
.dpf_loop2:
|
||||
|
||||
// Clear initial state of the result register
|
||||
// a2 - A
|
||||
// a3 - B
|
||||
// a6 - n
|
||||
// a10 - step == 4 bytes
|
||||
// a8 - step n*4
|
||||
mov a12, a2 // load A
|
||||
|
||||
slli a13, a11, 2 // loop count to pointer value
|
||||
add.n a13, a3, a13 // load A
|
||||
|
||||
wfr f1, a14 // reset f1
|
||||
// Calculating dotproduct...
|
||||
dotprode_f32_ae32 a12, a13, a6, a10, a15;
|
||||
|
||||
ssi f1, a4, 0 // Store result from f1 to memory at a4
|
||||
addi a4, a4, 4 // increment a4 for next time
|
||||
|
||||
// check loop 2
|
||||
addi a11, a11, 1 // Increment loop2 counter
|
||||
blt a11, a7, .dpf_loop2
|
||||
|
||||
// check loop 1
|
||||
add.n a2, a2, a8 // Increment A, A = A[i*n]
|
||||
|
||||
addi a9, a9, 1 // Increment loop1 counter
|
||||
blt a9, a5, .dpf_loop1
|
||||
|
||||
movi.n a2, 0 // return status ESP_OK
|
||||
retw.n
|
||||
|
||||
#endif //dspm_mult_f32_ae32_enabled
|
||||
@@ -0,0 +1,150 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "dspm_mult_platform.h"
|
||||
|
||||
#if (dspm_mult_f32_aes3_enabled == 1)
|
||||
|
||||
|
||||
// This is matrix multipliction function for ESP32 processor.
|
||||
.text
|
||||
.align 4
|
||||
.global dspm_mult_f32_aes3
|
||||
.global .dspm_mult_f32_ae32_body
|
||||
.type dspm_mult_f32_aes3,@function
|
||||
// The function implements the following C code:
|
||||
// esp_err_t dspm_mult_f32_ansi(const float* A, const float* B, float* C, int m, int n, int k)
|
||||
// {
|
||||
// for (int i=0 ; i< m ; i++)
|
||||
// {
|
||||
// for (int j=0 ; j< k ; j++)
|
||||
// {
|
||||
// C[i*k + j] = A[i*n]*B[j];
|
||||
// for (int s=1; s< n ; s++)
|
||||
// {
|
||||
// C[i*k + j] += A[i*n + s]*B[s*k + j];
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// return ESP_OK;
|
||||
// }
|
||||
|
||||
dspm_mult_f32_aes3:
|
||||
entry a1, 16
|
||||
// A - a2
|
||||
// B - a3
|
||||
// C - a4
|
||||
// m - a5
|
||||
// n - a6
|
||||
// k - a7
|
||||
|
||||
// Ccheck if we can use S3 memory model:
|
||||
or a12, a5, a6
|
||||
or a12, a7, a12
|
||||
movi.n a11, 3
|
||||
and a12, a12, a11
|
||||
movi.n a11, 15
|
||||
or a10, a3, a2
|
||||
or a10, a10, a4
|
||||
and a10, a10, a11
|
||||
or a12, a12, a10
|
||||
beqz a12, .s3_mmult
|
||||
// Call Esp32 function
|
||||
J .dspm_mult_f32_ae32_body
|
||||
|
||||
.s3_mmult:
|
||||
// f0, f1, f2, f3 - multiplication result
|
||||
// f4, f5, f6, f7 - input for matrix B
|
||||
// f8, f9, f10,f11- input far matrix A
|
||||
movi.n a14, 0
|
||||
|
||||
slli a12, a7, 2 // a12 = K*4 - step for rows
|
||||
slli a10, a7, 2 // a10 = K*4 - step for rows
|
||||
srli a11, a6, 2 // N count
|
||||
addi.n a11, a11, -1
|
||||
|
||||
movi.n a15, 0
|
||||
mov a13, a3
|
||||
mov a7, a4
|
||||
|
||||
.loop_x_aes3:
|
||||
movi.n a9, 0
|
||||
mov a8, a2 // A matirx
|
||||
.loop_y_aes3:
|
||||
add a13, a3, a14 // Reload Y pointer to Y11 + A14
|
||||
EE.LDF.128.IP f11, f10, f9, f8, a8, 16 // Load A values: X11, X12, X13, X14
|
||||
EE.LDF.128.XP f7, f6, f5, f4, a13, a12 // Load B value: Y11, Y12, Y13, Y14
|
||||
mul.s f0, f4, f8 // f0 = X11*Y11
|
||||
mul.s f1, f5, f8 // f1 = X12*Y11
|
||||
mul.s f2, f6, f8 // f2 = X13*Y11
|
||||
mul.s f3, f7, f8 // f3 = X14*Y11
|
||||
|
||||
EE.LDF.128.XP f7, f6, f5, f4, a13, a12 // Load B value: Y21, Y22, Y23, Y24
|
||||
madd.s f0, f4, f9 // f0 = X11*Y11 + X12*Y21
|
||||
madd.s f1, f5, f9 // f1 = X11*Y12 + X12*Y22
|
||||
madd.s f2, f6, f9 // f2 = X11*Y13 + X12*Y23
|
||||
madd.s f3, f7, f9 // f3 = X11*Y14 + X12*Y24
|
||||
|
||||
EE.LDF.128.XP f7, f6, f5, f4, a13, a12 // Load B value: Y31, Y32, Y33, Y34
|
||||
madd.s f0, f4, f10 // f0 = X11*Y11 + X12*Y21 + X13*Y31
|
||||
madd.s f1, f5, f10 // f1 = X11*Y12 + X12*Y22 + X13*Y32
|
||||
madd.s f2, f6, f10 // f2 = X11*Y13 + X12*Y23 + X13*Y33
|
||||
madd.s f3, f7, f10 // f3 = X11*Y14 + X12*Y24 + X13*Y34
|
||||
|
||||
EE.LDF.128.XP f7, f6, f5, f4, a13, a12 // Load B value: Y41, Y42, Y43, Y44
|
||||
madd.s f0, f4, f11 // f0 = X11*Y11 + X12*Y21 + X13*Y31 + X14*Y41
|
||||
madd.s f1, f5, f11 // f1 = X11*Y12 + X12*Y22 + X13*Y32 + X14*Y42
|
||||
madd.s f2, f6, f11 // f2 = X11*Y13 + X12*Y23 + X13*Y33 + X14*Y43
|
||||
madd.s f3, f7, f11 // f3 = X11*Y14 + X12*Y24 + X13*Y34 + X14*Y44
|
||||
|
||||
loopnez a11, .loop_end_m_aes3
|
||||
EE.LDF.128.IP f11, f10, f9, f8, a8, 16 // Load A values: X15, X16, X17, X18
|
||||
|
||||
EE.LDF.128.XP f7, f6, f5, f4, a13, a12 // Load B value: Y51, Y52, Y53, Y54
|
||||
madd.s f0, f4, f8 // f0 += X15*Y51
|
||||
madd.s f1, f5, f8 // f1 += X15*Y52
|
||||
madd.s f2, f6, f8 // f2 += X15*Y53
|
||||
madd.s f3, f7, f8 // f3 += X15*Y54
|
||||
|
||||
EE.LDF.128.XP f7, f6, f5, f4, a13, a12 // Load B value: Y61, Y62, Y63, Y64
|
||||
madd.s f0, f4, f9 // f0 += X16*Y61
|
||||
madd.s f1, f5, f9 // f1 += X16*Y62
|
||||
madd.s f2, f6, f9 // f2 += X16*Y63
|
||||
madd.s f3, f7, f9 // f3 += X16*Y64
|
||||
|
||||
EE.LDF.128.XP f7, f6, f5, f4, a13, a12 // Load B value: Y71, Y72, Y73, Y74
|
||||
madd.s f0, f4, f10 // f0 =
|
||||
madd.s f1, f5, f10 // f1 =
|
||||
madd.s f2, f6, f10 // f2 =
|
||||
madd.s f3, f7, f10 // f3 =
|
||||
|
||||
EE.LDF.128.XP f7, f6, f5, f4, a13, a12 // Load B value: Y81, Y82, Y83, Y84
|
||||
madd.s f0, f4, f11 // f0 =
|
||||
madd.s f1, f5, f11 // f1 =
|
||||
madd.s f2, f6, f11 // f2 =
|
||||
madd.s f3, f7, f11 // f3 =
|
||||
.loop_end_m_aes3:
|
||||
EE.STF.128.XP f3, f2, f1, f0, a4, a10 // Store result
|
||||
|
||||
addi a9, a9, 1 // Increment loop1 counter
|
||||
blt a9, a5, .loop_y_aes3
|
||||
addi.n a7, a7, 16
|
||||
mov a4, a7
|
||||
addi.n a14, a14, 16 // B shift for 4
|
||||
addi a15, a15, 16 // Increment loop1 counter
|
||||
blt a15, a12, .loop_x_aes3
|
||||
movi.n a2, 0 // return status ESP_OK
|
||||
retw.n
|
||||
|
||||
#endif //dspm_mult_f32_aes3_enabled
|
||||
@@ -0,0 +1,33 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
|
||||
#include "dsps_dotprod.h"
|
||||
#include "dspm_mult.h"
|
||||
|
||||
// Matrinx A(m,n), m - amount or rows, n - amount of columns
|
||||
// C(m,k) = A(m,n)*B(n,k)
|
||||
// c(i,j) = sum(a(i,s)*b(s,j)) , s=1..n
|
||||
esp_err_t dspm_mult_f32_ansi(const float *A, const float *B, float *C, int m, int n, int k)
|
||||
{
|
||||
for (int i = 0 ; i < m ; i++) {
|
||||
for (int j = 0 ; j < k ; j++) {
|
||||
C[i * k + j] = A[i * n] * B[j];
|
||||
for (int s = 1; s < n ; s++) {
|
||||
C[i * k + j] += A[i * n + s] * B[s * k + j];
|
||||
}
|
||||
}
|
||||
}
|
||||
return ESP_OK;
|
||||
}
|
||||
@@ -0,0 +1,109 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "dspm_mult_platform.h"
|
||||
#if (dspm_mult_f32_arp4_enabled == 1)
|
||||
|
||||
// This is matrix multipliction function for ESP32 processor.
|
||||
.text
|
||||
.align 4
|
||||
.global dspm_mult_f32_arp4
|
||||
.global .dspm_mult_f32_arp4_body
|
||||
.type dspm_mult_f32_arp4,@function
|
||||
// The function implements the following C code:
|
||||
// esp_err_t dspm_mult_f32_ansi(const float* A, const float* B, float* C, int m, int n, int k)
|
||||
// {
|
||||
// for (int i=0 ; i< m ; i++)
|
||||
// {
|
||||
// for (int j=0 ; j< k ; j++)
|
||||
// {
|
||||
// C[i*k + j] = A[i*n]*B[j];
|
||||
// for (int s=1; s< n ; s++)
|
||||
// {
|
||||
// C[i*k + j] += A[i*n + s]*B[s*k + j];
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// return ESP_OK;
|
||||
// }
|
||||
|
||||
dspm_mult_f32_arp4:
|
||||
// A - a2: a0
|
||||
// B - a3: a1
|
||||
// C - a4: a2
|
||||
// m - a5: a3
|
||||
// n - a6: a4
|
||||
// k - a7: a5
|
||||
|
||||
// a8:a6 = n*4
|
||||
// a10:t0 = 4
|
||||
// a9:a7 - counter loop1: 0..m
|
||||
// a11:t1 - counter loop2: 0..k
|
||||
// a12:t2 - A
|
||||
// a13:t3 - B
|
||||
// a14:t4
|
||||
// a15:t5
|
||||
|
||||
add sp,sp,-16
|
||||
// Array increment for floating point data should be 4
|
||||
.dspm_mult_f32_arp4_body:
|
||||
slli a6, a4, 2 // Pointer increment for A
|
||||
slli t5,a5, 2 // Pointer increment for B
|
||||
|
||||
li t4, 0 // Innitial state of accumulator f1
|
||||
li t0, 4 // Increment = 4
|
||||
li a7, 0 // counter loop1
|
||||
|
||||
.dpf_loop1:
|
||||
li t1, 0 // reset counter for loop2
|
||||
.dpf_loop2:
|
||||
|
||||
// Clear initial state of the result register
|
||||
// a2 - A
|
||||
// a3 - B
|
||||
// a6 - n
|
||||
// a10 - step == 4 bytes
|
||||
// a8 - step n*4
|
||||
mv t2, a0 // load A
|
||||
|
||||
slli t3, t1, 2 // loop count to pointer value
|
||||
add t3, a1, t3 // load A
|
||||
|
||||
fmv.w.x fa2,zero // reset fa2
|
||||
// Calculating dotproduct...
|
||||
esp.lp.setup 0, a4, .matrix_mul_loop
|
||||
flw fa0, 0(t2)
|
||||
add t2, t2, t0
|
||||
flw fa1, 0(t3)
|
||||
fmadd.s fa2, fa1, fa0, fa2
|
||||
.matrix_mul_loop: add t3, t3, t5
|
||||
|
||||
fsw fa2, 0(a2)
|
||||
addi a2, a2, 4 // increment a2 for next time
|
||||
// check loop 2
|
||||
addi t1, t1, 1 // Increment loop2 counter
|
||||
blt t1, a5, .dpf_loop2
|
||||
|
||||
// check loop 1
|
||||
add a0, a0, a6 // Increment A, A = A[i*n]
|
||||
|
||||
add a7, a7, 1 // Increment loop1 counter
|
||||
blt a7, a3, .dpf_loop1
|
||||
|
||||
// Exit
|
||||
mv a0, a6 // return status ESP_OK
|
||||
add sp,sp,16
|
||||
ret
|
||||
|
||||
#endif //dspm_mult_f32_arp4_enabled
|
||||
@@ -0,0 +1,232 @@
|
||||
// Copyright 2018-2023 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef _dspm_mult_H_
|
||||
#define _dspm_mult_H_
|
||||
|
||||
#include "dsp_err.h"
|
||||
#include "dspm_mult_platform.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
|
||||
/**@{*/
|
||||
/**
|
||||
* @brief Matrix multiplication
|
||||
*
|
||||
* Matrix multiplication for two floating point matrices: C[m][k] = A[m][n] * B[n][k]
|
||||
* The extension (_ansi) use ANSI C and could be compiled and run on any platform.
|
||||
* The extension (_ae32) is optimized for ESP32 chip.
|
||||
*
|
||||
* @param[in] A input matrix A[m][n]
|
||||
* @param[in] B input matrix B[n][k]
|
||||
* @param C result matrix C[m][k]
|
||||
* @param[in] m matrix dimension
|
||||
* @param[in] n matrix dimension
|
||||
* @param[in] k matrix dimension
|
||||
* @return
|
||||
* - ESP_OK on success
|
||||
* - One of the error codes from DSP library
|
||||
*/
|
||||
esp_err_t dspm_mult_f32_ansi(const float *A, const float *B, float *C, int m, int n, int k);
|
||||
esp_err_t dspm_mult_f32_ae32(const float *A, const float *B, float *C, int m, int n, int k);
|
||||
esp_err_t dspm_mult_f32_aes3(const float *A, const float *B, float *C, int m, int n, int k);
|
||||
esp_err_t dspm_mult_f32_arp4(const float *A, const float *B, float *C, int m, int n, int k);
|
||||
/**@}*/
|
||||
|
||||
|
||||
/**
|
||||
* @brief Matrix multiplication A[3x3]xB[3x1]
|
||||
*
|
||||
* Matrix multiplication for two floating point matrices 3x3 and 3x1: C[1][3] = A[3][3] * B[3][1]
|
||||
* The implementation is optimized for ESP32 chip.
|
||||
*
|
||||
* @param[in] A input matrix A[3][3]
|
||||
* @param[in] B input matrix/vector B[3][1]
|
||||
* @param C result matrix/vector C[3][3]
|
||||
* @return
|
||||
* - ESP_OK on success
|
||||
* - One of the error codes from DSP library
|
||||
*/
|
||||
esp_err_t dspm_mult_3x3x1_f32_ae32(const float *A, const float *B, float *C);
|
||||
|
||||
/**
|
||||
* @brief Matrix multiplication A[3x3]xB[3x3]
|
||||
*
|
||||
* Matrix multiplication for two square 3x3 floating point matrices: C[3][3] = A[3][3] * B[3][3]
|
||||
* The implementation is optimized for ESP32 chip.
|
||||
*
|
||||
* @param[in] A input matrix A[3][3]
|
||||
* @param[in] B input matrix B[3][3]
|
||||
* @param C result matrix C[3][3]
|
||||
* @return
|
||||
* - ESP_OK on success
|
||||
* - One of the error codes from DSP library
|
||||
*/
|
||||
esp_err_t dspm_mult_3x3x3_f32_ae32(const float *A, const float *B, float *C);
|
||||
|
||||
/**
|
||||
* @brief Matrix multiplication A[4x4]xB[4x1]
|
||||
*
|
||||
* Matrix multiplication for two floating point matrices 4x4 and 4x1: C[1][4] = A[4][4] * B[4][1]
|
||||
* The implementation is optimized for ESP32 chip.
|
||||
*
|
||||
* @param[in] A input matrix A[4][4]
|
||||
* @param[in] B input matrix/vector B[4][1]
|
||||
* @param C result matrix/vector C[4][4]
|
||||
* @return
|
||||
* - ESP_OK on success
|
||||
* - One of the error codes from DSP library
|
||||
*/
|
||||
|
||||
esp_err_t dspm_mult_4x4x1_f32_ae32(const float *A, const float *B, float *C);
|
||||
|
||||
/**
|
||||
* @brief Matrix multiplication A[4x4]xB[4x4]
|
||||
*
|
||||
* Matrix multiplication for two square 3x3 floating point matrices: C[4][4] = A[4][4] * B[4][4]
|
||||
* The implementation is optimized for ESP32 chip.
|
||||
*
|
||||
* @param[in] A input matrix A[4][4]
|
||||
* @param[in] B input matrix B[4][4]
|
||||
* @param C result matrix C[4][4]
|
||||
* @return
|
||||
* - ESP_OK on success
|
||||
* - One of the error codes from DSP library
|
||||
*/
|
||||
esp_err_t dspm_mult_4x4x4_f32_ae32(const float *A, const float *B, float *C);
|
||||
|
||||
/**@{*/
|
||||
/**
|
||||
* @brief Matrix multiplication 16 bit signeg int
|
||||
*
|
||||
* Matrix multiplication for two signed 16 bit fixed point matrices: C[m][k] = (A[m][n] * B[n][k]) >> (15- shift)
|
||||
* The extension (_ansi) use ANSI C and could be compiled and run on any platform.
|
||||
* The extension (_ae32) is optimized for ESP32 chip.
|
||||
*
|
||||
* @param[in] A input matrix A[m][n]
|
||||
* @param[in] B input matrix B[n][k]
|
||||
* @param C result matrix C[m][k]
|
||||
* @param[in] m matrix dimension
|
||||
* @param[in] n matrix dimension
|
||||
* @param[in] k matrix dimension
|
||||
* @param[in] shift every result will be shifted and stored as 16 bit signed value.
|
||||
* @return
|
||||
* - ESP_OK on success
|
||||
* - One of the error codes from DSP library
|
||||
*/
|
||||
esp_err_t dspm_mult_s16_ansi(const int16_t *A, const int16_t *B, int16_t *C, int m, int n, int k, int shift);
|
||||
esp_err_t dspm_mult_s16_ae32(const int16_t *A, const int16_t *B, int16_t *C, int m, int n, int k, int shift);
|
||||
esp_err_t dspm_mult_s16_aes3(const int16_t *A, const int16_t *B, int16_t *C, int m, int n, int k, int shift);
|
||||
esp_err_t dspm_mult_s16_arp4(const int16_t *A, const int16_t *B, int16_t *C, int m, int n, int k, int shift);
|
||||
/**@}*/
|
||||
|
||||
/**@{*/
|
||||
/**
|
||||
* @brief Matrix subset multiplication
|
||||
*
|
||||
* One or all of the matrices are matrix subsets, described with pointers and strides
|
||||
* Matrix multiplication for two floating point matrices: C[m][k] = A[m][n] * B[n][k]
|
||||
* The extension (_ansi) use ANSI C and could be compiled and run on any platform.
|
||||
* The extension (_ae32) is optimized for ESP32 chip.
|
||||
*
|
||||
* @param[in] A input matrix A[m][n]
|
||||
* @param[in] B input matrix B[n][k]
|
||||
* @param[out] C result matrix C[m][k]
|
||||
* @param[in] m matrix dimension
|
||||
* @param[in] n matrix dimension
|
||||
* @param[in] k matrix dimension
|
||||
* @param[in] A_padd input matrix A padding
|
||||
* @param[in] B_padd input matrix B padding
|
||||
* @param[in] C_padd result matrix C padding
|
||||
* @return
|
||||
* - ESP_OK on success
|
||||
* - One of the error codes from DSP library
|
||||
*/
|
||||
esp_err_t dspm_mult_ex_f32_ansi(const float *A, const float *B, float *C, int m, int n, int k, int A_padd, int B_padd, int C_padd);
|
||||
esp_err_t dspm_mult_ex_f32_ae32(const float *A, const float *B, float *C, int m, int n, int k, int A_padd, int B_padd, int C_padd);
|
||||
esp_err_t dspm_mult_ex_f32_aes3(const float *A, const float *B, float *C, int m, int n, int k, int A_padd, int B_padd, int C_padd);
|
||||
esp_err_t dspm_mult_ex_f32_arp4(const float *A, const float *B, float *C, int m, int n, int k, int A_padd, int B_padd, int C_padd);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#if CONFIG_DSP_OPTIMIZED
|
||||
|
||||
|
||||
#if (dspm_mult_s16_aes3_enabled == 1)
|
||||
#define dspm_mult_s16 dspm_mult_s16_aes3
|
||||
#elif (dspm_mult_s16_ae32_enabled == 1)
|
||||
#define dspm_mult_s16 dspm_mult_s16_ae32
|
||||
#elif (dspm_mult_s16_arp4_enabled == 1)
|
||||
#define dspm_mult_s16 dspm_mult_s16_arp4
|
||||
#else
|
||||
#define dspm_mult_s16 dspm_mult_s16_ansi
|
||||
#endif
|
||||
|
||||
#if (dspm_mult_f32_aes3_enabled == 1)
|
||||
#define dspm_mult_f32 dspm_mult_f32_aes3
|
||||
#define dspm_mult_ex_f32 dspm_mult_ex_f32_aes3
|
||||
#elif (dspm_mult_f32_ae32_enabled == 1)
|
||||
#define dspm_mult_f32 dspm_mult_f32_ae32
|
||||
#define dspm_mult_ex_f32 dspm_mult_ex_f32_ae32
|
||||
#elif (dspm_mult_f32_arp4_enabled == 1)
|
||||
#define dspm_mult_f32 dspm_mult_f32_arp4
|
||||
#define dspm_mult_ex_f32 dspm_mult_ex_f32_arp4
|
||||
#else
|
||||
#define dspm_mult_f32 dspm_mult_f32_ansi
|
||||
#define dspm_mult_ex_f32 dspm_mult_ex_f32_ansi
|
||||
#endif
|
||||
|
||||
#if (dspm_mult_3x3x1_f32_ae32_enabled == 1)
|
||||
#define dspm_mult_3x3x1_f32 dspm_mult_3x3x1_f32_ae32
|
||||
#else
|
||||
#define dspm_mult_3x3x1_f32(A,B,C) dspm_mult_f32(A,B,C, 3, 3, 1)
|
||||
#endif
|
||||
#if (dspm_mult_3x3x3_f32_ae32_enabled == 1)
|
||||
#define dspm_mult_3x3x3_f32(A,B,C) dspm_mult_3x3x3_f32_ae32(A,B,C)
|
||||
#else
|
||||
#define dspm_mult_3x3x3_f32(A,B,C) dspm_mult_f32(A,B,C,3,3,3);
|
||||
#endif
|
||||
#if (dspm_mult_4x4x1_f32_ae32_enabled == 1)
|
||||
#define dspm_mult_4x4x1_f32(A,B,C) dspm_mult_4x4x1_f32_ae32(A,B,C)
|
||||
#else
|
||||
#define dspm_mult_4x4x1_f32(A,B,C) dspm_mult_f32(A,B,C, 4, 4, 1)
|
||||
#endif
|
||||
|
||||
#if (dspm_mult_f32_aes3_enabled == 1)
|
||||
#define dspm_mult_4x4x4_f32(A,B,C) dspm_mult_f32_aes3(A,B,C, 4, 4, 4)
|
||||
#elif (dspm_mult_4x4x4_f32_ae32_enabled == 1)
|
||||
#define dspm_mult_4x4x4_f32 dspm_mult_4x4x4_f32_ae32
|
||||
#else
|
||||
#define dspm_mult_4x4x4_f32(A,B,C) dspm_mult_f32(A,B,C, 4, 4, 4)
|
||||
#endif
|
||||
|
||||
#else
|
||||
#define dspm_mult_s16 dspm_mult_s16_ansi
|
||||
#define dspm_mult_f32 dspm_mult_f32_ansi
|
||||
#define dspm_mult_3x3x1_f32(A,B,C) dspm_mult_f32(A,B,C, 3, 3, 1)
|
||||
#define dsps_sub_f32 dsps_sub_f32_ansi
|
||||
#define dsps_add_f32 dsps_add_f32_ansi
|
||||
#define dspm_mult_4x4x4_f32(A,B,C) dspm_mult_f32(A,B,C, 4, 4, 4)
|
||||
#define dspm_mult_ex_f32 dspm_mult_ex_f32_ansi
|
||||
#define dspm_mult_3x3x3_f32(A,B,C) dspm_mult_f32(A,B,C,3,3,3);
|
||||
#define dspm_mult_4x4x1_f32(A,B,C) dspm_mult_f32(A,B,C, 4, 4, 1)
|
||||
#endif // CONFIG_DSP_OPTIMIZED
|
||||
|
||||
|
||||
#endif // _dspm_mult_H_
|
||||
@@ -0,0 +1,44 @@
|
||||
#ifndef _dspm_mult_platform_H_
|
||||
#define _dspm_mult_platform_H_
|
||||
|
||||
#include "sdkconfig.h"
|
||||
|
||||
#ifdef __XTENSA__
|
||||
#include <xtensa/config/core-isa.h>
|
||||
#include <xtensa/config/core-matmap.h>
|
||||
|
||||
|
||||
#if ((XCHAL_HAVE_FP == 1) && (XCHAL_HAVE_LOOPS == 1))
|
||||
|
||||
#define dspm_mult_f32_ae32_enabled 1
|
||||
#define dspm_mult_3x3x1_f32_ae32_enabled 1
|
||||
#define dspm_mult_3x3x3_f32_ae32_enabled 1
|
||||
#define dspm_mult_4x4x1_f32_ae32_enabled 1
|
||||
#define dspm_mult_4x4x4_f32_ae32_enabled 1
|
||||
|
||||
#endif
|
||||
|
||||
#if ((XCHAL_HAVE_LOOPS == 1) && (XCHAL_HAVE_MAC16 == 1))
|
||||
|
||||
#define dspm_mult_s16_ae32_enabled 1
|
||||
|
||||
#endif
|
||||
#endif // __XTENSA__
|
||||
|
||||
#if CONFIG_IDF_TARGET_ESP32S3
|
||||
#define dspm_mult_f32_aes3_enabled 1
|
||||
#define dspm_mult_s16_aes3_enabled 1
|
||||
#endif
|
||||
|
||||
#if CONFIG_IDF_TARGET_ESP32P4
|
||||
#ifdef CONFIG_DSP_OPTIMIZED
|
||||
#define dspm_mult_f32_arp4_enabled 1
|
||||
#define dspm_mult_s16_arp4_enabled 1
|
||||
#else
|
||||
#define dspm_mult_f32_arp4_enabled 0
|
||||
#define dspm_mult_s16_arp4_enabled 0
|
||||
#endif // CONFIG_DSP_OPTIMIZED
|
||||
|
||||
#endif
|
||||
|
||||
#endif // _dspm_mult_platform_H_
|
||||
@@ -0,0 +1,84 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2023 Espressif Systems (Shanghai) CO LTD
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#ifndef _test_mat_common_H_
|
||||
#define _test_mat_common_H_
|
||||
|
||||
#include "dspm_mult.h"
|
||||
#include "dsp_err.h"
|
||||
#include "dspm_mult_platform.h"
|
||||
#include "esp_dsp.h"
|
||||
#include "dsp_platform.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @brief data type for testing operations with sub-matrices
|
||||
*
|
||||
* test evaluation in the test app for matrices check
|
||||
* compare 2 matrices
|
||||
*/
|
||||
typedef struct m_test_data_s {
|
||||
int var;
|
||||
int A_start_row;
|
||||
int A_start_col;
|
||||
int B_start_row;
|
||||
int B_start_col;
|
||||
int C_start_row;
|
||||
int C_start_col;
|
||||
int m;
|
||||
int n;
|
||||
int k;
|
||||
} m_test_data_t;
|
||||
|
||||
/**
|
||||
* @brief check whether 2 matrices are equal
|
||||
*
|
||||
* test evaluation in the test app for matrices check
|
||||
* compare 2 matrices
|
||||
*
|
||||
* @param[in] m_expected: reference matrix
|
||||
* @param[in] m_actual: matrix to be evaluated
|
||||
* @param[in] message: message for test app, in case the test fails
|
||||
*
|
||||
*/
|
||||
void test_assert_equal_mat_mat(dspm::Mat &m_expected, dspm::Mat &m_actual, const char *message);
|
||||
|
||||
/**
|
||||
* @brief check whether a matrix is set to a constant
|
||||
*
|
||||
* test evaluation in the test app for matrices check
|
||||
* compare matrix with constant
|
||||
*
|
||||
* @param[in] m_actual: matrix to be evaluated
|
||||
* @param[in] num: reference constant
|
||||
* @param[in] message: message for test app, if a test fails
|
||||
*
|
||||
*/
|
||||
void test_assert_equal_mat_const(dspm::Mat &m_actual, float num, const char *message);
|
||||
|
||||
/**
|
||||
* @brief check if an area around a sub-matrix is unaffected
|
||||
*
|
||||
* test evaluation in the test app for matrices check
|
||||
*
|
||||
* @param[in] m_origin: original matrix
|
||||
* @param[in] m_modified: sub-matrix, which is created from m_orign
|
||||
* @param[in] start_row: sub-matrix start row
|
||||
* @param[in] start_col: sub-matrix start col
|
||||
* @param[in] message: message for test app, in case the test fails
|
||||
*
|
||||
*/
|
||||
void test_assert_check_area_mat_mat(dspm::Mat &m_origin, dspm::Mat &m_modified, int start_row, int start_col, const char *message);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // _test_mat_common_H_
|
||||
@@ -0,0 +1,74 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2023 Espressif Systems (Shanghai) CO LTD
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include "unity.h"
|
||||
#include "esp_log.h"
|
||||
|
||||
#include "esp_attr.h"
|
||||
#include "dsp_tests.h"
|
||||
#include "test_mat_common.h"
|
||||
|
||||
void test_assert_equal_mat_mat(dspm::Mat &m_expected, dspm::Mat &m_actual, const char *message)
|
||||
{
|
||||
for (int row = 0; row < m_expected.rows; row++) {
|
||||
for (int col = 0; col < m_expected.cols; col++) {
|
||||
TEST_ASSERT_EQUAL_FLOAT_MESSAGE(m_expected(row, col), m_actual(row, col), message);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void test_assert_equal_mat_const(dspm::Mat &m_actual, float num, const char *message)
|
||||
{
|
||||
for (int row = 0; row < m_actual.rows; row++) {
|
||||
for (int col = 0; col < m_actual.cols; col++) {
|
||||
TEST_ASSERT_EQUAL_FLOAT_MESSAGE(num, m_actual(row, col), message);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void test_assert_check_area_mat_mat(dspm::Mat &m_origin, dspm::Mat &m_modified, int start_row, int start_col, const char *message)
|
||||
{
|
||||
float *m_origin_ptr = m_origin.data;
|
||||
float *m_modified_ptr = m_modified.data;
|
||||
|
||||
// set ptr of modified matrix back to the beginning
|
||||
const int ptr_shift = (start_row * m_origin.cols) + start_col;
|
||||
m_modified_ptr -= ptr_shift;
|
||||
const int end_of_matrix_space = m_origin.length - m_modified.length - ptr_shift - ((m_modified.rows - 1) * m_modified.padding);
|
||||
|
||||
// original matrix area before the sub-matrix
|
||||
for (int index = 0; index < ptr_shift; index++) {
|
||||
TEST_ASSERT_EQUAL_FLOAT_MESSAGE(*m_origin_ptr, *m_modified_ptr, message);
|
||||
m_origin_ptr++;
|
||||
m_modified_ptr++;
|
||||
}
|
||||
|
||||
// in and between the sub-matrix area
|
||||
for (int row = 0; row < m_modified.rows; row++) {
|
||||
// The actual sub-matrix (accessed area)
|
||||
for (int mat_col = 0; mat_col < m_modified.cols; mat_col++) {
|
||||
m_origin_ptr++;
|
||||
m_modified_ptr++;
|
||||
}
|
||||
|
||||
// padding area
|
||||
if (row != (m_modified.rows - 1)) { // skip padding after last row
|
||||
for (int padd_col = 0; padd_col < m_modified.padding; padd_col++) {
|
||||
TEST_ASSERT_EQUAL_FLOAT_MESSAGE(*m_origin_ptr, *m_modified_ptr, message);
|
||||
m_origin_ptr++;
|
||||
m_modified_ptr++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// original matrix area after the sub-matrix
|
||||
for (int index = 0; index < end_of_matrix_space; index++) {
|
||||
TEST_ASSERT_EQUAL_FLOAT_MESSAGE(*m_origin_ptr, *m_modified_ptr, message);
|
||||
m_origin_ptr++;
|
||||
m_modified_ptr++;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,270 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <string.h>
|
||||
#include "unity.h"
|
||||
#include "esp_dsp.h"
|
||||
#include "dsp_platform.h"
|
||||
#include "esp_log.h"
|
||||
|
||||
#include "dspm_mult.h"
|
||||
#include "esp_attr.h"
|
||||
#include "dsp_tests.h"
|
||||
#include "mat.h"
|
||||
|
||||
static const char *TAG = "dspm_Mat";
|
||||
|
||||
TEST_CASE("Mat class ", "[dspm]")
|
||||
{
|
||||
int m = 3;
|
||||
int n = 3;
|
||||
dspm::Mat mat(m, n);
|
||||
std::cout << "Test matrix: rows: " << mat.rows << ", columns: " << mat.cols << std::endl;
|
||||
std::cout << mat;
|
||||
}
|
||||
|
||||
TEST_CASE("Mat class check solve ", "[dspm]")
|
||||
{
|
||||
int m = 3;
|
||||
int n = 3;
|
||||
float data_a[9] = {3, 2, 1, 2, 3, 1, 2, 1, 3};
|
||||
float data_b[9] = {5, -1, 4};
|
||||
dspm::Mat A(data_a, m, n);
|
||||
dspm::Mat b(data_b, m, 1);
|
||||
dspm::Mat x1 = dspm::Mat::solve(A, b);
|
||||
std::cout << "Solve result matrix: rows: " << x1.rows << ", columns: " << x1.cols << std::endl;
|
||||
std::cout << (x1 * 12).t();
|
||||
dspm::Mat x2 = dspm::Mat::roots(A, b);
|
||||
std::cout << "Roots result matrix: rows: " << x2.rows << ", columns: " << x2.cols << std::endl;
|
||||
std::cout << (x2 * 12).t();
|
||||
dspm::Mat diff_b = x1 - x2;
|
||||
std::cout << "Difference between solve() abd roots(): " << diff_b.t();
|
||||
for (int m = 0 ; m < diff_b.rows; m++) {
|
||||
for (int n = 0 ; n < diff_b.cols ; n++) {
|
||||
if (fabs(diff_b(m, n)) > 0.000001) {
|
||||
TEST_ASSERT_MESSAGE (false, "Calculation is incorrect! Error more then expected!");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("Mat class basic operations", "[dspm]")
|
||||
{
|
||||
int M = 4;
|
||||
int N = 4;
|
||||
|
||||
dspm::Mat A(M, N);
|
||||
dspm::Mat x(N, 1);
|
||||
for (int m = 0 ; m < M ; m++) {
|
||||
for (int n = 0 ; n < N ; n++) {
|
||||
A(m, n) = N * (m + 1) + (n + 1);
|
||||
}
|
||||
x(m, 0) = m + 2;
|
||||
}
|
||||
|
||||
A(0, 0) = 10;
|
||||
A(0, 1) = 11;
|
||||
|
||||
|
||||
dspm::Mat b = A * x;
|
||||
dspm::Mat x1_ = dspm::Mat::solve(A, b);
|
||||
dspm::Mat x2_ = dspm::Mat::roots(A, b);
|
||||
|
||||
ESP_LOGI(TAG, "Matrix A:");
|
||||
std::cout << A;
|
||||
ESP_LOGI(TAG, "Matrix x.t():");
|
||||
std::cout << x.t();
|
||||
ESP_LOGI(TAG, "Matrix b.t():");
|
||||
std::cout << b.t();
|
||||
ESP_LOGI(TAG, "Solve result:");
|
||||
std::cout << x1_.t();
|
||||
ESP_LOGI(TAG, "Roots result:");
|
||||
std::cout << x2_.t();
|
||||
dspm::Mat check_b = A * x1_;
|
||||
ESP_LOGI(TAG, "Result b.t():");
|
||||
std::cout << check_b.t();
|
||||
dspm::Mat diff_b = check_b - b;
|
||||
ESP_LOGI(TAG, "Difference:");
|
||||
std::cout << diff_b.t();
|
||||
|
||||
for (int m = 0 ; m < diff_b.rows; m++) {
|
||||
for (int n = 0 ; n < diff_b.cols ; n++) {
|
||||
float error = fabs(diff_b(m, n));
|
||||
if (fabs(diff_b(m, n)) > 0.0001) {
|
||||
ESP_LOGE(TAG, "Solve calculation error: %f", error);
|
||||
TEST_ASSERT_MESSAGE (false, "Calculation is incorrect! Error more then expected!");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("Mat class operators", "[dspm]")
|
||||
{
|
||||
int M = 4;
|
||||
int N = 4;
|
||||
|
||||
dspm::Mat test1(M, N);
|
||||
dspm::Mat test2(M, N);
|
||||
dspm::Mat result(M, N);
|
||||
float *check_array = new float[M * N];
|
||||
for (int m = 0 ; m < M ; m++) {
|
||||
for (int n = 0 ; n < N ; n++) {
|
||||
test1(m, n) = (m * N + n) * 2;
|
||||
test2(m, n) = m * N + n;
|
||||
result(m, n) = 0;
|
||||
}
|
||||
}
|
||||
|
||||
result = test1 + test2;
|
||||
for (int m = 0 ; m < M ; m++) {
|
||||
for (int n = 0 ; n < N ; n++) {
|
||||
if ((result(m, n) != (test1(m, n) + test2(m, n))) ||
|
||||
(result(m, n) != 3 * (m * N + n)) ||
|
||||
(result.data[m * N + n] != 3 * (m * N + n))) {
|
||||
TEST_ASSERT_MESSAGE (false, "Error in + operator!");
|
||||
}
|
||||
}
|
||||
}
|
||||
result = test1 - test2;
|
||||
for (int m = 0 ; m < M ; m++) {
|
||||
for (int n = 0 ; n < N ; n++) {
|
||||
if ((result(m, n) != (test1(m, n) - test2(m, n))) ||
|
||||
(result(m, n) != (m * N + n)) ||
|
||||
(result.data[m * N + n] != (m * N + n))) {
|
||||
TEST_ASSERT_MESSAGE (false, "Error in - operator!");
|
||||
}
|
||||
}
|
||||
}
|
||||
// Check * operator (result = A*B;)
|
||||
// result = I*test2
|
||||
// result == test2
|
||||
test1 = test1.eye(test1.rows);
|
||||
result = test1 * test2;
|
||||
dspm::Mat result2 = test1;
|
||||
result2 *= test2;
|
||||
|
||||
for (int m = 0 ; m < M ; m++) {
|
||||
for (int n = 0 ; n < N ; n++) {
|
||||
// if (result(m,n) < 0.000000001)
|
||||
// {
|
||||
// result(m,n) = 0;
|
||||
// }
|
||||
if ((result(m, n) != test2(m, n)) ||
|
||||
(result(m, n) != (m * N + n)) ||
|
||||
(result.data[m * N + n] != (m * N + n))) {
|
||||
std::cout << "Error: " << result(m, n) << "!=" << test2(m, n) << " , "
|
||||
<< result(m, n) << "!=" << (m * N + n) << " , "
|
||||
<< result.data[m * N + n] << "!=" << (m * N + n) << std::endl;
|
||||
TEST_ASSERT_MESSAGE (false, "Error in * operator!");
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!(result == result2)) {
|
||||
std::cout << "result matrix: " << std::endl << result << std::endl;
|
||||
std::cout << "result2 matrix: " << std::endl << result2 << std::endl;
|
||||
TEST_ASSERT_MESSAGE (false, "Error in *= or in == operator!");
|
||||
}
|
||||
// Check * and + operator (result = A*const1 + const2;)
|
||||
|
||||
test1 = test2;
|
||||
float const1 = 2;
|
||||
float const2 = 10;
|
||||
result = test1 * const1 + const2;
|
||||
result = (result - const2) / const1;
|
||||
for (int m = 0 ; m < M ; m++) {
|
||||
for (int n = 0 ; n < N ; n++) {
|
||||
if ((result(m, n) != test2(m, n)) ||
|
||||
(result(m, n) != (m * N + n)) ||
|
||||
(result.data[m * N + n] != (m * N + n))
|
||||
) {
|
||||
TEST_ASSERT_MESSAGE (false, "Error in + * const operator!");
|
||||
}
|
||||
}
|
||||
}
|
||||
// Test block(...):
|
||||
int count = 0;
|
||||
for (int m = 0 ; m < M ; m++) {
|
||||
for (int n = 0 ; n < N ; n++) {
|
||||
result(m, n) = count++;
|
||||
}
|
||||
}
|
||||
std::cout << "Original matrix: " << std::endl;
|
||||
std::cout << result << std::endl;
|
||||
std::cout << "block: " << std::endl;
|
||||
std::cout << result.block(1, 1, M - 1, N - 1) << std::endl;
|
||||
// Test normalize()
|
||||
result = dspm::Mat(2, 2);
|
||||
for (int m = 0 ; m < result.rows ; m++) {
|
||||
for (int n = 0 ; n < result.cols ; n++) {
|
||||
result(m, n) = 1;
|
||||
}
|
||||
}
|
||||
std::cout << "Befor normalize: " << std::endl;
|
||||
std::cout << result << std::endl;
|
||||
result.normalize();
|
||||
std::cout << "normalize: " << std::endl;
|
||||
std::cout << result << std::endl;
|
||||
|
||||
for (int m = 0 ; m < result.rows ; m++) {
|
||||
for (int n = 0 ; n < result.cols ; n++) {
|
||||
if (std::abs(result(m, n) - 0.5) > dspm::Mat::abs_tol) {
|
||||
ESP_LOGE(TAG, "Error bigger then expected: %f", std::abs(result(m, n) - 0.5));
|
||||
TEST_ASSERT_MESSAGE (false, "Error in normalize() operation! ");
|
||||
}
|
||||
}
|
||||
}
|
||||
// Test inverse()
|
||||
float m_data[] = {2, 5, 7,
|
||||
6, 3, 4,
|
||||
5, -2, -3
|
||||
};
|
||||
float m_result[] = { 1.0000, -1.0000, 1.0000,
|
||||
-38.0000, 41.0000, -34.0000,
|
||||
27.0000, -29.0000, 24.0000
|
||||
};
|
||||
result = dspm::Mat(m_data, 3, 3);
|
||||
result = result.inverse();
|
||||
std::cout << "inverse: " << std::endl;
|
||||
std::cout << result << std::endl;
|
||||
for (int i = 0 ; i < 3 * 3 ; i++) {
|
||||
if (std::abs(result.data[i] - m_result[i]) > 1e-4) {
|
||||
printf("Error at[%i] = %f, expected= %f, calculated = %f \n", i, std::abs(result.data[i] - m_result[i]), m_result[i], result.data[i]);
|
||||
TEST_ASSERT_MESSAGE (false, "Error in inverse() operation!\n");
|
||||
}
|
||||
}
|
||||
|
||||
result = dspm::Mat(m_data, 3, 3);
|
||||
result = result.pinv();
|
||||
std::cout << "pinv: " << std::endl;
|
||||
std::cout << result << std::endl;
|
||||
for (int i = 0 ; i < 3 * 3 ; i++) {
|
||||
if (std::abs(result.data[i] - m_result[i]) > 1e-2) {
|
||||
printf("Error at[%i] = %f, expected= %f, calculated = %f \n", i, std::abs(result.data[i] - m_result[i]), m_result[i], result.data[i]);
|
||||
TEST_ASSERT_MESSAGE (false, "Error in pinv() operation!\n");
|
||||
}
|
||||
}
|
||||
|
||||
delete[] check_array;
|
||||
}
|
||||
|
||||
TEST_CASE("mat.cpp functionality", "[dsps]")
|
||||
{
|
||||
int max_size = 10;
|
||||
for (int i = 3 ; i < max_size ; i++) {
|
||||
dspm::Mat A = dspm::Mat::eye(i);
|
||||
float det = A.det(i);
|
||||
printf("Det[%i] = %f\n", i, det);
|
||||
TEST_ASSERT_EQUAL(det, 1);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,917 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2023 Espressif Systems (Shanghai) CO LTD
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include <malloc.h>
|
||||
#include "unity.h"
|
||||
#include "esp_dsp.h"
|
||||
#include "dsp_platform.h"
|
||||
#include "esp_log.h"
|
||||
|
||||
#include "dspm_mult.h"
|
||||
#include "esp_attr.h"
|
||||
#include "dsp_tests.h"
|
||||
#include "mat.h"
|
||||
#include "test_mat_common.h"
|
||||
|
||||
static const char *TAG = "[dspm]";
|
||||
|
||||
#define MAT_ROW 6 // test_matrix rows
|
||||
#define MAT_COL 6 // test_matrix cols
|
||||
#define ROI_ROW 4 // sub_matrix rows
|
||||
#define ROI_COL 4 // sub_matrix cols
|
||||
#define START_ROI 1 // start row/col dimension to create sub matrix from test matrix
|
||||
|
||||
dspm::Mat::Rect roi_rect(START_ROI, START_ROI, ROI_ROW, ROI_COL);
|
||||
|
||||
// matrix subset
|
||||
TEST_CASE("Mat class matrix subset", TAG)
|
||||
{
|
||||
float data[25] = {0, 1, 2, 3, 4,
|
||||
5, 6, 7, 8, 9,
|
||||
0, 1, 2, 3, 4,
|
||||
5, 6, 7, 8, 9,
|
||||
0, 1, 2, 3, 4
|
||||
};
|
||||
|
||||
// Test matrix dimensions
|
||||
const int m = 5;
|
||||
const int n = 5;
|
||||
|
||||
dspm::Mat mat(data, m, n);
|
||||
std::cout << "Test matrix: rows: " << mat.rows << ", columns: " << mat.cols << std::endl;
|
||||
std::cout << mat << std::endl;
|
||||
|
||||
// Sub matrix method 1 - sub-matrix dimensions
|
||||
int start_row = 1;
|
||||
int start_col = 1;
|
||||
int roi_rows = 4;
|
||||
int roi_cols = 3;
|
||||
|
||||
// Create matrix subset as a shallow copy of mat matrix (no matrix data are copied)
|
||||
dspm::Mat mat_subset1 = mat.getROI(start_row, start_col, roi_rows, roi_cols);
|
||||
|
||||
// Create matrix subset as a deep copy of mat matrix (matrix data are copied)
|
||||
dspm::Mat mat_subset1_check = mat.Get(start_row, roi_rows, start_col, roi_cols);
|
||||
|
||||
std::cout << "Matrix subset, method 1: rows: " << mat_subset1.rows << ", columns: " << mat_subset1.cols << std::endl;
|
||||
std::cout << mat_subset1 << std::endl;
|
||||
|
||||
// Compare the deep and the shallow copies
|
||||
test_assert_equal_mat_mat(mat_subset1_check, mat_subset1, "matrix subset 1");
|
||||
|
||||
// Sub matrix method 2 - sub-matrix dimensions as a matrix rectangle
|
||||
int x = 1;
|
||||
int y = 1;
|
||||
int width = 4;
|
||||
int height = 3;
|
||||
|
||||
// Create matrix ROI as a rectangle area
|
||||
dspm::Mat::Rect roi_rect(x, y, width, height);
|
||||
dspm::Mat mat_subset2 = mat.getROI(roi_rect);
|
||||
std::cout << "Matrix subset method 2: rows: " << mat_subset2.rows << ", columns: " << mat_subset2.cols << std::endl;
|
||||
std::cout << mat_subset2 << std::endl;
|
||||
dspm::Mat mat_subset2_check = mat.Get(roi_rect);
|
||||
|
||||
test_assert_equal_mat_mat(mat_subset2_check, mat_subset2, "matrix subset 2");
|
||||
|
||||
// Sub matrix method 2 - sub-matrix dimensions with specified stride
|
||||
start_row = 0;
|
||||
start_col = 1;
|
||||
roi_rows = 3;
|
||||
roi_cols = 3;
|
||||
int stride = 10;
|
||||
|
||||
dspm::Mat mat_subset3 = mat.getROI(start_row, start_col, roi_rows, roi_cols, stride);
|
||||
std::cout << "Matrix subset method 3: rows: " << mat_subset1.rows << ", columns: " << mat_subset3.cols << std::endl;
|
||||
std::cout << mat_subset3 << std::endl;
|
||||
dspm::Mat mat_subset3_check = mat.Get(start_row, 5, start_col, roi_cols);
|
||||
|
||||
for (int row = 0; row < mat_subset3_check.rows; row++) {
|
||||
if (row % 2) {
|
||||
continue;
|
||||
};
|
||||
for (int col = 0; col < mat_subset3_check.cols; col++) {
|
||||
TEST_ASSERT_EQUAL_FLOAT(mat_subset3_check(row, col), mat_subset3(row / 2, col));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void test_mat_subset_operator_eq()
|
||||
{
|
||||
dspm::Mat mat(2, 2);
|
||||
for (int i = 0; i < mat.length; i++) {
|
||||
mat.data[i] = 1;
|
||||
}
|
||||
|
||||
dspm::Mat mat1(2, 2);
|
||||
for (int i = 0; i < mat1.length; i++) {
|
||||
mat1.data[i] = i + 1;
|
||||
}
|
||||
|
||||
// matrices, dimensions are equal
|
||||
// mat(2, 2), mat1(2, 2)
|
||||
mat = mat1;
|
||||
TEST_ASSERT_EQUAL_INT(2, mat.rows);
|
||||
TEST_ASSERT_EQUAL_INT(mat1.rows, mat.rows);
|
||||
TEST_ASSERT_EQUAL_INT(2, mat.cols);
|
||||
TEST_ASSERT_EQUAL_INT(mat1.cols, mat.cols);
|
||||
test_assert_equal_mat_mat(mat1, mat, "=operator, mat = mat (equal dim)");
|
||||
|
||||
dspm::Mat mat2(3, 3);
|
||||
for (int i = 0; i < mat2.length; i++) {
|
||||
mat2.data[i] = (i + 1) * 2;
|
||||
}
|
||||
|
||||
// matrices, dimensions are not equal
|
||||
// mat1(2, 2), mat2(3, 3)
|
||||
mat1 = mat2;
|
||||
TEST_ASSERT_EQUAL_INT(3, mat1.rows);
|
||||
TEST_ASSERT_EQUAL_INT(mat2.rows, mat1.rows);
|
||||
TEST_ASSERT_EQUAL_INT(3, mat1.cols);
|
||||
TEST_ASSERT_EQUAL_INT(mat2.cols, mat1.cols);
|
||||
test_assert_equal_mat_mat(mat2, mat1, "=operator, mat = mat (not equal dim)");
|
||||
|
||||
dspm::Mat mat3(4, 4);
|
||||
dspm::Mat mat4(4, 4);
|
||||
dspm::Mat mat4_compare(4, 4);
|
||||
for (int i = 0; i < mat3.length; i++) {
|
||||
mat3.data[i] = (i + 1) * 3;
|
||||
mat4.data[i] = (i + 1) * 4;
|
||||
mat4_compare.data[i] = (i + 1) * 4;
|
||||
}
|
||||
dspm::Mat mat3_sub_3x3 = mat3.getROI(1, 1, 3, 3);
|
||||
dspm::Mat mat3_sub_2x2 = mat3.getROI(1, 1, 2, 2);
|
||||
dspm::Mat mat3_mat_2x2 = mat3.Get(1, 2, 1, 2);
|
||||
|
||||
// matrix and sub-matrix, dimensions are equal
|
||||
// mat1(3, 3), mat3_sub_3x3(3, 3)
|
||||
mat1 = mat3_sub_3x3;
|
||||
TEST_ASSERT_FALSE(mat1.sub_matrix);
|
||||
TEST_ASSERT_EQUAL_INT(3, mat1.rows);
|
||||
TEST_ASSERT_EQUAL_INT(mat3_sub_3x3.rows, mat1.rows);
|
||||
TEST_ASSERT_EQUAL_INT(3, mat1.cols);
|
||||
TEST_ASSERT_EQUAL_INT(mat3_sub_3x3.cols, mat1.cols);
|
||||
test_assert_equal_mat_mat(mat3_sub_3x3, mat1, "=operator, mat = sub_mat (equal dim)");
|
||||
|
||||
dspm::Mat mat4_sub_2x2 = mat4.getROI(1, 1, 2, 2);
|
||||
dspm::Mat mat4_mat_2x2 = mat4.Get(1, 2, 1, 2);
|
||||
|
||||
// matrix and sub-matrix, dimensions are not equal
|
||||
// mat1(3, 3), mat4_sub_2x2(2, 2)
|
||||
mat1 = mat4_sub_2x2;
|
||||
TEST_ASSERT_FALSE(mat1.sub_matrix);
|
||||
TEST_ASSERT_EQUAL_INT(2, mat1.rows);
|
||||
TEST_ASSERT_EQUAL_INT(mat4_sub_2x2.rows, mat1.rows);
|
||||
TEST_ASSERT_EQUAL_INT(2, mat1.cols);
|
||||
TEST_ASSERT_EQUAL_INT(mat4_sub_2x2.cols, mat1.cols);
|
||||
test_assert_equal_mat_mat(mat4_sub_2x2, mat1, "=operator, mat = sub_mat (not equal dim)");
|
||||
|
||||
// sub-matrix and sub-matrix, dimensions are not equal
|
||||
// mat4_sub_2x2(2, 2), mat3_sub_3x3(3, 3)
|
||||
ESP_LOGI("=operator test", "following is an expected error message about matrices not having equal dimensions");
|
||||
mat4_sub_2x2 = mat3_sub_3x3;
|
||||
TEST_ASSERT_TRUE(mat4_sub_2x2.sub_matrix);
|
||||
TEST_ASSERT_EQUAL_INT(2, mat4_sub_2x2.rows);
|
||||
TEST_ASSERT_EQUAL_INT(2, mat4_sub_2x2.cols);
|
||||
test_assert_equal_mat_mat(mat4_mat_2x2, mat4_sub_2x2, "=operator, sub_mat = sub_mat (not equal dim)");
|
||||
test_assert_check_area_mat_mat(mat4_compare, mat4_sub_2x2, 1, 1, "=operator area, sub_mat = sub_mat (not equal dim)");
|
||||
|
||||
// sub-matrix and sub-matrix, dimensions are equal
|
||||
// mat4_sub_2x2(2, 2), mat3_sub_2x2(2, 2)
|
||||
mat4_sub_2x2 = mat3_sub_2x2;
|
||||
TEST_ASSERT_TRUE(mat4_sub_2x2.sub_matrix);
|
||||
TEST_ASSERT_EQUAL_INT(2, mat4_sub_2x2.rows);
|
||||
TEST_ASSERT_EQUAL_INT(mat3_sub_2x2.rows, mat4_sub_2x2.rows);
|
||||
TEST_ASSERT_EQUAL_INT(2, mat4_sub_2x2.cols);
|
||||
TEST_ASSERT_EQUAL_INT(mat3_sub_2x2.cols, mat4_sub_2x2.cols);
|
||||
test_assert_equal_mat_mat(mat3_mat_2x2, mat4_sub_2x2, "=operator, sub_mat = sub_mat (equal dim)");
|
||||
test_assert_check_area_mat_mat(mat4_compare, mat4_sub_2x2, 1, 1, "=operator area, sub_mat = sub_mat (equal dim)");
|
||||
}
|
||||
|
||||
// operator==
|
||||
static void test_mat_subset_operator_eq_eq(void)
|
||||
{
|
||||
dspm::Mat A(MAT_ROW, MAT_COL);
|
||||
dspm::Mat B(MAT_ROW, MAT_COL);
|
||||
|
||||
for (int i = 0; i < A.length; i++) {
|
||||
A.data[i] = i;
|
||||
B.data[i] = i * 2;
|
||||
}
|
||||
|
||||
dspm::Mat A_sub = A.getROI(roi_rect);
|
||||
dspm::Mat A_mat = A.Get(roi_rect);
|
||||
|
||||
dspm::Mat B_sub = B.getROI(roi_rect);
|
||||
|
||||
for (int row = 0; row < B_sub.rows; row++) {
|
||||
for (int col = 0; col < B_sub.cols; col++) {
|
||||
B_sub(row, col) = B_sub(row, col) / 2;
|
||||
}
|
||||
}
|
||||
dspm::Mat B_mat = B.Get(roi_rect);
|
||||
dspm::Mat B_mat_neq_cont = B_mat * 3;
|
||||
dspm::Mat B_mat_neq_dim(3, 3);
|
||||
|
||||
TEST_ASSERT_TRUE(A_mat == B_mat);
|
||||
TEST_ASSERT_TRUE(A_sub == B_sub);
|
||||
TEST_ASSERT_TRUE(A_sub == B_mat);
|
||||
TEST_ASSERT_TRUE(A_mat == B_sub);
|
||||
ESP_LOGI("==operator test", "following is an expected error message about matrices not having equal content");
|
||||
TEST_ASSERT_FALSE(A_sub == B_mat_neq_cont);
|
||||
TEST_ASSERT_FALSE(A_sub == B_mat_neq_dim);
|
||||
}
|
||||
|
||||
// operator/
|
||||
static void test_mat_subset_operator_mat_div_mat(void)
|
||||
{
|
||||
dspm::Mat mat(MAT_ROW, MAT_COL);
|
||||
for (int i = 0; i < mat.length; i++) {
|
||||
mat.data[i] = i;
|
||||
}
|
||||
|
||||
dspm::Mat C = mat;
|
||||
dspm::Mat C_compare_area = mat;
|
||||
|
||||
dspm::Mat A_sub = mat.getROI(roi_rect);
|
||||
dspm::Mat A_mat = mat.Get(roi_rect);
|
||||
|
||||
dspm::Mat B_sub = mat.getROI(roi_rect);
|
||||
dspm::Mat B_mat = mat.Get(roi_rect);
|
||||
|
||||
dspm::Mat C_sub = C.getROI(roi_rect);
|
||||
dspm::Mat C_mat = C.Get(roi_rect);
|
||||
dspm::Mat C_compare(ROI_ROW, ROI_COL);
|
||||
|
||||
for (int i = 0; i < C_compare.length; i++) {
|
||||
C_compare.data[i] = A_mat.data[i] / B_mat.data[i];
|
||||
}
|
||||
|
||||
C_mat = A_mat / B_mat;
|
||||
test_assert_equal_mat_mat(C_compare, C_mat, "/ operator, mat = mat / mat");
|
||||
|
||||
C_mat = A_sub / B_sub;
|
||||
test_assert_equal_mat_mat(C_compare, C_mat, "/ operator, mat = sub_mat / sub_mat");
|
||||
|
||||
C_mat = A_sub / B_mat;
|
||||
test_assert_equal_mat_mat(C_compare, C_mat, "/ operator, mat = sub_mat / mat");
|
||||
|
||||
C_mat = A_mat / B_sub;
|
||||
test_assert_equal_mat_mat(C_compare, C_mat, "/ operator, mat = mat / sub_mat");
|
||||
|
||||
C_sub = A_sub / B_sub;
|
||||
test_assert_equal_mat_mat(C_compare, C_sub, "/ operator, sub_mat = sub_mat / sub_mat");
|
||||
test_assert_check_area_mat_mat(C_compare_area, C_sub, START_ROI, START_ROI, "/ operator, area check, sub_mat = sub_mat / sub_mat");
|
||||
|
||||
C_sub = A_mat / B_sub;
|
||||
test_assert_equal_mat_mat(C_compare, C_sub, "/ operator, sub_mat = mat / sub_mat");
|
||||
test_assert_check_area_mat_mat(C_compare_area, C_sub, START_ROI, START_ROI, "/ operator, area check, sub_mat = sub_mat / sub_mat");
|
||||
|
||||
C = mat;
|
||||
C_mat = C.Get(roi_rect); // C_mat must be refreshed
|
||||
C_mat /= A_mat;
|
||||
test_assert_equal_mat_mat(C_compare, C_mat, "/ operator, mat /= mat");
|
||||
|
||||
C = mat;
|
||||
C_mat = C.Get(roi_rect); // C_mat must be refreshed
|
||||
C_mat /= A_sub;
|
||||
test_assert_equal_mat_mat(C_compare, C_mat, "/ operator, mat /= sub_mat");
|
||||
|
||||
C = mat; // C must be refreshed, to refresh the C_sub
|
||||
C_sub /= A_mat;
|
||||
test_assert_equal_mat_mat(C_compare, C_sub, "/ operator, sub_mat /= mat");
|
||||
test_assert_check_area_mat_mat(C_compare_area, C_sub, START_ROI, START_ROI, "/ operator, area check, sub_mat /= mat");
|
||||
|
||||
C = mat; // C must be refreshed, to refresh the C_sub
|
||||
C_sub /= A_sub;
|
||||
test_assert_equal_mat_mat(C_compare, C_sub, "/ operator, sub_mat /= sub_mat");
|
||||
test_assert_check_area_mat_mat(C_compare_area, C_sub, START_ROI, START_ROI, "/ operator, area check, sub_mat /= sub_mat");
|
||||
}
|
||||
|
||||
// operator^
|
||||
static void test_mat_subset_operator_xor(void)
|
||||
{
|
||||
dspm::Mat mat(5, 5);
|
||||
dspm::Mat mat_area_check(5, 5);
|
||||
for (int i = 0; i < mat.length; i++) {
|
||||
mat.data[i] = i;
|
||||
mat_area_check.data[i] = i;
|
||||
}
|
||||
|
||||
dspm::Mat::Rect roi_rect(1, 1, 3, 3);
|
||||
dspm::Mat mat_mat = mat.Get(roi_rect);
|
||||
dspm::Mat mat_sub = mat.getROI(roi_rect);
|
||||
|
||||
// XOR 0
|
||||
dspm::Mat res_mat = mat_mat ^ 0;
|
||||
dspm::Mat res_sub = mat_sub ^ 0;
|
||||
test_assert_equal_mat_mat(res_mat, res_sub, "sub-matrix operator^ 0");
|
||||
test_assert_check_area_mat_mat(mat_area_check, mat_sub, 1, 1, "sub-matrix area check operator^ 0");
|
||||
|
||||
// XOR 1
|
||||
res_mat = mat_mat ^ 1;
|
||||
res_sub = mat_sub ^ 1;
|
||||
test_assert_equal_mat_mat(res_mat, res_sub, "sub-matrix operator^ 1");
|
||||
test_assert_check_area_mat_mat(mat_area_check, mat_sub, 1, 1, "sub-matrix area check operator^ 1");
|
||||
|
||||
// XOR even
|
||||
res_mat = mat_mat ^ 2;
|
||||
res_sub = mat_sub ^ 2;
|
||||
test_assert_equal_mat_mat(res_mat, res_sub, "sub-matrix operator^ 2");
|
||||
test_assert_check_area_mat_mat(mat_area_check, mat_sub, 1, 1, "sub-matrix area check operator^ 2");
|
||||
|
||||
// XOR odd
|
||||
res_mat = mat_mat ^ 3;
|
||||
res_sub = mat_sub ^ 3;
|
||||
test_assert_equal_mat_mat(res_mat, res_sub, "sub-matrix operator^ 3");
|
||||
test_assert_check_area_mat_mat(mat_area_check, mat_sub, 1, 1, "sub-matrix area check operator^ 3");
|
||||
}
|
||||
|
||||
// operator/
|
||||
static void test_mat_subset_operator_mat_div_const(void)
|
||||
{
|
||||
const float div_const = 2;
|
||||
|
||||
dspm::Mat mat(MAT_ROW, MAT_COL);
|
||||
for (int i = 0; i < mat.length; i++) {
|
||||
mat.data[i] = i;
|
||||
}
|
||||
|
||||
dspm::Mat C_compare_area = mat;
|
||||
dspm::Mat C = mat;
|
||||
|
||||
dspm::Mat A_sub = mat.getROI(roi_rect);
|
||||
dspm::Mat A_mat = mat.Get(roi_rect);
|
||||
|
||||
dspm::Mat C_sub = C.getROI(roi_rect);
|
||||
dspm::Mat C_mat = C.Get(roi_rect);
|
||||
dspm::Mat C_compare = mat.Get(roi_rect);
|
||||
|
||||
for (int i = 0; i < C_compare.length; i++) {
|
||||
C_compare.data[i] /= div_const;
|
||||
}
|
||||
|
||||
C_mat = A_mat / div_const;
|
||||
test_assert_equal_mat_mat(C_compare, C_mat, "/ operator, mat = mat / const");
|
||||
|
||||
C_mat = A_sub / div_const;
|
||||
test_assert_equal_mat_mat(C_compare, C_mat, "/ operator, mat = sub_mat / const");
|
||||
C_mat = C.Get(roi_rect);
|
||||
|
||||
C_mat /= div_const;
|
||||
test_assert_equal_mat_mat(C_compare, C_mat, "/ operator, mat /= const");
|
||||
|
||||
C_sub = A_mat / div_const;
|
||||
test_assert_equal_mat_mat(C_compare, C_sub, "/ operator, sub_mat = mat / const");
|
||||
test_assert_check_area_mat_mat(C_compare_area, C_sub, START_ROI, START_ROI, "/ operator, area check, sub_mat = mat / const");
|
||||
|
||||
C = mat;
|
||||
C_sub /= div_const;
|
||||
test_assert_equal_mat_mat(C_compare, C_sub, "/ operator, sub_mat /= const");
|
||||
test_assert_check_area_mat_mat(C_compare_area, C_sub, START_ROI, START_ROI, "/ operator, area check, sub_mat /= const");
|
||||
}
|
||||
|
||||
// operator-
|
||||
static void test_mat_subset_operator_mat_sub_const(void)
|
||||
{
|
||||
const float sub_const = 2;
|
||||
|
||||
dspm::Mat mat(MAT_ROW, MAT_COL);
|
||||
for (int i = 0; i < mat.length; i++) {
|
||||
mat.data[i] = i;
|
||||
}
|
||||
|
||||
dspm::Mat C_compare_area = mat;
|
||||
dspm::Mat C = mat;
|
||||
|
||||
dspm::Mat A_sub = mat.getROI(roi_rect);
|
||||
dspm::Mat A_mat = mat.Get(roi_rect);
|
||||
|
||||
dspm::Mat C_sub = C.getROI(roi_rect);
|
||||
dspm::Mat C_mat = C.Get(roi_rect);
|
||||
dspm::Mat C_compare = mat.Get(roi_rect);
|
||||
|
||||
for (int i = 0; i < C_compare.length; i++) {
|
||||
C_compare.data[i] -= sub_const;
|
||||
}
|
||||
|
||||
C_mat = A_mat - sub_const;
|
||||
test_assert_equal_mat_mat(C_compare, C_mat, "- operator, mat = mat - const");
|
||||
|
||||
C_mat = A_sub - sub_const;
|
||||
test_assert_equal_mat_mat(C_compare, C_mat, "- operator, mat = sub_mat - const");
|
||||
C_mat = C.Get(roi_rect);
|
||||
|
||||
C_mat -= sub_const;
|
||||
test_assert_equal_mat_mat(C_compare, C_mat, "- operator, mat -= const");
|
||||
|
||||
C_sub = A_mat - sub_const;
|
||||
test_assert_equal_mat_mat(C_compare, C_sub, "- operator, sub_mat = mat - const");
|
||||
test_assert_check_area_mat_mat(C_compare_area, C_sub, START_ROI, START_ROI, "- operator, area check, sub_mat = mat - const");
|
||||
|
||||
C = mat;
|
||||
C_sub -= sub_const;
|
||||
test_assert_equal_mat_mat(C_compare, C_sub, "- operator, sub_mat -= const");
|
||||
test_assert_check_area_mat_mat(C_compare_area, C_sub, START_ROI, START_ROI, "- operator, area check, sub_mat -= const");
|
||||
}
|
||||
|
||||
// operator-
|
||||
static void test_mat_subset_operator_mat_sub_mat(void)
|
||||
{
|
||||
dspm::Mat mat(MAT_ROW, MAT_COL);
|
||||
for (int i = 0; i < mat.length; i++) {
|
||||
mat.data[i] = i;
|
||||
}
|
||||
|
||||
dspm::Mat C = mat;
|
||||
dspm::Mat C_compare_area = mat;
|
||||
|
||||
dspm::Mat A_sub = mat.getROI(roi_rect);
|
||||
dspm::Mat A_mat = mat.Get(roi_rect);
|
||||
|
||||
dspm::Mat B_sub = mat.getROI(roi_rect);
|
||||
dspm::Mat B_mat = mat.Get(roi_rect);
|
||||
|
||||
dspm::Mat C_sub = C.getROI(roi_rect);
|
||||
dspm::Mat C_mat = C.Get(roi_rect);
|
||||
dspm::Mat C_compare(ROI_ROW, ROI_COL);
|
||||
|
||||
for (int i = 0; i < C_compare.length; i++) {
|
||||
C_compare.data[i] = A_mat.data[i] - B_mat.data[i];
|
||||
}
|
||||
|
||||
C_mat = A_mat - B_mat;
|
||||
test_assert_equal_mat_mat(C_compare, C_mat, "- operator, mat = mat - mat");
|
||||
|
||||
C_mat = A_sub - B_sub;
|
||||
test_assert_equal_mat_mat(C_compare, C_mat, "- operator, mat = sub_mat - sub_mat");
|
||||
|
||||
C_mat = A_sub - B_mat;
|
||||
test_assert_equal_mat_mat(C_compare, C_mat, "- operator, mat = sub_mat - mat");
|
||||
|
||||
C_mat = A_mat - B_sub;
|
||||
test_assert_equal_mat_mat(C_compare, C_mat, "- operator, mat = mat - sub_mat");
|
||||
|
||||
C_sub = A_sub - B_sub;
|
||||
test_assert_equal_mat_mat(C_compare, C_sub, "- operator, sub_mat = sub_mat - sub_mat");
|
||||
test_assert_check_area_mat_mat(C_compare_area, C_sub, START_ROI, START_ROI, "- operator, area check, sub_mat = sub_mat - sub_mat");
|
||||
|
||||
C_sub = A_mat - B_sub;
|
||||
test_assert_equal_mat_mat(C_compare, C_sub, "- operator, sub_mat = mat - sub_mat");
|
||||
test_assert_check_area_mat_mat(C_compare_area, C_sub, START_ROI, START_ROI, "- operator, area check, sub_mat = sub_mat - sub_mat");
|
||||
|
||||
C = mat;
|
||||
C_mat = C.Get(roi_rect); // C_mat must be refreshed
|
||||
C_mat -= A_mat;
|
||||
test_assert_equal_mat_mat(C_compare, C_mat, "- operator, mat -= mat");
|
||||
|
||||
C = mat;
|
||||
C_mat = C.Get(roi_rect); // C_mat must be refreshed
|
||||
C_mat -= A_sub;
|
||||
test_assert_equal_mat_mat(C_compare, C_mat, "- operator, mat -= sub_mat");
|
||||
|
||||
C = mat; // C must be refreshed, to refresh the C_sub
|
||||
C_sub -= A_mat;
|
||||
test_assert_equal_mat_mat(C_compare, C_sub, "- operator, sub_mat -= mat");
|
||||
test_assert_check_area_mat_mat(C_compare_area, C_sub, START_ROI, START_ROI, "- operator, area check, sub_mat -= mat");
|
||||
|
||||
C = mat; // C must be refreshed, to refresh the C_sub
|
||||
C_sub -= A_sub;
|
||||
test_assert_equal_mat_mat(C_compare, C_sub, "- operator, sub_mat -= sub_mat");
|
||||
test_assert_check_area_mat_mat(C_compare_area, C_sub, START_ROI, START_ROI, "- operator, area check, sub_mat -= sub_mat");
|
||||
}
|
||||
|
||||
// operator+
|
||||
static void test_mat_subset_operator_mat_add_mat(void)
|
||||
{
|
||||
dspm::Mat mat(MAT_ROW, MAT_COL);
|
||||
for (int i = 0; i < mat.length; i++) {
|
||||
mat.data[i] = i;
|
||||
}
|
||||
|
||||
dspm::Mat C = mat;
|
||||
dspm::Mat C_compare_area = mat;
|
||||
|
||||
dspm::Mat A_sub = mat.getROI(roi_rect);
|
||||
dspm::Mat A_mat = mat.Get(roi_rect);
|
||||
|
||||
dspm::Mat B_sub = mat.getROI(roi_rect);
|
||||
dspm::Mat B_mat = mat.Get(roi_rect);
|
||||
|
||||
dspm::Mat C_sub = C.getROI(roi_rect);
|
||||
dspm::Mat C_mat = C.Get(roi_rect);
|
||||
dspm::Mat C_compare(ROI_ROW, ROI_COL);
|
||||
|
||||
for (int i = 0; i < C_compare.length; i++) {
|
||||
C_compare.data[i] = A_mat.data[i] + B_mat.data[i];
|
||||
}
|
||||
|
||||
C_mat = A_mat + B_mat;
|
||||
test_assert_equal_mat_mat(C_compare, C_mat, "+ operator, mat = mat + mat");
|
||||
|
||||
C_mat = A_sub + B_sub;
|
||||
test_assert_equal_mat_mat(C_compare, C_mat, "+ operator, mat = sub_mat + sub_mat");
|
||||
|
||||
C_mat = A_sub + B_mat;
|
||||
test_assert_equal_mat_mat(C_compare, C_mat, "+ operator, mat = sub_mat + mat");
|
||||
|
||||
C_sub = A_sub + B_sub;
|
||||
test_assert_equal_mat_mat(C_compare, C_sub, "+ operator, sub_mat = sub_mat + sub_mat");
|
||||
test_assert_check_area_mat_mat(C_compare_area, C_sub, START_ROI, START_ROI, "+ operator, area check, sub_mat = sub_mat + sub_mat");
|
||||
|
||||
C_sub = A_mat + B_sub;
|
||||
test_assert_equal_mat_mat(C_compare, C_sub, "+ operator, sub_mat = mat + sub_mat");
|
||||
test_assert_check_area_mat_mat(C_compare_area, C_sub, START_ROI, START_ROI, "+ operator, area check, sub_mat = sub_mat + sub_mat");
|
||||
|
||||
C = mat;
|
||||
C_mat = C.Get(roi_rect); // C_mat must be refreshed
|
||||
C_mat += A_mat;
|
||||
test_assert_equal_mat_mat(C_compare, C_mat, "+ operator, mat += mat");
|
||||
|
||||
C = mat;
|
||||
C_mat = C.Get(roi_rect); // C_mat must be refreshed
|
||||
C_mat += A_sub;
|
||||
test_assert_equal_mat_mat(C_compare, C_mat, "+ operator, mat += sub_mat");
|
||||
|
||||
C = mat; // C must be refreshed, to refresh the C_sub
|
||||
C_sub += A_mat;
|
||||
test_assert_equal_mat_mat(C_compare, C_sub, "+ operator, sub_mat += mat");
|
||||
test_assert_check_area_mat_mat(C_compare_area, C_sub, START_ROI, START_ROI, "+ operator, area check, sub_mat += mat");
|
||||
|
||||
C = mat; // C must be refreshed, to refresh the C_sub
|
||||
C_sub += A_sub;
|
||||
test_assert_equal_mat_mat(C_compare, C_sub, "+ operator, sub_mat += sub_mat");
|
||||
test_assert_check_area_mat_mat(C_compare_area, C_sub, START_ROI, START_ROI, "+ operator, area check, sub_mat += sub_mat");
|
||||
}
|
||||
|
||||
// operator+
|
||||
static void test_mat_subset_operator_mat_add_const(void)
|
||||
{
|
||||
const float add_const = 2;
|
||||
|
||||
dspm::Mat mat(MAT_ROW, MAT_COL);
|
||||
for (int i = 0; i < mat.length; i++) {
|
||||
mat.data[i] = i;
|
||||
}
|
||||
|
||||
dspm::Mat C_compare_area = mat;
|
||||
dspm::Mat C = mat;
|
||||
|
||||
dspm::Mat A_sub = mat.getROI(roi_rect);
|
||||
dspm::Mat A_mat = mat.Get(roi_rect);
|
||||
|
||||
dspm::Mat C_sub = C.getROI(roi_rect);
|
||||
dspm::Mat C_mat = C.Get(roi_rect);
|
||||
dspm::Mat C_compare = mat.Get(roi_rect);
|
||||
|
||||
for (int i = 0; i < C_compare.length; i++) {
|
||||
C_compare.data[i] += add_const;
|
||||
}
|
||||
|
||||
C_mat = A_sub + add_const;
|
||||
test_assert_equal_mat_mat(C_compare, C_mat, "+ operator, mat = sub_mat + const");
|
||||
C_mat = C.Get(roi_rect);
|
||||
|
||||
C_mat += add_const;
|
||||
test_assert_equal_mat_mat(C_compare, C_mat, "+ operator, mat += const");
|
||||
|
||||
C_sub = A_mat + add_const;
|
||||
test_assert_equal_mat_mat(C_compare, C_sub, "+ operator, sub_mat = mat + const");
|
||||
test_assert_check_area_mat_mat(C_compare_area, C_sub, START_ROI, START_ROI, "+ operator, area check, sub_mat = mat + const");
|
||||
|
||||
C = mat;
|
||||
C_sub += add_const;
|
||||
test_assert_equal_mat_mat(C_compare, C_sub, "+ operator, sub_mat += const");
|
||||
test_assert_check_area_mat_mat(C_compare_area, C_sub, START_ROI, START_ROI, "+ operator, area check, sub_mat += const");
|
||||
}
|
||||
|
||||
// operator*
|
||||
static void test_mat_subset_operator_mat_mul_const(void)
|
||||
{
|
||||
const float mul_const = 2;
|
||||
|
||||
dspm::Mat mat(MAT_ROW, MAT_COL);
|
||||
for (int i = 0; i < mat.length; i++) {
|
||||
mat.data[i] = i;
|
||||
}
|
||||
|
||||
dspm::Mat C_compare_area = mat;
|
||||
dspm::Mat C = mat;
|
||||
|
||||
dspm::Mat A_sub = mat.getROI(roi_rect);
|
||||
dspm::Mat A_mat = mat.Get(roi_rect);
|
||||
|
||||
dspm::Mat C_sub = C.getROI(roi_rect);
|
||||
dspm::Mat C_mat = C.Get(roi_rect);
|
||||
dspm::Mat C_compare = mat.Get(roi_rect);
|
||||
|
||||
for (int i = 0; i < C_compare.length; i++) {
|
||||
C_compare.data[i] *= mul_const;
|
||||
}
|
||||
|
||||
C_mat = A_mat * mul_const;
|
||||
test_assert_equal_mat_mat(C_compare, C_mat, "* operator, mat = mat * const");
|
||||
|
||||
C_mat = A_sub * mul_const;
|
||||
test_assert_equal_mat_mat(C_compare, C_mat, "* operator, mat = sub_mat * const");
|
||||
C_mat = C.Get(roi_rect);
|
||||
|
||||
C_mat *= mul_const;
|
||||
test_assert_equal_mat_mat(C_compare, C_mat, "* operator, mat *= const");
|
||||
|
||||
C_sub = A_mat * mul_const;
|
||||
test_assert_equal_mat_mat(C_compare, C_sub, "* operator, sub_mat = mat * const");
|
||||
test_assert_check_area_mat_mat(C_compare_area, C_sub, START_ROI, START_ROI, "* operator, area check, sub_mat = mat * const");
|
||||
|
||||
C = mat;
|
||||
C_sub *= mul_const;
|
||||
test_assert_equal_mat_mat(C_compare, C_sub, "* operator, sub_mat *= const");
|
||||
test_assert_check_area_mat_mat(C_compare_area, C_sub, START_ROI, START_ROI, "* operator, area check, sub_mat *= const");
|
||||
}
|
||||
|
||||
// operator*
|
||||
static void test_mat_subset_operator_mat_mul_mat_2(void)
|
||||
{
|
||||
dspm::Mat mat(MAT_ROW, MAT_COL);
|
||||
for (int i = 0; i < mat.length; i++) {
|
||||
mat.data[i] = i;
|
||||
}
|
||||
|
||||
dspm::Mat C_compare_area = mat;
|
||||
dspm::Mat C = mat;
|
||||
|
||||
const int m = 4, n = 4, k = 4;
|
||||
dspm::Mat::Rect roi_rect_mul(1, 1, k, m);
|
||||
|
||||
dspm::Mat A_sub = mat.getROI(roi_rect_mul);
|
||||
dspm::Mat A_mat = mat.Get(roi_rect_mul);
|
||||
|
||||
dspm::Mat C_sub = C.getROI(roi_rect_mul);
|
||||
dspm::Mat C_mat = C.Get(roi_rect_mul);
|
||||
dspm::Mat C_compare = dspm::Mat::ones(m, k);
|
||||
|
||||
for (int i = 0 ; i < m ; i++) {
|
||||
for (int j = 0 ; j < k ; j++) {
|
||||
C_compare.data[(i * k) + j] = 0;
|
||||
for (int s = 0 ; s < n ; s++) {
|
||||
C_compare.data[(i * k) + j] += A_mat.data[(i * n) + s] * C_mat.data[(s * k) + j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
C_mat *= A_mat;
|
||||
test_assert_equal_mat_mat(C_compare, C_mat, "*= operator, mat *= mat");
|
||||
C_mat = C.Get(roi_rect_mul);
|
||||
|
||||
C_mat *= A_sub;
|
||||
test_assert_equal_mat_mat(C_compare, C_mat, "*= operator, mat *= sub_mat");
|
||||
|
||||
C_sub *= A_sub;
|
||||
test_assert_equal_mat_mat(C_compare, C_sub, "*= operator, sub_mat *= sub_mat");
|
||||
test_assert_check_area_mat_mat(C_compare_area, C_sub, START_ROI, START_ROI, "*= operator, area check, sub_mat *= sub_mat");
|
||||
|
||||
C = mat;
|
||||
C_sub *= A_mat;
|
||||
test_assert_equal_mat_mat(C_compare, C_sub, "*= operator, sub_mat *= sub_mat");
|
||||
test_assert_check_area_mat_mat(C_compare_area, C_sub, START_ROI, START_ROI, "*= operator, area check, sub_mat *= sub_mat");
|
||||
}
|
||||
|
||||
// operator*
|
||||
static void test_mat_subset_operator_mat_mul_mat_1(void)
|
||||
{
|
||||
dspm::Mat mat(MAT_ROW, MAT_COL);
|
||||
for (int i = 0; i < mat.length; i++) {
|
||||
mat.data[i] = i;
|
||||
}
|
||||
|
||||
dspm::Mat C = dspm::Mat::ones(6);
|
||||
dspm::Mat C_compare_area = dspm::Mat::ones(6);
|
||||
|
||||
// matrix dimensions
|
||||
const int m = 4, n = 3, k = 4;
|
||||
dspm::Mat::Rect A_roi_rect(2, 1, n, m);
|
||||
dspm::Mat::Rect B_roi_rect(1, 2, k, n);
|
||||
dspm::Mat::Rect C_roi_rect(1, 1, k, m);
|
||||
|
||||
dspm::Mat A_sub = mat.getROI(A_roi_rect);
|
||||
dspm::Mat A_mat = mat.Get(A_roi_rect);
|
||||
|
||||
dspm::Mat B_sub = mat.getROI(B_roi_rect);
|
||||
dspm::Mat B_mat = mat.Get(B_roi_rect);
|
||||
|
||||
dspm::Mat C_sub = C.getROI(C_roi_rect);
|
||||
dspm::Mat C_mat = C.Get(C_roi_rect);
|
||||
dspm::Mat C_compare = dspm::Mat::ones(m, k);
|
||||
|
||||
for (int i = 0 ; i < m ; i++) {
|
||||
for (int j = 0 ; j < k ; j++) {
|
||||
C_compare.data[(i * k) + j] = 0;
|
||||
for (int s = 0 ; s < n ; s++) {
|
||||
C_compare.data[(i * k) + j] += A_mat.data[(i * n) + s] * B_mat.data[(s * k) + j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
C_mat = A_mat * B_mat;
|
||||
test_assert_equal_mat_mat(C_compare, C_mat, "* operator, mat = mat * mat");
|
||||
|
||||
C_mat = A_sub * B_sub;
|
||||
test_assert_equal_mat_mat(C_compare, C_mat, "* operator, mat = sub_mat * sub_mat");
|
||||
|
||||
C_mat = A_sub * B_mat;
|
||||
test_assert_equal_mat_mat(C_compare, C_mat, "* operator, mat = sub_mat * mat");
|
||||
|
||||
C_sub = A_sub * B_sub;
|
||||
test_assert_equal_mat_mat(C_compare, C_sub, "* operator, sub_mat = sub_mat * sub_mat");
|
||||
test_assert_check_area_mat_mat(C_compare_area, C_sub, START_ROI, START_ROI, "* operator, area check, sub_mat = sub_mat * sub_mat");
|
||||
|
||||
C_sub = A_mat * B_sub;
|
||||
test_assert_equal_mat_mat(C_compare, C_sub, "*operator, sub_mat = mat * sub_mat");
|
||||
test_assert_check_area_mat_mat(C_compare_area, C_sub, START_ROI, START_ROI, "* operator, area check, sub_mat = sub_mat * sub_mat");
|
||||
}
|
||||
|
||||
TEST_CASE("Matrix subset operators", TAG)
|
||||
{
|
||||
test_mat_subset_operator_eq(); // mat = mat
|
||||
test_mat_subset_operator_eq_eq(); // mat == mat
|
||||
test_mat_subset_operator_xor(); // mat ^ const
|
||||
test_mat_subset_operator_mat_mul_mat_1(); // mat * mat
|
||||
test_mat_subset_operator_mat_mul_mat_2(); // mat * mat
|
||||
test_mat_subset_operator_mat_mul_const(); // mat * const
|
||||
test_mat_subset_operator_mat_add_mat(); // mat + mat
|
||||
test_mat_subset_operator_mat_add_const(); // mat + const
|
||||
test_mat_subset_operator_mat_sub_mat(); // mat - mat
|
||||
test_mat_subset_operator_mat_sub_const(); // mat - const
|
||||
test_mat_subset_operator_mat_div_mat(); // mat / mat
|
||||
test_mat_subset_operator_mat_div_const(); // mat / const
|
||||
}
|
||||
|
||||
static void test_mat_subset_solve(void)
|
||||
{
|
||||
int m = 3;
|
||||
int n = 3;
|
||||
float data_a[9] = {3, 2, 1, 2, 3, 1, 2, 1, 3};
|
||||
float data_b[9] = {5, -1, 4};
|
||||
dspm::Mat A(data_a, m, n);
|
||||
dspm::Mat b(data_b, m, 1);
|
||||
|
||||
dspm::Mat A_origin = dspm::Mat::ones(5);
|
||||
dspm::Mat b_origin = dspm::Mat::ones(5, 3);
|
||||
dspm::Mat A_origin_area_check = dspm::Mat::ones(5);
|
||||
dspm::Mat b_origin_area_check = dspm::Mat::ones(5, 3);
|
||||
|
||||
A_origin.Copy(A, 1, 1);
|
||||
b_origin.Copy(b, 1, 1);
|
||||
|
||||
// create sub-matrices
|
||||
dspm::Mat A_sub = A_origin.getROI(1, 1, m, n);
|
||||
dspm::Mat b_sub = b_origin.getROI(1, 1, m, 1);
|
||||
|
||||
dspm::Mat x1 = dspm::Mat::solve(A_sub, b_sub);
|
||||
test_assert_check_area_mat_mat(A_origin_area_check, A_sub, 1, 1, "check solve, area A");
|
||||
test_assert_check_area_mat_mat(b_origin_area_check, b_sub, 1, 1, "check solve, area b");
|
||||
|
||||
std::cout << "Solve result matrix: rows: " << x1.rows << ", columns: " << x1.cols << std::endl;
|
||||
std::cout << (x1 * 12).t();
|
||||
dspm::Mat x2 = dspm::Mat::roots(A_sub, b_sub);
|
||||
test_assert_check_area_mat_mat(A_origin_area_check, A_sub, 1, 1, "check solve, area A");
|
||||
test_assert_check_area_mat_mat(b_origin_area_check, b_sub, 1, 1, "check solve, area b");
|
||||
|
||||
std::cout << "Roots result matrix: rows: " << x2.rows << ", columns: " << x2.cols << std::endl;
|
||||
std::cout << (x2 * 12).t();
|
||||
dspm::Mat diff_b = x1 - x2;
|
||||
std::cout << "Difference between solve() abd roots(): " << diff_b.t();
|
||||
for (int row = 0; row < diff_b.rows; row++) {
|
||||
for (int col = 0; col < diff_b.cols; col++) {
|
||||
if (fabs(diff_b(row, col)) > 0.000001) {
|
||||
TEST_ASSERT_MESSAGE (false, "Calculation is incorrect! Error more then expected!");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void test_mat_subset_inverse(void)
|
||||
{
|
||||
// Test inverse()
|
||||
dspm::Mat result;
|
||||
float m_data[] = {2, 5, 7,
|
||||
6, 3, 4,
|
||||
5, -2, -3
|
||||
};
|
||||
float m_result[] = { 1.0000, -1.0000, 1.0000,
|
||||
-38.0000, 41.0000, -34.0000,
|
||||
27.0000, -29.0000, 24.0000
|
||||
};
|
||||
|
||||
result = dspm::Mat(m_data, 3, 3);
|
||||
|
||||
dspm::Mat result_origin = dspm::Mat::ones(5);
|
||||
dspm::Mat result_origin_area_check = dspm::Mat::ones(5);
|
||||
|
||||
result_origin.Copy(result, 1, 1);
|
||||
dspm::Mat result_sub = result_origin.getROI(1, 1, 3, 3);
|
||||
|
||||
result = result_sub.inverse();
|
||||
test_assert_check_area_mat_mat(result_origin_area_check, result_sub, 1, 1, "area check inverse");
|
||||
|
||||
std::cout << "inverse: " << std::endl;
|
||||
std::cout << result << std::endl;
|
||||
for (int i = 0; i < 3 * 3; i++) {
|
||||
if (std::abs(result.data[i] - m_result[i]) > 1e-4) {
|
||||
printf("Error at[%i] = %f, expected= %f, calculated = %f\n", i, std::abs(result.data[i] - m_result[i]), m_result[i], result.data[i]);
|
||||
TEST_ASSERT_MESSAGE (false, "Error in inverse() operation!\n");
|
||||
}
|
||||
}
|
||||
result = dspm::Mat(m_data, 3, 3);
|
||||
result_origin = dspm::Mat::ones(5);
|
||||
result_origin.Copy(result, 1, 1);
|
||||
result_sub = result_origin.getROI(1, 1, 3, 3);
|
||||
|
||||
result = result_sub.pinv();
|
||||
test_assert_check_area_mat_mat(result_origin_area_check, result_sub, 1, 1, "area check pinv");
|
||||
|
||||
std::cout << "pinv: " << std::endl;
|
||||
std::cout << result << std::endl;
|
||||
for (int i = 0; i < 3 * 3; i++) {
|
||||
if (std::abs(result.data[i] - m_result[i]) > 1e-2) {
|
||||
printf("Error at[%i] = %f, expected= %f, calculated = %f \n", i, std::abs(result.data[i] - m_result[i]), m_result[i], result.data[i]);
|
||||
TEST_ASSERT_MESSAGE (false, "Error in pinv() operation!\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void test_mat_subset_normalize(void)
|
||||
{
|
||||
dspm::Mat result_origin = dspm::Mat::ones(4);
|
||||
dspm::Mat result_area_check = dspm::Mat::ones(4);
|
||||
dspm::Mat result_sub = result_origin.getROI(1, 1, 2, 2);
|
||||
|
||||
std::cout << "Befor normalize: " << std::endl;
|
||||
std::cout << result_sub << std::endl;
|
||||
result_sub.normalize();
|
||||
test_assert_check_area_mat_mat(result_area_check, result_sub, 1, 1, "normalize area check");
|
||||
std::cout << "normalize: " << std::endl;
|
||||
std::cout << result_sub << std::endl;
|
||||
|
||||
for (int row = 0; row < result_sub.rows; row++) {
|
||||
for (int col = 0 ; col < result_sub.cols ; col++) {
|
||||
if (std::abs(result_sub(row, col) - 0.5) > dspm::Mat::abs_tol) {
|
||||
ESP_LOGE(TAG, "Error bigger then expected: %f", std::abs(result_sub(row, col) - 0.5));
|
||||
TEST_ASSERT_MESSAGE (false, "Error in normalize() operation! ");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void test_mat_subset_swap_trans_dot_clear(void)
|
||||
{
|
||||
dspm::Mat mat(5, 5);
|
||||
dspm::Mat mat_area_check(5, 5);
|
||||
for (int row = 0; row < mat.rows; row++) {
|
||||
for (int col = 0; col < mat.cols; col++) {
|
||||
mat(row, col) = row + 1;
|
||||
mat_area_check(row, col) = row + 1;
|
||||
}
|
||||
}
|
||||
|
||||
dspm::Mat::Rect roi_rect(1, 1, 3, 3);
|
||||
dspm::Mat mat_sub = mat.getROI(roi_rect);
|
||||
dspm::Mat mat_mat = mat.Get(roi_rect);
|
||||
|
||||
// check swap rows
|
||||
mat_sub.swapRows(0, 1);
|
||||
mat_mat.swapRows(0, 1);
|
||||
test_assert_equal_mat_mat(mat_sub, mat_mat, "sub-matrix swapRows");
|
||||
test_assert_check_area_mat_mat(mat_area_check, mat_sub, 1, 1, "area check sub-matrix swapRows");
|
||||
|
||||
// check transpose
|
||||
dspm::Mat mat_sub_res = mat_sub.t();
|
||||
dspm::Mat mat_mat_res = mat_mat.t();
|
||||
test_assert_equal_mat_mat(mat_mat_res, mat_sub_res, "sub-matrix transpose");
|
||||
test_assert_check_area_mat_mat(mat_area_check, mat_sub, 1, 1, "area check sub-matrix transpose");
|
||||
|
||||
// check dot product
|
||||
float dot_mat = dspm::Mat::dotProduct(mat_mat, mat_mat);
|
||||
float dot_sub = dspm::Mat::dotProduct(mat_sub, mat_sub);
|
||||
TEST_ASSERT_EQUAL_FLOAT(dot_mat, dot_sub);
|
||||
|
||||
// check clear
|
||||
mat_sub.clear();
|
||||
mat_mat.clear();
|
||||
test_assert_equal_mat_const(mat_sub, 0, "sub-matrix clear");
|
||||
test_assert_equal_mat_mat(mat_mat, mat_sub, "sub-matrix clear");
|
||||
test_assert_check_area_mat_mat(mat_area_check, mat_sub, 1, 1, "area check sub-matrix clear");
|
||||
}
|
||||
|
||||
|
||||
TEST_CASE("Matrix subset methods check", TAG)
|
||||
{
|
||||
test_mat_subset_solve();
|
||||
test_mat_subset_inverse();
|
||||
test_mat_subset_normalize();
|
||||
test_mat_subset_swap_trans_dot_clear();
|
||||
}
|
||||
@@ -0,0 +1,187 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <string.h>
|
||||
#include "unity.h"
|
||||
#include "esp_dsp.h"
|
||||
#include "dsp_platform.h"
|
||||
#include "esp_log.h"
|
||||
|
||||
#include "dspm_mult.h"
|
||||
#include "esp_attr.h"
|
||||
#include "dsp_tests.h"
|
||||
|
||||
static const char *TAG = "dspm_mult_3x3xX_f32";
|
||||
|
||||
// Test dsps_dotprod_s16_ansi function
|
||||
TEST_CASE("dspm_mult_3x3x1_f32 functionality", "[dspm]")
|
||||
{
|
||||
int m = 3;
|
||||
int n = 3;
|
||||
int k = 1;
|
||||
|
||||
|
||||
float A[m][n];
|
||||
float *A_ptr = (float *)A;
|
||||
|
||||
float B[n][k];
|
||||
float *B_ptr = (float *)B;
|
||||
|
||||
float C[m][k];
|
||||
float *C_ptr = (float *)C;
|
||||
float C_compare[m][k];
|
||||
float *Cc_ptr = (float *)C_compare;
|
||||
|
||||
for (int i = 0; i < m; i++) {
|
||||
for (int j = 0; j < n; j++) {
|
||||
A[i][j] = i;
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < n; i++) {
|
||||
for (int j = 0; j < k; j++) {
|
||||
B[i][j] = i;
|
||||
}
|
||||
}
|
||||
|
||||
dspm_mult_3x3x1_f32(A_ptr, B_ptr, C_ptr);
|
||||
dspm_mult_f32_ansi(A_ptr, B_ptr, Cc_ptr, m, n, k);
|
||||
|
||||
for (int i = 0 ; i < m ; i++) {
|
||||
for (int j = 0 ; j < k ; j++) {
|
||||
ESP_LOGD(TAG, "[%i][%i] calc=%f, expected =%f", i, j, C[i][j], C_compare[i][j]);
|
||||
}
|
||||
}
|
||||
//Compare and check results
|
||||
for (int i = 0; i < m * k; i++) {
|
||||
if (Cc_ptr[i] != C_ptr[i]) {
|
||||
TEST_ASSERT_EQUAL(C_ptr[i], Cc_ptr[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("dspm_mult_3x3x3_f32 functionality", "[dspm]")
|
||||
{
|
||||
int m = 3;
|
||||
int n = 3;
|
||||
int k = 3;
|
||||
|
||||
|
||||
float A[m][n];
|
||||
float *A_ptr = (float *)A;
|
||||
|
||||
float B[n][k];
|
||||
float *B_ptr = (float *)B;
|
||||
|
||||
float C[m][k];
|
||||
float *C_ptr = (float *)C;
|
||||
float C_compare[m][k];
|
||||
float *Cc_ptr = (float *)C_compare;
|
||||
|
||||
for (int i = 0; i < m; i++) {
|
||||
for (int j = 0; j < n; j++) {
|
||||
A[i][j] = i;
|
||||
C[i][j] = 0;
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < n; i++) {
|
||||
for (int j = 0; j < k; j++) {
|
||||
B[i][j] = i;
|
||||
}
|
||||
}
|
||||
|
||||
dspm_mult_3x3x3_f32(A_ptr, B_ptr, C_ptr);
|
||||
dspm_mult_f32_ansi(A_ptr, B_ptr, Cc_ptr, m, n, k);
|
||||
|
||||
for (int i = 0 ; i < m ; i++) {
|
||||
for (int j = 0 ; j < k ; j++) {
|
||||
ESP_LOGD(TAG, "[%i][%i] calc=%f, expected =%f", i, j, C[i][j], C_compare[i][j]);
|
||||
}
|
||||
}
|
||||
// Compare and check results
|
||||
for (int i = 0 ; i < m * k ; i++) {
|
||||
if (Cc_ptr[i] != C_ptr[i]) {
|
||||
TEST_ASSERT_EQUAL( C_ptr[i], Cc_ptr[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static portMUX_TYPE testnlock = portMUX_INITIALIZER_UNLOCKED;
|
||||
|
||||
TEST_CASE("dspm_mult_3x3x1_f32 benchmark", "[dspm]")
|
||||
{
|
||||
int m = 3;
|
||||
int n = 3;
|
||||
int k = 1;
|
||||
|
||||
float A[m][n];
|
||||
float *A_ptr = (float *)A;
|
||||
|
||||
float B[n][k];
|
||||
float *B_ptr = (float *)B;
|
||||
|
||||
float C[m][k];
|
||||
float *C_ptr = (float *)C;
|
||||
|
||||
|
||||
portENTER_CRITICAL(&testnlock);
|
||||
|
||||
unsigned int start_b = dsp_get_cpu_cycle_count();
|
||||
int repeat_count = 1024;
|
||||
for (int i = 0 ; i < repeat_count ; i++) {
|
||||
dspm_mult_3x3x1_f32(A_ptr, B_ptr, C_ptr);
|
||||
}
|
||||
unsigned int end_b = dsp_get_cpu_cycle_count();
|
||||
portEXIT_CRITICAL(&testnlock);
|
||||
|
||||
float total_b = end_b - start_b;
|
||||
float cycles = total_b / (repeat_count);
|
||||
ESP_LOGI("dspm_mult_3x3x1_f32", "dspm_mult_3x3x1_f32 - %f per multiplication (ae32 - 134, ansi - 285)", cycles);
|
||||
float min_exec = 60;
|
||||
float max_exec = 200;
|
||||
TEST_ASSERT_EXEC_IN_RANGE(min_exec, max_exec, cycles);
|
||||
}
|
||||
|
||||
TEST_CASE("dspm_mult_3x3x3_f32 benchmark", "[dspm]")
|
||||
{
|
||||
int m = 4;
|
||||
int n = 4;
|
||||
int k = 4;
|
||||
|
||||
float A[m][n];
|
||||
float *A_ptr = (float *)A;
|
||||
|
||||
float B[n][k];
|
||||
float *B_ptr = (float *)B;
|
||||
|
||||
float C[m][k];
|
||||
float *C_ptr = (float *)C;
|
||||
|
||||
|
||||
portENTER_CRITICAL(&testnlock);
|
||||
|
||||
unsigned int start_b = dsp_get_cpu_cycle_count();
|
||||
int repeat_count = 1024;
|
||||
for (int i = 0 ; i < repeat_count ; i++) {
|
||||
dspm_mult_3x3x3_f32(A_ptr, B_ptr, C_ptr);
|
||||
}
|
||||
unsigned int end_b = dsp_get_cpu_cycle_count();
|
||||
portEXIT_CRITICAL(&testnlock);
|
||||
|
||||
float total_b = end_b - start_b;
|
||||
float cycles = total_b / (repeat_count);
|
||||
ESP_LOGI("dspm_mult_3x3x3_f32", "dspm_mult_3x3x3_f32 - %f per multiplication", cycles);
|
||||
float min_exec = 100;
|
||||
float max_exec = 400;
|
||||
TEST_ASSERT_EXEC_IN_RANGE(min_exec, max_exec, cycles);
|
||||
}
|
||||
@@ -0,0 +1,186 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <string.h>
|
||||
#include "unity.h"
|
||||
#include "esp_dsp.h"
|
||||
#include "dsp_platform.h"
|
||||
#include "esp_log.h"
|
||||
|
||||
#include "dspm_mult.h"
|
||||
#include "esp_attr.h"
|
||||
#include "dsp_tests.h"
|
||||
|
||||
static const char *TAG = "dspm_mult_4x4x1_f32_ae32";
|
||||
|
||||
TEST_CASE("dspm_mult_4x4x1_f32_ae32 functionality", "[dspm]")
|
||||
{
|
||||
int m = 4;
|
||||
int n = 4;
|
||||
int k = 1;
|
||||
|
||||
float A[m][n];
|
||||
float *A_ptr = (float *)A;
|
||||
|
||||
float B[n][k];
|
||||
float *B_ptr = (float *)B;
|
||||
|
||||
float C[m][k];
|
||||
float *C_ptr = (float *)C;
|
||||
float C_compare[m][k];
|
||||
float *Cc_ptr = (float *)C_compare;
|
||||
|
||||
for (int i = 0; i < m; i++) {
|
||||
for (int j = 0; j < n; j++) {
|
||||
A[i][j] = i;
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < n; i++) {
|
||||
for (int j = 0; j < k; j++) {
|
||||
B[i][j] = i;
|
||||
}
|
||||
}
|
||||
|
||||
dspm_mult_4x4x1_f32(A_ptr, B_ptr, C_ptr);
|
||||
dspm_mult_f32_ansi(A_ptr, B_ptr, Cc_ptr, m, n, k);
|
||||
|
||||
for (int i = 0 ; i < m ; i++) {
|
||||
for (int j = 0 ; j < k ; j++) {
|
||||
ESP_LOGD(TAG, "[%i][%i] calc=%f, expected =%f", i, j, C[i][j], C_compare[i][j]);
|
||||
}
|
||||
}
|
||||
//Compare and check results
|
||||
for (int i = 0; i < m * k; i++) {
|
||||
if (Cc_ptr[i] != C_ptr[i]) {
|
||||
TEST_ASSERT_EQUAL(C_ptr[i], Cc_ptr[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("dspm_mult_4x4x4_f32_ae32 functionality", "[dspm]")
|
||||
{
|
||||
int m = 4;
|
||||
int n = 4;
|
||||
int k = 4;
|
||||
|
||||
|
||||
float A[m][n];
|
||||
float *A_ptr = (float *)A;
|
||||
|
||||
float B[n][k];
|
||||
float *B_ptr = (float *)B;
|
||||
|
||||
float C[m][k];
|
||||
float *C_ptr = (float *)C;
|
||||
float C_compare[m][k];
|
||||
float *Cc_ptr = (float *)C_compare;
|
||||
|
||||
for (int i = 0; i < m; i++) {
|
||||
for (int j = 0; j < n; j++) {
|
||||
A[i][j] = i;
|
||||
C[i][j] = 0;
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < n; i++) {
|
||||
for (int j = 0; j < k; j++) {
|
||||
B[i][j] = i;
|
||||
}
|
||||
}
|
||||
|
||||
dspm_mult_4x4x4_f32(A_ptr, B_ptr, C_ptr);
|
||||
dspm_mult_f32_ansi(A_ptr, B_ptr, Cc_ptr, m, n, k);
|
||||
|
||||
for (int i = 0 ; i < m ; i++) {
|
||||
for (int j = 0 ; j < k ; j++) {
|
||||
ESP_LOGD(TAG, "[%i][%i] calc=%f, expected =%f", i, j, C[i][j], C_compare[i][j]);
|
||||
}
|
||||
}
|
||||
// Compare and check results
|
||||
for (int i = 0 ; i < m * k ; i++) {
|
||||
if (Cc_ptr[i] != C_ptr[i]) {
|
||||
TEST_ASSERT_EQUAL( C_ptr[i], Cc_ptr[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static portMUX_TYPE testnlock = portMUX_INITIALIZER_UNLOCKED;
|
||||
|
||||
TEST_CASE("dspm_mult_4x4x1_f32_ae32 benchmark", "[dspm]")
|
||||
{
|
||||
int m = 4;
|
||||
int n = 4;
|
||||
int k = 1;
|
||||
|
||||
float A[m][n];
|
||||
float *A_ptr = (float *)A;
|
||||
|
||||
float B[n][k];
|
||||
float *B_ptr = (float *)B;
|
||||
|
||||
float C[m][k];
|
||||
float *C_ptr = (float *)C;
|
||||
|
||||
|
||||
portENTER_CRITICAL(&testnlock);
|
||||
|
||||
unsigned int start_b = dsp_get_cpu_cycle_count();
|
||||
int repeat_count = 1024;
|
||||
for (int i = 0 ; i < repeat_count ; i++) {
|
||||
dspm_mult_4x4x1_f32(A_ptr, B_ptr, C_ptr);
|
||||
}
|
||||
unsigned int end_b = dsp_get_cpu_cycle_count();
|
||||
portEXIT_CRITICAL(&testnlock);
|
||||
|
||||
float total_b = end_b - start_b;
|
||||
float cycles = total_b / (repeat_count);
|
||||
ESP_LOGI("dspm_mult_4x4x1_f32_ae32", "dspm_mult_4x4x1_f32_ae32 - %f per multiplication", cycles);
|
||||
float min_exec = 60;
|
||||
float max_exec = 300;
|
||||
TEST_ASSERT_EXEC_IN_RANGE(min_exec, max_exec, cycles);
|
||||
}
|
||||
|
||||
TEST_CASE("dspm_mult_4x4x4_f32_ae32 benchmark", "[dspm]")
|
||||
{
|
||||
int m = 4;
|
||||
int n = 4;
|
||||
int k = 4;
|
||||
|
||||
float A[m][n];
|
||||
float *A_ptr = (float *)A;
|
||||
|
||||
float B[n][k];
|
||||
float *B_ptr = (float *)B;
|
||||
|
||||
float C[m][k];
|
||||
float *C_ptr = (float *)C;
|
||||
|
||||
ESP_LOGI(TAG, "A: %8.8"PRIx32", B: %8.8"PRIx32", C=%8.8"PRIx32"", (uint32_t)A_ptr, (uint32_t)B_ptr, (uint32_t)C_ptr);
|
||||
|
||||
portENTER_CRITICAL(&testnlock);
|
||||
|
||||
unsigned int start_b = dsp_get_cpu_cycle_count();
|
||||
int repeat_count = 1024;
|
||||
for (int i = 0 ; i < repeat_count ; i++) {
|
||||
dspm_mult_4x4x4_f32(A_ptr, B_ptr, C_ptr);
|
||||
}
|
||||
unsigned int end_b = dsp_get_cpu_cycle_count();
|
||||
portEXIT_CRITICAL(&testnlock);
|
||||
|
||||
float total_b = end_b - start_b;
|
||||
float cycles = total_b / (repeat_count);
|
||||
ESP_LOGI("dspm_mult_4x4x4_f32_ae32", "dspm_mult_4x4x4_f32_ae32 - %f per multiplication", cycles);
|
||||
float min_exec = 50;
|
||||
float max_exec = 750;
|
||||
TEST_ASSERT_EXEC_IN_RANGE(min_exec, max_exec, cycles);
|
||||
}
|
||||
@@ -0,0 +1,285 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2023 Espressif Systems (Shanghai) CO LTD
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include <malloc.h>
|
||||
#include "unity.h"
|
||||
#include "esp_dsp.h"
|
||||
#include "dsp_platform.h"
|
||||
#include "esp_log.h"
|
||||
|
||||
#include "dspm_mult.h"
|
||||
#include "esp_attr.h"
|
||||
#include "dsp_tests.h"
|
||||
#include "test_mat_common.h"
|
||||
|
||||
|
||||
// create ROI rectangles
|
||||
dspm::Mat::Rect A_roi_rect;
|
||||
dspm::Mat::Rect B_roi_rect;
|
||||
dspm::Mat::Rect C_roi_rect;
|
||||
|
||||
static void dspm_mult_ex_f32_aexx_functionality_in_cycle(m_test_data_t *test_d)
|
||||
{
|
||||
char message[120];
|
||||
sprintf(message, "var = %d, A_s_row = %d, A_s_col = %d, B_s_row = %d B_s_col = %d, C_s_row = %d, C_s_col = %d, m = %d, n = %d, k = %d\n", test_d->var,
|
||||
test_d->A_start_row, test_d->A_start_col, test_d->B_start_row, test_d->B_start_col,
|
||||
test_d->C_start_row, test_d->C_start_col, test_d->m, test_d->n, test_d->k);
|
||||
|
||||
// aligned data for A B C matrices
|
||||
float *A_data = (float *)memalign(16, ((test_d->m + (2 * test_d->A_start_row)) * (test_d->n + (2 * test_d->A_start_col))) * sizeof(float));
|
||||
float *B_data = (float *)memalign(16, ((test_d->n + (2 * test_d->B_start_row)) * (test_d->k + (2 * test_d->B_start_col))) * sizeof(float));
|
||||
float *C_data = (float *)memalign(16, ((test_d->m + (2 * test_d->C_start_row)) * (test_d->k + (2 * test_d->C_start_col))) * sizeof(float));
|
||||
|
||||
// create A B C matrices with m n k dimensions + padding
|
||||
// padding is from both sides of the targeted sub-matrix
|
||||
// 1 1 1 1
|
||||
// 1 x x 1
|
||||
// 1 x x 1
|
||||
// 1 1 1 1
|
||||
dspm::Mat A(A_data, test_d->m + (2 * test_d->A_start_row), test_d->n + (2 * test_d->A_start_col));
|
||||
dspm::Mat B(B_data, test_d->n + (2 * test_d->B_start_row), test_d->k + (2 * test_d->B_start_col));
|
||||
dspm::Mat C(C_data, test_d->m + (2 * test_d->C_start_row), test_d->k + (2 * test_d->C_start_col));
|
||||
|
||||
// create ROI rectangles for sub-matrices
|
||||
A_roi_rect.resizeRect(test_d->A_start_col, test_d->A_start_row, test_d->n, test_d->m);
|
||||
B_roi_rect.resizeRect(test_d->B_start_col, test_d->B_start_row, test_d->k, test_d->n);
|
||||
C_roi_rect.resizeRect(test_d->C_start_col, test_d->C_start_row, test_d->k, test_d->m);
|
||||
|
||||
// aligned data for sub-matrices
|
||||
float *A_sub_data = (float *)memalign(16, A_roi_rect.areaRect() * sizeof(float));
|
||||
float *B_sub_data = (float *)memalign(16, B_roi_rect.areaRect() * sizeof(float));
|
||||
float *C_sub_data = (float *)memalign(16, C_roi_rect.areaRect() * sizeof(float));
|
||||
|
||||
// create sub-matrices A, B C matrices with aligned data
|
||||
// matrices are used as sub-matrices with data copying for a matrix operation testing
|
||||
dspm::Mat A_sub(A_sub_data, test_d->m, test_d->n);
|
||||
dspm::Mat B_sub(B_sub_data, test_d->n, test_d->k);
|
||||
dspm::Mat C_sub(C_sub_data, test_d->m, test_d->k);
|
||||
|
||||
// fill A B matrices with numbers
|
||||
// fill C matrix with ones
|
||||
for (int i = 0; i < A.length; i++) {
|
||||
A.data[i] = i + 1;
|
||||
}
|
||||
for (int i = 0; i < B.length; i++) {
|
||||
B.data[i] = i + 1;
|
||||
}
|
||||
|
||||
if (test_d->var < 4) {
|
||||
for (int i = 0; i < C.length; i++) {
|
||||
C.data[i] = 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Combinations of A B C matrices and sub-matrices are created for testing
|
||||
// As an example: case 1
|
||||
// Matrices A and C are sub-matrices - the data are defined as a pointer to an external buffer
|
||||
// Matrix B is a matrix - the data are copied into the B matrix
|
||||
switch (test_d->var) {
|
||||
case 0: {
|
||||
A_sub.CopyHead(A.getROI(A_roi_rect)); // A sub-matrix - NO DATA CPY
|
||||
B_sub.CopyHead(B.getROI(B_roi_rect)); // B sub-matrix - NO DATA CPY
|
||||
C_sub.CopyHead(C.getROI(C_roi_rect)); // C sub-matrix - NO DATA CPY
|
||||
} break;
|
||||
case 1: {
|
||||
A_sub = A.Get(A_roi_rect); // A matrix - DATA CPY
|
||||
B_sub.CopyHead(B.getROI(B_roi_rect)); // B sub-matrix - NO DATA CPY
|
||||
C_sub.CopyHead(C.getROI(C_roi_rect)); // C sub-matrix - NO DATA CPY
|
||||
} break;
|
||||
case 2: {
|
||||
A_sub.CopyHead(A.getROI(A_roi_rect)); // A sub-matrix - NO DATA CPY
|
||||
B_sub = B.Get(B_roi_rect); // B matrix - DATA CPY
|
||||
C_sub.CopyHead(C.getROI(C_roi_rect)); // C sub-matrix - NO DATA CPY
|
||||
} break;
|
||||
case 3: {
|
||||
A_sub = A.Get(A_roi_rect); // A matrix - DATA CPY
|
||||
B_sub = B.Get(B_roi_rect); // B matrix - DATA CPY
|
||||
C_sub.CopyHead(C.getROI(C_roi_rect)); // C sub-matrix - NO DATA CPY
|
||||
} break;
|
||||
case 4: {
|
||||
A_sub.CopyHead(A.getROI(A_roi_rect)); // A sub-matrix - NO DATA CPY
|
||||
B_sub.CopyHead(B.getROI(B_roi_rect)); // B sub-matrix - NO DATA CPY
|
||||
C_sub = C.Get(C_roi_rect); // B matrix - DATA CPY
|
||||
} break;
|
||||
case 5: {
|
||||
A_sub.CopyHead(A.getROI(A_roi_rect)); // A sub-matrix - NO DATA CPY
|
||||
B_sub = B.Get(B_roi_rect); // B matrix - DATA CPY
|
||||
C_sub = C.Get(C_roi_rect); // C matrix - DATA CPY
|
||||
} break;
|
||||
case 6: {
|
||||
A_sub = A.Get(A_roi_rect); // A matrix - DATA CPY
|
||||
B_sub.CopyHead(B.getROI(B_roi_rect)); // B sub-matrix - NO DATA CPY
|
||||
C_sub = C.Get(C_roi_rect); // C matrix - DATA CPY
|
||||
} break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
// create A B check sub-matrices, actual matrix data are COPIED
|
||||
dspm::Mat A_sub_check = A.Get(A_roi_rect);
|
||||
dspm::Mat B_sub_check = B.Get(B_roi_rect);
|
||||
dspm::Mat C_sub_check(test_d->m, test_d->k);
|
||||
|
||||
// Calculate C_sub_check = A_sub_check * B_sub_check
|
||||
for (int i = 0 ; i < test_d->m ; i++) {
|
||||
for (int j = 0 ; j < test_d->k ; j++) {
|
||||
C_sub_check(i, j) = 0;
|
||||
for (int s = 0 ; s < test_d->n ; s++) {
|
||||
C_sub_check(i, j) += A_sub_check(i, s) * B_sub_check(s, j);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
dspm_mult_ex_f32(A_sub.data, B_sub.data, C_sub.data, test_d->m, test_d->n, test_d->k, A_sub.padding, B_sub.padding, C_sub.padding);
|
||||
|
||||
// C is a sub-matrix
|
||||
if (C_sub.sub_matrix) {
|
||||
// Create a copy of the original C matrix (filled with ones 1)
|
||||
// to check if an area around the sub-matrix is unaffected after a matrix operation
|
||||
dspm::Mat C_area_check = dspm::Mat::ones(test_d->m + (2 * test_d->C_start_row), test_d->k + (2 * test_d->C_start_col));
|
||||
test_assert_equal_mat_mat(C_sub_check, C_sub, message);
|
||||
test_assert_check_area_mat_mat(C_area_check, C_sub, test_d->C_start_row, test_d->C_start_col, message);
|
||||
// C is a matrix
|
||||
} else {
|
||||
test_assert_equal_mat_mat(C_sub_check, C_sub, message);
|
||||
}
|
||||
|
||||
free(A_data);
|
||||
free(B_data);
|
||||
free(C_data);
|
||||
free(A_sub_data);
|
||||
free(B_sub_data);
|
||||
free(C_sub_data);
|
||||
}
|
||||
|
||||
TEST_CASE("dspm_mult_ex_f32_aexx functionality", "[dspm]")
|
||||
{
|
||||
m_test_data_t test_data;
|
||||
|
||||
const int test_varations = 7;
|
||||
const int start_col_min = 0;
|
||||
const int start_row_min = 0;
|
||||
|
||||
#if CONFIG_IDF_TARGET_ESP32S3
|
||||
const int start_col_max = 4;
|
||||
const int start_row_max = 4;
|
||||
const int col_row_increment = 4;
|
||||
const int m_max = 12;
|
||||
const int n_max = 12;
|
||||
const int k_mak = 12;
|
||||
const int dim_increment = 4;
|
||||
const int dim_start = 4;
|
||||
#elif CONFIG_IDF_TARGET_ESP32P4
|
||||
const int start_col_max = 1;
|
||||
const int start_row_max = 1;
|
||||
const int col_row_increment = 1;
|
||||
const int m_max = 4;
|
||||
const int n_max = 4;
|
||||
const int k_mak = 4;
|
||||
const int dim_increment = 1;
|
||||
const int dim_start = 2; // <= the esp.lp.setup instruction is not working with loop count 1. The min value is 2.
|
||||
#else
|
||||
const int start_col_max = 1;
|
||||
const int start_row_max = 1;
|
||||
const int col_row_increment = 1;
|
||||
const int m_max = 4;
|
||||
const int n_max = 4;
|
||||
const int k_mak = 4;
|
||||
const int dim_increment = 1;
|
||||
const int dim_start = 1;
|
||||
#endif
|
||||
|
||||
for (int var = 0; var < test_varations; var++) {
|
||||
// C Matrix starting row for sub-matrix
|
||||
for (int C_start_row = start_row_min; C_start_row <= start_row_max; C_start_row += col_row_increment) {
|
||||
|
||||
// C Matrix starting col for sub-matrix
|
||||
for (int C_start_col = start_col_min; C_start_col <= start_col_max; C_start_col += col_row_increment) {
|
||||
|
||||
// A Matrix starting row for sub-matrix
|
||||
for (int A_start_row = start_row_min; A_start_row <= start_row_max; A_start_row += col_row_increment) {
|
||||
|
||||
// A Matrix starting col for sub-matrix
|
||||
for (int A_start_col = start_col_min; A_start_col <= start_col_max; A_start_col += col_row_increment) {
|
||||
|
||||
// B Matrix starting row for sub-matrix
|
||||
for (int B_start_row = start_row_min; B_start_row <= start_row_max; B_start_row += col_row_increment) {
|
||||
|
||||
// B Matrix starting col for sub-matrix
|
||||
for (int B_start_col = start_col_min; B_start_col <= start_col_max; B_start_col += col_row_increment) {
|
||||
|
||||
// sub-matrix m parameter
|
||||
for (int m = dim_start; m <= m_max; m += dim_increment) {
|
||||
|
||||
// sub-matrix n paramter
|
||||
for (int n = dim_start; n <= n_max; n += dim_increment) {
|
||||
|
||||
// sub-matrix k parameter
|
||||
for (int k = dim_start; k <= k_mak; k += dim_increment) {
|
||||
|
||||
test_data = {var, A_start_row, A_start_col, B_start_row, B_start_col, C_start_row, C_start_col, m, n, k};
|
||||
dspm_mult_ex_f32_aexx_functionality_in_cycle(&test_data);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
std::cout << var + 1 << "/" << test_varations << " of test done" << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
static portMUX_TYPE testnlock = portMUX_INITIALIZER_UNLOCKED;
|
||||
|
||||
TEST_CASE("dspm_mult_ex_f32_aexx benchmark", "[dspm]")
|
||||
{
|
||||
const int m = 4;
|
||||
const int n = 4;
|
||||
const int k = 4;
|
||||
const int start_row_col = 4;
|
||||
|
||||
A_roi_rect.resizeRect(start_row_col, start_row_col, n, m);
|
||||
B_roi_rect.resizeRect(start_row_col, start_row_col, k, n);
|
||||
C_roi_rect.resizeRect(start_row_col, start_row_col, k, m);
|
||||
|
||||
float *A_data = (float *)memalign(16, (m + (2 * start_row_col)) * (n + (2 * start_row_col)) * sizeof(float));
|
||||
float *B_data = (float *)memalign(16, (n + (2 * start_row_col)) * (k + (2 * start_row_col)) * sizeof(float));
|
||||
float *C_data = (float *)memalign(16, (m + (2 * start_row_col)) * (k + (2 * start_row_col)) * sizeof(float));
|
||||
|
||||
dspm::Mat A(A_data, m + (2 * start_row_col), n + (2 * start_row_col));
|
||||
dspm::Mat B(B_data, n + (2 * start_row_col), k + (2 * start_row_col));
|
||||
dspm::Mat C(C_data, m + (2 * start_row_col), k + (2 * start_row_col));
|
||||
|
||||
dspm::Mat A_subset = A.getROI(A_roi_rect);
|
||||
dspm::Mat B_subset = B.getROI(B_roi_rect);
|
||||
dspm::Mat C_subset = C.getROI(C_roi_rect);
|
||||
|
||||
portENTER_CRITICAL(&testnlock);
|
||||
dspm_mult_ex_f32(A_subset.data, B_subset.data, C_subset.data, m, n, k, A_subset.padding, B_subset.padding, C_subset.padding);
|
||||
|
||||
unsigned int start_b = dsp_get_cpu_cycle_count();
|
||||
int repeat_count = 1024;
|
||||
for (int i = 0 ; i < repeat_count ; i++) {
|
||||
dspm_mult_ex_f32(A_subset.data, B_subset.data, C_subset.data, m, n, k, A_subset.padding, B_subset.padding, C_subset.padding);
|
||||
}
|
||||
unsigned int end_b = dsp_get_cpu_cycle_count();
|
||||
portEXIT_CRITICAL(&testnlock);
|
||||
|
||||
float total_b = end_b - start_b;
|
||||
float cycles = total_b / (repeat_count);
|
||||
printf("Benchmark dspm_mult_f32 - %f per multiplication 4x4 + overhead.\n", cycles);
|
||||
float min_exec = 100;
|
||||
float max_exec = 750;
|
||||
TEST_ASSERT_EXEC_IN_RANGE(min_exec, max_exec, cycles);
|
||||
|
||||
free(A_data);
|
||||
free(B_data);
|
||||
free(C_data);
|
||||
}
|
||||
@@ -0,0 +1,176 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2023 Espressif Systems (Shanghai) CO LTD
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include "unity.h"
|
||||
#include "esp_dsp.h"
|
||||
#include "dsp_platform.h"
|
||||
#include "esp_log.h"
|
||||
|
||||
#include "dspm_mult.h"
|
||||
#include "esp_attr.h"
|
||||
#include "dsp_tests.h"
|
||||
#include "test_mat_common.h"
|
||||
|
||||
TEST_CASE("dspm_mult_ex_f32_ansi functionality", "[dspm]")
|
||||
{
|
||||
// create ROI rectangles
|
||||
dspm::Mat::Rect A_roi_rect;
|
||||
dspm::Mat::Rect B_roi_rect;
|
||||
dspm::Mat::Rect C_roi_rect;
|
||||
|
||||
char message[60];
|
||||
for (int var = 0; var < 7; var++) {
|
||||
for (int start_row = 0; start_row < 2; start_row++) {
|
||||
for (int start_col = 0; start_col < 2; start_col++) {
|
||||
for (int m = 1; m < 6; m++) {
|
||||
for (int n = 1; n < 6; n++) {
|
||||
for (int k = 1; k < 6; k++) {
|
||||
sprintf(message, "var = %d s_row = %d s_col = %d, m = %d, n = %d, k = %d", var, start_row, start_col, m, n, k);
|
||||
// create A B C matrices with m n k dimensions + padding
|
||||
// padding is from both sides of the targeted sub-matrix
|
||||
// 1 1 1 1
|
||||
// 1 x x 1
|
||||
// 1 x x 1
|
||||
// 1 1 1 1
|
||||
dspm::Mat A(m + (2 * start_row), n + (2 * start_col));
|
||||
dspm::Mat B(n + (2 * start_row), k + (2 * start_col));
|
||||
dspm::Mat C = dspm::Mat::ones(m + (2 * start_row), k + (2 * start_col));
|
||||
|
||||
// create A B C sub matrices with undefined dimensions
|
||||
dspm::Mat A_sub;
|
||||
dspm::Mat B_sub;
|
||||
dspm::Mat C_sub;
|
||||
|
||||
// adjust ROI rectangles
|
||||
A_roi_rect.resizeRect(start_col, start_row, n, m);
|
||||
B_roi_rect.resizeRect(start_col, start_row, k, n);
|
||||
C_roi_rect.resizeRect(start_col, start_row, k, m);
|
||||
|
||||
// fill A B matrices with numbers
|
||||
// fill C matrix with ones
|
||||
for (int i = 0; i < A.length; i++) {
|
||||
A.data[i] = i + 1;
|
||||
}
|
||||
for (int i = 0; i < B.length; i++) {
|
||||
B.data[i] = i + 1;
|
||||
}
|
||||
|
||||
// Combinations of A B C matrices and sub-matrices are created for testing
|
||||
// As an example: case 1
|
||||
// Matrices B and C are sub-matrices - the data are defined as a pointer to an external buffer
|
||||
// Matrix B is a matrix - the data are copied into the B matrix
|
||||
switch (var) {
|
||||
case 0: {
|
||||
A_sub.CopyHead(A.getROI(A_roi_rect)); // A sub-matrix - NO DATA CPY
|
||||
B_sub.CopyHead(B.getROI(B_roi_rect)); // B sub-matrix - NO DATA CPY
|
||||
C_sub.CopyHead(C.getROI(C_roi_rect)); // C sub-matrix - NO DATA CPY
|
||||
} break;
|
||||
case 1: {
|
||||
A_sub = A.Get(A_roi_rect); // A matrix - DATA CPY
|
||||
B_sub.CopyHead(B.getROI(B_roi_rect)); // B sub_matirx - NO DATA CPY
|
||||
C_sub.CopyHead(C.getROI(C_roi_rect)); // C sub_matirx - NO DATA CPY
|
||||
} break;
|
||||
case 2: {
|
||||
A_sub.CopyHead(A.getROI(A_roi_rect)); // A sub-matrix - NO DATA CPY
|
||||
B_sub = B.Get(B_roi_rect); // B matrix - DATA CPY
|
||||
C_sub.CopyHead(C.getROI(C_roi_rect)); // C sub-matrix - NO DATA CPY
|
||||
} break;
|
||||
case 3: {
|
||||
A_sub = A.Get(A_roi_rect); // A matrix - DATA CPY
|
||||
B_sub = B.Get(B_roi_rect); // B matrix - DATA CPY
|
||||
C_sub.CopyHead(C.getROI(C_roi_rect)); // C sub-matrix - NO DATA CPY
|
||||
} break;
|
||||
case 4: {
|
||||
A_sub.CopyHead(A.getROI(A_roi_rect)); // A sub-matrix - NO DATA CPY
|
||||
B_sub.CopyHead(B.getROI(B_roi_rect)); // B sub-matrix - NO DATA CPY
|
||||
C_sub = C.Get(C_roi_rect); // C matrix - DATA CPY
|
||||
} break;
|
||||
case 5: {
|
||||
A_sub.CopyHead(A.getROI(A_roi_rect)); // A sub-matrix - NO DATA CPY
|
||||
B_sub = B.Get(B_roi_rect); // B matrix - DATA CPY
|
||||
C_sub = C.Get(C_roi_rect); // C matrix - DATA CPY
|
||||
} break;
|
||||
case 6: {
|
||||
A_sub = A.Get(A_roi_rect); // A matrix - DATA CPY
|
||||
B_sub.CopyHead(B.getROI(B_roi_rect)); // B sub-matrix - NO DATA CPY
|
||||
C_sub = C.Get(C_roi_rect); // C matrix - DATA CPY
|
||||
} break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
// create A B check sub-matrices, actual matrix data are COPIED
|
||||
dspm::Mat A_sub_check = A.Get(A_roi_rect);
|
||||
dspm::Mat B_sub_check = B.Get(B_roi_rect);
|
||||
dspm::Mat C_sub_check(m, k);
|
||||
|
||||
// Calculate C_sub_check = A_sub_check * B_sub_check
|
||||
for (int i = 0 ; i < m ; i++) {
|
||||
for (int j = 0 ; j < k ; j++) {
|
||||
C_sub_check(i, j) = 0;
|
||||
for (int s = 0 ; s < n ; s++) {
|
||||
C_sub_check(i, j) += A_sub_check(i, s) * B_sub_check(s, j);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
dspm_mult_ex_f32_ansi(A_sub.data, B_sub.data, C_sub.data, m, n, k, A_sub.padding, B_sub.padding, C_sub.padding);
|
||||
|
||||
// C is a sub-matrix
|
||||
if (C_sub.sub_matrix) {
|
||||
// Create a copy of the original C matrix (filled with ones 1)
|
||||
// to check if an area around the sub-matrix is unaffected after a matrix operation
|
||||
dspm::Mat C_area_check = dspm::Mat::ones(m + (2 * start_row), k + (2 * start_col));
|
||||
test_assert_equal_mat_mat(C_sub_check, C_sub, message);
|
||||
test_assert_check_area_mat_mat(C_area_check, C_sub, start_row, start_col, message);
|
||||
// C is a matrix
|
||||
} else {
|
||||
test_assert_equal_mat_mat(C_sub_check, C_sub, message);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static portMUX_TYPE testnlock = portMUX_INITIALIZER_UNLOCKED;
|
||||
|
||||
TEST_CASE("dspm_mult_ex_f32_ansi benchmark", "[dspm]")
|
||||
{
|
||||
const int m = 4;
|
||||
const int n = 4;
|
||||
const int k = 4;
|
||||
const int M_off = 1;
|
||||
|
||||
dspm::Mat A(m + M_off, n + M_off);
|
||||
dspm::Mat B(n + M_off, k + M_off);
|
||||
dspm::Mat C(m + M_off, k + M_off);
|
||||
|
||||
dspm::Mat A_subset = A.getROI(M_off, M_off, m, n);
|
||||
dspm::Mat B_subset = B.getROI(M_off, M_off, n, k);
|
||||
dspm::Mat C_subset = C.getROI(M_off, M_off, m, k);
|
||||
|
||||
portENTER_CRITICAL(&testnlock);
|
||||
dspm_mult_ex_f32_ansi(A_subset.data, B_subset.data, C_subset.data, m, n, k, A_subset.padding, B_subset.padding, C_subset.padding);
|
||||
|
||||
unsigned int start_b = dsp_get_cpu_cycle_count();
|
||||
int repeat_count = 1024;
|
||||
for (int i = 0 ; i < repeat_count ; i++) {
|
||||
dspm_mult_ex_f32_ansi(A_subset.data, B_subset.data, C_subset.data, m, n, k, A_subset.padding, B_subset.padding, C_subset.padding);
|
||||
}
|
||||
unsigned int end_b = dsp_get_cpu_cycle_count();
|
||||
portEXIT_CRITICAL(&testnlock);
|
||||
|
||||
float total_b = end_b - start_b;
|
||||
float cycles = total_b / (repeat_count);
|
||||
printf("Benchmark dspm_mult_f32 - %f per multiplication 4x4 + overhead.\n", cycles);
|
||||
float min_exec = 100;
|
||||
float max_exec = 1400;
|
||||
TEST_ASSERT_EXEC_IN_RANGE(min_exec, max_exec, cycles);
|
||||
}
|
||||
@@ -0,0 +1,108 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <string.h>
|
||||
#include "unity.h"
|
||||
#include "esp_dsp.h"
|
||||
#include "dsp_platform.h"
|
||||
#include "esp_log.h"
|
||||
|
||||
#include "dspm_mult.h"
|
||||
#include "esp_attr.h"
|
||||
#include "dsp_tests.h"
|
||||
|
||||
static const char *TAG = "dspm_mult_f32_aexx";
|
||||
|
||||
// Test dsps_dotprod_s16_ansi function
|
||||
TEST_CASE("dspm_mult_f32 functionality", "[dspm]")
|
||||
{
|
||||
int m = 4;
|
||||
int n = 3;
|
||||
int k = 4;
|
||||
|
||||
|
||||
float A[m][n];
|
||||
float *A_ptr = (float *)A;
|
||||
|
||||
float B[n][k];
|
||||
float *B_ptr = (float *)B;
|
||||
|
||||
float C[m][k];
|
||||
float *C_ptr = (float *)C;
|
||||
float C_compare[m][k];
|
||||
float *Cc_ptr = (float *)C_compare;
|
||||
|
||||
for (int i = 0 ; i < m * n; i++) {
|
||||
A_ptr[i] = i;
|
||||
B_ptr[i] = i;
|
||||
}
|
||||
for (int i = 0 ; i < m ; i++) {
|
||||
for (int j = 0 ; j < k ; j++) {
|
||||
C_compare[i][j] = 0;
|
||||
for (int s = 0 ; s < n ; s++) {
|
||||
C_compare[i][j] += A[i][s] * B[s][j];
|
||||
}
|
||||
}
|
||||
}
|
||||
dspm_mult_f32(A_ptr, B_ptr, C_ptr, m, n, k);
|
||||
|
||||
for (int i = 0 ; i < m ; i++) {
|
||||
for (int j = 0 ; j < k ; j++) {
|
||||
ESP_LOGI(TAG, "[%i][%i] calc=%f, expected =%f", i, j, C[i][j], C_compare[i][j]);
|
||||
}
|
||||
}
|
||||
// Compare and check results
|
||||
for (int i = 0 ; i < m * k ; i++) {
|
||||
if (Cc_ptr[i] != C_ptr[i]) {
|
||||
TEST_ASSERT_EQUAL( C_ptr[i], Cc_ptr[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static portMUX_TYPE testnlock = portMUX_INITIALIZER_UNLOCKED;
|
||||
|
||||
TEST_CASE("dspm_mult_f32 benchmark", "[dspm]")
|
||||
{
|
||||
int m = 4;
|
||||
int n = 4;
|
||||
int k = 4;
|
||||
|
||||
float A[m][n];
|
||||
float *A_ptr = (float *)A;
|
||||
|
||||
float B[n][k];
|
||||
float *B_ptr = (float *)B;
|
||||
|
||||
float C[m][k];
|
||||
float *C_ptr = (float *)C;
|
||||
|
||||
|
||||
ESP_LOGI(TAG, "A: %8.8"PRIx32", B: %8.8"PRIx32", C=%8.8"PRIx32"", (uint32_t)A_ptr, (uint32_t)B_ptr, (uint32_t)C_ptr);
|
||||
portENTER_CRITICAL(&testnlock);
|
||||
|
||||
unsigned int start_b = dsp_get_cpu_cycle_count();
|
||||
int repeat_count = 1024;
|
||||
for (int i = 0 ; i < repeat_count ; i++) {
|
||||
dspm_mult_f32(A_ptr, B_ptr, C_ptr, m, n, k);
|
||||
}
|
||||
unsigned int end_b = dsp_get_cpu_cycle_count();
|
||||
portEXIT_CRITICAL(&testnlock);
|
||||
|
||||
float total_b = end_b - start_b;
|
||||
float cycles = total_b / (repeat_count);
|
||||
printf("Benchmark dspm_mult_f32 - %f per multiplication 4x4 + overhead.\n", cycles);
|
||||
float min_exec = 100;
|
||||
float max_exec = 800;
|
||||
TEST_ASSERT_EXEC_IN_RANGE(min_exec, max_exec, cycles);
|
||||
}
|
||||
@@ -0,0 +1,118 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <string.h>
|
||||
#include "unity.h"
|
||||
#include "esp_dsp.h"
|
||||
#include "dsp_platform.h"
|
||||
#include "esp_log.h"
|
||||
|
||||
#include "dspm_mult.h"
|
||||
#include "esp_attr.h"
|
||||
#include "dsp_tests.h"
|
||||
|
||||
static const char *TAG = "dspm_mult_f32_ansi";
|
||||
|
||||
// Test dsps_dotprod_s16_ansi function
|
||||
TEST_CASE("dspm_mult_f32_ansi functionality", "[dspm]")
|
||||
{
|
||||
for (int m = 1 ; m < 8 ; m++) {
|
||||
for (int n = 1; n < 8 ; n++) {
|
||||
for (int k = 1; k < 8 ; k++) {
|
||||
float A[m][n];
|
||||
float *A_ptr = (float *)A;
|
||||
|
||||
float B[n][k];
|
||||
float *B_ptr = (float *)B;
|
||||
|
||||
float C[m][k];
|
||||
float *C_ptr = (float *)C;
|
||||
float C_compare[m][k];
|
||||
float *Cc_ptr = (float *)C_compare;
|
||||
|
||||
for (int i = 0 ; i < m * n; i++) {
|
||||
A_ptr[i] = i;
|
||||
B_ptr[i] = i;
|
||||
}
|
||||
for (int i = 0 ; i < m ; i++) {
|
||||
for (int j = 0 ; j < n ; j++) {
|
||||
A[i][j] = i * n + j;
|
||||
}
|
||||
}
|
||||
for (int i = 0 ; i < n ; i++) {
|
||||
for (int j = 0 ; j < k ; j++) {
|
||||
B[i][j] = i * k + j;
|
||||
}
|
||||
}
|
||||
for (int i = 0 ; i < m ; i++) {
|
||||
for (int j = 0 ; j < k ; j++) {
|
||||
C_compare[i][j] = 0;
|
||||
for (int s = 0 ; s < n ; s++) {
|
||||
C_compare[i][j] += A[i][s] * B[s][j];
|
||||
}
|
||||
}
|
||||
}
|
||||
dspm_mult_f32_ansi(A_ptr, B_ptr, C_ptr, m, n, k);
|
||||
|
||||
for (int i = 0 ; i < m ; i++) {
|
||||
for (int j = 0 ; j < k ; j++) {
|
||||
ESP_LOGD(TAG, "[%i][%i] calc=%f, expected =%f", i, j, C[i][j], C_compare[i][j]);
|
||||
}
|
||||
}
|
||||
// Compare and check results
|
||||
for (int i = 0 ; i < m * k ; i++) {
|
||||
if (Cc_ptr[i] != C_ptr[i]) {
|
||||
TEST_ASSERT_EQUAL(Cc_ptr[i], C_ptr[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static portMUX_TYPE testnlock = portMUX_INITIALIZER_UNLOCKED;
|
||||
|
||||
TEST_CASE("dspm_mult_f32_ansi benchmark", "[dspm]")
|
||||
{
|
||||
int m = 4;
|
||||
int n = 4;
|
||||
int k = 4;
|
||||
|
||||
float A[m][n];
|
||||
float *A_ptr = (float *)A;
|
||||
|
||||
float B[n][k];
|
||||
float *B_ptr = (float *)B;
|
||||
|
||||
float C[m][k];
|
||||
float *C_ptr = (float *)C;
|
||||
|
||||
|
||||
portENTER_CRITICAL(&testnlock);
|
||||
|
||||
unsigned int start_b = dsp_get_cpu_cycle_count();
|
||||
int repeat_count = 1024;
|
||||
for (int i = 0 ; i < repeat_count ; i++) {
|
||||
dspm_mult_f32_ansi(A_ptr, B_ptr, C_ptr, m, n, k);
|
||||
}
|
||||
unsigned int end_b = dsp_get_cpu_cycle_count();
|
||||
portEXIT_CRITICAL(&testnlock);
|
||||
|
||||
float total_b = end_b - start_b;
|
||||
float cycles = total_b / (repeat_count);
|
||||
printf("Benchmark dspm_mult_f32_ansi - %f per multiplication 4x4 + overhead.\n", cycles);
|
||||
float min_exec = 100;
|
||||
float max_exec = 2000;
|
||||
TEST_ASSERT_EXEC_IN_RANGE(min_exec, max_exec, cycles);
|
||||
}
|
||||
@@ -0,0 +1,106 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <string.h>
|
||||
#include "unity.h"
|
||||
#include "esp_dsp.h"
|
||||
#include "dsp_platform.h"
|
||||
#include "esp_log.h"
|
||||
|
||||
#include "dspm_mult.h"
|
||||
#include "esp_attr.h"
|
||||
#include "esp_log.h"
|
||||
|
||||
// Test dsps_dotprod_s16_ansi function
|
||||
TEST_CASE("dspm_mult_s16_aexx functionality", "[dspm]")
|
||||
{
|
||||
for (int m = 1 ; m < 8 ; m++) {
|
||||
for (int n = 1 ; n < 16 ; n++) {
|
||||
for (int k = 1 ; k < 16 ; k++) {
|
||||
|
||||
int16_t A[m][n];
|
||||
int16_t *A_ptr = (int16_t *)A;
|
||||
|
||||
int16_t B[n][k];
|
||||
int16_t *B_ptr = (int16_t *)B;
|
||||
|
||||
int16_t C[m][k];
|
||||
int16_t *C_ptr = (int16_t *)C;
|
||||
int16_t C_compare[m][k];
|
||||
int16_t *Cc_ptr = (int16_t *)C_compare;
|
||||
for (int shift = -4 ; shift < 4 ; shift++) {
|
||||
for (int i = 0 ; i < m ; i++) {
|
||||
for (int j = 0 ; j < n; j++) {
|
||||
A[i][j] = 0x123;
|
||||
}
|
||||
}
|
||||
for (int i = 0 ; i < n ; i++) {
|
||||
for (int j = 0 ; j < k; j++) {
|
||||
B[i][j] = 0x123;
|
||||
}
|
||||
}
|
||||
|
||||
dspm_mult_s16_ansi(A_ptr, B_ptr, Cc_ptr, m, n, k, shift);
|
||||
dspm_mult_s16(A_ptr, B_ptr, C_ptr, m, n, k, shift);
|
||||
|
||||
// Compare and check results
|
||||
for (int i = 0 ; i < m * k ; i++) {
|
||||
if (Cc_ptr[i] != C_ptr[i]) {
|
||||
ESP_LOGE("dspm_mult_s16_aexx", "Process path m=%i, n=%i, k=%i, shift=%i", m, n, k, shift);
|
||||
ESP_LOGE("dspm_mult_s16_aexx", "data[%i] %4.4x != %4.4x expected \n", i, C_ptr[i], Cc_ptr[i]);
|
||||
TEST_ASSERT_EQUAL(Cc_ptr[i], C_ptr[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static portMUX_TYPE testnlock = portMUX_INITIALIZER_UNLOCKED;
|
||||
|
||||
TEST_CASE("dspm_mult_s16_aexx benchmark", "[dspm]")
|
||||
{
|
||||
unsigned int start_b = dsp_get_cpu_cycle_count();
|
||||
unsigned int end_b = dsp_get_cpu_cycle_count();
|
||||
for (int m = 2 ; m <= 8 ; m++) {
|
||||
for (int n = 2 ; n <= 16 ; n++) {
|
||||
for (int k = 1 ; k <= 16 ; k++) {
|
||||
|
||||
int16_t A[m][n];
|
||||
int16_t *A_ptr = (int16_t *)A;
|
||||
|
||||
int16_t B[m][n];
|
||||
int16_t *B_ptr = (int16_t *)B;
|
||||
|
||||
int16_t C[m][k];
|
||||
int16_t *C_ptr = (int16_t *)C;
|
||||
|
||||
memset(A, 0, sizeof(A));
|
||||
memset(B, 0, sizeof(A));
|
||||
memset(C, 0, sizeof(A));
|
||||
portENTER_CRITICAL(&testnlock);
|
||||
|
||||
start_b = dsp_get_cpu_cycle_count();
|
||||
dspm_mult_s16(A_ptr, B_ptr, C_ptr, m, n, k, 0);
|
||||
end_b = dsp_get_cpu_cycle_count();
|
||||
portEXIT_CRITICAL(&testnlock);
|
||||
|
||||
float total_b = end_b - start_b;
|
||||
float cycles = total_b;
|
||||
ESP_LOGD("dspm_mult_s16_aexx", "dspm_mult_s16_aexx[%i][%i][%i] - %f", m, n, k, cycles);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,110 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <string.h>
|
||||
#include "unity.h"
|
||||
#include "esp_dsp.h"
|
||||
#include "dsp_platform.h"
|
||||
#include "esp_log.h"
|
||||
|
||||
#include "dspm_mult.h"
|
||||
#include "esp_attr.h"
|
||||
#include "dsp_tests.h"
|
||||
|
||||
static const char *TAG = "dspm_mult_s16_ansi";
|
||||
|
||||
// Test dsps_dotprod_s16_ansi function
|
||||
TEST_CASE("dspm_mult_s16_ansi functionality", "[dspm]")
|
||||
{
|
||||
int m = 4;
|
||||
int n = 3;
|
||||
int k = 4;
|
||||
|
||||
|
||||
int16_t A[m][n];
|
||||
int16_t *A_ptr = (int16_t *)A;
|
||||
|
||||
int16_t B[n][k];
|
||||
int16_t *B_ptr = (int16_t *)B;
|
||||
|
||||
int16_t C[m][k];
|
||||
int16_t *C_ptr = (int16_t *)C;
|
||||
int16_t C_compare[m][k];
|
||||
int16_t *Cc_ptr = (int16_t *)C_compare;
|
||||
|
||||
int shift = 0;
|
||||
for (int i = 0 ; i < m * n; i++) {
|
||||
A_ptr[i] = 0x1000;
|
||||
B_ptr[i] = 0x200;
|
||||
}
|
||||
long long store_reg = 0;
|
||||
for (int i = 0 ; i < m ; i++) {
|
||||
for (int j = 0 ; j < k ; j++) {
|
||||
store_reg = (0x7fff >> shift);
|
||||
for (int s = 0 ; s < n ; s++) {
|
||||
store_reg += ((int32_t)A[i][s] * (int32_t)B[s][j]);
|
||||
}
|
||||
C_compare[i][j] = store_reg >> (15 - shift);
|
||||
}
|
||||
}
|
||||
dspm_mult_s16_ansi(A_ptr, B_ptr, C_ptr, m, n, k, shift);
|
||||
|
||||
for (int i = 0 ; i < m ; i++) {
|
||||
for (int j = 0 ; j < k ; j++) {
|
||||
ESP_LOGD(TAG, "[%i][%i] calc=%i, expected =%i", i, j, C[i][j], C_compare[i][j]);
|
||||
}
|
||||
}
|
||||
// Compare and check results
|
||||
for (int i = 0 ; i < m * k ; i++) {
|
||||
if (Cc_ptr[i] != C_ptr[i]) {
|
||||
TEST_ASSERT_EQUAL(Cc_ptr[i], C_ptr[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static portMUX_TYPE testnlock = portMUX_INITIALIZER_UNLOCKED;
|
||||
|
||||
TEST_CASE("dspm_mult_s16_ansi benchmark", "[dspm]")
|
||||
{
|
||||
int m = 4;
|
||||
int n = 4;
|
||||
int k = 4;
|
||||
|
||||
int16_t A[m][n];
|
||||
int16_t *A_ptr = (int16_t *)A;
|
||||
|
||||
int16_t B[n][k];
|
||||
int16_t *B_ptr = (int16_t *)B;
|
||||
|
||||
int16_t C[m][k];
|
||||
int16_t *C_ptr = (int16_t *)C;
|
||||
|
||||
|
||||
portENTER_CRITICAL(&testnlock);
|
||||
|
||||
unsigned int start_b = dsp_get_cpu_cycle_count();
|
||||
int repeat_count = 1024;
|
||||
for (int i = 0 ; i < repeat_count ; i++) {
|
||||
dspm_mult_s16_ansi(A_ptr, B_ptr, C_ptr, m, n, k, 0);
|
||||
}
|
||||
unsigned int end_b = dsp_get_cpu_cycle_count();
|
||||
portEXIT_CRITICAL(&testnlock);
|
||||
|
||||
float total_b = end_b - start_b;
|
||||
float cycles = total_b / (repeat_count);
|
||||
ESP_LOGI("dspm_mult_s16_ansi", "Benchmark dspm_mult_s16_ansi - %f per multiplication %ix%ix%i.\n", cycles, m, n, k);
|
||||
float min_exec = 1000;
|
||||
float max_exec = 3000;
|
||||
TEST_ASSERT_EXEC_IN_RANGE(min_exec, max_exec, cycles);
|
||||
}
|
||||
@@ -0,0 +1,12 @@
|
||||
|
||||
void test_mmult();
|
||||
|
||||
int main(void)
|
||||
{
|
||||
printf("main starts!\n");
|
||||
// xt_iss_profile_enable();
|
||||
test_mmult();
|
||||
// xt_iss_profile_disable();
|
||||
|
||||
printf("Test done\n");
|
||||
}
|
||||
@@ -0,0 +1,65 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <math.h>
|
||||
|
||||
#include "dsp_common.h"
|
||||
|
||||
#include "dspm_mult.h"
|
||||
extern void xt_iss_profile_disable();
|
||||
extern void xt_iss_profile_enable();
|
||||
|
||||
#define M 4
|
||||
#define N 8
|
||||
#define K 16
|
||||
|
||||
const int m = M;
|
||||
const int n = N;
|
||||
const int k = K;
|
||||
|
||||
float A[M][N];
|
||||
float B[N][K];
|
||||
float C[M][K];
|
||||
float C_compare[M][K];
|
||||
|
||||
void test_mmult()
|
||||
{
|
||||
|
||||
float *A_ptr = (float *)A;
|
||||
float *B_ptr = (float *)B;
|
||||
float *C_ptr = (float *)C;
|
||||
float *Cc_ptr = (float *)C_compare;
|
||||
|
||||
for (int i = 0 ; i < m * n; i++) {
|
||||
A_ptr[i] = i;
|
||||
B_ptr[i] = i;
|
||||
}
|
||||
for (int i = 0 ; i < m ; i++) {
|
||||
for (int j = 0 ; j < k ; j++) {
|
||||
C_compare[i][j] = 0;
|
||||
for (int s = 0 ; s < n ; s++) {
|
||||
C_compare[i][j] += A[i][s] * B[s][j];
|
||||
}
|
||||
C[i][j] = -1;
|
||||
}
|
||||
}
|
||||
xt_iss_profile_enable();
|
||||
dspm_mult_f32_ae32(A_ptr, B_ptr, Cc_ptr, m, n, k);
|
||||
dspm_mult_f32_aes3(A_ptr, B_ptr, C_ptr, m, n, k);
|
||||
xt_iss_profile_disable();
|
||||
|
||||
for (int i = 0 ; i < m ; i++) {
|
||||
for (int j = 0 ; j < k ; j++) {
|
||||
printf("[%i][%i] calc=%f, expected =%f\n", i, j, C[i][j], C_compare[i][j]);
|
||||
}
|
||||
}
|
||||
// Compare and check results
|
||||
for (int i = 0 ; i < m * k ; i++) {
|
||||
if (Cc_ptr[i] != C_ptr[i]) {
|
||||
printf("Error - C_ptr= %f, Cc_ptr= %f \n", C_ptr[i], Cc_ptr[i]);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
printf("Test Pass!\n");
|
||||
}
|
||||
Reference in New Issue
Block a user