9043 lines
327 KiB
C
Executable File
9043 lines
327 KiB
C
Executable File
//--------------------------------------------------------------------------
|
|
#include <stdint.h>
|
|
#include <stdio.h>
|
|
#include <math.h>
|
|
#include "bee_simd_custom.h"
|
|
//#include "uart.h"
|
|
//#include "hw_reg_api.h"
|
|
#include "os_types.h"
|
|
//#include "dbg_io.h"
|
|
//#include "cpl_types.h"
|
|
#include "iot_diag.h"
|
|
#include "iot_io.h"
|
|
#include "simd_vector_function.h"
|
|
#include "simd_matrix_function.h"
|
|
#include "simd_nn_function.h"
|
|
//#include "clk.h"
|
|
//#include "chip_reg_base.h"
|
|
#include "os_mem.h"
|
|
//#include "cpu.h"
|
|
#include "simd_config.h"
|
|
|
|
//#define SATURATION 1
|
|
//#define AI_USING_PSRAM 1
|
|
//#define AI_OS_TASK 1
|
|
|
|
#ifdef AI_OS_TASK
|
|
#include "os_task.h"
|
|
#endif
|
|
|
|
|
|
void dbg_uart_init();
|
|
void read_case_from_python(uint8_t *config_read, uint32_t *config_get, uint8_t *data_get);
|
|
void read_nn_case_from_python(uint8_t *config_read, uint32_t *config_get);
|
|
void read_case_from_python_matrix(uint8_t *config_read, uint32_t *config_get);
|
|
void read_case_from_python_matrix_multi(uint8_t *config_read, uint32_t *config_get);
|
|
void verify_8bits(uint32_t out_addr, uint32_t golden_addr, uint32_t length);
|
|
extern void uart_dma_init(int port, int br);
|
|
extern void uart_dma_read(uint8_t *bufptr, uint32_t size, void (*callback) (void*, uint8_t), void* dummy);
|
|
void vector_maximum_element_int8_small(int8_t *v, uint8_t *max_index, int8_t *max, uint8_t len);
|
|
void iot_dbg_uart_set_port(uint8_t port, uint32_t baud, uint8_t parity, uint8_t data, uint8_t stop);
|
|
void verify_float(uint32_t out_addr, uint32_t golden_addr, uint32_t length);
|
|
int32_t test_uart0_getc();
|
|
uint64_t cpu_get_mcycle();
|
|
void test_uart_init();
|
|
int8_t table8[256];
|
|
int16_t table16[256];
|
|
int32_t table32[256];
|
|
int32_t table32[256];
|
|
int8_t weight[] = {
|
|
0x02, // w(0,0)2.0
|
|
0xcc, // w(1,0)-52.0
|
|
0x3e, // w(2,0)62.0
|
|
0xfe, // w(3,0)-2.0
|
|
0x60, // w(4,0)96.0
|
|
0x69, // w(5,0)105.0
|
|
0x59, // w(6,0)89.0
|
|
0x98, // w(7,0)-104.0
|
|
0xbe, // w(8,0)-66.0
|
|
0x34, // w(9,0)52.0
|
|
0x06, // w(10,0)6.0
|
|
0x16, // w(11,0)22.0
|
|
0x13, // w(12,0)19.0
|
|
0x74, // w(13,0)116.0
|
|
0x41, // w(14,0)65.0
|
|
0x25, // w(15,0)37.0
|
|
0xd8, // w(0,1)-40.0
|
|
0xeb, // w(1,1)-21.0
|
|
0xbc, // w(2,1)-68.0
|
|
0x32, // w(3,1)50.0
|
|
0x87, // w(4,1)-121.0
|
|
0x95, // w(5,1)-107.0
|
|
0xa0, // w(6,1)-96.0
|
|
0x5a, // w(7,1)90.0
|
|
0x0e, // w(8,1)14.0
|
|
0xf9, // w(9,1)-7.0
|
|
0x60, // w(10,1)96.0
|
|
0xeb, // w(11,1)-21.0
|
|
0xdd, // w(12,1)-35.0
|
|
0x20, // w(13,1)32.0
|
|
0x68, // w(14,1)104.0
|
|
0x13, // w(15,1)19.0
|
|
0x18, // w(0,2)24.0
|
|
0x5c, // w(1,2)92.0
|
|
0x00, // w(2,2)0.0
|
|
0xb1, // w(3,2)-79.0
|
|
0xae, // w(4,2)-82.0
|
|
0xcb, // w(5,2)-53.0
|
|
0x17, // w(6,2)23.0
|
|
0x24, // w(7,2)36.0
|
|
0x80, // w(8,2)-128.0
|
|
0x3a, // w(9,2)58.0
|
|
0x32, // w(10,2)50.0
|
|
0xec, // w(11,2)-20.0
|
|
0x0f, // w(12,2)15.0
|
|
0x14, // w(13,2)20.0
|
|
0xd9, // w(14,2)-39.0
|
|
0xd6, // w(15,2)-42.0
|
|
0xdc, // w(0,3)-36.0
|
|
0x6f, // w(1,3)111.0
|
|
0x3a, // w(2,3)58.0
|
|
0x7f, // w(3,3)127.0
|
|
0x3a, // w(4,3)58.0
|
|
0x7a, // w(5,3)122.0
|
|
0x8e, // w(6,3)-114.0
|
|
0x95, // w(7,3)-107.0
|
|
0x5c, // w(8,3)92.0
|
|
0x0a, // w(9,3)10.0
|
|
0xa0, // w(10,3)-96.0
|
|
0xb5, // w(11,3)-75.0
|
|
0xc8, // w(12,3)-56.0
|
|
0xf7, // w(13,3)-9.0
|
|
0xaf, // w(14,3)-81.0
|
|
0x6a, // w(15,3)106.0
|
|
0x9a, // w(0,4)-102.0
|
|
0xb1, // w(1,4)-79.0
|
|
0xd5, // w(2,4)-43.0
|
|
0x94, // w(3,4)-108.0
|
|
0x68, // w(4,4)104.0
|
|
0x6d, // w(5,4)109.0
|
|
0xcb, // w(6,4)-53.0
|
|
0xb5, // w(7,4)-75.0
|
|
0x42, // w(8,4)66.0
|
|
0xe3, // w(9,4)-29.0
|
|
0xc3, // w(10,4)-61.0
|
|
0x1c, // w(11,4)28.0
|
|
0xfc, // w(12,4)-4.0
|
|
0xd2, // w(13,4)-46.0
|
|
0xf0, // w(14,4)-16.0
|
|
0xcf, // w(15,4)-49.0
|
|
0x13, // w(0,5)19.0
|
|
0x05, // w(1,5)5.0
|
|
0x37, // w(2,5)55.0
|
|
0xee, // w(3,5)-18.0
|
|
0xf1, // w(4,5)-15.0
|
|
0x26, // w(5,5)38.0
|
|
0xd2, // w(6,5)-46.0
|
|
0xf7, // w(7,5)-9.0
|
|
0x0c, // w(8,5)12.0
|
|
0xed, // w(9,5)-19.0
|
|
0x2d, // w(10,5)45.0
|
|
0x23, // w(11,5)35.0
|
|
0x38, // w(12,5)56.0
|
|
0xb3, // w(13,5)-77.0
|
|
0xec, // w(14,5)-20.0
|
|
0x8c, // w(15,5)-116.0
|
|
0xc9, // w(0,6)-55.0
|
|
0xdf, // w(1,6)-33.0
|
|
0xc6, // w(2,6)-58.0
|
|
0x24, // w(3,6)36.0
|
|
0x99, // w(4,6)-103.0
|
|
0xd1, // w(5,6)-47.0
|
|
0x23, // w(6,6)35.0
|
|
0x2a, // w(7,6)42.0
|
|
0x96, // w(8,6)-106.0
|
|
0xa9, // w(9,6)-87.0
|
|
0x17, // w(10,6)23.0
|
|
0x06, // w(11,6)6.0
|
|
0xe3, // w(12,6)-29.0
|
|
0xf0, // w(13,6)-16.0
|
|
0x58, // w(14,6)88.0
|
|
0x70, // w(15,6)112.0
|
|
0x10, // w(0,7)16.0
|
|
0xb0, // w(1,7)-80.0
|
|
0x74, // w(2,7)116.0
|
|
0x3e, // w(3,7)62.0
|
|
0x3d, // w(4,7)61.0
|
|
0x99, // w(5,7)-103.0
|
|
0xa1, // w(6,7)-95.0
|
|
0x0c, // w(7,7)12.0
|
|
0x4f, // w(8,7)79.0
|
|
0x35, // w(9,7)53.0
|
|
0xf7, // w(10,7)-9.0
|
|
0x8b, // w(11,7)-117.0
|
|
0x28, // w(12,7)40.0
|
|
0xc7, // w(13,7)-57.0
|
|
0xbe, // w(14,7)-66.0
|
|
0x13, // w(15,7)19.0
|
|
0x85, // w(0,8)-123.0
|
|
0x64, // w(1,8)100.0
|
|
0xd8, // w(2,8)-40.0
|
|
0xf4, // w(3,8)-12.0
|
|
0x18, // w(4,8)24.0
|
|
0xab, // w(5,8)-85.0
|
|
0x37, // w(6,8)55.0
|
|
0x87, // w(7,8)-121.0
|
|
0x0f, // w(8,8)15.0
|
|
0x6a, // w(9,8)106.0
|
|
0x67, // w(10,8)103.0
|
|
0x8f, // w(11,8)-113.0
|
|
0xb5, // w(12,8)-75.0
|
|
0x4f, // w(13,8)79.0
|
|
0x7a, // w(14,8)122.0
|
|
0xc8, // w(15,8)-56.0
|
|
0x4d, // w(0,9)77.0
|
|
0x5f, // w(1,9)95.0
|
|
0x2e, // w(2,9)46.0
|
|
0x17, // w(3,9)23.0
|
|
0x4b, // w(4,9)75.0
|
|
0x0b, // w(5,9)11.0
|
|
0x3d, // w(6,9)61.0
|
|
0xbf, // w(7,9)-65.0
|
|
0x6f, // w(8,9)111.0
|
|
0x2f, // w(9,9)47.0
|
|
0xa8, // w(10,9)-88.0
|
|
0x71, // w(11,9)113.0
|
|
0x68, // w(12,9)104.0
|
|
0xb9, // w(13,9)-71.0
|
|
0xe9, // w(14,9)-23.0
|
|
0xc1, // w(15,9)-63.0
|
|
0x2d, // w(0,10)45.0
|
|
0xb5, // w(1,10)-75.0
|
|
0x6b, // w(2,10)107.0
|
|
0x66, // w(3,10)102.0
|
|
0x05, // w(4,10)5.0
|
|
0x76, // w(5,10)118.0
|
|
0x00, // w(6,10)0.0
|
|
0x29, // w(7,10)41.0
|
|
0xe6, // w(8,10)-26.0
|
|
0xc9, // w(9,10)-55.0
|
|
0x22, // w(10,10)34.0
|
|
0xf6, // w(11,10)-10.0
|
|
0x2f, // w(12,10)47.0
|
|
0xec, // w(13,10)-20.0
|
|
0xb9, // w(14,10)-71.0
|
|
0x96, // w(15,10)-106.0
|
|
0x33, // w(0,11)51.0
|
|
0x4c, // w(1,11)76.0
|
|
0xeb, // w(2,11)-21.0
|
|
0x48, // w(3,11)72.0
|
|
0xea, // w(4,11)-22.0
|
|
0x0b, // w(5,11)11.0
|
|
0xcb, // w(6,11)-53.0
|
|
0x0d, // w(7,11)13.0
|
|
0x0e, // w(8,11)14.0
|
|
0x9b, // w(9,11)-101.0
|
|
0xba, // w(10,11)-70.0
|
|
0xe0, // w(11,11)-32.0
|
|
0x54, // w(12,11)84.0
|
|
0xd2, // w(13,11)-46.0
|
|
0x43, // w(14,11)67.0
|
|
0x1c, // w(15,11)28.0
|
|
0x37, // w(0,12)55.0
|
|
0xee, // w(1,12)-18.0
|
|
0xca, // w(2,12)-54.0
|
|
0xe5, // w(3,12)-27.0
|
|
0xf0, // w(4,12)-16.0
|
|
0x4f, // w(5,12)79.0
|
|
0xb5, // w(6,12)-75.0
|
|
0x62, // w(7,12)98.0
|
|
0x92, // w(8,12)-110.0
|
|
0xaa, // w(9,12)-86.0
|
|
0x58, // w(10,12)88.0
|
|
0xbc, // w(11,12)-68.0
|
|
0xc6, // w(12,12)-58.0
|
|
0xf7, // w(13,12)-9.0
|
|
0x7b, // w(14,12)123.0
|
|
0xae, // w(15,12)-82.0
|
|
0xc1, // w(0,13)-63.0
|
|
0x4e, // w(1,13)78.0
|
|
0xf9, // w(2,13)-7.0
|
|
0x0b, // w(3,13)11.0
|
|
0xa5, // w(4,13)-91.0
|
|
0xf0, // w(5,13)-16.0
|
|
0x6d, // w(6,13)109.0
|
|
0xa8, // w(7,13)-88.0
|
|
0x80, // w(8,13)-128.0
|
|
0x97, // w(9,13)-105.0
|
|
0x92, // w(10,13)-110.0
|
|
0x68, // w(11,13)104.0
|
|
0x92, // w(12,13)-110.0
|
|
0xed, // w(13,13)-19.0
|
|
0xdd, // w(14,13)-35.0
|
|
0x47, // w(15,13)71.0 // end of one group of 16 filters
|
|
0x1d, // w(16,0)29.0
|
|
0x99, // w(17,0)-103.0
|
|
0x18, // w(18,0)24.0
|
|
0x4e, // w(19,0)78.0
|
|
0x63, // w(20,0)99.0
|
|
0x83, // w(21,0)-125.0
|
|
0xa9, // w(22,0)-87.0
|
|
0xfe, // w(23,0)-2.0
|
|
0x00,
|
|
0x00,
|
|
0x00,
|
|
0x00,
|
|
0x00,
|
|
0x00,
|
|
0x00,
|
|
0x00,
|
|
0x73, // w(16,1)115.0
|
|
0x6a, // w(17,1)106.0
|
|
0x49, // w(18,1)73.0
|
|
0x77, // w(19,1)119.0
|
|
0x7a, // w(20,1)122.0
|
|
0x1e, // w(21,1)30.0
|
|
0xdc, // w(22,1)-36.0
|
|
0x0b, // w(23,1)11.0
|
|
0x00,
|
|
0x00,
|
|
0x00,
|
|
0x00,
|
|
0x00,
|
|
0x00,
|
|
0x00,
|
|
0x00,
|
|
0x75, // w(16,2)117.0
|
|
0x37, // w(17,2)55.0
|
|
0x22, // w(18,2)34.0
|
|
0xee, // w(19,2)-18.0
|
|
0xe8, // w(20,2)-24.0
|
|
0x23, // w(21,2)35.0
|
|
0x2b, // w(22,2)43.0
|
|
0xc1, // w(23,2)-63.0
|
|
0x00,
|
|
0x00,
|
|
0x00,
|
|
0x00,
|
|
0x00,
|
|
0x00,
|
|
0x00,
|
|
0x00,
|
|
0x2d, // w(16,3)45.0
|
|
0xa0, // w(17,3)-96.0
|
|
0x53, // w(18,3)83.0
|
|
0x6f, // w(19,3)111.0
|
|
0x09, // w(20,3)9.0
|
|
0xf0, // w(21,3)-16.0
|
|
0x66, // w(22,3)102.0
|
|
0xa9, // w(23,3)-87.0
|
|
0x00,
|
|
0x00,
|
|
0x00,
|
|
0x00,
|
|
0x00,
|
|
0x00,
|
|
0x00,
|
|
0x00,
|
|
0x87, // w(16,4)-121.0
|
|
0x64, // w(17,4)100.0
|
|
0x56, // w(18,4)86.0
|
|
0x0a, // w(19,4)10.0
|
|
0xee, // w(20,4)-18.0
|
|
0xdb, // w(21,4)-37.0
|
|
0xad, // w(22,4)-83.0
|
|
0xd5, // w(23,4)-43.0
|
|
0x00,
|
|
0x00,
|
|
0x00,
|
|
0x00,
|
|
0x00,
|
|
0x00,
|
|
0x00,
|
|
0x00,
|
|
0xc1, // w(16,5)-63.0
|
|
0xc1, // w(17,5)-63.0
|
|
0x23, // w(18,5)35.0
|
|
0x94, // w(19,5)-108.0
|
|
0x4b, // w(20,5)75.0
|
|
0x4f, // w(21,5)79.0
|
|
0x59, // w(22,5)89.0
|
|
0xb8, // w(23,5)-72.0
|
|
0x00,
|
|
0x00,
|
|
0x00,
|
|
0x00,
|
|
0x00,
|
|
0x00,
|
|
0x00,
|
|
0x00,
|
|
0x41, // w(16,6)65.0
|
|
0xd8, // w(17,6)-40.0
|
|
0xbe, // w(18,6)-66.0
|
|
0x9c, // w(19,6)-100.0
|
|
0xc1, // w(20,6)-63.0
|
|
0x98, // w(21,6)-104.0
|
|
0x4a, // w(22,6)74.0
|
|
0xab, // w(23,6)-85.0
|
|
0x00,
|
|
0x00,
|
|
0x00,
|
|
0x00,
|
|
0x00,
|
|
0x00,
|
|
0x00,
|
|
0x00,
|
|
0xc2, // w(16,7)-62.0
|
|
0x00, // w(17,7)0.0
|
|
0xa9, // w(18,7)-87.0
|
|
0xc7, // w(19,7)-57.0
|
|
0xd8, // w(20,7)-40.0
|
|
0x47, // w(21,7)71.0
|
|
0x18, // w(22,7)24.0
|
|
0x2b, // w(23,7)43.0
|
|
0x00,
|
|
0x00,
|
|
0x00,
|
|
0x00,
|
|
0x00,
|
|
0x00,
|
|
0x00,
|
|
0x00,
|
|
0xf5, // w(16,8)-11.0
|
|
0x2e, // w(17,8)46.0
|
|
0xf5, // w(18,8)-11.0
|
|
0x21, // w(19,8)33.0
|
|
0x75, // w(20,8)117.0
|
|
0x55, // w(21,8)85.0
|
|
0x19, // w(22,8)25.0
|
|
0x81, // w(23,8)-127.0
|
|
0x00,
|
|
0x00,
|
|
0x00,
|
|
0x00,
|
|
0x00,
|
|
0x00,
|
|
0x00,
|
|
0x00,
|
|
0xe2, // w(16,9)-30.0
|
|
0x38, // w(17,9)56.0
|
|
0xbb, // w(18,9)-69.0
|
|
0x18, // w(19,9)24.0
|
|
0xa7, // w(20,9)-89.0
|
|
0x29, // w(21,9)41.0
|
|
0xa9, // w(22,9)-87.0
|
|
0x6d, // w(23,9)109.0
|
|
0x00,
|
|
0x00,
|
|
0x00,
|
|
0x00,
|
|
0x00,
|
|
0x00,
|
|
0x00,
|
|
0x00,
|
|
0x40, // w(16,10)64.0
|
|
0x9c, // w(17,10)-100.0
|
|
0x5e, // w(18,10)94.0
|
|
0xf8, // w(19,10)-8.0
|
|
0x2d, // w(20,10)45.0
|
|
0x90, // w(21,10)-112.0
|
|
0x3a, // w(22,10)58.0
|
|
0x8b, // w(23,10)-117.0
|
|
0x00,
|
|
0x00,
|
|
0x00,
|
|
0x00,
|
|
0x00,
|
|
0x00,
|
|
0x00,
|
|
0x00,
|
|
0xdf, // w(16,11)-33.0
|
|
0xde, // w(17,11)-34.0
|
|
0x0a, // w(18,11)10.0
|
|
0xc1, // w(19,11)-63.0
|
|
0x7b, // w(20,11)123.0
|
|
0xe1, // w(21,11)-31.0
|
|
0xc3, // w(22,11)-61.0
|
|
0x82, // w(23,11)-126.0
|
|
0x00,
|
|
0x00,
|
|
0x00,
|
|
0x00,
|
|
0x00,
|
|
0x00,
|
|
0x00,
|
|
0x00,
|
|
0x39, // w(16,12)57.0
|
|
0x5a, // w(17,12)90.0
|
|
0x2d, // w(18,12)45.0
|
|
0x87, // w(19,12)-121.0
|
|
0x9e, // w(20,12)-98.0
|
|
0x0b, // w(21,12)11.0
|
|
0x9a, // w(22,12)-102.0
|
|
0xee, // w(23,12)-18.0
|
|
0x00,
|
|
0x00,
|
|
0x00,
|
|
0x00,
|
|
0x00,
|
|
0x00,
|
|
0x00,
|
|
0x00,
|
|
0x6a, // w(16,13)106.0
|
|
0x26, // w(17,13)38.0
|
|
0x34, // w(18,13)52.0
|
|
0x5d, // w(19,13)93.0
|
|
0xf5, // w(20,13)-11.0
|
|
0xdd, // w(21,13)-35.0
|
|
0x3f, // w(22,13)63.0
|
|
0x61, // w(23,13)97.0
|
|
0x00,
|
|
0x00,
|
|
0x00,
|
|
0x00,
|
|
0x00,
|
|
0x00,
|
|
0x00,
|
|
0x00 // end of one group of 16 filters
|
|
};
|
|
int32_t bias[32];
|
|
int8_t bias8[] = {
|
|
0x5e, // F(0,0)94
|
|
0xe1, // F(1,0)-31
|
|
0xe9, // F(2,0)-23
|
|
0xcc, // F(3,0)-52
|
|
0xd2, // F(4,0)-46
|
|
0x89, // F(5,0)-119
|
|
0xdc, // F(6,0)-36
|
|
0x02, // F(7,0)2
|
|
0x08, // F(8,0)8
|
|
0x4f, // F(9,0)79
|
|
0xb1, // F(10,0)-79
|
|
0xde, // F(11,0)-34
|
|
0x97, // F(12,0)-105
|
|
0xfe, // F(13,0)-2
|
|
0x1f, // F(14,0)31
|
|
0xf1, // F(15,0)-15
|
|
0x0c, // F(16,0)12
|
|
0x69, // F(17,0)105
|
|
0x9d, // F(18,0)-99
|
|
0xbc, // F(19,0)-68
|
|
0xe7, // F(20,0)-25
|
|
0xd3, // F(21,0)-45
|
|
0xae, // F(22,0)-82
|
|
0xc7, // F(23,0)-57
|
|
0x00,
|
|
0x00,
|
|
0x00,
|
|
0x00,
|
|
0x00,
|
|
0x00,
|
|
0x00,
|
|
0x00
|
|
};
|
|
int8_t output[32];
|
|
int8_t input[] = {
|
|
0xbc, // F(0,0)-68
|
|
0xdb, // F(1,0)-37
|
|
0xc2, // F(2,0)-62
|
|
0x3a, // F(3,0)58
|
|
0x59, // F(4,0)89
|
|
0x15, // F(5,0)21
|
|
0x47, // F(6,0)71
|
|
0xbb, // F(7,0)-69
|
|
0x38, // F(8,0)56
|
|
0xf1, // F(9,0)-15
|
|
0xc3, // F(10,0)-61
|
|
0xdf, // F(11,0)-33
|
|
0xa3, // F(12,0)-93
|
|
0x47, // F(13,0)71
|
|
0x00,
|
|
0x00
|
|
};
|
|
|
|
void vldx_test_uint8() {
|
|
for (uint16_t i = 0; i < 256; i++) {
|
|
table8[i] = i;
|
|
}
|
|
uint8_t in[16] = {96, 15, 30, 4, 24, 25, 57, 23, 71, 112, 83, 89, 6, 60, 57, 32};
|
|
uint8_t o[16];
|
|
vld_uib(0, in, 1)
|
|
vldx_uib(1, table8, 0)
|
|
vst_uib(o, 1, 1)
|
|
asm("fence");
|
|
for (uint8_t i = 0; i < 16; i++) {
|
|
iot_printf("in[%d] = %x, out[%d] = %x\n", i, in[i], i, o[i]);
|
|
}
|
|
}
|
|
|
|
void vldx_test_int8() {
|
|
for (uint16_t i = 0; i < 256; i++) {
|
|
table8[i] = i;
|
|
}
|
|
int8_t in[16] = {-6, -15, 30, 4, 24, -25, 57, -23, 71, -112, 83, -89, -6, 60, 57, 32};
|
|
int8_t o[16];
|
|
vld_sib(0, in, 1)
|
|
vldx_sib(1, (table8 + 128), 0)
|
|
vst_sib(o, 1, 1)
|
|
asm("fence");
|
|
for (uint8_t i = 0; i < 16; i++) {
|
|
iot_printf("in[%d] = %x, out[%d] = %x\n", i, in[i], i, o[i]);
|
|
}
|
|
}
|
|
|
|
void vldx_test_uint16() {
|
|
for (uint16_t i = 0; i < 256; i++) {
|
|
table16[i] = i | ((i + 1) << 8);
|
|
}
|
|
uint16_t in[16] = {96, 15, 30, 4, 24, 25, 57, 23, 71, 112, 83, 89, 6, 60, 57, 32};
|
|
uint16_t o[16];
|
|
vld_uih(0, in, 1)
|
|
vadd(0, 0, 0)
|
|
vldx_uih(1, table16, 0)
|
|
vst_uih(o, 1, 1)
|
|
asm("fence");
|
|
for (uint16_t i = 0; i < 16; i++) {
|
|
iot_printf("in[%d] = %x, out[%d] = %x\n", i, in[i], i, o[i]);
|
|
}
|
|
}
|
|
|
|
void vldx_test_int16() {
|
|
for (uint16_t i = 0; i < 256; i++) {
|
|
table16[i] = i | ((i + 1) << 8);
|
|
}
|
|
int16_t in[16] = {-6, -15, 30, 4, 24, -25, 57, -23, 71, -112, 83, -89, -6, 60, 57, 32};
|
|
int16_t o[16];
|
|
vld_sih(0, in, 1)
|
|
vadd(0, 0, 0)
|
|
vldx_sih(1, (table16 + 128), 0)
|
|
vst_sih(o, 1, 1)
|
|
asm("fence");
|
|
for (uint16_t i = 0; i < 16; i++) {
|
|
iot_printf("in[%d] = %x, out[%d] = %x\n", i, in[i], i, o[i]);
|
|
}
|
|
}
|
|
|
|
void vldx_test_uint32() {
|
|
for (uint32_t i = 0; i < 256; i++) {
|
|
table32[i] = i | ((i + 1) << 8) | ((i + 2) << 16) | ((i + 3) << 24);
|
|
}
|
|
uint32_t in[16] = {96, 15, 30, 4, 24, 25, 57, 23, 71, 112, 83, 89, 6, 60, 57, 32};
|
|
uint32_t o[16];
|
|
vld_uiw(0, in, 1)
|
|
vadd(0, 0, 0)
|
|
vadd(0, 0, 0)
|
|
vldx_uiw(1, table32, 0)
|
|
vst_uiw(o, 1, 1)
|
|
asm("fence");
|
|
for (uint32_t i = 0; i < 4; i++) {
|
|
iot_printf("in[%d] = %x, out[%d] = %x\n", i, in[i], i, o[i]);
|
|
}
|
|
}
|
|
|
|
void vldx_test_int32() {
|
|
for (uint32_t i = 0; i < 256; i++) {
|
|
table32[i] = i | ((i + 1) << 8) | ((i + 2) << 16) | ((i + 3) << 24);
|
|
}
|
|
int32_t in[16] = {-6, -15, 30, 4, 24, -25, 57, -23, 71, -112, 83, -89, -6, 60, 57, 32};
|
|
int32_t o[16];
|
|
vld_siw(0, in, 1)
|
|
vadd(0, 0, 0)
|
|
vadd(0, 0, 0)
|
|
vldx_siw(1, (table32 + 128), 0)
|
|
vst_siw(o, 1, 1)
|
|
asm("fence");
|
|
for (uint32_t i = 0; i < 4; i++) {
|
|
iot_printf("in[%d] = %x, out[%d] = %x\n", i, in[i], i, o[i]);
|
|
}
|
|
}
|
|
|
|
void vstx_test_uint8() {
|
|
uint8_t index[16] = {0, 3, 6, 9, 12, 15, 2, 5, 8, 11, 14, 1, 4, 7, 10, 13};
|
|
uint8_t in[16] = {0, 3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 33, 36, 39, 42, 45};
|
|
uint8_t out[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
|
|
vld_uib(0, index, 1)
|
|
vld_uib(1, in, 1)
|
|
vstx_uib(out, 0, 1)
|
|
asm("fence");
|
|
for (uint8_t i = 0; i < 16; i++) {
|
|
iot_printf("out[%d] = %d\n", i, out[i]);
|
|
}
|
|
}
|
|
|
|
void vstx_test_int8() {
|
|
int8_t index[16] = {0-8, 3-8, 6-8, 9-8, 12-8, 15-8, 2-8, 5-8, 8-8, 11-8, 14-8, 1-8, 4-8, 7-8, 10-8, 13-8};
|
|
int8_t in[16] = {0-12, 3-12, 6-12, 9-12, 12-12, 15-12, 18-12, 21-12, 24-12, 27-12, 30-12, 33-12, 36-12, 39-12, 42-12, 45};
|
|
int8_t out[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
|
|
vld_sib(0, index, 1)
|
|
vld_sib(1, in, 1)
|
|
vstx_sib((out + 8), 0, 1)
|
|
asm("fence");
|
|
for (uint8_t i = 0; i < 16; i++) {
|
|
iot_printf("out[%d] = %d\n", i, out[i]);
|
|
}
|
|
}
|
|
|
|
void vstx_test_uint16() {
|
|
uint16_t index[16] = {0, 3, 6, 9, 12, 15, 2, 5, 8, 11, 14, 1, 4, 7, 10, 13};
|
|
uint16_t in[16] = {0, 3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 33, 36, 39, 42, 45};
|
|
uint16_t out[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
|
|
uint16_t *pi = in;
|
|
uint16_t *pd = index;
|
|
vld_uih(0, pd, 1)
|
|
vadd(0, 0, 0)
|
|
vld_uih(1, pi, 1)
|
|
vstx_uih(out, 0, 1)
|
|
pi += 8;
|
|
pd += 8;
|
|
vld_uih(0, pd, 1)
|
|
vadd(0, 0, 0)
|
|
vld_uih(1, pi, 1)
|
|
vstx_uih(out, 0, 1)
|
|
asm("fence");
|
|
for (uint8_t i = 0; i < 16; i++) {
|
|
iot_printf("out[%d] = %d\n", i, out[i]);
|
|
}
|
|
}
|
|
|
|
void vstx_test_int16() {
|
|
int16_t index[16] = {0-8, 3-8, 6-8, 9-8, 12-8, 15-8, 2-8, 5-8, 8-8, 11-8, 14-8, 1-8, 4-8, 7-8, 10-8, 13-8};
|
|
int16_t in[16] = {0-12, 3-12, 6-12, 9-12, 12-12, 15-12, 18-12, 21-12, 24-12, 27-12, 30-12, 33-12, 36-12, 39-12, 42-12, 45};
|
|
int16_t out[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
|
|
int16_t *pi = in;
|
|
int16_t *pd = index;
|
|
vld_sih(0, pd, 1)
|
|
vadd(0, 0, 0)
|
|
vld_sih(1, pi, 1)
|
|
vstx_sih((out + 8), 0, 1)
|
|
pi += 8;
|
|
pd += 8;
|
|
vld_sih(0, pd, 1)
|
|
vadd(0, 0, 0)
|
|
vld_sih(1, pi, 1)
|
|
vstx_sih((out + 8), 0, 1)
|
|
asm("fence");
|
|
for (uint8_t i = 0; i < 16; i++) {
|
|
iot_printf("out[%d] = %d\n", i, out[i]);
|
|
}
|
|
}
|
|
|
|
void vstx_test_uint32() {
|
|
uint32_t index[16] = {0, 3, 6, 9, 12, 15, 2, 5, 8, 11, 14, 1, 4, 7, 10, 13};
|
|
uint32_t in[16] = {0, 3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 33, 36, 39, 42, 45};
|
|
uint32_t out[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
|
|
uint32_t *pi = in;
|
|
uint32_t *pd = index;
|
|
vld_uiw(0, pd, 1)
|
|
vadd(0, 0, 0)
|
|
vadd(0, 0, 0)
|
|
vld_uiw(1, pi, 1)
|
|
vstx_uiw(out, 0, 1)
|
|
pi += 4;
|
|
pd += 4;
|
|
vld_uiw(0, pd, 1)
|
|
vadd(0, 0, 0)
|
|
vadd(0, 0, 0)
|
|
vld_uiw(1, pi, 1)
|
|
vstx_uiw(out, 0, 1)
|
|
pi += 4;
|
|
pd += 4;
|
|
vld_uiw(0, pd, 1)
|
|
vadd(0, 0, 0)
|
|
vadd(0, 0, 0)
|
|
vld_uiw(1, pi, 1)
|
|
vstx_uiw(out, 0, 1)
|
|
pi += 4;
|
|
pd += 4;
|
|
vld_uiw(0, pd, 1)
|
|
vadd(0, 0, 0)
|
|
vadd(0, 0, 0)
|
|
vld_uiw(1, pi, 1)
|
|
vstx_uiw(out, 0, 1)
|
|
pi += 4;
|
|
pd += 4;
|
|
asm("fence");
|
|
for (uint8_t i = 0; i < 16; i++) {
|
|
iot_printf("out[%d] = %d\n", i, out[i]);
|
|
}
|
|
}
|
|
|
|
void vstx_test_int32() {
|
|
int32_t index[16] = {0-8, 3-8, 6-8, 9-8, 12-8, 15-8, 2-8, 5-8, 8-8, 11-8, 14-8, 1-8, 4-8, 7-8, 10-8, 13-8};
|
|
int32_t in[16] = {0-12, 3-12, 6-12, 9-12, 12-12, 15-12, 18-12, 21-12, 24-12, 27-12, 30-12, 33-12, 36-12, 39-12, 42-12, 45};
|
|
int32_t out[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
|
|
int32_t *pi = in;
|
|
int32_t *pd = index;
|
|
vld_siw(0, pd, 1)
|
|
vadd(0, 0, 0)
|
|
vadd(0, 0, 0)
|
|
vld_siw(1, pi, 1)
|
|
vstx_siw((out + 8), 0, 1)
|
|
pi += 4;
|
|
pd += 4;
|
|
vld_siw(0, pd, 1)
|
|
vadd(0, 0, 0)
|
|
vadd(0, 0, 0)
|
|
vld_siw(1, pi, 1)
|
|
vstx_siw((out + 8), 0, 1)
|
|
pi += 4;
|
|
pd += 4;
|
|
vld_siw(0, pd, 1)
|
|
vadd(0, 0, 0)
|
|
vadd(0, 0, 0)
|
|
vld_siw(1, pi, 1)
|
|
vstx_siw((out + 8), 0, 1)
|
|
pi += 4;
|
|
pd += 4;
|
|
vld_siw(0, pd, 1)
|
|
vadd(0, 0, 0)
|
|
vadd(0, 0, 0)
|
|
vld_siw(1, pi, 1)
|
|
vstx_siw((out + 8), 0, 1)
|
|
pi += 4;
|
|
pd += 4;
|
|
asm("fence");
|
|
for (uint8_t i = 0; i < 16; i++) {
|
|
iot_printf("out[%d] = %d\n", i, out[i]);
|
|
}
|
|
}
|
|
|
|
uint8_t context_switch_test(){
|
|
|
|
uint8_t error = 0;
|
|
|
|
int32_t v_od_00[8] = { 31891, -20565, 20631, 12774, -24314, -9825, 18402, -22729};
|
|
int32_t v_od_01[8] = { 1501, -25964, -10420, 10591, 18387, -72, -27290, -17956};
|
|
int32_t v_od_02[8] = { 16671, -7064, 23947, -1473, 13293, -12916, 191, -14974};
|
|
int32_t v_od_03[8] = { -30005, -8091, -19722, -23435, -69, 4113, 3432, 27961};
|
|
int32_t v_od_04[8] = { -618, 660, -28635, 24275, 25812, -16048, -29275, -11623};
|
|
int32_t v_od_05[8] = { -4293, -7881, -3072, -12244, 18456, 4407, 22162, 7440};
|
|
int32_t v_od_06[8] = { 23991, 835, 10023, -24922, 19792, -8055, 505, 13278};
|
|
int32_t v_od_07[8] = { -31128, 718, -25631, -17336, -18325, 9021, -4872, 29411};
|
|
int32_t v_od_08[8] = { 14059, -14991, -7484, -16624, -1074, 20256, 17419, 4956};
|
|
int32_t v_od_09[8] = { 21760, -19031, 15016, 32652, -32195, 25705, 1899, 7788};
|
|
int32_t v_od_10[8] = { 31891, -20165, 20131, 12774, -24314, -9821, 18102, -22719};
|
|
int32_t v_od_11[8] = { 1501, -25164, -10120, 10591, 18317, -71, -27190, -17916};
|
|
int32_t v_od_12[8] = { 16671, -7164, 23147, -1473, 13213, -12911, 191, -14914};
|
|
int32_t v_od_13[8] = { -30005, -8191, -19122, -23435, -19, 4111, 3132, 27911};
|
|
int32_t v_od_14[8] = { -618, 160, -28135, 24275, 25812, -16041, -29175, -11613};
|
|
int32_t v_od_15[8] = { -4293, -7181, -3172, -12244, 18416, 4401, 22162, 7410};
|
|
int32_t v_od_16[8] = { 23991, 135, 10123, -24922, 19712, -8051, 105, 13218};
|
|
int32_t v_od_17[8] = { -31128, 118, -25131, -17336, -18315, 9021, -4172, 29411};
|
|
int32_t v_od_18[8] = { 14059, -14191, -7184, -16624, -1014, 20251, 17119, 4916};
|
|
int32_t v_od_19[8] = { 21760, -19131, 15116, 32652, -32115, 25701, 1199, 7718};
|
|
int32_t v_od_20[8] = { 31881, -20865, 28131, 18774, -24384, -9881, 18802, -28719};
|
|
int32_t v_od_21[8] = { 1581, -25864, -18120, 18591, 18387, -81, -27890, -18916};
|
|
int32_t v_od_22[8] = { 16681, -7864, 28147, -8473, 13283, -12981, 891, -18914};
|
|
int32_t v_od_23[8] = { -30085, -8891, -18122, -28435, -89, 4181, 3832, 28911};
|
|
int32_t v_od_24[8] = { -688, 860, -28135, 28275, 25882, -16081, -29875, -18613};
|
|
int32_t v_od_25[8] = { -4283, -7881, -8172, -18244, 18486, 4481, 22862, 8410};
|
|
int32_t v_od_26[8] = { 23981, 835, 18123, -28922, 19782, -8081, 805, 18218};
|
|
int32_t v_od_27[8] = { -31188, 818, -28131, -18336, -18385, 9081, -4872, 28411};
|
|
int32_t v_od_28[8] = { 14089, -14891, -8184, -18624, -1084, 20281, 17819, 8916};
|
|
int32_t v_od_29[8] = { 21780, -19831, 18116, 38652, -32185, 25781, 1899, 8718};
|
|
int32_t v_od_30[8] = { 31891, -29865, 29131, 18974, -29384, -9889, 18902, -29919};
|
|
int32_t v_od_31[8] = { 1591, -29864, -19120, 18991, 19387, -89, -27990, -19916};
|
|
|
|
int32_t v_dd_00[8];
|
|
int32_t v_dd_01[8];
|
|
int32_t v_dd_02[8];
|
|
int32_t v_dd_03[8];
|
|
int32_t v_dd_04[8];
|
|
int32_t v_dd_05[8];
|
|
int32_t v_dd_06[8];
|
|
int32_t v_dd_07[8];
|
|
int32_t v_dd_08[8];
|
|
int32_t v_dd_09[8];
|
|
int32_t v_dd_10[8];
|
|
int32_t v_dd_11[8];
|
|
int32_t v_dd_12[8];
|
|
int32_t v_dd_13[8];
|
|
int32_t v_dd_14[8];
|
|
int32_t v_dd_15[8];
|
|
int32_t v_dd_16[8];
|
|
int32_t v_dd_17[8];
|
|
int32_t v_dd_18[8];
|
|
int32_t v_dd_19[8];
|
|
int32_t v_dd_20[8];
|
|
int32_t v_dd_21[8];
|
|
int32_t v_dd_22[8];
|
|
int32_t v_dd_23[8];
|
|
int32_t v_dd_24[8];
|
|
int32_t v_dd_25[8];
|
|
int32_t v_dd_26[8];
|
|
int32_t v_dd_27[8];
|
|
int32_t v_dd_28[8];
|
|
int32_t v_dd_29[8];
|
|
int32_t v_dd_30[8];
|
|
int32_t v_dd_31[8];
|
|
|
|
int32_t v_dt[5];
|
|
|
|
vld_fpw( 0, v_od_00, 1)
|
|
vld_sib( 1, v_od_01, 1)
|
|
vld_uib( 2, v_od_02, 1)
|
|
vld_sih( 3, v_od_03, 1)
|
|
vld_uih( 4, v_od_04, 1)
|
|
vld_siw( 5, v_od_05, 1)
|
|
vld_uiw( 6, v_od_06, 1)
|
|
vld_fpw( 7, v_od_07, 1)
|
|
vld_sib( 8, v_od_08, 1)
|
|
vld_uib( 9, v_od_09, 1)
|
|
vld_sih(10, v_od_10, 1)
|
|
vld_uih(11, v_od_11, 1)
|
|
vld_siw(12, v_od_12, 1)
|
|
vld_uiw(13, v_od_13, 1)
|
|
vld_fpw(14, v_od_14, 1)
|
|
vld_sib(15, v_od_15, 1)
|
|
// vld_uib(16, v_od_16, 1)
|
|
// vld_sih(17, v_od_17, 1)
|
|
// vld_uih(18, v_od_18, 1)
|
|
// vld_siw(19, v_od_19, 1)
|
|
// vld_uiw(20, v_od_20, 1)
|
|
// vld_fpw(21, v_od_21, 1)
|
|
// vld_sib(22, v_od_22, 1)
|
|
// vld_uib(23, v_od_23, 1)
|
|
// vld_sih(24, v_od_24, 1)
|
|
// vld_uih(25, v_od_25, 1)
|
|
// vld_siw(26, v_od_26, 1)
|
|
// vld_uiw(27, v_od_27, 1)
|
|
// vld_fpw(28, v_od_28, 1)
|
|
// vld_sib(29, v_od_29, 1)
|
|
// vld_uib(30, v_od_30, 1)
|
|
// vld_sih(31, v_od_31, 1)
|
|
|
|
vst_fpw(v_dd_00, 1, 0)
|
|
vst_sib(v_dd_01, 1, 1)
|
|
vst_uib(v_dd_02, 1, 2)
|
|
vst_sih(v_dd_03, 1, 3)
|
|
vst_uih(v_dd_04, 1, 4)
|
|
vst_siw(v_dd_05, 1, 5)
|
|
vst_uiw(v_dd_06, 1, 6)
|
|
vst_fpw(v_dd_07, 1, 7)
|
|
vst_sib(v_dd_08, 1, 8)
|
|
vst_uib(v_dd_09, 1, 9)
|
|
vst_sih(v_dd_10, 1, 10)
|
|
vst_uih(v_dd_11, 1, 11)
|
|
vst_siw(v_dd_12, 1, 12)
|
|
vst_uiw(v_dd_13, 1, 13)
|
|
vst_fpw(v_dd_14, 1, 14)
|
|
vst_sib(v_dd_15, 1, 15)
|
|
// vst_uib(v_dd_16, 1, 16)
|
|
// vst_sih(v_dd_17, 1, 17)
|
|
// vst_uih(v_dd_18, 1, 18)
|
|
// vst_siw(v_dd_19, 1, 19)
|
|
// vst_uiw(v_dd_20, 1, 20)
|
|
// vst_fpw(v_dd_21, 1, 21)
|
|
// vst_sib(v_dd_22, 1, 22)
|
|
// vst_uib(v_dd_23, 1, 23)
|
|
// vst_sih(v_dd_24, 1, 24)
|
|
// vst_uih(v_dd_25, 1, 25)
|
|
// vst_siw(v_dd_26, 1, 26)
|
|
// vst_uiw(v_dd_27, 1, 27)
|
|
// vst_fpw(v_dd_28, 1, 28)
|
|
// vst_sib(v_dd_29, 1, 29)
|
|
// vst_uib(v_dd_30, 1, 30)
|
|
// vst_sih(v_dd_31, 1, 31)
|
|
vcsrr(v_dt[0], 0)
|
|
vcsrr(v_dt[1], 4)
|
|
vcsrr(v_dt[2], 5)
|
|
for (uint8_t i = 0; i < 5; i++) {
|
|
iot_printf("v_dt[%d]=%d\n", i, v_dt[i]);
|
|
}
|
|
// vcsrr(v_dt[3], 6)
|
|
// vcsrr(v_dt[4], 7)
|
|
|
|
for(uint8_t i = 0; i < 4; i++) {
|
|
error += (v_od_00[i] != v_dd_00[i]);
|
|
error += (v_od_01[i] != v_dd_01[i]);
|
|
error += (v_od_02[i] != v_dd_02[i]);
|
|
error += (v_od_03[i] != v_dd_03[i]);
|
|
error += (v_od_04[i] != v_dd_04[i]);
|
|
error += (v_od_05[i] != v_dd_05[i]);
|
|
error += (v_od_06[i] != v_dd_06[i]);
|
|
error += (v_od_07[i] != v_dd_07[i]);
|
|
error += (v_od_08[i] != v_dd_08[i]);
|
|
error += (v_od_09[i] != v_dd_09[i]);
|
|
error += (v_od_10[i] != v_dd_10[i]);
|
|
error += (v_od_11[i] != v_dd_11[i]);
|
|
error += (v_od_12[i] != v_dd_12[i]);
|
|
error += (v_od_13[i] != v_dd_13[i]);
|
|
error += (v_od_14[i] != v_dd_14[i]);
|
|
error += (v_od_15[i] != v_dd_15[i]);
|
|
error += (v_od_16[i] != v_dd_16[i]);
|
|
error += (v_od_17[i] != v_dd_17[i]);
|
|
error += (v_od_18[i] != v_dd_18[i]);
|
|
error += (v_od_19[i] != v_dd_19[i]);
|
|
error += (v_od_20[i] != v_dd_20[i]);
|
|
error += (v_od_21[i] != v_dd_21[i]);
|
|
error += (v_od_22[i] != v_dd_22[i]);
|
|
error += (v_od_23[i] != v_dd_23[i]);
|
|
error += (v_od_24[i] != v_dd_24[i]);
|
|
error += (v_od_25[i] != v_dd_25[i]);
|
|
error += (v_od_26[i] != v_dd_26[i]);
|
|
error += (v_od_27[i] != v_dd_27[i]);
|
|
error += (v_od_28[i] != v_dd_28[i]);
|
|
error += (v_od_29[i] != v_dd_29[i]);
|
|
error += (v_od_30[i] != v_dd_30[i]);
|
|
error += (v_od_31[i] != v_dd_31[i]);
|
|
}
|
|
|
|
return error;
|
|
}
|
|
|
|
uint8_t fpw_test(){
|
|
int i;
|
|
uint8_t error = 0;
|
|
int len = 256;
|
|
float a = 34.75;
|
|
float b = -44.1876;
|
|
a = a;
|
|
b = b;
|
|
float v_a[256] = {777.07, 4935.03, -2095.22, 12332.16, 5333.75, -10814.01, 6083.75, 9508.28, -19271.97, 16169.42, -17471.97, -7809.87, -8338.85, 6279.29, -7754.14, 3226.75, -11237.26, 16013.69, 8641.08, -11088.85, -19109.87, 7806.68, 4344.90, -7388.85, -4545.22, -245.85, 10789.49, 13392.35, -19191.71, -9114.01, 15573.24, -3244.26, 10161.46, 19411.14, -1048.08, 13182.80, -555.73, 16038.21, 798.40, 6779.61, -14253.50, 1392.99, 12161.14, -13104.14, 13215.60, -10624.84, 12779.29, -7574.52, -15975.15, 15696.17, -11969.10, -8404.14, -2035.35, -9578.02, 17222.29, -19736.62, 12621.97, 16485.03, 4657.96, 3929.61, -135.35, 16592.35, -11447.13, -3710.19, 1707.32, 14610.82, -14592.67, 19698.72, 3263.05, 9526.11, 19942.03, -3376.75, -13132.80, 12350.31, 1144.58, -10321.97, 7638.53, 3089.49, -15864.01, -18837.26, 3166.87, 14204.45, -12628.34, -12936.62, -13019.74, -8274.20, 14179.93, -20016.24, -1608.28, 8886.30, -16579.29, -4057.32, 12142.03, 7670.06, -16300.0, 9552.86, -1552.54, -12142.35, 17177.70, 17026.75, 6104.77, 17009.55, -3898.40, 3015.28, 20788.21, 11214.01, -9442.99, -7353.50, -357.00, -13278.02, -7142.35, -12297.77, -16668.15, -19056.36, 11109.23, -4065.92, 2818.15, -10530.57, 3775.79, -12004.14, 18415.28, -8501.91, -9812.10, -7161.46, -5634.71, -14551.91, 15949.36, 13456.68};
|
|
float v_b[256] = {8438.85, 280.25, -8459.55, -5259.23, 15479.29, 15932.48, -16.87, -10713.05, -6168.15, -10871.01, 12173.56, -15502.54, -9540.76, -12503.82, 13469.10, -4861.14, -15288.53, -13114.01, -9909.87, -13050.0, 4168.47, 18297.45, -10086.94, -8887.89, 2401.59, -16019.10, 10295.22, 9995.22, 16160.82, 9697.77, 426.11, 20664.96, 6679.93, -2966.87, -7217.19, 2012.73, -1391.71, -5016.24, -16111.46, 12742.35, 7115.60, 4667.51, -6773.24, 12151.27, -6780.89, -12656.36, -12919.42, 1100.31, 10725.79, -1694.26, -1034.07, -6064.33, 6150.0, -7362.10, 20117.19, 12178.66, 14416.24, -15065.28, 8656.36, -13225.47, 16494.58, 1716.24, -11512.10, -12388.53, 1336.30, 15338.85, -353.18, -6915.28, -18771.33, -13300.95, -19781.84, -8577.38, -2192.35, 13153.50, -1938.21, 6155.09, -20264.96, -5348.72, -7686.30, -10642.35, -4306.05, 11095.22, 19364.96, 17412.73, 4867.83, -2163.05, -16232.16, 1861.46, 10643.63, -434.39, -17728.34, -7474.84, 17064.33, 7873.88, 19580.89, -17547.13, 13165.28, 3083.12, 18822.61, -17371.01, -1943.63, 6529.61, -17193.94, -10397.45, -1166.56, 2749.04, -16870.38, 1806.05, -3050.95, -8488.21, -14813.69, -7595.54, 19336.30, 19500.95, 4617.19, -9113.37, -17397.45, 9054.77, 13341.71, 20647.77, 5813.69, 13237.57, 12785.66, 18758.59, 18591.08, -2187.26, 1249.04, 13889.49};
|
|
float v_d0[256];
|
|
float v_d1[256];
|
|
|
|
float *p_a = v_a;
|
|
float *p_b = v_b;
|
|
float *p_d0 = v_d0;
|
|
|
|
vld_fpw(0, p_a, 1)
|
|
vld_fpw(1, p_b, 1)
|
|
for(i=0; i<len; i=i+SIMD_W*1){
|
|
p_a = p_a + SIMD_W*1;
|
|
p_b = p_b + SIMD_W*1;
|
|
|
|
//vmull(2,0,1)
|
|
//vmadd(2,0,1)
|
|
//vmsub(2,0,1)
|
|
//vsub(2,2,0)
|
|
//vadd(2,2,1)
|
|
|
|
vmull(2,0,1)
|
|
vadd(2,2,1)
|
|
|
|
vld_fpw(0, p_a, 1)
|
|
vld_fpw(1, p_b, 1)
|
|
vst_fpw(p_d0, 1, 2)
|
|
p_d0 = p_d0 + SIMD_W*1;
|
|
}
|
|
|
|
for(i=0; i<len; i++){
|
|
//v_d1[i] = v_a[i] * v_b[i] + v_a[i] * v_b[i] - v_a[i] * v_b[i] - v_a[i] + v_b[i];
|
|
v_d1[i] = v_a[i] * v_b[i] + v_b[i];
|
|
}
|
|
|
|
for(i=0; i<len; i++){
|
|
if(v_d0[i] != v_d1[i]){
|
|
error = 1;
|
|
}
|
|
}
|
|
|
|
return error;
|
|
}
|
|
|
|
uint8_t siw_test(){
|
|
int i;
|
|
uint8_t error = 0;
|
|
int len = 256;
|
|
int32_t a = 4431;
|
|
int32_t b = -977;
|
|
a = a;
|
|
b = b;
|
|
int32_t v_a[256] = {31891, -20565, 20631, 12774, -24314, -9825, 18402, -22729, 1501, -25964, -10420, 10591, 18387, -72, -27290, -17956, 16671, -7064, 23947, -1473, 13293, -12916, 191, -14974, -30005, -8091, -19722, -23435, -69, 4113, 3432, 27961, -618, 660, -28635, 24275, 25812, -16048, -29275, -11623, -4293, -7881, -3072, -12244, 18456, 4407, 22162, 7440, 23991, 835, 10023, -24922, 19792, -8055, 505, 13278, -31128, 718, -25631, -17336, -18325, 9021, -4872, 29411, 14059, -14991, -7484, -16624, -1074, 20256, 17419, 4956, 21760, -19031, 15016, 32652, -32195, 25705, 1899, 7788, 10858, -24402, -28464, -25207, -14174, -28881, -31196, -2981, -25670, 17823, 31203, 30466, 13466, 6305, 5822, -10110, -842, 12133, -2669, 8585, -14829, 10916, 8070, -32532, 28141, 18758, -19161, 32388, -7288, 28414, -29363, -28396, -18355, 2658, 23473, 25969, 6706, -32626, 27891, 12062, -27582, 2759, -14680, -24825, -24447, -27837, 9492, 25982, -21924, -6946, 25043, 14646, 20039, 18822, -14363, -16391, 27539, -30930, -22999, 12927, 6345, -21161, 4974, -31459, -6780, 15562, -18841, -13759, 20162, 32285, -2627, 4885, 26834, 27777, 13004, -14924, 22994, -18296, 32560, -27123, -19226, -31844, -18825, 11605, -6562, 26417, 8679, -385, 23600, -29805, 21464, -17727, 6791, 17400, -23574, -814, -20400, -24248, 20524, 14883, 19741, -23825, -8707, 30996, -27303, -9405, -13731, -6428, -18502, -1892, 24601, 26090, -26076, 5165, 19130, -13644, -11590, 29499, -18592, -32625, -6238, 31725, -9899, 7175, 5797, -28332, 17739, 18253, 2210, 2423, 3965, -8678, 7521, -7115, 18383, 13718, 14246, 8456, -26732, 6817, -32614, -25240, -18689, -12323, 22752, 31419, 9487, -10463, -15174, 6310, 28280, -10041, 4824, 12311, -12218, -17051, -9411, -3750, 31689, 25828, -15092, -4717, -25287, -31588, -21071, 11426, -27236, -4123, 18948, -20084, -1076, 1726, -7119, -32740, -29594, 986};
|
|
int32_t v_b[256] = {-25964, -10420, 10591, 18387, -72, -27290, -17956, 16671, -7064, 23947, -1473, 13293, -12916, 191, -14974, -30005, -8091, -19722, -23435, -69, 4113, 3432, 27961, -618, 660, -28635, 24275, 25812, -16048, -29275, -11623, -4293, -7881, -3072, -12244, 18456, 4407, 22162, 7440, 23991, 835, 10023, -24922, 19792, -8055, 505, 13278, -31128, 718, -25631, -17336, -18325, 9021, -4872, 29411, 14059, -14991, -7484, -16624, -1074, 20256, 17419, 4956, 21760, -19031, 15016, 32652, -32195, 25705, 1899, 7788, 10858, -24402, -28464, -25207, -14174, -28881, -31196, -2981, -25670, 17823, 31203, 30466, 13466, 6305, 5822, -10110, -842, 12133, -2669, 8585, -14829, 10916, 8070, -32532, 28141, 18758, -19161, 32388, -7288, 28414, -29363, -28396, -18355, 2658, 23473, 25969, 6706, -32626, 27891, 12062, -27582, 2759, -14680, -24825, -24447, -27837, 9492, 25982, -21924, -6946, 25043, 14646, 20039, 18822, -14363, -16391, 27539, -30930, -22999, 12927, 6345, -21161, 4974, -31459, -6780, 15562, -18841, -13759, 20162, 32285, -2627, 4885, 26834, 27777, 13004, -14924, 22994, -18296, 32560, -27123, -19226, -31844, -18825, 11605, -6562, 26417, 8679, -385, 23600, -29805, 21464, -17727, 6791, 17400, -23574, -814, -20400, -24248, 20524, 14883, 19741, -23825, -8707, 30996, -27303, -9405, -13731, -6428, -18502, -1892, 24601, 26090, -26076, 5165, 19130, -13644, -11590, 29499, -18592, -32625, -6238, 31725, -9899, 7175, 5797, -28332, 17739, 18253, 2210, 2423, 3965, -8678, 7521, -7115, 18383, 13718, 14246, 8456, -26732, 6817, -32614, -25240, -18689, -12323, 22752, 31419, 9487, -10463, -15174, 6310, 28280, -10041, 4824, 12311, -12218, -17051, -9411, -3750, 31689, 25828, -15092, -4717, -25287, -31588, -21071, 11426, -27236, -4123, 18948, -20084, -1076, 1726, -7119, -32740, -29594, 31891, -20565, 20631, 12774, -24314, -9825, 18402, -22729, 1501, 986};
|
|
int32_t v_d0[256];
|
|
int32_t v_d1[256];
|
|
|
|
int32_t *p_a = v_a;
|
|
int32_t *p_b = v_b;
|
|
int32_t *p_d0 = v_d0;
|
|
|
|
uint32_t shiftl[1] = {1};
|
|
uint32_t shiftr[1] = {16};
|
|
vld_uiw(8, shiftl, 0)
|
|
vld_uiw(9, shiftr, 0)
|
|
|
|
vld_siw(0, p_a, 1)
|
|
vld_siw(1, p_b, 1)
|
|
for(i=0; i<len; i=i+SIMD_W*1){
|
|
p_a = p_a + SIMD_W*1;
|
|
p_b = p_b + SIMD_W*1;
|
|
|
|
//vmull(2,0,1)
|
|
//vmadd(2,0,1)
|
|
//vmsub(2,0,1)
|
|
//vsub(2,2,0)
|
|
//vadd(2,2,1)
|
|
//vsll(2,2,8)
|
|
//vsra(2,2,9)
|
|
|
|
vmull(2,0,1)
|
|
|
|
vld_siw(0, p_a, 1)
|
|
vld_siw(1, p_b, 1)
|
|
vst_siw(p_d0, 1, 2)
|
|
p_d0 = p_d0 + SIMD_W*1;
|
|
}
|
|
|
|
for(i=0; i<len; i++){
|
|
|
|
//v_d1[i] = v_a[i] * v_b[i] + v_a[i] * v_b[i] - v_a[i] * v_b[i] - v_a[i] + v_b[i];
|
|
//v_d1[i] = v_d1[i] << 1;
|
|
//v_d1[i] = v_d1[i] >> 16;
|
|
|
|
v_d1[i] = v_a[i] * v_b[i];
|
|
|
|
}
|
|
|
|
for(i=0; i<len; i++){
|
|
if(v_d0[i] != v_d1[i]){
|
|
error = 1;
|
|
break;
|
|
}
|
|
}
|
|
|
|
return error;
|
|
}
|
|
|
|
uint8_t uiw_test(){
|
|
int i;
|
|
uint8_t error = 0;
|
|
int len = 256;
|
|
uint32_t a = 4431;
|
|
uint32_t b = 977;
|
|
a = a;
|
|
b = b;
|
|
uint32_t v_a[256] = {31891, 20565, 20631, 12774, 24314, 9825, 18402, 22729, 1501, 25964, 10420, 10591, 18387, 72, 27290, 17956, 16671, 7064, 23947, 1473, 13293, 12916, 191, 14974, 30005, 8091, 19722, 23435, 69, 4113, 3432, 27961, 618, 660, 28635, 24275, 25812, 16048, 29275, 11623, 4293, 7881, 3072, 12244, 18456, 4407, 22162, 7440, 23991, 835, 10023, 24922, 19792, 8055, 505, 13278, 31128, 718, 25631, 17336, 18325, 9021, 4872, 29411, 14059, 14991, 7484, 16624, 1074, 20256, 17419, 4956, 21760, 19031, 15016, 32652, 32195, 25705, 1899, 7788, 10858, 24402, 28464, 25207, 14174, 28881, 31196, 2981, 25670, 17823, 31203, 30466, 13466, 6305, 5822, 10110, 842, 12133, 2669, 8585, 14829, 10916, 8070, 32532, 28141, 18758, 19161, 32388, 7288, 28414, 29363, 28396, 18355, 2658, 23473, 25969, 6706, 32626, 27891, 12062, 27582, 2759, 14680, 24825, 24447, 27837, 9492, 25982, 21924, 6946, 25043, 14646, 20039, 18822, 14363, 16391, 27539, 30930, 22999, 12927, 6345, 21161, 4974, 31459, 6780, 15562, 18841, 13759, 20162, 32285, 2627, 4885, 26834, 27777, 13004, 14924, 22994, 18296, 32560, 27123, 19226, 31844, 18825, 11605, 6562, 26417, 8679, 385, 23600, 29805, 21464, 17727, 6791, 17400, 23574, 814, 20400, 24248, 20524, 14883, 19741, 23825, 8707, 30996, 27303, 9405, 13731, 6428, 18502, 1892, 24601, 26090, 26076, 5165, 19130, 13644, 11590, 29499, 18592, 32625, 6238, 31725, 9899, 7175, 5797, 28332, 17739, 18253, 2210, 2423, 3965, 8678, 7521, 7115, 18383, 13718, 14246, 8456, 26732, 6817, 32614, 25240, 18689, 12323, 22752, 31419, 9487, 10463, 15174, 6310, 28280, 10041, 4824, 12311, 12218, 17051, 9411, 3750, 31689, 25828, 15092, 4717, 25287, 31588, 21071, 11426, 27236, 4123, 18948, 20084, 1076, 1726, 7119, 32740, 29594, 986};
|
|
uint32_t v_b[256] = {25964, 10420, 10591, 18387, 72, 27290, 17956, 16671, 7064, 23947, 1473, 13293, 12916, 191, 14974, 30005, 8091, 19722, 23435, 69, 4113, 3432, 27961, 618, 660, 28635, 24275, 25812, 16048, 29275, 11623, 4293, 7881, 3072, 12244, 18456, 4407, 22162, 7440, 23991, 835, 10023, 24922, 19792, 8055, 505, 13278, 31128, 718, 25631, 17336, 18325, 9021, 4872, 29411, 14059, 14991, 7484, 16624, 1074, 20256, 17419, 4956, 21760, 19031, 15016, 32652, 32195, 25705, 1899, 7788, 10858, 24402, 28464, 25207, 14174, 28881, 31196, 2981, 25670, 17823, 31203, 30466, 13466, 6305, 5822, 10110, 842, 12133, 2669, 8585, 14829, 10916, 8070, 32532, 28141, 18758, 19161, 32388, 7288, 28414, 29363, 28396, 18355, 2658, 23473, 25969, 6706, 32626, 27891, 12062, 27582, 2759, 14680, 24825, 24447, 27837, 9492, 25982, 21924, 6946, 25043, 14646, 20039, 18822, 14363, 16391, 27539, 30930, 22999, 12927, 6345, 21161, 4974, 31459, 6780, 15562, 18841, 13759, 20162, 32285, 2627, 4885, 26834, 27777, 13004, 14924, 22994, 18296, 32560, 27123, 19226, 31844, 18825, 11605, 6562, 26417, 8679, 385, 23600, 29805, 21464, 17727, 6791, 17400, 23574, 814, 20400, 24248, 20524, 14883, 19741, 23825, 8707, 30996, 27303, 9405, 13731, 6428, 18502, 1892, 24601, 26090, 26076, 5165, 19130, 13644, 11590, 29499, 18592, 32625, 6238, 31725, 9899, 7175, 5797, 28332, 17739, 18253, 2210, 2423, 3965, 8678, 7521, 7115, 18383, 13718, 14246, 8456, 26732, 6817, 32614, 25240, 18689, 12323, 22752, 31419, 9487, 10463, 15174, 6310, 28280, 10041, 4824, 12311, 12218, 17051, 9411, 3750, 31689, 25828, 15092, 4717, 25287, 31588, 21071, 11426, 27236, 4123, 18948, 20084, 1076, 1726, 7119, 32740, 29594, 31891, 20565, 20631, 12774, 24314, 9825, 18402, 22729, 1501, 986};
|
|
uint32_t v_d0[256];
|
|
uint32_t v_d1[256];
|
|
|
|
uint32_t *p_a = v_a;
|
|
uint32_t *p_b = v_b;
|
|
uint32_t *p_d0 = v_d0;
|
|
|
|
uint32_t shiftl[1] = {1};
|
|
uint32_t shiftr[1] = {16};
|
|
vld_uiw(8, shiftl, 0)
|
|
vld_uiw(9, shiftr, 0)
|
|
|
|
vld_uiw(0, p_a, 1)
|
|
vld_uiw(1, p_b, 1)
|
|
for(i=0; i<len; i=i+SIMD_W*1){
|
|
p_a = p_a + SIMD_W*1;
|
|
p_b = p_b + SIMD_W*1;
|
|
|
|
vmull(2,0,1)
|
|
|
|
//vmull(2,0,1)
|
|
//vmadd(2,0,1)
|
|
//vmsub(2,0,1)
|
|
//vsub(2,2,0)
|
|
//vadd(2,2,1)
|
|
//vsll(2,2,8)
|
|
//vsrl(2,2,9)
|
|
|
|
vld_uiw(0, p_a, 1)
|
|
vld_uiw(1, p_b, 1)
|
|
vst_uiw(p_d0, 1, 2)
|
|
p_d0 = p_d0 + SIMD_W*1;
|
|
}
|
|
|
|
for(i=0; i<len; i++){
|
|
|
|
//v_d1[i] = v_a[i] * v_b[i] + v_a[i] * v_b[i] - v_a[i] * v_b[i] - v_a[i] + v_b[i];
|
|
//v_d1[i] = v_d1[i] << 1;
|
|
//v_d1[i] = v_d1[i] >> 16;
|
|
|
|
v_d1[i] = v_a[i] * v_b[i];
|
|
}
|
|
|
|
for(i=0; i<len; i++){
|
|
if(v_d0[i] != v_d1[i]){
|
|
error = 1;
|
|
break;
|
|
}
|
|
}
|
|
|
|
return error;
|
|
}
|
|
|
|
uint8_t sih_test(){
|
|
int i;
|
|
uint8_t error = 0;
|
|
int len = 256;
|
|
int16_t a = 35;
|
|
int16_t b = -113;
|
|
a = a;
|
|
b = b;
|
|
int16_t v_a[256] = {83, 96, 15, -30, 4, -24, -25, 57, -23, 71, -112, 83, 89, -6, 60, 57, -32, -123, -7, 59, -77, 101, 97, 84, 24, 49, 105, -52, -31, 74, -82, -10, 14, 27, 29, -12, -6, -23, -91, 49, 25, 51, 72, -14, -43, 81, -6, 116, -43, -58, 119, -103, -113, 109, 101, -87, 34, -100, -14, -126, -53, 82, 31, -91, -1, -112, -121, -13, 54, -25, -91, 98, -39, -21, 112, -24, -44, 116, -123, -92, -95, 50, -67, 37, 47, -71, -6, -24, 69, -42, -105, 127, 74, -55, -20, 78, 107, 26, -17, -96, 109, -60, 80, -43, 31, 106, -41, -95, 41, -79, 22, -78, -39, -77, -81, 25, -86, -18, -81, 109, 90, -125, -116, -93, -119, 25, -24, -54, -95, 37, 4, 108, 111, -50, 93, 121, 33, 95, 75, 125, 104, 31, 85, 109, -87, 59, 91, -80, 15, -98, 74, -63, 42, 11, -69, -10, -72, 51, 17, 52, -87, -18, 34, -81, 62, 24, 87, -89, 3, -67, 52, -11, -105, -64, -11, 12, -54, 6, -27, 21, 51, -36, -52, -19, -110, 114, -125, 65, -88, -54, 52, 48, -20, 127, -58, 46, -62, 71, 17, 104, -40, 37, -66, -110, -42, -110, -55, -44, 72, 67, -17, -11, 17, 39, -31, -54, -106, -46, -76, -32, 47, 64, -70, 64, -23, -89, -67, 9, 94, -117, -54, 45, -70, 81, -110, -12, -50, 100, -123, -103, -42, 92, -68, 108, -81, -62, -32, -97, -125, -65, 44, 63, 64, 100, 78, 92};
|
|
int16_t v_b[256] = {83, 89, -6, 60, 57, -32, -123, -7, 59, -77, 101, 97, 84, 24, 49, 105, -52, -31, 74, -82, -10, 14, 27, 29, -12, -6, -23, -91, 49, 25, 51, 72, -14, -43, 81, -6, 116, -43, -58, 119, -103, -113, 109, 101, -87, 34, -100, -14, -126, -53, 82, 31, -91, -1, -112, -121, -13, 54, -25, -91, 98, -39, -21, 112, -24, -44, 116, -123, -92, -95, 50, -67, 37, 47, -71, -6, -24, 69, -42, -105, 127, 74, -55, -20, 78, 107, 26, -17, -96, 109, -60, 80, -43, 31, 106, -41, -95, 41, -79, 22, -78, -39, -77, -81, 25, -86, -18, -81, 109, 90, -125, -116, -93, -119, 25, -24, -54, -95, 37, 4, 108, 111, -50, 93, 121, 33, 95, 75, 125, 104, 31, 85, 109, -87, 59, 91, -80, 15, -98, 74, -63, 42, 11, -69, -10, -72, 51, 17, 52, -87, -18, 34, -81, 62, 24, 87, -89, 3, -67, 52, -11, -105, -64, -11, 12, -54, 6, -27, 21, 51, -36, -52, -19, -110, 114, -125, 65, -88, -54, 52, 48, -20, 127, -58, 46, -62, 71, 17, 104, -40, 37, -66, -110, -42, -110, -55, -44, 72, 67, -17, -11, 17, 39, -31, -54, -106, -46, -76, -32, 47, 64, -70, 64, -23, -89, -67, 9, 94, -117, -54, 45, -70, 81, -110, -12, -50, 100, -123, -103, -42, 92, -68, 108, -81, -62, -32, -97, -125, -65, 44, 63, 64, 100, 78, 83, 96, 15, -30, 4, -24, -25, 57, -23, 71, -112, 92};
|
|
int16_t v_d0[256];
|
|
int16_t v_d1[256];
|
|
|
|
int16_t *p_a = v_a;
|
|
int16_t *p_b = v_b;
|
|
int16_t *p_d0 = v_d0;
|
|
|
|
uint16_t shiftl[2] = {1, 1};
|
|
uint16_t shiftr[2] = {8, 8};
|
|
vld_uiw(8, shiftl, 0)
|
|
vld_uiw(9, shiftr, 0)
|
|
|
|
vld_sih(0, p_a, 1)
|
|
vld_sih(1, p_b, 1)
|
|
for(i=0; i<len; i=i+SIMD_W*2){
|
|
p_a = p_a + SIMD_W*2;
|
|
p_b = p_b + SIMD_W*2;
|
|
vmull(2,0,1)
|
|
vmadd(2,0,1)
|
|
vmsub(2,0,1)
|
|
vsub(2,2,0)
|
|
vadd(2,2,1)
|
|
vsll(2,2,8)
|
|
vsra(2,2,9)
|
|
vld_sih(0, p_a, 1)
|
|
vld_sih(1, p_b, 1)
|
|
vst_sih(p_d0, 1, 2)
|
|
p_d0 = p_d0 + SIMD_W*2;
|
|
}
|
|
|
|
for(i=0; i<len; i++){
|
|
v_d1[i] = v_a[i] * v_b[i] + v_a[i] * v_b[i] - v_a[i] * v_b[i] - v_a[i] + v_b[i];
|
|
v_d1[i] = v_d1[i] << 1;
|
|
v_d1[i] = v_d1[i] >> 8;
|
|
}
|
|
|
|
for(i=0; i<len; i++){
|
|
if(v_d0[i] != v_d1[i]){
|
|
error = 1;
|
|
break;
|
|
}
|
|
}
|
|
|
|
return error;
|
|
}
|
|
|
|
uint8_t uih_test(){
|
|
int i;
|
|
uint8_t error = 0;
|
|
int len = 256;
|
|
uint16_t a = 35;
|
|
uint16_t b = 113;
|
|
a = a;
|
|
b = b;
|
|
uint16_t v_a[256] = {83, 96, 15, 30, 4, 24, 25, 57, 23, 71, 112, 83, 89, 6, 60, 57, 32, 123, 7, 59, 77, 101, 97, 84, 24, 49, 105, 52, 31, 74, 82, 10, 14, 27, 29, 12, 6, 23, 91, 49, 25, 51, 72, 14, 43, 81, 6, 116, 43, 58, 119, 103, 113, 109, 101, 87, 34, 100, 14, 126, 53, 82, 31, 91, 1, 112, 121, 13, 54, 25, 91, 98, 39, 21, 112, 24, 44, 116, 123, 92, 95, 50, 67, 37, 47, 71, 6, 24, 69, 42, 105, 127, 74, 55, 20, 78, 107, 26, 17, 96, 109, 60, 80, 43, 31, 106, 41, 95, 41, 79, 22, 78, 39, 77, 81, 25, 86, 18, 81, 109, 90, 125, 116, 93, 119, 25, 24, 54, 95, 37, 4, 108, 111, 50, 93, 121, 33, 95, 75, 125, 104, 31, 85, 109, 87, 59, 91, 80, 15, 98, 74, 63, 42, 11, 69, 10, 72, 51, 17, 52, 87, 18, 34, 81, 62, 24, 87, 89, 3, 67, 52, 11, 105, 64, 11, 12, 54, 6, 27, 21, 51, 36, 52, 19, 110, 114, 125, 65, 88, 54, 52, 48, 20, 127, 58, 46, 62, 71, 17, 104, 40, 37, 66, 110, 42, 110, 55, 44, 72, 67, 17, 11, 17, 39, 31, 54, 106, 46, 76, 32, 47, 64, 70, 64, 23, 89, 67, 9, 94, 117, 54, 45, 70, 81, 110, 12, 50, 100, 123, 103, 42, 92, 68, 108, 81, 62, 32, 97, 125, 65, 44, 63, 64, 100, 78, 92};
|
|
uint16_t v_b[256] = {83, 89, 6, 60, 57, 32, 123, 7, 59, 77, 101, 97, 84, 24, 49, 105, 52, 31, 74, 82, 10, 14, 27, 29, 12, 6, 23, 91, 49, 25, 51, 72, 14, 43, 81, 6, 116, 43, 58, 119, 103, 113, 109, 101, 87, 34, 100, 14, 126, 53, 82, 31, 91, 1, 112, 121, 13, 54, 25, 91, 98, 39, 21, 112, 24, 44, 116, 123, 92, 95, 50, 67, 37, 47, 71, 6, 24, 69, 42, 105, 127, 74, 55, 20, 78, 107, 26, 17, 96, 109, 60, 80, 43, 31, 106, 41, 95, 41, 79, 22, 78, 39, 77, 81, 25, 86, 18, 81, 109, 90, 125, 116, 93, 119, 25, 24, 54, 95, 37, 4, 108, 111, 50, 93, 121, 33, 95, 75, 125, 104, 31, 85, 109, 87, 59, 91, 80, 15, 98, 74, 63, 42, 11, 69, 10, 72, 51, 17, 52, 87, 18, 34, 81, 62, 24, 87, 89, 3, 67, 52, 11, 105, 64, 11, 12, 54, 6, 27, 21, 51, 36, 52, 19, 110, 114, 125, 65, 88, 54, 52, 48, 20, 127, 58, 46, 62, 71, 17, 104, 40, 37, 66, 110, 42, 110, 55, 44, 72, 67, 17, 11, 17, 39, 31, 54, 106, 46, 76, 32, 47, 64, 70, 64, 23, 89, 67, 9, 94, 117, 54, 45, 70, 81, 110, 12, 50, 100, 123, 103, 42, 92, 68, 108, 81, 62, 32, 97, 125, 65, 44, 63, 64, 100, 78, 83, 96, 15, 30, 4, 24, 25, 57, 23, 71, 112, 92};
|
|
uint16_t v_d0[256];
|
|
uint16_t v_d1[256];
|
|
|
|
uint16_t *p_a = v_a;
|
|
uint16_t *p_b = v_b;
|
|
uint16_t *p_d0 = v_d0;
|
|
|
|
uint16_t shiftl[2] = {1, 1};
|
|
uint16_t shiftr[2] = {8, 8};
|
|
vld_uiw(8, shiftl, 0)
|
|
vld_uiw(9, shiftr, 0)
|
|
|
|
vld_uih(0, p_a, 1)
|
|
vld_uih(1, p_b, 1)
|
|
for(i=0; i<len; i=i+SIMD_W*2){
|
|
p_a = p_a + SIMD_W*2;
|
|
p_b = p_b + SIMD_W*2;
|
|
vmull(2,0,1)
|
|
vmadd(2,0,1)
|
|
vmsub(2,0,1)
|
|
vsub(2,2,0)
|
|
vadd(2,2,1)
|
|
vsll(2,2,8)
|
|
vsrl(2,2,9)
|
|
vld_uih(0, p_a, 1)
|
|
vld_uih(1, p_b, 1)
|
|
vst_uih(p_d0, 1, 2)
|
|
p_d0 = p_d0 + SIMD_W*2;
|
|
}
|
|
|
|
for(i=0; i<len; i++){
|
|
v_d1[i] = v_a[i] * v_b[i] + v_a[i] * v_b[i] - v_a[i] * v_b[i] - v_a[i] + v_b[i];
|
|
v_d1[i] = v_d1[i] << 1;
|
|
v_d1[i] = v_d1[i] >> 8;
|
|
}
|
|
|
|
for(i=0; i<len; i++){
|
|
if(v_d0[i] != v_d1[i]){
|
|
error = 1;
|
|
break;
|
|
}
|
|
}
|
|
|
|
return error;
|
|
}
|
|
|
|
uint8_t sib_test(){
|
|
int i;
|
|
uint8_t error = 0;
|
|
int len = 256;
|
|
int8_t a = 5;
|
|
int8_t b = -3;
|
|
a = a;
|
|
b = b;
|
|
int8_t v_a[256] = {2, -7, -5, 3, -4, 5, 0, -6, -1, 7, -5, -5, 4, 3, 4, 6, 0, -5, -7, -4, -2, 6, -2, -2, 2, -2, 2, -4, -5, 1, -7, -4, 5, 2, -3, 3, -7, 7, -4, 4, -3, -4, 5, -1, 7, -4, 6, 5, 5, 7, -5, 7, 0, -3, 0, -1, 2, 2, -6, 0, 6, -6, -5, 6, 1, -3, -6, -6, 7, -7, -7, -3, -6, -5, 2, -3, 6, -2, -6, -7, -3, 1, 7, 3, -3, -6, 4, 1, -3, -5, 4, 7, -3, -3, 5, 5, -2, 1, -2, 4, -5, 7, 5, -3, 7, 4, -6, 5, -3, 1, -5, 2, 1, -4, 0, -3, -7, 1, 1, -6, 2, -2, -2, 3, -1, 3, -3, 1, 5, 5, 5, -1, 2, 3, 0, 3, 4, -6, 5, -5, -1, -1, 4, -6, 4, -1, -6, 3, 7, -5, 4, 7, -5, 4, 2, -5, -1, 4, 1, -2, 7, 5, -7, 6, -5, 0, -1, -1, -5, 6, -7, 4, 0, 4, 0, -6, -4, -7, 7, 7, -7, 3, 5, 3, 4, 3, 3, -2, 0, 5, 2, 2, 1, -2, -1, 1, -6, 1, -3, 6, 3, -4, -7, 1, 5, -4, -2, -5, -1, -1, 5, 5, 6, -1, -4, 5, 0, 0, 1, -6, 7, -5, 2, 1, 7, 7, 6, -7, -3, 2, -3, 6, -6, 6, -2, 0, -4, 7, 0, 6, -1, -5, 4, 6, 3, 5, -4, -2, 1, 4, 4, -1, -6, -1, -2, -4};
|
|
int8_t v_b[256] = {-5, -7, -4, -2, 6, -2, -2, 2, -2, 2, -4, -5, 1, -7, -4, 5, 2, -3, 3, -7, 7, -4, 4, -3, -4, 5, -1, 7, -4, 6, 5, 5, 7, -5, 7, 0, -3, 0, -1, 2, 2, -6, 0, 6, -6, -5, 6, 1, -3, -6, -6, 7, -7, -7, -3, -6, -5, 2, -3, 6, -2, -6, -7, -3, 1, 7, 3, -3, -6, 4, 1, -3, -5, 4, 7, -3, -3, 5, 5, -2, 1, -2, 4, -5, 7, 5, -3, 7, 4, -6, 5, -3, 1, -5, 2, 1, -4, 0, -3, -7, 1, 1, -6, 2, -2, -2, 3, -1, 3, -3, 1, 5, 5, 5, -1, 2, 3, 0, 3, 4, -6, 5, -5, -1, -1, 4, -6, 4, -1, -6, 3, 7, -5, 4, 7, -5, 4, 2, -5, -1, 4, 1, -2, 7, 5, -7, 6, -5, 0, -1, -1, -5, 6, -7, 4, 0, 4, 0, -6, -4, -7, 7, 7, -7, 3, 5, 3, 4, 3, 3, -2, 0, 5, 2, 2, 1, -2, -1, 1, -6, 1, -3, 6, 3, -4, -7, 1, 5, -4, -2, -5, -1, -1, 5, 5, 6, -1, -4, 5, 0, 0, 1, -6, 7, -5, 2, 1, 7, 7, 6, -7, -3, 2, -3, 6, -6, 6, -2, 0, -4, 7, 0, 6, -1, -5, 4, 6, 3, 5, -4, -2, 1, 4, 4, -1, -6, -1, -2, 2, -7, -5, 3, -4, 5, 0, -6, -1, 7, -5, -5, 4, 3, 4, 6, 0, -4};
|
|
int8_t v_d0[256];
|
|
int8_t v_d1[256];
|
|
|
|
int8_t *p_a = v_a;
|
|
int8_t *p_b = v_b;
|
|
int8_t *p_d0 = v_d0;
|
|
|
|
uint8_t shiftl[4] = {1, 1, 1, 1};
|
|
uint8_t shiftr[4] = {4, 4, 4, 4};
|
|
vld_uiw(8, shiftl, 0)
|
|
vld_uiw(9, shiftr, 0)
|
|
|
|
vld_sib(0, p_a, 1)
|
|
vld_sib(1, p_b, 1)
|
|
for(i=0; i<len; i=i+SIMD_W*4){
|
|
p_a = p_a + SIMD_W*4;
|
|
p_b = p_b + SIMD_W*4;
|
|
vmull(2,0,1)
|
|
vmadd(2,0,1)
|
|
vmsub(2,0,1)
|
|
vsub(2,2,0)
|
|
vadd(2,2,1)
|
|
vsll(2,2,8)
|
|
vsra(2,2,9)
|
|
vld_sib(0, p_a, 1)
|
|
vld_sib(1, p_b, 1)
|
|
vst_sib(p_d0, 1, 2)
|
|
p_d0 = p_d0 + SIMD_W*4;
|
|
}
|
|
|
|
for(i=0; i<len; i++){
|
|
v_d1[i] = v_a[i] * v_b[i] + v_a[i] * v_b[i] - v_a[i] * v_b[i] - v_a[i] + v_b[i];
|
|
v_d1[i] = v_d1[i] << 1;
|
|
v_d1[i] = v_d1[i] >> 4;
|
|
}
|
|
|
|
for(i=0; i<len; i++){
|
|
if(v_d0[i] != v_d1[i]){
|
|
error = 1;
|
|
break;
|
|
}
|
|
}
|
|
|
|
return error;
|
|
}
|
|
|
|
uint8_t uib_test(){
|
|
int i;
|
|
uint8_t error = 0;
|
|
int len = 256;
|
|
uint8_t a = 5;
|
|
uint8_t b = 3;
|
|
a = a;
|
|
b = b;
|
|
uint8_t v_a[256] = {2, 7, 5, 3, 4, 5, 0, 6, 1, 7, 5, 5, 4, 3, 4, 6, 0, 5, 7, 4, 2, 6, 2, 2, 2, 2, 2, 4, 5, 1, 7, 4, 5, 2, 3, 3, 7, 7, 4, 4, 3, 4, 5, 1, 7, 4, 6, 5, 5, 7, 5, 7, 0, 3, 0, 1, 2, 2, 6, 0, 6, 6, 5, 6, 1, 3, 6, 6, 7, 7, 7, 3, 6, 5, 2, 3, 6, 2, 6, 7, 3, 1, 7, 3, 3, 6, 4, 1, 3, 5, 4, 7, 3, 3, 5, 5, 2, 1, 2, 4, 5, 7, 5, 3, 7, 4, 6, 5, 3, 1, 5, 2, 1, 4, 0, 3, 7, 1, 1, 6, 2, 2, 2, 3, 1, 3, 3, 1, 5, 5, 5, 1, 2, 3, 0, 3, 4, 6, 5, 5, 1, 1, 4, 6, 4, 1, 6, 3, 7, 5, 4, 7, 5, 4, 2, 5, 1, 4, 1, 2, 7, 5, 7, 6, 5, 0, 1, 1, 5, 6, 7, 4, 0, 4, 0, 6, 4, 7, 7, 7, 7, 3, 5, 3, 4, 3, 3, 2, 0, 5, 2, 2, 1, 2, 1, 1, 6, 1, 3, 6, 3, 4, 7, 1, 5, 4, 2, 5, 1, 1, 5, 5, 6, 1, 4, 5, 0, 0, 1, 6, 7, 5, 2, 1, 7, 7, 6, 7, 3, 2, 3, 6, 6, 6, 2, 0, 4, 7, 0, 6, 1, 5, 4, 6, 3, 5, 4, 2, 1, 4, 4, 1, 6, 1, 2, 4};
|
|
uint8_t v_b[256] = {5, 7, 4, 2, 6, 2, 2, 2, 2, 2, 4, 5, 1, 7, 4, 5, 2, 3, 3, 7, 7, 4, 4, 3, 4, 5, 1, 7, 4, 6, 5, 5, 7, 5, 7, 0, 3, 0, 1, 2, 2, 6, 0, 6, 6, 5, 6, 1, 3, 6, 6, 7, 7, 7, 3, 6, 5, 2, 3, 6, 2, 6, 7, 3, 1, 7, 3, 3, 6, 4, 1, 3, 5, 4, 7, 3, 3, 5, 5, 2, 1, 2, 4, 5, 7, 5, 3, 7, 4, 6, 5, 3, 1, 5, 2, 1, 4, 0, 3, 7, 1, 1, 6, 2, 2, 2, 3, 1, 3, 3, 1, 5, 5, 5, 1, 2, 3, 0, 3, 4, 6, 5, 5, 1, 1, 4, 6, 4, 1, 6, 3, 7, 5, 4, 7, 5, 4, 2, 5, 1, 4, 1, 2, 7, 5, 7, 6, 5, 0, 1, 1, 5, 6, 7, 4, 0, 4, 0, 6, 4, 7, 7, 7, 7, 3, 5, 3, 4, 3, 3, 2, 0, 5, 2, 2, 1, 2, 1, 1, 6, 1, 3, 6, 3, 4, 7, 1, 5, 4, 2, 5, 1, 1, 5, 5, 6, 1, 4, 5, 0, 0, 1, 6, 7, 5, 2, 1, 7, 7, 6, 7, 3, 2, 3, 6, 6, 6, 2, 0, 4, 7, 0, 6, 1, 5, 4, 6, 3, 5, 4, 2, 1, 4, 4, 1, 6, 1, 2, 2, 7, 5, 3, 4, 5, 0, 6, 1, 7, 5, 5, 4, 3, 4, 6, 0, 4};
|
|
uint8_t v_d0[256];
|
|
uint8_t v_d1[256];
|
|
|
|
uint8_t *p_a = v_a;
|
|
uint8_t *p_b = v_b;
|
|
uint8_t *p_d0 = v_d0;
|
|
|
|
uint8_t shiftl[4] = {1, 1, 1, 1};
|
|
uint8_t shiftr[4] = {4, 4, 4, 4};
|
|
vld_uiw(8, shiftl, 0)
|
|
vld_uiw(9, shiftr, 0)
|
|
|
|
vld_uib(0, p_a, 1)
|
|
vld_uib(1, p_b, 1)
|
|
for(i=0; i<len; i=i+SIMD_W*4){
|
|
p_a = p_a + SIMD_W*4;
|
|
p_b = p_b + SIMD_W*4;
|
|
vmull(2,0,1)
|
|
vmadd(2,0,1)
|
|
vmsub(2,0,1)
|
|
vsub(2,2,0)
|
|
vadd(2,2,1)
|
|
vsll(2,2,8)
|
|
vsrl(2,2,9)
|
|
vld_uib(0, p_a, 1)
|
|
vld_uib(1, p_b, 1)
|
|
vst_uib(p_d0, 1, 2)
|
|
p_d0 = p_d0 + SIMD_W*4;
|
|
}
|
|
|
|
for(i=0; i<len; i++){
|
|
v_d1[i] = v_a[i] * v_b[i] + v_a[i] * v_b[i] - v_a[i] * v_b[i] - v_a[i] + v_b[i];
|
|
v_d1[i] = v_d1[i] << 1;
|
|
v_d1[i] = v_d1[i] >> 4;
|
|
}
|
|
|
|
for(i=0; i<len; i++){
|
|
if(v_d0[i] != v_d1[i]){
|
|
error = 1;
|
|
break;
|
|
}
|
|
}
|
|
|
|
return error;
|
|
}
|
|
|
|
uint8_t sihw_test(){
|
|
int i;
|
|
uint8_t error = 0;
|
|
int len = 256;
|
|
int16_t a = 4431;
|
|
int16_t b = -977;
|
|
a = a;
|
|
b = b;
|
|
int16_t v_a[256] = {31891, -20565, 20631, 12774, -24314, -9825, 18402, -22729, 1501, -25964, -10420, 10591, 18387, -72, -27290, -17956, 16671, -7064, 23947, -1473, 13293, -12916, 191, -14974, -30005, -8091, -19722, -23435, -69, 4113, 3432, 27961, -618, 660, -28635, 24275, 25812, -16048, -29275, -11623, -4293, -7881, -3072, -12244, 18456, 4407, 22162, 7440, 23991, 835, 10023, -24922, 19792, -8055, 505, 13278, -31128, 718, -25631, -17336, -18325, 9021, -4872, 29411, 14059, -14991, -7484, -16624, -1074, 20256, 17419, 4956, 21760, -19031, 15016, 32652, -32195, 25705, 1899, 7788, 10858, -24402, -28464, -25207, -14174, -28881, -31196, -2981, -25670, 17823, 31203, 30466, 13466, 6305, 5822, -10110, -842, 12133, -2669, 8585, -14829, 10916, 8070, -32532, 28141, 18758, -19161, 32388, -7288, 28414, -29363, -28396, -18355, 2658, 23473, 25969, 6706, -32626, 27891, 12062, -27582, 2759, -14680, -24825, -24447, -27837, 9492, 25982, -21924, -6946, 25043, 14646, 20039, 18822, -14363, -16391, 27539, -30930, -22999, 12927, 6345, -21161, 4974, -31459, -6780, 15562, -18841, -13759, 20162, 32285, -2627, 4885, 26834, 27777, 13004, -14924, 22994, -18296, 32560, -27123, -19226, -31844, -18825, 11605, -6562, 26417, 8679, -385, 23600, -29805, 21464, -17727, 6791, 17400, -23574, -814, -20400, -24248, 20524, 14883, 19741, -23825, -8707, 30996, -27303, -9405, -13731, -6428, -18502, -1892, 24601, 26090, -26076, 5165, 19130, -13644, -11590, 29499, -18592, -32625, -6238, 31725, -9899, 7175, 5797, -28332, 17739, 18253, 2210, 2423, 3965, -8678, 7521, -7115, 18383, 13718, 14246, 8456, -26732, 6817, -32614, -25240, -18689, -12323, 22752, 31419, 9487, -10463, -15174, 6310, 28280, -10041, 4824, 12311, -12218, -17051, -9411, -3750, 31689, 25828, -15092, -4717, -25287, -31588, -21071, 11426, -27236, -4123, 18948, -20084, -1076, 1726, -7119, -32740, -29594, 986};
|
|
int16_t v_b[256] = {-25964, -10420, 10591, 18387, -72, -27290, -17956, 16671, -7064, 23947, -1473, 13293, -12916, 191, -14974, -30005, -8091, -19722, -23435, -69, 4113, 3432, 27961, -618, 660, -28635, 24275, 25812, -16048, -29275, -11623, -4293, -7881, -3072, -12244, 18456, 4407, 22162, 7440, 23991, 835, 10023, -24922, 19792, -8055, 505, 13278, -31128, 718, -25631, -17336, -18325, 9021, -4872, 29411, 14059, -14991, -7484, -16624, -1074, 20256, 17419, 4956, 21760, -19031, 15016, 32652, -32195, 25705, 1899, 7788, 10858, -24402, -28464, -25207, -14174, -28881, -31196, -2981, -25670, 17823, 31203, 30466, 13466, 6305, 5822, -10110, -842, 12133, -2669, 8585, -14829, 10916, 8070, -32532, 28141, 18758, -19161, 32388, -7288, 28414, -29363, -28396, -18355, 2658, 23473, 25969, 6706, -32626, 27891, 12062, -27582, 2759, -14680, -24825, -24447, -27837, 9492, 25982, -21924, -6946, 25043, 14646, 20039, 18822, -14363, -16391, 27539, -30930, -22999, 12927, 6345, -21161, 4974, -31459, -6780, 15562, -18841, -13759, 20162, 32285, -2627, 4885, 26834, 27777, 13004, -14924, 22994, -18296, 32560, -27123, -19226, -31844, -18825, 11605, -6562, 26417, 8679, -385, 23600, -29805, 21464, -17727, 6791, 17400, -23574, -814, -20400, -24248, 20524, 14883, 19741, -23825, -8707, 30996, -27303, -9405, -13731, -6428, -18502, -1892, 24601, 26090, -26076, 5165, 19130, -13644, -11590, 29499, -18592, -32625, -6238, 31725, -9899, 7175, 5797, -28332, 17739, 18253, 2210, 2423, 3965, -8678, 7521, -7115, 18383, 13718, 14246, 8456, -26732, 6817, -32614, -25240, -18689, -12323, 22752, 31419, 9487, -10463, -15174, 6310, 28280, -10041, 4824, 12311, -12218, -17051, -9411, -3750, 31689, 25828, -15092, -4717, -25287, -31588, -21071, 11426, -27236, -4123, 18948, -20084, -1076, 1726, -7119, -32740, -29594, 31891, -20565, 20631, 12774, -24314, -9825, 18402, -22729, 1501, 986};
|
|
int32_t v_d0[256];
|
|
int32_t v_d1[256];
|
|
|
|
int16_t *p_a = v_a;
|
|
int16_t *p_b = v_b;
|
|
int32_t *p_d0 = v_d0;
|
|
|
|
vld_sih(0, p_a, 1)
|
|
vld_sih(1, p_b, 1)
|
|
for(i=0; i<len; i=i+SIMD_W*2){
|
|
p_a = p_a + SIMD_W*2;
|
|
p_b = p_b + SIMD_W*2;
|
|
vmulw(2,0,1)
|
|
vld_sih(0, p_a, 1)
|
|
vld_sih(1, p_b, 1)
|
|
vst_siw(p_d0, 1, 2)
|
|
p_d0 = p_d0 + SIMD_W*1;
|
|
vst_siw(p_d0, 1, 3)
|
|
p_d0 = p_d0 + SIMD_W*1;
|
|
}
|
|
|
|
for(i=0; i<len; i++){
|
|
v_d1[i] = v_a[i] * v_b[i];
|
|
}
|
|
|
|
for(i=0; i<len; i++){
|
|
if(v_d0[i] != v_d1[i]){
|
|
error = 1;
|
|
break;
|
|
}
|
|
}
|
|
|
|
return error;
|
|
}
|
|
|
|
uint8_t uihw_test(){
|
|
int i;
|
|
uint8_t error = 0;
|
|
int len = 256;
|
|
uint16_t a = 4431;
|
|
uint16_t b = 977;
|
|
a = a;
|
|
b = b;
|
|
uint16_t v_a[256] = {31891, 20565, 20631, 12774, 24314, 9825, 18402, 22729, 1501, 25964, 10420, 10591, 18387, 72, 27290, 17956, 16671, 7064, 23947, 1473, 13293, 12916, 191, 14974, 30005, 8091, 19722, 23435, 69, 4113, 3432, 27961, 618, 660, 28635, 24275, 25812, 16048, 29275, 11623, 4293, 7881, 3072, 12244, 18456, 4407, 22162, 7440, 23991, 835, 10023, 24922, 19792, 8055, 505, 13278, 31128, 718, 25631, 17336, 18325, 9021, 4872, 29411, 14059, 14991, 7484, 16624, 1074, 20256, 17419, 4956, 21760, 19031, 15016, 32652, 32195, 25705, 1899, 7788, 10858, 24402, 28464, 25207, 14174, 28881, 31196, 2981, 25670, 17823, 31203, 30466, 13466, 6305, 5822, 10110, 842, 12133, 2669, 8585, 14829, 10916, 8070, 32532, 28141, 18758, 19161, 32388, 7288, 28414, 29363, 28396, 18355, 2658, 23473, 25969, 6706, 32626, 27891, 12062, 27582, 2759, 14680, 24825, 24447, 27837, 9492, 25982, 21924, 6946, 25043, 14646, 20039, 18822, 14363, 16391, 27539, 30930, 22999, 12927, 6345, 21161, 4974, 31459, 6780, 15562, 18841, 13759, 20162, 32285, 2627, 4885, 26834, 27777, 13004, 14924, 22994, 18296, 32560, 27123, 19226, 31844, 18825, 11605, 6562, 26417, 8679, 385, 23600, 29805, 21464, 17727, 6791, 17400, 23574, 814, 20400, 24248, 20524, 14883, 19741, 23825, 8707, 30996, 27303, 9405, 13731, 6428, 18502, 1892, 24601, 26090, 26076, 5165, 19130, 13644, 11590, 29499, 18592, 32625, 6238, 31725, 9899, 7175, 5797, 28332, 17739, 18253, 2210, 2423, 3965, 8678, 7521, 7115, 18383, 13718, 14246, 8456, 26732, 6817, 32614, 25240, 18689, 12323, 22752, 31419, 9487, 10463, 15174, 6310, 28280, 10041, 4824, 12311, 12218, 17051, 9411, 3750, 31689, 25828, 15092, 4717, 25287, 31588, 21071, 11426, 27236, 4123, 18948, 20084, 1076, 1726, 7119, 32740, 29594, 986};
|
|
uint16_t v_b[256] = {25964, 10420, 10591, 18387, 72, 27290, 17956, 16671, 7064, 23947, 1473, 13293, 12916, 191, 14974, 30005, 8091, 19722, 23435, 69, 4113, 3432, 27961, 618, 660, 28635, 24275, 25812, 16048, 29275, 11623, 4293, 7881, 3072, 12244, 18456, 4407, 22162, 7440, 23991, 835, 10023, 24922, 19792, 8055, 505, 13278, 31128, 718, 25631, 17336, 18325, 9021, 4872, 29411, 14059, 14991, 7484, 16624, 1074, 20256, 17419, 4956, 21760, 19031, 15016, 32652, 32195, 25705, 1899, 7788, 10858, 24402, 28464, 25207, 14174, 28881, 31196, 2981, 25670, 17823, 31203, 30466, 13466, 6305, 5822, 10110, 842, 12133, 2669, 8585, 14829, 10916, 8070, 32532, 28141, 18758, 19161, 32388, 7288, 28414, 29363, 28396, 18355, 2658, 23473, 25969, 6706, 32626, 27891, 12062, 27582, 2759, 14680, 24825, 24447, 27837, 9492, 25982, 21924, 6946, 25043, 14646, 20039, 18822, 14363, 16391, 27539, 30930, 22999, 12927, 6345, 21161, 4974, 31459, 6780, 15562, 18841, 13759, 20162, 32285, 2627, 4885, 26834, 27777, 13004, 14924, 22994, 18296, 32560, 27123, 19226, 31844, 18825, 11605, 6562, 26417, 8679, 385, 23600, 29805, 21464, 17727, 6791, 17400, 23574, 814, 20400, 24248, 20524, 14883, 19741, 23825, 8707, 30996, 27303, 9405, 13731, 6428, 18502, 1892, 24601, 26090, 26076, 5165, 19130, 13644, 11590, 29499, 18592, 32625, 6238, 31725, 9899, 7175, 5797, 28332, 17739, 18253, 2210, 2423, 3965, 8678, 7521, 7115, 18383, 13718, 14246, 8456, 26732, 6817, 32614, 25240, 18689, 12323, 22752, 31419, 9487, 10463, 15174, 6310, 28280, 10041, 4824, 12311, 12218, 17051, 9411, 3750, 31689, 25828, 15092, 4717, 25287, 31588, 21071, 11426, 27236, 4123, 18948, 20084, 1076, 1726, 7119, 32740, 29594, 31891, 20565, 20631, 12774, 24314, 9825, 18402, 22729, 1501, 986};
|
|
uint32_t v_d0[256];
|
|
uint32_t v_d1[256];
|
|
|
|
uint16_t *p_a = v_a;
|
|
uint16_t *p_b = v_b;
|
|
uint32_t *p_d0 = v_d0;
|
|
|
|
vld_uih(0, p_a, 1)
|
|
vld_uih(1, p_b, 1)
|
|
for(i=0; i<len; i=i+SIMD_W*2){
|
|
p_a = p_a + SIMD_W*2;
|
|
p_b = p_b + SIMD_W*2;
|
|
vmulw(2,0,1)
|
|
vld_uih(0, p_a, 1)
|
|
vld_uih(1, p_b, 1)
|
|
vst_uiw(p_d0, 1, 2)
|
|
p_d0 = p_d0 + SIMD_W*1;
|
|
vst_uiw(p_d0, 1, 3)
|
|
p_d0 = p_d0 + SIMD_W*1;
|
|
}
|
|
|
|
for(i=0; i<len; i++){
|
|
v_d1[i] = v_a[i] * v_b[i];
|
|
}
|
|
|
|
for(i=0; i<len; i++){
|
|
if(v_d0[i] != v_d1[i]){
|
|
error = 1;
|
|
break;
|
|
}
|
|
}
|
|
|
|
return error;
|
|
}
|
|
|
|
uint8_t sibh_test(){
|
|
int i;
|
|
uint8_t error = 0;
|
|
int len = 256;
|
|
int8_t a = 5;
|
|
int8_t b = -3;
|
|
a = a;
|
|
b = b;
|
|
int8_t v_a[256] = {83, 96, 15, -30, 4, -24, -25, 57, -23, 71, -112, 83, 89, -6, 60, 57, -32, -123, -7, 59, -77, 101, 97, 84, 24, 49, 105, -52, -31, 74, -82, -10, 14, 27, 29, -12, -6, -23, -91, 49, 25, 51, 72, -14, -43, 81, -6, 116, -43, -58, 119, -103, -113, 109, 101, -87, 34, -100, -14, -126, -53, 82, 31, -91, -1, -112, -121, -13, 54, -25, -91, 98, -39, -21, 112, -24, -44, 116, -123, -92, -95, 50, -67, 37, 47, -71, -6, -24, 69, -42, -105, 127, 74, -55, -20, 78, 107, 26, -17, -96, 109, -60, 80, -43, 31, 106, -41, -95, 41, -79, 22, -78, -39, -77, -81, 25, -86, -18, -81, 109, 90, -125, -116, -93, -119, 25, -24, -54, -95, 37, 4, 108, 111, -50, 93, 121, 33, 95, 75, 125, 104, 31, 85, 109, -87, 59, 91, -80, 15, -98, 74, -63, 42, 11, -69, -10, -72, 51, 17, 52, -87, -18, 34, -81, 62, 24, 87, -89, 3, -67, 52, -11, -105, -64, -11, 12, -54, 6, -27, 21, 51, -36, -52, -19, -110, 114, -125, 65, -88, -54, 52, 48, -20, 127, -58, 46, -62, 71, 17, 104, -40, 37, -66, -110, -42, -110, -55, -44, 72, 67, -17, -11, 17, 39, -31, -54, -106, -46, -76, -32, 47, 64, -70, 64, -23, -89, -67, 9, 94, -117, -54, 45, -70, 81, -110, -12, -50, 100, -123, -103, -42, 92, -68, 108, -81, -62, -32, -97, -125, -65, 44, 63, 64, 100, 78, 92};
|
|
int8_t v_b[256] = {83, 89, -6, 60, 57, -32, -123, -7, 59, -77, 101, 97, 84, 24, 49, 105, -52, -31, 74, -82, -10, 14, 27, 29, -12, -6, -23, -91, 49, 25, 51, 72, -14, -43, 81, -6, 116, -43, -58, 119, -103, -113, 109, 101, -87, 34, -100, -14, -126, -53, 82, 31, -91, -1, -112, -121, -13, 54, -25, -91, 98, -39, -21, 112, -24, -44, 116, -123, -92, -95, 50, -67, 37, 47, -71, -6, -24, 69, -42, -105, 127, 74, -55, -20, 78, 107, 26, -17, -96, 109, -60, 80, -43, 31, 106, -41, -95, 41, -79, 22, -78, -39, -77, -81, 25, -86, -18, -81, 109, 90, -125, -116, -93, -119, 25, -24, -54, -95, 37, 4, 108, 111, -50, 93, 121, 33, 95, 75, 125, 104, 31, 85, 109, -87, 59, 91, -80, 15, -98, 74, -63, 42, 11, -69, -10, -72, 51, 17, 52, -87, -18, 34, -81, 62, 24, 87, -89, 3, -67, 52, -11, -105, -64, -11, 12, -54, 6, -27, 21, 51, -36, -52, -19, -110, 114, -125, 65, -88, -54, 52, 48, -20, 127, -58, 46, -62, 71, 17, 104, -40, 37, -66, -110, -42, -110, -55, -44, 72, 67, -17, -11, 17, 39, -31, -54, -106, -46, -76, -32, 47, 64, -70, 64, -23, -89, -67, 9, 94, -117, -54, 45, -70, 81, -110, -12, -50, 100, -123, -103, -42, 92, -68, 108, -81, -62, -32, -97, -125, -65, 44, 63, 64, 100, 78, 83, 96, 15, -30, 4, -24, -25, 57, -23, 71, -112, 92};
|
|
int16_t v_d0[256];
|
|
int16_t v_d1[256];
|
|
|
|
int8_t *p_a = v_a;
|
|
int8_t *p_b = v_b;
|
|
int16_t *p_d0 = v_d0;
|
|
|
|
vld_sib(0, p_a, 1)
|
|
vld_sib(1, p_b, 1)
|
|
for(i=0; i<len; i=i+SIMD_W*4){
|
|
p_a = p_a + SIMD_W*4;
|
|
p_b = p_b + SIMD_W*4;
|
|
vmulw(2,0,1)
|
|
vld_sib(0, p_a, 1)
|
|
vld_sib(1, p_b, 1)
|
|
vst_sih(p_d0, 1, 2)
|
|
p_d0 = p_d0 + SIMD_W*2;
|
|
vst_uih(p_d0, 1, 3)
|
|
p_d0 = p_d0 + SIMD_W*2;
|
|
}
|
|
|
|
for(i=0; i<len; i++){
|
|
v_d1[i] = v_a[i] * v_b[i];
|
|
}
|
|
|
|
for(i=0; i<len; i++){
|
|
if(v_d0[i] != v_d1[i]){
|
|
error = 1;
|
|
break;
|
|
}
|
|
}
|
|
|
|
return error;
|
|
}
|
|
|
|
uint8_t uibh_test(){
|
|
int i;
|
|
uint8_t error = 0;
|
|
int len = 256;
|
|
uint8_t a = 5;
|
|
uint8_t b = 3;
|
|
a = a;
|
|
b = b;
|
|
uint8_t v_a[256] = {83, 96, 15, 30, 4, 24, 25, 57, 23, 71, 112, 83, 89, 6, 60, 57, 32, 123, 7, 59, 77, 101, 97, 84, 24, 49, 105, 52, 31, 74, 82, 10, 14, 27, 29, 12, 6, 23, 91, 49, 25, 51, 72, 14, 43, 81, 6, 116, 43, 58, 119, 103, 113, 109, 101, 87, 34, 100, 14, 126, 53, 82, 31, 91, 1, 112, 121, 13, 54, 25, 91, 98, 39, 21, 112, 24, 44, 116, 123, 92, 95, 50, 67, 37, 47, 71, 6, 24, 69, 42, 105, 127, 74, 55, 20, 78, 107, 26, 17, 96, 109, 60, 80, 43, 31, 106, 41, 95, 41, 79, 22, 78, 39, 77, 81, 25, 86, 18, 81, 109, 90, 125, 116, 93, 119, 25, 24, 54, 95, 37, 4, 108, 111, 50, 93, 121, 33, 95, 75, 125, 104, 31, 85, 109, 87, 59, 91, 80, 15, 98, 74, 63, 42, 11, 69, 10, 72, 51, 17, 52, 87, 18, 34, 81, 62, 24, 87, 89, 3, 67, 52, 11, 105, 64, 11, 12, 54, 6, 27, 21, 51, 36, 52, 19, 110, 114, 125, 65, 88, 54, 52, 48, 20, 127, 58, 46, 62, 71, 17, 104, 40, 37, 66, 110, 42, 110, 55, 44, 72, 67, 17, 11, 17, 39, 31, 54, 106, 46, 76, 32, 47, 64, 70, 64, 23, 89, 67, 9, 94, 117, 54, 45, 70, 81, 110, 12, 50, 100, 123, 103, 42, 92, 68, 108, 81, 62, 32, 97, 125, 65, 44, 63, 64, 100, 78, 92};
|
|
uint8_t v_b[256] = {83, 89, 6, 60, 57, 32, 123, 7, 59, 77, 101, 97, 84, 24, 49, 105, 52, 31, 74, 82, 10, 14, 27, 29, 12, 6, 23, 91, 49, 25, 51, 72, 14, 43, 81, 6, 116, 43, 58, 119, 103, 113, 109, 101, 87, 34, 100, 14, 126, 53, 82, 31, 91, 1, 112, 121, 13, 54, 25, 91, 98, 39, 21, 112, 24, 44, 116, 123, 92, 95, 50, 67, 37, 47, 71, 6, 24, 69, 42, 105, 127, 74, 55, 20, 78, 107, 26, 17, 96, 109, 60, 80, 43, 31, 106, 41, 95, 41, 79, 22, 78, 39, 77, 81, 25, 86, 18, 81, 109, 90, 125, 116, 93, 119, 25, 24, 54, 95, 37, 4, 108, 111, 50, 93, 121, 33, 95, 75, 125, 104, 31, 85, 109, 87, 59, 91, 80, 15, 98, 74, 63, 42, 11, 69, 10, 72, 51, 17, 52, 87, 18, 34, 81, 62, 24, 87, 89, 3, 67, 52, 11, 105, 64, 11, 12, 54, 6, 27, 21, 51, 36, 52, 19, 110, 114, 125, 65, 88, 54, 52, 48, 20, 127, 58, 46, 62, 71, 17, 104, 40, 37, 66, 110, 42, 110, 55, 44, 72, 67, 17, 11, 17, 39, 31, 54, 106, 46, 76, 32, 47, 64, 70, 64, 23, 89, 67, 9, 94, 117, 54, 45, 70, 81, 110, 12, 50, 100, 123, 103, 42, 92, 68, 108, 81, 62, 32, 97, 125, 65, 44, 63, 64, 100, 78, 83, 96, 15, 30, 4, 24, 25, 57, 23, 71, 112, 92};
|
|
uint16_t v_d0[256];
|
|
uint16_t v_d1[256];
|
|
|
|
uint8_t *p_a = v_a;
|
|
uint8_t *p_b = v_b;
|
|
uint16_t *p_d0 = v_d0;
|
|
|
|
vld_uib(0, p_a, 1)
|
|
vld_uib(1, p_b, 1)
|
|
for(i=0; i<len; i=i+SIMD_W*4){
|
|
p_a = p_a + SIMD_W*4;
|
|
p_b = p_b + SIMD_W*4;
|
|
vmulw(2,0,1)
|
|
vld_uib(0, p_a, 1)
|
|
vld_uib(1, p_b, 1)
|
|
vst_uih(p_d0, 1, 2)
|
|
p_d0 = p_d0 + SIMD_W*2;
|
|
vst_uih(p_d0, 1, 3)
|
|
p_d0 = p_d0 + SIMD_W*2;
|
|
}
|
|
|
|
for(i=0; i<len; i++){
|
|
v_d1[i] = v_a[i] * v_b[i];
|
|
}
|
|
|
|
for(i=0; i<len; i++){
|
|
if(v_d0[i] != v_d1[i]){
|
|
error = 1;
|
|
break;
|
|
}
|
|
}
|
|
|
|
return error;
|
|
}
|
|
|
|
uint8_t sib_macw_test(){
|
|
int i;
|
|
uint8_t error = 0;
|
|
int len = 256;
|
|
int8_t a = 5;
|
|
int8_t b = -3;
|
|
a = a;
|
|
b = b;
|
|
int8_t v_a[256] = {83, 96, 15, -30, 4, -24, -25, 57, -23, 71, -112, 83, 89, -6, 60, 57, -32, -123, -7, 59, -77, 101, 97, 84, 24, 49, 105, -52, -31, 74, -82, -10, 14, 27, 29, -12, -6, -23, -91, 49, 25, 51, 72, -14, -43, 81, -6, 116, -43, -58, 119, -103, -113, 109, 101, -87, 34, -100, -14, -126, -53, 82, 31, -91, -1, -112, -121, -13, 54, -25, -91, 98, -39, -21, 112, -24, -44, 116, -123, -92, -95, 50, -67, 37, 47, -71, -6, -24, 69, -42, -105, 127, 74, -55, -20, 78, 107, 26, -17, -96, 109, -60, 80, -43, 31, 106, -41, -95, 41, -79, 22, -78, -39, -77, -81, 25, -86, -18, -81, 109, 90, -125, -116, -93, -119, 25, -24, -54, -95, 37, 4, 108, 111, -50, 93, 121, 33, 95, 75, 125, 104, 31, 85, 109, -87, 59, 91, -80, 15, -98, 74, -63, 42, 11, -69, -10, -72, 51, 17, 52, -87, -18, 34, -81, 62, 24, 87, -89, 3, -67, 52, -11, -105, -64, -11, 12, -54, 6, -27, 21, 51, -36, -52, -19, -110, 114, -125, 65, -88, -54, 52, 48, -20, 127, -58, 46, -62, 71, 17, 104, -40, 37, -66, -110, -42, -110, -55, -44, 72, 67, -17, -11, 17, 39, -31, -54, -106, -46, -76, -32, 47, 64, -70, 64, -23, -89, -67, 9, 94, -117, -54, 45, -70, 81, -110, -12, -50, 100, -123, -103, -42, 92, -68, 108, -81, -62, -32, -97, -125, -65, 44, 63, 64, 100, 78, 92};
|
|
int8_t v_b[256] = {83, 89, -6, 60, 57, -32, -123, -7, 59, -77, 101, 97, 84, 24, 49, 105, -52, -31, 74, -82, -10, 14, 27, 29, -12, -6, -23, -91, 49, 25, 51, 72, -14, -43, 81, -6, 116, -43, -58, 119, -103, -113, 109, 101, -87, 34, -100, -14, -126, -53, 82, 31, -91, -1, -112, -121, -13, 54, -25, -91, 98, -39, -21, 112, -24, -44, 116, -123, -92, -95, 50, -67, 37, 47, -71, -6, -24, 69, -42, -105, 127, 74, -55, -20, 78, 107, 26, -17, -96, 109, -60, 80, -43, 31, 106, -41, -95, 41, -79, 22, -78, -39, -77, -81, 25, -86, -18, -81, 109, 90, -125, -116, -93, -119, 25, -24, -54, -95, 37, 4, 108, 111, -50, 93, 121, 33, 95, 75, 125, 104, 31, 85, 109, -87, 59, 91, -80, 15, -98, 74, -63, 42, 11, -69, -10, -72, 51, 17, 52, -87, -18, 34, -81, 62, 24, 87, -89, 3, -67, 52, -11, -105, -64, -11, 12, -54, 6, -27, 21, 51, -36, -52, -19, -110, 114, -125, 65, -88, -54, 52, 48, -20, 127, -58, 46, -62, 71, 17, 104, -40, 37, -66, -110, -42, -110, -55, -44, 72, 67, -17, -11, 17, 39, -31, -54, -106, -46, -76, -32, 47, 64, -70, 64, -23, -89, -67, 9, 94, -117, -54, 45, -70, 81, -110, -12, -50, 100, -123, -103, -42, 92, -68, 108, -81, -62, -32, -97, -125, -65, 44, 63, 64, 100, 78, 83, 96, 15, -30, 4, -24, -25, 57, -23, 71, -112, 92};
|
|
int32_t v_d0[SIMD_W];
|
|
int32_t v_d1[1] = {0};
|
|
|
|
int8_t *p_a = v_a;
|
|
int8_t *p_b = v_b;
|
|
int32_t *p_d0 = v_d0;
|
|
int32_t *p_d1 = v_d1;
|
|
|
|
vld_siw( 8, p_d1, 0)
|
|
vadd( 9, 8, 8)
|
|
vadd(10, 8, 8)
|
|
vadd(11, 8, 8)
|
|
|
|
for(i=0; i<len; i=i+SIMD_W*4*4){
|
|
vld_sib(0, p_a, 1)
|
|
vld_sib(4, p_b, 1)
|
|
p_a = p_a + SIMD_W*4;
|
|
p_b = p_b + SIMD_W*4;
|
|
vld_sib(1, p_a, 1)
|
|
vld_sib(5, p_b, 1)
|
|
p_a = p_a + SIMD_W*4;
|
|
p_b = p_b + SIMD_W*4;
|
|
vld_sib(2, p_a, 1)
|
|
vld_sib(6, p_b, 1)
|
|
p_a = p_a + SIMD_W*4;
|
|
p_b = p_b + SIMD_W*4;
|
|
vld_sib(3, p_a, 1)
|
|
vld_sib(7, p_b, 1)
|
|
p_a = p_a + SIMD_W*4;
|
|
p_b = p_b + SIMD_W*4;
|
|
vmaddw( 8,0,4)
|
|
vmaddw( 8,1,5)
|
|
vmaddw( 8,2,6)
|
|
vmaddw( 8,3,7)
|
|
}
|
|
vadd( 8, 8, 9)
|
|
vadd( 8, 8,10)
|
|
vadd( 8, 8,11)
|
|
|
|
vst_sih(p_d0, 1, 8)
|
|
|
|
for(i=0; i<len; i++){
|
|
v_d1[0] += v_a[i] * v_b[i];
|
|
}
|
|
|
|
for(i=1; i<SIMD_W; i++){
|
|
v_d0[0] += v_d0[i];
|
|
}
|
|
|
|
if(v_d1[0] != v_d0[0]){
|
|
iot_printf("incorrect, golden = %d, output = %d\n", v_d1[0], v_d0[0]);
|
|
error = 1;
|
|
}
|
|
iot_printf("mac finish\n");
|
|
|
|
return error;
|
|
}
|
|
|
|
uint8_t uib_macw_test(){
|
|
int i;
|
|
uint8_t error = 0;
|
|
int len = 256;
|
|
uint8_t a = 5;
|
|
uint8_t b = 3;
|
|
a = a;
|
|
b = b;
|
|
uint8_t v_a[256] = {83, 96, 15, 30, 4, 24, 25, 57, 23, 71, 112, 83, 89, 6, 60, 57, 32, 123, 7, 59, 77, 101, 97, 84, 24, 49, 105, 52, 31, 74, 82, 10, 14, 27, 29, 12, 6, 23, 91, 49, 25, 51, 72, 14, 43, 81, 6, 116, 43, 58, 119, 103, 113, 109, 101, 87, 34, 100, 14, 126, 53, 82, 31, 91, 1, 112, 121, 13, 54, 25, 91, 98, 39, 21, 112, 24, 44, 116, 123, 92, 95, 50, 67, 37, 47, 71, 6, 24, 69, 42, 105, 127, 74, 55, 20, 78, 107, 26, 17, 96, 109, 60, 80, 43, 31, 106, 41, 95, 41, 79, 22, 78, 39, 77, 81, 25, 86, 18, 81, 109, 90, 125, 116, 93, 119, 25, 24, 54, 95, 37, 4, 108, 111, 50, 93, 121, 33, 95, 75, 125, 104, 31, 85, 109, 87, 59, 91, 80, 15, 98, 74, 63, 42, 11, 69, 10, 72, 51, 17, 52, 87, 18, 34, 81, 62, 24, 87, 89, 3, 67, 52, 11, 105, 64, 11, 12, 54, 6, 27, 21, 51, 36, 52, 19, 110, 114, 125, 65, 88, 54, 52, 48, 20, 127, 58, 46, 62, 71, 17, 104, 40, 37, 66, 110, 42, 110, 55, 44, 72, 67, 17, 11, 17, 39, 31, 54, 106, 46, 76, 32, 47, 64, 70, 64, 23, 89, 67, 9, 94, 117, 54, 45, 70, 81, 110, 12, 50, 100, 123, 103, 42, 92, 68, 108, 81, 62, 32, 97, 125, 65, 44, 63, 64, 100, 78, 92};
|
|
uint8_t v_b[256] = {83, 89, 6, 60, 57, 32, 123, 7, 59, 77, 101, 97, 84, 24, 49, 105, 52, 31, 74, 82, 10, 14, 27, 29, 12, 6, 23, 91, 49, 25, 51, 72, 14, 43, 81, 6, 116, 43, 58, 119, 103, 113, 109, 101, 87, 34, 100, 14, 126, 53, 82, 31, 91, 1, 112, 121, 13, 54, 25, 91, 98, 39, 21, 112, 24, 44, 116, 123, 92, 95, 50, 67, 37, 47, 71, 6, 24, 69, 42, 105, 127, 74, 55, 20, 78, 107, 26, 17, 96, 109, 60, 80, 43, 31, 106, 41, 95, 41, 79, 22, 78, 39, 77, 81, 25, 86, 18, 81, 109, 90, 125, 116, 93, 119, 25, 24, 54, 95, 37, 4, 108, 111, 50, 93, 121, 33, 95, 75, 125, 104, 31, 85, 109, 87, 59, 91, 80, 15, 98, 74, 63, 42, 11, 69, 10, 72, 51, 17, 52, 87, 18, 34, 81, 62, 24, 87, 89, 3, 67, 52, 11, 105, 64, 11, 12, 54, 6, 27, 21, 51, 36, 52, 19, 110, 114, 125, 65, 88, 54, 52, 48, 20, 127, 58, 46, 62, 71, 17, 104, 40, 37, 66, 110, 42, 110, 55, 44, 72, 67, 17, 11, 17, 39, 31, 54, 106, 46, 76, 32, 47, 64, 70, 64, 23, 89, 67, 9, 94, 117, 54, 45, 70, 81, 110, 12, 50, 100, 123, 103, 42, 92, 68, 108, 81, 62, 32, 97, 125, 65, 44, 63, 64, 100, 78, 83, 96, 15, 30, 4, 24, 25, 57, 23, 71, 112, 92};
|
|
uint32_t v_d0[SIMD_W];
|
|
uint32_t v_d1[1] = {0};
|
|
|
|
uint8_t *p_a = v_a;
|
|
uint8_t *p_b = v_b;
|
|
uint32_t *p_d0 = v_d0;
|
|
uint32_t *p_d1 = v_d1;
|
|
|
|
vld_uiw( 8, p_d1, 0)
|
|
vadd( 9, 8, 8)
|
|
vadd(10, 8, 8)
|
|
vadd(11, 8, 8)
|
|
|
|
for(i=0; i<len; i=i+SIMD_W*4*4){
|
|
vld_uib(0, p_a, 1)
|
|
vld_uib(4, p_b, 1)
|
|
p_a = p_a + SIMD_W*4;
|
|
p_b = p_b + SIMD_W*4;
|
|
vld_uib(1, p_a, 1)
|
|
vld_uib(5, p_b, 1)
|
|
p_a = p_a + SIMD_W*4;
|
|
p_b = p_b + SIMD_W*4;
|
|
vld_uib(2, p_a, 1)
|
|
vld_uib(6, p_b, 1)
|
|
p_a = p_a + SIMD_W*4;
|
|
p_b = p_b + SIMD_W*4;
|
|
vld_uib(3, p_a, 1)
|
|
vld_uib(7, p_b, 1)
|
|
p_a = p_a + SIMD_W*4;
|
|
p_b = p_b + SIMD_W*4;
|
|
vmaddw( 8,0,4)
|
|
vmaddw( 8,1,5)
|
|
vmaddw( 8,2,6)
|
|
vmaddw( 8,3,7)
|
|
}
|
|
vadd( 8, 8, 9)
|
|
vadd( 8, 8,10)
|
|
vadd( 8, 8,11)
|
|
|
|
vst_uih(p_d0, 1, 8)
|
|
|
|
for(i=0; i<len; i++){
|
|
v_d1[0] += v_a[i] * v_b[i];
|
|
}
|
|
|
|
for(i=1; i<SIMD_W; i++){
|
|
v_d0[0] += v_d0[i];
|
|
}
|
|
|
|
if(v_d1[0] != v_d0[0]){
|
|
error = 1;
|
|
}
|
|
|
|
return error;
|
|
}
|
|
|
|
uint8_t sih_macw_test() {
|
|
int16_t a[8] = {12000, 14000, 16000, 18000, 20000, 22000, 24000, 26000};
|
|
int16_t b[8] = {28000, 30000, 32000, 34000, 36000, 38000, 40000, 42000};
|
|
vld_sih(0, a, 1);
|
|
vld_sih(1, b, 1);
|
|
int32_t zero[] = {0};
|
|
vld_siw(2, zero, 0);
|
|
vld_siw(3, zero, 0);
|
|
vld_siw(4, zero, 0);
|
|
vld_siw(5, zero, 0);
|
|
vmaddw(2, 0, 1);
|
|
int32_t out[16];
|
|
int32_t *po = out;
|
|
vst_siw(po, 1, 2);
|
|
po += SIMD_W;
|
|
vst_siw(po, 1, 3);
|
|
po += SIMD_W;
|
|
vst_siw(po, 1, 4);
|
|
po += SIMD_W;
|
|
vst_siw(po, 1, 5);
|
|
po += SIMD_W;
|
|
return 0;
|
|
}
|
|
|
|
uint8_t gather8_test(){
|
|
int i;
|
|
uint8_t error = 0;
|
|
int len = 256;
|
|
int8_t v_a[256] = {83, 96, 15, -30, 4, -24, -25, 57, -23, 71, -112, 83, 89, -6, 60, 57, -32, -123, -7, 59, -77, 101, 97, 84, 24, 49, 105, -52, -31, 74, -82, -10, 14, 27, 29, -12, -6, -23, -91, 49, 25, 51, 72, -14, -43, 81, -6, 116, -43, -58, 119, -103, -113, 109, 101, -87, 34, -100, -14, -126, -53, 82, 31, -91, -1, -112, -121, -13, 54, -25, -91, 98, -39, -21, 112, -24, -44, 116, -123, -92, -95, 50, -67, 37, 47, -71, -6, -24, 69, -42, -105, 127, 74, -55, -20, 78, 107, 26, -17, -96, 109, -60, 80, -43, 31, 106, -41, -95, 41, -79, 22, -78, -39, -77, -81, 25, -86, -18, -81, 109, 90, -125, -116, -93, -119, 25, -24, -54, -95, 37, 4, 108, 111, -50, 93, 121, 33, 95, 75, 125, 104, 31, 85, 109, -87, 59, 91, -80, 15, -98, 74, -63, 42, 11, -69, -10, -72, 51, 17, 52, -87, -18, 34, -81, 62, 24, 87, -89, 3, -67, 52, -11, -105, -64, -11, 12, -54, 6, -27, 21, 51, -36, -52, -19, -110, 114, -125, 65, -88, -54, 52, 48, -20, 127, -58, 46, -62, 71, 17, 104, -40, 37, -66, -110, -42, -110, -55, -44, 72, 67, -17, -11, 17, 39, -31, -54, -106, -46, -76, -32, 47, 64, -70, 64, -23, -89, -67, 9, 94, -117, -54, 45, -70, 81, -110, -12, -50, 100, -123, -103, -42, 92, -68, 108, -81, -62, -32, -97, -125, -65, 44, 63, 64, 100, 78, 92};
|
|
int8_t v_d0[256];
|
|
int8_t v_d1[256];
|
|
const int8_t *lookup_table = table8;
|
|
|
|
|
|
int8_t *p_a = v_a;
|
|
int8_t *p_d0 = v_d0;
|
|
|
|
for(i=0; i<len; i=i+SIMD_W*4){
|
|
vlds_sib(0, p_a, 4)
|
|
vldx_sib(1, (lookup_table + 128), 0)
|
|
p_a = p_a + SIMD_W*4;
|
|
vsts_sib(p_d0, 4, 1)
|
|
p_d0 = p_d0 + SIMD_W*4;
|
|
}
|
|
|
|
for(i=0; i<len; i++){
|
|
v_d1[i] = *((lookup_table + 128) + v_a[i]);
|
|
}
|
|
|
|
for(i=0; i<len; i++){
|
|
if(v_d0[i] != v_d1[i]){
|
|
error = 1;
|
|
break;
|
|
}
|
|
}
|
|
|
|
return error;
|
|
}
|
|
|
|
uint8_t gather16_test(){
|
|
int i;
|
|
uint8_t error = 0;
|
|
int len = 256;
|
|
int16_t v_a[256] = {83, 96, 15, -30, 4, -24, -25, 57, -23, 71, -112, 83, 89, -6, 60, 57, -32, -123, -7, 59, -77, 101, 97, 84, 24, 49, 105, -52, -31, 74, -82, -10, 14, 27, 29, -12, -6, -23, -91, 49, 25, 51, 72, -14, -43, 81, -6, 116, -43, -58, 119, -103, -113, 109, 101, -87, 34, -100, -14, -126, -53, 82, 31, -91, -1, -112, -121, -13, 54, -25, -91, 98, -39, -21, 112, -24, -44, 116, -123, -92, -95, 50, -67, 37, 47, -71, -6, -24, 69, -42, -105, 127, 74, -55, -20, 78, 107, 26, -17, -96, 109, -60, 80, -43, 31, 106, -41, -95, 41, -79, 22, -78, -39, -77, -81, 25, -86, -18, -81, 109, 90, -125, -116, -93, -119, 25, -24, -54, -95, 37, 4, 108, 111, -50, 93, 121, 33, 95, 75, 125, 104, 31, 85, 109, -87, 59, 91, -80, 15, -98, 74, -63, 42, 11, -69, -10, -72, 51, 17, 52, -87, -18, 34, -81, 62, 24, 87, -89, 3, -67, 52, -11, -105, -64, -11, 12, -54, 6, -27, 21, 51, -36, -52, -19, -110, 114, -125, 65, -88, -54, 52, 48, -20, 127, -58, 46, -62, 71, 17, 104, -40, 37, -66, -110, -42, -110, -55, -44, 72, 67, -17, -11, 17, 39, -31, -54, -106, -46, -76, -32, 47, 64, -70, 64, -23, -89, -67, 9, 94, -117, -54, 45, -70, 81, -110, -12, -50, 100, -123, -103, -42, 92, -68, 108, -81, -62, -32, -97, -125, -65, 44, 63, 64, 100, 78, 92};
|
|
int16_t v_d0[256];
|
|
int16_t v_d1[256];
|
|
const int16_t *lookup_table = table16;
|
|
for (uint32_t i = 0; i < 256; i++) {
|
|
table16[i] = 10 * i;
|
|
}
|
|
for(i=0; i<len; i++){
|
|
iot_printf("look_up[%d] = %d\n", i, lookup_table[i]);
|
|
}
|
|
|
|
|
|
int16_t *p_a = v_a;
|
|
int16_t *p_d0 = v_d0;
|
|
|
|
for(i=0; i<len; i=i+SIMD_W*2){
|
|
vlds_sih(0, p_a, 4)
|
|
p_a = p_a + SIMD_W*2;
|
|
vadd(0, 0, 0)
|
|
vldx_sih(1, (lookup_table + 128), 0)
|
|
vsts_sih(p_d0, 4, 1)
|
|
p_d0 = p_d0 + SIMD_W*2;
|
|
}
|
|
|
|
for(i=0; i<len; i++){
|
|
v_d1[i] = *((lookup_table + 128) + v_a[i]);
|
|
}
|
|
|
|
for(i=0; i<len; i++){
|
|
iot_printf("g[%d] = %d, o[%d] = %d\n", i, v_d1[i], i, v_d0[i]);
|
|
if(v_d0[i] != v_d1[i]){
|
|
error = 1;
|
|
}
|
|
}
|
|
|
|
return error;
|
|
}
|
|
|
|
uint8_t scatter8_test(){
|
|
int i;
|
|
uint8_t error = 0;
|
|
int len = 256;
|
|
int8_t v_a[256] = {127, 126, 125, 124, 123, 122, 121, 120, 119, 118, 117, 116, 115, 114, 113, 112, 111, 110, 109, 108, 107, 106, 105, 104, 103, 102, 101, 100, 99, 98, 97, 96, 95, 94, 93, 92, 91, 90, 89, 88, 87, 86, 85, 84, 83, 82, 81, 80, 79, 78, 77, 76, 75, 74, 73, 72, 71, 70, 69, 68, 67, 66, 65, 64, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, 48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16, -17, -18, -19, -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, -30, -31, -32, -33, -34, -35, -36, -37, -38, -39, -40, -41, -42, -43, -44, -45, -46, -47, -48, -49, -50, -51, -52, -53, -54, -55, -56, -57, -58, -59, -60, -61, -62, -63, -64, -65, -66, -67, -68, -69, -70, -71, -72, -73, -74, -75, -76, -77, -78, -79, -80, -81, -82, -83, -84, -85, -86, -87, -88, -89, -90, -91, -92, -93, -94, -95, -96, -97, -98, -99, -100, -101, -102, -103, -104, -105, -106, -107, -108, -109, -110, -111, -112, -113, -114, -115, -116, -117, -118, -119, -120, -121, -122, -123, -124, -125, -126, -127, -128};
|
|
int8_t v_d0[256];
|
|
int8_t v_d1[256];
|
|
const int8_t *lookup_table = table8;
|
|
|
|
|
|
int8_t *p_a = v_a;
|
|
const int8_t *p_t = lookup_table;
|
|
|
|
for(i=0; i<len; i=i+SIMD_W*4){
|
|
vlds_sib(0, p_a, 4)
|
|
p_a = p_a + SIMD_W*4;
|
|
vlds_sib(1, p_t, 4)
|
|
vstx_sib((v_d0+128), 0, 1)
|
|
p_t = p_t + SIMD_W*4;
|
|
}
|
|
|
|
for(i=0; i<len; i++){
|
|
v_d1[v_a[i]+128] = *(lookup_table + i);
|
|
}
|
|
|
|
for(i=0; i<len; i++){
|
|
if(v_d0[i] != v_d1[i]){
|
|
error = 1;
|
|
break;
|
|
}
|
|
}
|
|
|
|
return error;
|
|
}
|
|
|
|
uint8_t scatter16_test(){
|
|
int i;
|
|
uint8_t error = 0;
|
|
int len = 256;
|
|
int16_t v_a[256] = {127, 126, 125, 124, 123, 122, 121, 120, 119, 118, 117, 116, 115, 114, 113, 112, 111, 110, 109, 108, 107, 106, 105, 104, 103, 102, 101, 100, 99, 98, 97, 96, 95, 94, 93, 92, 91, 90, 89, 88, 87, 86, 85, 84, 83, 82, 81, 80, 79, 78, 77, 76, 75, 74, 73, 72, 71, 70, 69, 68, 67, 66, 65, 64, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, 48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16, -17, -18, -19, -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, -30, -31, -32, -33, -34, -35, -36, -37, -38, -39, -40, -41, -42, -43, -44, -45, -46, -47, -48, -49, -50, -51, -52, -53, -54, -55, -56, -57, -58, -59, -60, -61, -62, -63, -64, -65, -66, -67, -68, -69, -70, -71, -72, -73, -74, -75, -76, -77, -78, -79, -80, -81, -82, -83, -84, -85, -86, -87, -88, -89, -90, -91, -92, -93, -94, -95, -96, -97, -98, -99, -100, -101, -102, -103, -104, -105, -106, -107, -108, -109, -110, -111, -112, -113, -114, -115, -116, -117, -118, -119, -120, -121, -122, -123, -124, -125, -126, -127, -128};
|
|
int16_t v_d0[256];
|
|
int16_t v_d1[256];
|
|
const int16_t *lookup_table = table16;
|
|
|
|
|
|
int16_t *p_a = v_a;
|
|
const int16_t *p_t = lookup_table;
|
|
|
|
for(i=0; i<len; i=i+SIMD_W*2){
|
|
vlds_sih(0, p_a, 4)
|
|
p_a = p_a + SIMD_W*2;
|
|
vlds_sih(1, p_t, 4)
|
|
vadd(0, 0, 0)
|
|
vstx_sih((v_d0+128), 0, 1)
|
|
p_t = p_t + SIMD_W*2;
|
|
}
|
|
|
|
for(i=0; i<len; i++){
|
|
v_d1[(v_a[i]+128)] = *(lookup_table + i);
|
|
}
|
|
|
|
for(i=0; i<len; i++){
|
|
if(v_d0[i] != v_d1[i]){
|
|
error = 1;
|
|
break;
|
|
}
|
|
}
|
|
|
|
return error;
|
|
}
|
|
|
|
void matrix_test() {
|
|
#if 0
|
|
int8_t a[256];
|
|
int8_t b[256];
|
|
int32_t out[1024];
|
|
for (uint32_t i = 0; i < 256; i++) {
|
|
a[i] = 1;
|
|
b[i] = -1;
|
|
out[i] = 0;
|
|
}
|
|
uint8_t result = matrix_multi_int8_to_int32(a, b, out, 32, 8, 29);
|
|
iot_printf("result = %d\n", result);
|
|
for (uint32_t i = 0; i < 1024; i++) {
|
|
iot_printf("out[%d] = %d\n", i, out[i]);
|
|
}
|
|
#else
|
|
#if 1
|
|
int8_t a[512];
|
|
int8_t b[512];
|
|
int8_t out[1024];
|
|
for (uint32_t i = 0; i < 512; i++) {
|
|
a[i] = 1;
|
|
b[i] = -3;
|
|
out[i] = 0;
|
|
}
|
|
uint8_t result = matrix_multi_int8_to_int8(a, b, out, 32, 8, 29, 1);
|
|
iot_printf("result = %d\n", result);
|
|
for (uint32_t i = 0; i < 1024; i++) {
|
|
iot_printf("out[%d] = %d\n", i, out[i]);
|
|
}
|
|
#else
|
|
float a[256];
|
|
float b[256];
|
|
float out[1024];
|
|
for (uint32_t i = 0; i < 256; i++) {
|
|
a[i] = 1;
|
|
b[i] = 1;
|
|
out[i] = 0;
|
|
}
|
|
uint8_t result = matrix_multi_float(a, b, out, 32, 8, 32);
|
|
uint32_t *out_bin = (uint32_t *)out;
|
|
iot_printf("result = %d\n", result);
|
|
for (uint32_t i = 0; i < 1024; i++) {
|
|
iot_printf("out[%d] = %08x\n", i, out_bin[i]);
|
|
}
|
|
#endif
|
|
#endif
|
|
}
|
|
|
|
void vector_add_test_uint8() {
|
|
uint8_t a[100];
|
|
uint8_t b[100];
|
|
uint8_t o[100];
|
|
uint8_t g[100];
|
|
for (uint32_t i = 0; i < 100; i++) {
|
|
a[i] = 2 * i;
|
|
b[i] = 3 * i + 23;
|
|
}
|
|
vector_add_const_uint8(a, b[0], o, 100);
|
|
for (uint8_t i = 0; i < 100; i++) {
|
|
#ifdef SATURATION
|
|
uint16_t gg = a[i] + b[0];
|
|
g[i] = gg > 255 ? 255 : gg;
|
|
#else
|
|
g[i] = a[i] + b[0];
|
|
#endif
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[0], g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("add of uint8 test end\n");
|
|
vector_add_uint8(a, b, o, 100);
|
|
for (uint8_t i = 0; i < 100; i++) {
|
|
#ifdef SATURATION
|
|
uint16_t gg = a[i] + b[i];
|
|
g[i] = gg > 255 ? 255 : gg;
|
|
#else
|
|
g[i] = a[i] + b[i];
|
|
#endif
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[i], g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("add const of uint8 test end\n");
|
|
}
|
|
|
|
void vector_sub_test_uint8() {
|
|
uint8_t a[100];
|
|
uint8_t b[100];
|
|
uint8_t o[100];
|
|
uint8_t g[100];
|
|
for (uint32_t i = 0; i < 100; i++) {
|
|
a[i] = 2 * i;
|
|
b[i] = 3 * i + 23;
|
|
}
|
|
vector_sub_const_uint8(a, b[0], o, 100);
|
|
for (uint8_t i = 0; i < 100; i++) {
|
|
#ifdef SATURATION
|
|
if (a[i] < b[0]) {
|
|
g[i] = 0;
|
|
} else {
|
|
g[i] = a[i] - b[0];
|
|
}
|
|
#else
|
|
g[i] = a[i] - b[0];
|
|
#endif
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[0], g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("sub const of uint8 test end\n");
|
|
vector_const_sub_uint8(a, b[0], o, 100);
|
|
for (uint8_t i = 0; i < 100; i++) {
|
|
#ifdef SATURATION
|
|
if (a[i] > b[0]) {
|
|
g[i] = 0;
|
|
} else {
|
|
g[i] = b[0] - a[i];
|
|
}
|
|
#else
|
|
g[i] = b[0] - a[i];
|
|
#endif
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, b = %d, a = %d, golden %d, output %d\n", i, b[0], a[i], g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("sub const of uint8 test end\n");
|
|
vector_sub_uint8(a, b, o, 100);
|
|
for (uint8_t i = 0; i < 100; i++) {
|
|
#ifdef SATURATION
|
|
if (a[i] < b[i]) {
|
|
g[i] = 0;
|
|
} else {
|
|
g[i] = a[i] - b[i];
|
|
}
|
|
#else
|
|
g[i] = a[i] - b[i];
|
|
#endif
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[i], g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("sub of uint8 test end\n");
|
|
}
|
|
|
|
void vector_mul_test_uint8() {
|
|
uint8_t length = 100;
|
|
uint8_t *a = (uint8_t *)os_mem_malloc(1, length);
|
|
uint8_t *b = (uint8_t *)os_mem_malloc(1, length);
|
|
uint8_t *l = (uint8_t *)os_mem_malloc(1, length);
|
|
uint8_t *h = (uint8_t *)os_mem_malloc(1, length);
|
|
uint16_t *w = (uint16_t *)os_mem_malloc(1, length * 2);
|
|
for (uint8_t i = 0; i < length; i++) {
|
|
a[i] = -2 * (i - 25);
|
|
b[i] = 3 * (i - 33);
|
|
//iot_printf("a[%d] = %d, b[%d] = %d\n", i, a[i], i, b[i]);
|
|
}
|
|
vector_multiply_uint8(a, b, w, length);
|
|
for (uint8_t i = 0; i < length; i++) {
|
|
uint16_t g = a[i] * b[i];
|
|
if (g != w[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g, w[i]);
|
|
}
|
|
}
|
|
iot_printf("vector_multiply_uint8 finished\n");
|
|
vector_multiply_const_uint8(a, b[1], w, length);
|
|
for (uint8_t i = 0; i < length; i++) {
|
|
uint16_t g = a[i] * b[1];
|
|
if (g != w[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g, w[i]);
|
|
}
|
|
}
|
|
iot_printf("vector_multiply_const_uint8 finished\n");
|
|
vector_multiply_uint8_high_8bit(a, b, h, length);
|
|
for (uint8_t i = 0; i < length; i++) {
|
|
uint16_t g = (a[i] * b[i]) >> 8;
|
|
if (g != h[i]) {
|
|
iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[i], g, l[i]);
|
|
}
|
|
}
|
|
iot_printf("vector_multiply_uint8_high finished\n");
|
|
vector_multiply_const_uint8_high_8bit(a, b[1], h, length);
|
|
for (uint8_t i = 0; i < length; i++) {
|
|
uint16_t g = (a[i] * b[1]) >> 8;
|
|
if (g != h[i]) {
|
|
iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[1], g, l[i]);
|
|
}
|
|
}
|
|
iot_printf("vector_multiply_const_uint8_high finished\n");
|
|
vector_multiply_uint8_low_8bit(a, b, l, length);
|
|
for (uint8_t i = 0; i < length; i++) {
|
|
#ifdef SATURATION
|
|
uint16_t g = (a[i] * b[i]);
|
|
if (g > 255) {
|
|
g = 255;
|
|
}
|
|
#else
|
|
uint16_t g = (a[i] * b[i]) & 0xff;
|
|
#endif
|
|
if (g != l[i]) {
|
|
iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[i], g, l[i]);
|
|
}
|
|
}
|
|
iot_printf("vector_multiply_uint8_low finished\n");
|
|
vector_multiply_const_uint8_low_8bit(a, b[1], l, length);
|
|
for (uint8_t i = 0; i < length; i++) {
|
|
#ifdef SATURATION
|
|
uint16_t g = (a[i] * b[1]);
|
|
if (g > 255) {
|
|
g = 255;
|
|
}
|
|
#else
|
|
uint16_t g = (a[i] * b[1]) & 0xff;
|
|
#endif
|
|
if (g != l[i]) {
|
|
iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[1], g, l[i]);
|
|
}
|
|
}
|
|
iot_printf("vector_multiply_const_uint8_low finished\n");
|
|
os_mem_free(a);
|
|
os_mem_free(b);
|
|
os_mem_free(h);
|
|
os_mem_free(l);
|
|
os_mem_free(w);
|
|
}
|
|
|
|
void vector_madd_msub_test_uint8() {
|
|
uint8_t length = 100;
|
|
uint8_t *a = os_mem_malloc(1, length);
|
|
uint8_t *b = os_mem_malloc(1, length);
|
|
uint8_t *j = os_mem_malloc(1, length);
|
|
uint8_t *o = os_mem_malloc(1, length);
|
|
uint32_t *J = os_mem_malloc(1, length * 4);
|
|
uint32_t *O = os_mem_malloc(1, length * 4);
|
|
for (uint8_t i = 0; i < length; i++) {
|
|
a[i] = 2 * (i + 25);
|
|
b[i] = 3 * (i + 33);
|
|
j[i] = i + 50;
|
|
J[i] = i * 100 + 50000;
|
|
//iot_printf("a[%d] = %d, b[%d] = %d, j[%d] = %d, J[%d] = %d\n", i, a[i], i, b[i], i, j[i], i, J[i]);
|
|
}
|
|
vector_mul_add_uint8(a, b, j, o, length);
|
|
for (uint8_t i = 0; i < length; i++) {
|
|
#ifdef SATURATION
|
|
uint16_t l = a[i] * b[i];
|
|
l = l > 255 ? 255 : l;
|
|
uint16_t g = j[i] + l;
|
|
g = g > 255 ? 255 : g;
|
|
#else
|
|
uint8_t g = j[i] + a[i] * b[i];
|
|
#endif
|
|
if (g != o[i]) {
|
|
iot_printf("incorrect at %d, j = %d, a = %d, b = %d, golden %d, output %d\n", i, j[i], a[i], b[i], g, o[i]);
|
|
}
|
|
}
|
|
iot_printf("vector_mul_add_uint8 finished\n");
|
|
vector_mul_sub_uint8(a, b, j, o, length);
|
|
for (uint8_t i = 0; i < length; i++) {
|
|
#ifdef SATURATION
|
|
uint16_t l = a[i] * b[i];
|
|
l = l > 255 ? 255 : l;
|
|
uint8_t g = j[i] > l ? j[i] - l : 0;
|
|
#else
|
|
uint8_t g = j[i] - a[i] * b[i];
|
|
#endif
|
|
if (g != o[i]) {
|
|
iot_printf("incorrect at %d, j = %d, a = %d, b = %d, golden %d, output %d\n", i, j[i], a[i], b[i], g, o[i]);
|
|
}
|
|
}
|
|
iot_printf("vector_mul_sub_uint8 finished\n");
|
|
vector_mul_add_uint8_to_uint32(a, b, J, O, length);
|
|
for (uint8_t i = 0; i < length; i++) {
|
|
uint32_t g = J[i] + (uint32_t)a[i] * (uint32_t)b[i];
|
|
if (g != O[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g, O[i]);
|
|
}
|
|
}
|
|
iot_printf("vector_mul_add_uint8_to_uint32 finished\n");
|
|
vector_mul_sub_uint8_to_uint32(a, b, J, O, length);
|
|
for (uint8_t i = 0; i < length; i++) {
|
|
uint32_t g = J[i] - (uint32_t)a[i] * (uint32_t)b[i];
|
|
if (g != O[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g, O[i]);
|
|
}
|
|
}
|
|
iot_printf("vector_mul_sub_uint8_to_uint32 finished\n");
|
|
}
|
|
|
|
void vector_min_test_uint8() {
|
|
uint8_t a[100];
|
|
uint8_t b[100];
|
|
uint8_t o[100];
|
|
uint8_t g[100];
|
|
for (uint32_t i = 0; i < 100; i++) {
|
|
a[i] = 2 * i;
|
|
b[i] = 3 * i + 23;
|
|
}
|
|
vector_min_uint8(a, b, o, 100);
|
|
for (uint8_t i = 0; i < 100; i++) {
|
|
g[i] = a[i] < b[i] ? a[i] : b[i];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_min_const_uint8(a, b[0], o, 100);
|
|
for (uint8_t i = 0; i < 100; i++) {
|
|
g[i] = a[i] < b[0] ? a[i] : b[0];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("min of uint8 test end\n");
|
|
}
|
|
|
|
void vector_max_test_uint8() {
|
|
uint8_t a[100];
|
|
uint8_t b[100];
|
|
uint8_t o[100];
|
|
uint8_t g[100];
|
|
for (uint32_t i = 0; i < 100; i++) {
|
|
a[i] = 2 * i;
|
|
b[i] = 3 * i + 23;
|
|
}
|
|
vector_max_uint8(a, b, o, 100);
|
|
for (uint8_t i = 0; i < 100; i++) {
|
|
g[i] = a[i] > b[i] ? a[i] : b[i];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_max_const_uint8(a, b[0], o, 100);
|
|
for (uint8_t i = 0; i < 100; i++) {
|
|
g[i] = a[i] > b[0] ? a[i] : b[0];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("max of uint8 test end\n");
|
|
}
|
|
|
|
void vector_equal_test_uint8() {
|
|
uint8_t a[100];
|
|
uint8_t b[100];
|
|
uint8_t o[100];
|
|
uint8_t g[100];
|
|
for (uint32_t i = 0; i < 100; i++) {
|
|
a[i] = 2 * i;
|
|
b[i] = 3 * i - 23;
|
|
}
|
|
vector_equal_uint8(a, b, o, 100);
|
|
for (uint8_t i = 0; i < 100; i++) {
|
|
g[i] = a[i] == b[i] ? 1 : 0;
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_equal_const_uint8(a, b[11], o, 100);
|
|
for (uint8_t i = 0; i < 100; i++) {
|
|
g[i] = a[i] == b[11] ? 1 : 0;
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("equal of uint8 test end\n");
|
|
}
|
|
|
|
void vector_not_equal_test_uint8() {
|
|
uint8_t a[100];
|
|
uint8_t b[100];
|
|
uint8_t o[100];
|
|
uint8_t g[100];
|
|
for (uint32_t i = 0; i < 100; i++) {
|
|
a[i] = 2 * i;
|
|
b[i] = 3 * i - 23;
|
|
}
|
|
vector_not_equal_uint8(a, b, o, 100);
|
|
for (uint8_t i = 0; i < 100; i++) {
|
|
g[i] = a[i] != b[i] ? 1 : 0;
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_not_equal_const_uint8(a, b[11], o, 100);
|
|
for (uint8_t i = 0; i < 100; i++) {
|
|
g[i] = a[i] != b[11] ? 1 : 0;
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("not equal of uint8 test end\n");
|
|
}
|
|
|
|
void vector_less_than_test_uint8() {
|
|
uint8_t a[100];
|
|
uint8_t b[100];
|
|
uint8_t o[100];
|
|
uint8_t g[100];
|
|
for (uint32_t i = 0; i < 100; i++) {
|
|
a[i] = 2 * i;
|
|
b[i] = 3 * i - 23;
|
|
}
|
|
vector_less_than_uint8(a, b, o, 100);
|
|
for (uint8_t i = 0; i < 100; i++) {
|
|
g[i] = a[i] < b[i] ? 1 : 0;
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_less_than_const_uint8(a, b[11], o, 100);
|
|
for (uint8_t i = 0; i < 100; i++) {
|
|
g[i] = a[i] < b[11] ? 1 : 0;
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_const_less_than_uint8(a, b[11], o, 100);
|
|
for (uint8_t i = 0; i < 100; i++) {
|
|
g[i] = a[i] > b[11] ? 1 : 0;
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("less than of uint8 test end\n");
|
|
}
|
|
|
|
void vector_greater_or_equal_test_uint8() {
|
|
uint8_t a[100];
|
|
uint8_t b[100];
|
|
uint8_t o[100];
|
|
uint8_t g[100];
|
|
for (uint32_t i = 0; i < 100; i++) {
|
|
a[i] = 2 * i;
|
|
b[i] = 3 * i - 23;
|
|
}
|
|
vector_greater_or_equal_uint8(a, b, o, 100);
|
|
for (uint8_t i = 0; i < 100; i++) {
|
|
g[i] = a[i] >= b[i] ? 1 : 0;
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_greater_or_equal_const_uint8(a, b[11], o, 100);
|
|
for (uint8_t i = 0; i < 100; i++) {
|
|
g[i] = a[i] >= b[11] ? 1 : 0;
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_const_greater_or_equal_uint8(a, b[11], o, 100);
|
|
for (uint8_t i = 0; i < 100; i++) {
|
|
g[i] = a[i] <= b[11] ? 1 : 0;
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("greater or equal of uint8 test end\n");
|
|
}
|
|
|
|
void vector_logic_and_test_uint8() {
|
|
uint8_t a[100];
|
|
uint8_t b[100];
|
|
uint8_t o[100];
|
|
uint8_t g[100];
|
|
for (uint32_t i = 0; i < 100; i++) {
|
|
a[i] = 2 * i;
|
|
b[i] = 3 * i - 23;
|
|
}
|
|
vector_logic_and_uint8(a, b, o, 100);
|
|
for (uint8_t i = 0; i < 100; i++) {
|
|
g[i] = a[i] & b[i];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_logic_and_const_uint8(a, b[11], o, 100);
|
|
for (uint8_t i = 0; i < 100; i++) {
|
|
g[i] = a[i] & b[11];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("logic xor of uint8 test end\n");
|
|
}
|
|
|
|
void vector_logic_or_test_uint8() {
|
|
uint8_t a[100];
|
|
uint8_t b[100];
|
|
uint8_t o[100];
|
|
uint8_t g[100];
|
|
for (uint32_t i = 0; i < 100; i++) {
|
|
a[i] = 2 * i;
|
|
b[i] = 3 * i - 23;
|
|
}
|
|
vector_logic_or_uint8(a, b, o, 100);
|
|
for (uint8_t i = 0; i < 100; i++) {
|
|
g[i] = a[i] | b[i];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_logic_or_const_uint8(a, b[11], o, 100);
|
|
for (uint8_t i = 0; i < 100; i++) {
|
|
g[i] = a[i] | b[11];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("logic or of uint8 test end\n");
|
|
}
|
|
|
|
void vector_logic_xor_test_uint8() {
|
|
uint8_t a[100];
|
|
uint8_t b[100];
|
|
uint8_t o[100];
|
|
uint8_t g[100];
|
|
for (uint32_t i = 0; i < 100; i++) {
|
|
a[i] = 2 * i;
|
|
b[i] = 3 * i - 23;
|
|
}
|
|
vector_logic_xor_uint8(a, b, o, 100);
|
|
for (uint8_t i = 0; i < 100; i++) {
|
|
g[i] = a[i] ^ b[i];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_logic_xor_const_uint8(a, b[11], o, 100);
|
|
for (uint8_t i = 0; i < 100; i++) {
|
|
g[i] = a[i] ^ b[11];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("logic xor of uint8 test end\n");
|
|
}
|
|
|
|
void vector_logic_xnor_test_uint8() {
|
|
uint8_t a[100];
|
|
uint8_t b[100];
|
|
uint8_t o[100];
|
|
uint8_t g[100];
|
|
for (uint32_t i = 0; i < 100; i++) {
|
|
a[i] = 2 * i;
|
|
b[i] = 3 * i - 23;
|
|
}
|
|
vector_logic_xnor_uint8(a, b, o, 100);
|
|
for (uint8_t i = 0; i < 100; i++) {
|
|
g[i] = ~(a[i] ^ b[i]);
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_logic_xnor_const_uint8(a, b[11], o, 100);
|
|
for (uint8_t i = 0; i < 100; i++) {
|
|
g[i] = ~(a[i] ^ b[11]);
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("logic xor of uint8 test end\n");
|
|
}
|
|
|
|
void vector_left_shift_test_uint8() {
|
|
uint8_t a[100];
|
|
uint8_t b[100];
|
|
uint8_t o[100];
|
|
uint8_t g[100];
|
|
for (uint32_t i = 0; i < 100; i++) {
|
|
a[i] = i + 1;
|
|
b[i] = i % 7 + 1;
|
|
}
|
|
vector_left_shift_uint8(a, b, o, 100);
|
|
for (uint8_t i = 0; i < 100; i++) {
|
|
g[i] = a[i] << b[i];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_left_shift_const_uint8(a, b[2], o, 100);
|
|
for (uint8_t i = 0; i < 100; i++) {
|
|
g[i] = a[i] << b[2];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_const_left_shift_uint8(b, a[6], o, 100);
|
|
for (uint8_t i = 0; i < 100; i++) {
|
|
g[i] = a[6] << b[i];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("left shift of uint8 test end\n");
|
|
}
|
|
|
|
void vector_right_shift_test_uint8() {
|
|
uint8_t a[100];
|
|
uint8_t b[100];
|
|
uint8_t o[100];
|
|
uint8_t g[100];
|
|
for (uint32_t i = 0; i < 100; i++) {
|
|
a[i] = i - 50;
|
|
b[i] = i % 7 + 1;
|
|
}
|
|
vector_right_shift_uint8(a, b, o, 100);
|
|
for (uint8_t i = 0; i < 100; i++) {
|
|
g[i] = a[i] >> b[i];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_right_shift_const_uint8(a, b[2], o, 100);
|
|
for (uint8_t i = 0; i < 100; i++) {
|
|
g[i] = a[i] >> b[2];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_const_right_shift_uint8(b, a[6], o, 100);
|
|
for (uint8_t i = 0; i < 100; i++) {
|
|
g[i] = a[6] >> b[i];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("logic right shift of uint8 test end\n");
|
|
}
|
|
|
|
void vector_add_test_int8() {
|
|
int8_t *a = (int8_t *)0x10200000;
|
|
int8_t *b = (int8_t *)0x10101000;
|
|
int8_t *o = (int8_t *)0x10102000;
|
|
int8_t *g = (int8_t *)0x10103000;
|
|
for (uint32_t i = 0; i < 128; i++) {
|
|
a[i] = 2 * i;
|
|
b[i] = 3 * i + 23;
|
|
}
|
|
vector_add_const_int8(a, b[0], o, 128);
|
|
for (uint8_t i = 0; i < 128; i++) {
|
|
#ifdef SATURATION
|
|
int16_t gg = a[i] + b[0];
|
|
g[i] = gg > 127 ? 127 : (gg < -128 ? -128 : gg);
|
|
#else
|
|
g[i] = a[i] + b[0];
|
|
#endif
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[0], g[i], o[i]);
|
|
}
|
|
}
|
|
uint64_t begin = cpu_get_mcycle();
|
|
for (uint32_t repeat = 0; repeat < 1024; repeat++) {
|
|
vector_add_int8(a, b, o, 128);
|
|
}
|
|
uint64_t end = cpu_get_mcycle();
|
|
iot_printf("time = %d\n", (uint32_t)(end - begin));
|
|
begin = cpu_get_mcycle();
|
|
for (uint32_t repeat = 0; repeat < 1024; repeat++) {
|
|
for (uint8_t i = 0; i < 128; i++) {
|
|
#ifdef SATURATION
|
|
int16_t gg = a[i] + b[i];
|
|
g[i] = gg > 127 ? 127 : (gg < -128 ? -128 : gg);
|
|
#else
|
|
g[i] = a[i] + b[i];
|
|
#endif
|
|
}
|
|
}
|
|
end = cpu_get_mcycle();
|
|
iot_printf("time = %d\n", (uint32_t)(end - begin));
|
|
for (uint8_t i = 0; i < 128; i++) {
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[i], g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("add const of int8 test end\n");
|
|
}
|
|
|
|
void vector_sub_test_int8() {
|
|
int8_t a[128];
|
|
int8_t b[128];
|
|
int8_t o[128];
|
|
int8_t g[128];
|
|
for (uint32_t i = 0; i < 128; i++) {
|
|
a[i] = 2 * i;
|
|
b[i] = 3 * i + 23;
|
|
}
|
|
vector_sub_const_int8(a, b[0], o, 128);
|
|
for (uint8_t i = 0; i < 128; i++) {
|
|
#ifdef SATURATION
|
|
int16_t gg = a[i] - b[0];
|
|
g[i] = gg > 127 ? 127 : (gg < -128 ? -128 : gg);
|
|
#else
|
|
g[i] = a[i] - b[0];
|
|
#endif
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_const_sub_int8(a, b[0], o, 128);
|
|
for (uint8_t i = 0; i < 128; i++) {
|
|
#ifdef SATURATION
|
|
int16_t gg = b[0] - a[i];
|
|
g[i] = gg > 127 ? 127 : (gg < -128 ? -128 : gg);
|
|
#else
|
|
g[i] = b[0] - a[i];
|
|
#endif
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
uint64_t begin = cpu_get_mcycle();
|
|
for (uint32_t repeat = 0; repeat < 1024; repeat++) {
|
|
vector_sub_int8(a, b, o, 128);
|
|
}
|
|
uint64_t end = cpu_get_mcycle();
|
|
iot_printf("time = %d\n", (uint32_t)(end - begin));
|
|
begin = cpu_get_mcycle();
|
|
for (uint32_t repeat = 0; repeat < 1024; repeat++) {
|
|
for (uint8_t i = 0; i < 128; i++) {
|
|
#ifdef SATURATION
|
|
int16_t gg = a[i] - b[i];
|
|
g[i] = gg > 127 ? 127 : (gg < -128 ? -128 : gg);
|
|
#else
|
|
g[i] = a[i] - b[i];
|
|
#endif
|
|
}
|
|
}
|
|
end = cpu_get_mcycle();
|
|
iot_printf("time = %d\n", (uint32_t)(end - begin));
|
|
for (uint8_t i = 0; i < 128; i++) {
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("sub const of int8 test end\n");
|
|
}
|
|
|
|
void vector_mul_test_int8() {
|
|
uint8_t length = 128;
|
|
int8_t *a = (int8_t *)os_mem_malloc(1, length);
|
|
int8_t *b = (int8_t *)os_mem_malloc(1, length);
|
|
int8_t *l = (int8_t *)os_mem_malloc(1, length);
|
|
int8_t *h = (int8_t *)os_mem_malloc(1, length);
|
|
int16_t *w = (int16_t *)os_mem_malloc(1, length * 2);
|
|
for (uint8_t i = 0; i < length; i++) {
|
|
a[i] = -2 * (i - 25);
|
|
b[i] = 3 * (i - 33);
|
|
//iot_printf("a[%d] = %d, b[%d] = %d\n", i, a[i], i, b[i]);
|
|
}
|
|
uint64_t begin = cpu_get_mcycle();
|
|
for (uint32_t repeat = 0; repeat < 1024; repeat++) {
|
|
vector_multiply_int8(a, b, w, length);
|
|
}
|
|
uint64_t end = cpu_get_mcycle();
|
|
iot_printf("time = %d\n", (uint32_t)(end - begin));
|
|
for (uint8_t i = 0; i < length; i++) {
|
|
int16_t g = a[i] * b[i];
|
|
if (g != w[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g, w[i]);
|
|
}
|
|
}
|
|
begin = cpu_get_mcycle();
|
|
for (uint32_t repeat = 0; repeat < 1024; repeat++) {
|
|
for (uint8_t i = 0; i < length; i++) {
|
|
w[i] = a[i] * b[i];
|
|
}
|
|
}
|
|
end = cpu_get_mcycle();
|
|
iot_printf("time = %d\n", (uint32_t)(end - begin));
|
|
iot_printf("vector_multiply_int8 finished\n");
|
|
vector_multiply_const_int8(a, b[1], w, length);
|
|
for (uint8_t i = 0; i < length; i++) {
|
|
int16_t g = a[i] * b[1];
|
|
if (g != w[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g, w[i]);
|
|
}
|
|
}
|
|
iot_printf("vector_multiply_const_int8 finished\n");
|
|
vector_multiply_int8_high_8bit(a, b, h, length);
|
|
for (uint8_t i = 0; i < length; i++) {
|
|
int16_t g = (a[i] * b[i]) >> 8;
|
|
if (g != h[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g, h[i]);
|
|
}
|
|
}
|
|
iot_printf("vector_multiply_int8_high finished\n");
|
|
vector_multiply_const_int8_high_8bit(a, b[1], h, length);
|
|
for (uint8_t i = 0; i < length; i++) {
|
|
int16_t g = (a[i] * b[1]) >> 8;
|
|
if (g != h[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g, h[i]);
|
|
}
|
|
}
|
|
iot_printf("vector_multiply_const_int8_high finished\n");
|
|
vector_multiply_int8_low_8bit(a, b, l, length);
|
|
for (uint8_t i = 0; i < length; i++) {
|
|
#ifdef SATURATION
|
|
int16_t g = a[i] * b[i];
|
|
g = g > 127 ? 127 : (g < -128 ? -128 : g);
|
|
#else
|
|
int8_t g = (a[i] * b[i]) & 0xff;
|
|
#endif
|
|
if (g != l[i]) {
|
|
iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[i], g, l[i]);
|
|
}
|
|
}
|
|
iot_printf("vector_multiply_int8_low finished\n");
|
|
vector_multiply_const_int8_low_8bit(a, b[1], l, length);
|
|
for (uint8_t i = 0; i < length; i++) {
|
|
#ifdef SATURATION
|
|
int16_t g = a[i] * b[1];
|
|
g = g > 127 ? 127 : (g < -128 ? -128 : g);
|
|
#else
|
|
int8_t g = (a[i] * b[1]) & 0xff;
|
|
#endif
|
|
if (g != l[i]) {
|
|
iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[1], g, l[i]);
|
|
}
|
|
}
|
|
iot_printf("vector_multiply_const_int8_low finished\n");
|
|
os_mem_free(a);
|
|
os_mem_free(b);
|
|
os_mem_free(h);
|
|
os_mem_free(l);
|
|
os_mem_free(w);
|
|
}
|
|
|
|
void vector_madd_msub_test_int8() {
|
|
uint8_t length = 128;
|
|
int8_t *a = os_mem_malloc(1, length);
|
|
int8_t *b = os_mem_malloc(1, length);
|
|
int8_t *j = os_mem_malloc(1, length);
|
|
int8_t *o = os_mem_malloc(1, length);
|
|
int32_t *J = os_mem_malloc(1, length * 4);
|
|
int32_t *O = os_mem_malloc(1, length * 4);
|
|
for (uint8_t i = 0; i < length; i++) {
|
|
a[i] = -2 * (i - 25);
|
|
b[i] = 3 * (i - 33);
|
|
j[i] = i - 50;
|
|
J[i] = i * 100 - 500;
|
|
//iot_printf("a[%d] = %d, b[%d] = %d, j[%d] = %d, J[%d] = %d\n", i, a[i], i, b[i], i, j[i], i, J[i]);
|
|
}
|
|
uint64_t begin = cpu_get_mcycle();
|
|
vector_mul_add_int8(a, b, j, o, length);
|
|
uint64_t end = cpu_get_mcycle();
|
|
iot_printf("time = %d\n", (uint32_t)(end - begin));
|
|
for (uint8_t i = 0; i < length; i++) {
|
|
#ifdef SATURATION
|
|
int16_t m = a[i] * b[i];
|
|
m = m > 127 ? 127 : (m < -128 ? -128 : m);
|
|
int16_t g = (int16_t)j[i] + m;
|
|
g = g > 127 ? 127 : (g < -128 ? -128 : g);
|
|
#else
|
|
int8_t g = j[i] + a[i] * b[i];
|
|
#endif
|
|
if (g != o[i]) {
|
|
iot_printf("incorrect at %d, a = %d, b = %d, j = %d, golden %d, output %d\n", i, a[i], b[i], j[i], g, o[i]);
|
|
}
|
|
}
|
|
iot_printf("vector_mul_add_int8 finished\n");
|
|
vector_mul_sub_int8(a, b, j, o, length);
|
|
for (uint8_t i = 0; i < length; i++) {
|
|
#ifdef SATURATION
|
|
int16_t m = a[i] * b[i];
|
|
m = m > 127 ? 127 : (m < -128 ? -128 : m);
|
|
int16_t g = (int16_t)j[i] - m;
|
|
g = g > 127 ? 127 : (g < -128 ? -128 : g);
|
|
#else
|
|
int8_t g = j[i] - a[i] * b[i];
|
|
#endif
|
|
if (g != o[i]) {
|
|
iot_printf("incorrect at %d, a = %d, b = %d, j = %d, golden %d, output %d\n", i, a[i], b[i], j[i], g, o[i]);
|
|
}
|
|
}
|
|
iot_printf("vector_mul_sub_int8 finished\n");
|
|
vector_mul_add_int8_to_int32(a, b, J, O, length);
|
|
for (uint8_t i = 0; i < length; i++) {
|
|
int32_t g = J[i] + a[i] * b[i];
|
|
if (g != O[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g, O[i]);
|
|
}
|
|
}
|
|
iot_printf("vector_mul_add_int8_to_int32 finished\n");
|
|
vector_mul_sub_int8_to_int32(a, b, J, O, length);
|
|
for (uint8_t i = 0; i < length; i++) {
|
|
int32_t g = J[i] - a[i] * b[i];
|
|
if (g != O[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g, O[i]);
|
|
}
|
|
}
|
|
iot_printf("vector_mul_sub_int8_to_int32 finished\n");
|
|
}
|
|
void vector_min_test_int8() {
|
|
int8_t a[100];
|
|
int8_t b[100];
|
|
int8_t o[100];
|
|
int8_t g[100];
|
|
for (uint32_t i = 0; i < 100; i++) {
|
|
a[i] = 2 * i;
|
|
b[i] = 3 * i + 23;
|
|
}
|
|
vector_min_int8(a, b, o, 100);
|
|
for (uint8_t i = 0; i < 100; i++) {
|
|
g[i] = a[i] < b[i] ? a[i] : b[i];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_min_const_int8(a, b[0], o, 100);
|
|
for (uint8_t i = 0; i < 100; i++) {
|
|
g[i] = a[i] < b[0] ? a[i] : b[0];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("min of int8 test end\n");
|
|
}
|
|
|
|
void vector_max_test_int8() {
|
|
int8_t a[100];
|
|
int8_t b[100];
|
|
int8_t o[100];
|
|
int8_t g[100];
|
|
for (uint32_t i = 0; i < 100; i++) {
|
|
a[i] = 2 * i;
|
|
b[i] = 3 * i + 23;
|
|
}
|
|
vector_max_int8(a, b, o, 100);
|
|
for (uint8_t i = 0; i < 100; i++) {
|
|
g[i] = a[i] > b[i] ? a[i] : b[i];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_max_const_int8(a, b[0], o, 100);
|
|
for (uint8_t i = 0; i < 100; i++) {
|
|
g[i] = a[i] > b[0] ? a[i] : b[0];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("max of int8 test end\n");
|
|
}
|
|
|
|
void vector_equal_test_int8() {
|
|
int8_t a[100];
|
|
int8_t b[100];
|
|
int8_t o[100];
|
|
int8_t g[100];
|
|
for (uint32_t i = 0; i < 100; i++) {
|
|
a[i] = 2 * i;
|
|
b[i] = 3 * i - 23;
|
|
}
|
|
vector_equal_int8(a, b, o, 100);
|
|
for (uint8_t i = 0; i < 100; i++) {
|
|
g[i] = a[i] == b[i] ? 1 : 0;
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_equal_const_int8(a, b[11], o, 100);
|
|
for (uint8_t i = 0; i < 100; i++) {
|
|
g[i] = a[i] == b[11] ? 1 : 0;
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("equal of int8 test end\n");
|
|
}
|
|
|
|
void vector_not_equal_test_int8() {
|
|
int8_t a[100];
|
|
int8_t b[100];
|
|
int8_t o[100];
|
|
int8_t g[100];
|
|
for (uint32_t i = 0; i < 100; i++) {
|
|
a[i] = 2 * i;
|
|
b[i] = 3 * i - 23;
|
|
}
|
|
vector_not_equal_int8(a, b, o, 100);
|
|
for (uint8_t i = 0; i < 100; i++) {
|
|
g[i] = a[i] != b[i] ? 1 : 0;
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_not_equal_const_int8(a, b[11], o, 100);
|
|
for (uint8_t i = 0; i < 100; i++) {
|
|
g[i] = a[i] != b[11] ? 1 : 0;
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("not equal of int8 test end\n");
|
|
}
|
|
|
|
void vector_less_than_test_int8() {
|
|
int8_t a[100];
|
|
int8_t b[100];
|
|
int8_t o[100];
|
|
int8_t g[100];
|
|
for (uint32_t i = 0; i < 100; i++) {
|
|
a[i] = 2 * i;
|
|
b[i] = 3 * i - 23;
|
|
}
|
|
vector_less_than_int8(a, b, o, 100);
|
|
for (uint8_t i = 0; i < 100; i++) {
|
|
g[i] = a[i] < b[i] ? 1 : 0;
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_less_than_const_int8(a, b[11], o, 100);
|
|
for (uint8_t i = 0; i < 100; i++) {
|
|
g[i] = a[i] < b[11] ? 1 : 0;
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_const_less_than_int8(a, b[11], o, 100);
|
|
for (uint8_t i = 0; i < 100; i++) {
|
|
g[i] = a[i] > b[11] ? 1 : 0;
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("less than of int8 test end\n");
|
|
}
|
|
|
|
void vector_greater_or_equal_test_int8() {
|
|
int8_t a[100];
|
|
int8_t b[100];
|
|
int8_t o[100];
|
|
int8_t g[100];
|
|
for (uint32_t i = 0; i < 100; i++) {
|
|
a[i] = 2 * i;
|
|
b[i] = 3 * i - 23;
|
|
//iot_printf("a[%d] = %d, b[%d] = %d\n", i, a[i], i, b[i]);
|
|
}
|
|
vector_greater_or_equal_int8(a, b, o, 100);
|
|
for (uint8_t i = 0; i < 100; i++) {
|
|
g[i] = a[i] >= b[i] ? 1 : 0;
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_greater_or_equal_const_int8(a, b[11], o, 100);
|
|
for (uint8_t i = 0; i < 100; i++) {
|
|
g[i] = a[i] >= b[11] ? 1 : 0;
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_const_greater_or_equal_int8(a, b[11], o, 100);
|
|
for (uint8_t i = 0; i < 100; i++) {
|
|
g[i] = a[i] <= b[11] ? 1 : 0;
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("greater or equal of int8 test end\n");
|
|
}
|
|
|
|
void vector_logic_and_test_int8() {
|
|
int8_t a[100];
|
|
int8_t b[100];
|
|
int8_t o[100];
|
|
int8_t g[100];
|
|
for (uint32_t i = 0; i < 100; i++) {
|
|
a[i] = 2 * i;
|
|
b[i] = 3 * i - 23;
|
|
}
|
|
vector_logic_and_int8(a, b, o, 100);
|
|
for (uint8_t i = 0; i < 100; i++) {
|
|
g[i] = a[i] & b[i];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_logic_and_const_int8(a, b[11], o, 100);
|
|
for (uint8_t i = 0; i < 100; i++) {
|
|
g[i] = a[i] & b[11];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("logic and of int8 test end\n");
|
|
}
|
|
|
|
void vector_logic_or_test_int8() {
|
|
int8_t a[100];
|
|
int8_t b[100];
|
|
int8_t o[100];
|
|
int8_t g[100];
|
|
for (uint32_t i = 0; i < 100; i++) {
|
|
a[i] = 2 * i;
|
|
b[i] = 3 * i - 23;
|
|
}
|
|
vector_logic_or_int8(a, b, o, 100);
|
|
for (uint8_t i = 0; i < 100; i++) {
|
|
g[i] = a[i] | b[i];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_logic_or_const_int8(a, b[11], o, 100);
|
|
for (uint8_t i = 0; i < 100; i++) {
|
|
g[i] = a[i] | b[11];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("logic or of int8 test end\n");
|
|
}
|
|
|
|
void vector_logic_xor_test_int8() {
|
|
int8_t a[100];
|
|
int8_t b[100];
|
|
int8_t o[100];
|
|
int8_t g[100];
|
|
for (uint32_t i = 0; i < 100; i++) {
|
|
a[i] = 2 * i;
|
|
b[i] = 3 * i - 23;
|
|
}
|
|
vector_logic_xor_int8(a, b, o, 100);
|
|
for (uint8_t i = 0; i < 100; i++) {
|
|
g[i] = a[i] ^ b[i];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_logic_xor_const_int8(a, b[11], o, 100);
|
|
for (uint8_t i = 0; i < 100; i++) {
|
|
g[i] = a[i] ^ b[11];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("logic xor of int8 test end\n");
|
|
}
|
|
|
|
void vector_logic_xnor_test_int8() {
|
|
int8_t a[100];
|
|
int8_t b[100];
|
|
int8_t o[100];
|
|
int8_t g[100];
|
|
for (uint32_t i = 0; i < 100; i++) {
|
|
a[i] = 2 * i;
|
|
b[i] = 3 * i - 23;
|
|
}
|
|
vector_logic_xnor_int8(a, b, o, 100);
|
|
for (uint8_t i = 0; i < 100; i++) {
|
|
g[i] = ~(a[i] ^ b[i]);
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_logic_xnor_const_int8(a, b[11], o, 100);
|
|
for (uint8_t i = 0; i < 100; i++) {
|
|
g[i] = ~(a[i] ^ b[11]);
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("logic xnor of int8 test end\n");
|
|
}
|
|
|
|
void vector_left_shift_test_int8() {
|
|
int8_t a[100];
|
|
uint8_t b[100];
|
|
int8_t o[100];
|
|
int8_t g[100];
|
|
for (uint32_t i = 0; i < 100; i++) {
|
|
a[i] = i + 1;
|
|
b[i] = i % 7 + 1;
|
|
}
|
|
vector_left_shift_int8(a, b, o, 100);
|
|
for (uint8_t i = 0; i < 100; i++) {
|
|
g[i] = a[i] << b[i];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_left_shift_const_int8(a, b[2], o, 100);
|
|
for (uint8_t i = 0; i < 100; i++) {
|
|
g[i] = a[i] << b[2];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_const_left_shift_int8(b, a[6], o, 100);
|
|
for (uint8_t i = 0; i < 100; i++) {
|
|
g[i] = a[6] << b[i];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("left shift of int8 test end\n");
|
|
}
|
|
|
|
void vector_logic_right_shift_test_int8() {
|
|
int8_t a[100];
|
|
uint8_t b[100];
|
|
int8_t o[100];
|
|
int8_t g[100];
|
|
for (uint32_t i = 0; i < 100; i++) {
|
|
a[i] = i - 50;
|
|
b[i] = i % 7 + 1;
|
|
}
|
|
vector_logic_right_shift_int8(a, b, o, 100);
|
|
for (uint8_t i = 0; i < 100; i++) {
|
|
g[i] = (uint8_t)a[i] >> b[i];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_logic_right_shift_const_int8(a, b[2], o, 100);
|
|
for (uint8_t i = 0; i < 100; i++) {
|
|
g[i] = (uint8_t)a[i] >> b[2];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_const_logic_right_shift_int8(b, a[6], o, 100);
|
|
for (uint8_t i = 0; i < 100; i++) {
|
|
g[i] = (uint8_t)a[6] >> b[i];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("logic right shift of int8 test end\n");
|
|
}
|
|
|
|
void vector_arithmatic_right_shift_test_int8() {
|
|
int8_t a[100];
|
|
uint8_t b[100];
|
|
int8_t o[100];
|
|
int8_t g[100];
|
|
for (uint32_t i = 0; i < 100; i++) {
|
|
a[i] = i - 50;
|
|
b[i] = i % 7 + 1;
|
|
}
|
|
vector_arithmatic_right_shift_int8(a, b, o, 100);
|
|
for (uint8_t i = 0; i < 100; i++) {
|
|
g[i] = a[i] >> b[i];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_arithmatic_right_shift_const_int8(a, b[2], o, 100);
|
|
for (uint8_t i = 0; i < 100; i++) {
|
|
g[i] = a[i] >> b[2];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_const_arithmatic_right_shift_int8(b, a[6], o, 100);
|
|
for (uint8_t i = 0; i < 100; i++) {
|
|
g[i] = a[6] >> b[i];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("arithmatic right shift of int8 test end\n");
|
|
}
|
|
|
|
void vector_add_test_uint16() {
|
|
uint16_t a[50];
|
|
uint16_t b[50];
|
|
uint16_t o[50];
|
|
uint16_t g[50];
|
|
for (uint32_t i = 0; i < 50; i++) {
|
|
a[i] = 2000 * i;
|
|
b[i] = 3000 * i + 23;
|
|
}
|
|
vector_add_const_uint16(a, b[0], o, 50);
|
|
for (uint16_t i = 0; i < 50; i++) {
|
|
#ifdef SATURATION
|
|
uint32_t gg = a[i] + b[0];
|
|
g[i] = gg > 65535 ? 65535 : gg;
|
|
#else
|
|
g[i] = a[i] + b[0];
|
|
#endif
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_add_uint16(a, b, o, 50);
|
|
for (uint16_t i = 0; i < 50; i++) {
|
|
#ifdef SATURATION
|
|
uint32_t gg = a[i] + b[i];
|
|
g[i] = gg > 65535 ? 65535 : gg;
|
|
#else
|
|
g[i] = a[i] + b[i];
|
|
#endif
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("add const of uint16 test end\n");
|
|
}
|
|
|
|
void vector_sub_test_uint16() {
|
|
uint16_t a[50];
|
|
uint16_t b[50];
|
|
uint16_t o[50];
|
|
uint16_t g[50];
|
|
for (uint32_t i = 0; i < 50; i++) {
|
|
a[i] = 2000 * i;
|
|
b[i] = 3000 * i + 23;
|
|
}
|
|
vector_sub_const_uint16(a, b[0], o, 50);
|
|
for (uint16_t i = 0; i < 50; i++) {
|
|
#ifdef SATURATION
|
|
if (a[i] < b[0]) {
|
|
g[i] = 0;
|
|
} else {
|
|
g[i] = a[i] - b[0];
|
|
}
|
|
#else
|
|
g[i] = a[i] - b[0];
|
|
#endif
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_const_sub_uint16(a, b[0], o, 50);
|
|
for (uint16_t i = 0; i < 50; i++) {
|
|
#ifdef SATURATION
|
|
if (a[i] > b[0]) {
|
|
g[i] = 0;
|
|
} else {
|
|
g[i] = b[0] - a[i];
|
|
}
|
|
#else
|
|
g[i] = b[0] - a[i];
|
|
#endif
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_sub_uint16(a, b, o, 50);
|
|
for (uint16_t i = 0; i < 50; i++) {
|
|
#ifdef SATURATION
|
|
if (a[i] < b[i]) {
|
|
g[i] = 0;
|
|
} else {
|
|
g[i] = a[i] - b[i];
|
|
}
|
|
#else
|
|
g[i] = a[i] - b[i];
|
|
#endif
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("sub const of uint16 test end\n");
|
|
}
|
|
|
|
void vector_mul_test_uint16() {
|
|
uint16_t length = 50;
|
|
uint16_t *a = (uint16_t *)os_mem_malloc(1, length * 2);
|
|
uint16_t *b = (uint16_t *)os_mem_malloc(1, length * 2);
|
|
uint16_t *l = (uint16_t *)os_mem_malloc(1, length * 2);
|
|
uint16_t *h = (uint16_t *)os_mem_malloc(1, length * 2);
|
|
uint32_t *w = (uint32_t *)os_mem_malloc(1, length * 2 * 2);
|
|
for (uint16_t i = 0; i < length; i++) {
|
|
a[i] = (uint16_t)(-2000 * (i - 25));
|
|
b[i] = (uint16_t)(3000 * (i - 33));
|
|
//iot_printf("a[%d] = %d, b[%d] = %d\n", i, a[i], i, b[i]);
|
|
}
|
|
vector_multiply_uint16(a, b, w, length);
|
|
for (uint16_t i = 0; i < length; i++) {
|
|
uint32_t g = a[i] * b[i];
|
|
if (g != w[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g, w[i]);
|
|
}
|
|
}
|
|
iot_printf("vector_multiply_uint16 finished\n");
|
|
vector_multiply_const_uint16(a, b[1], w, length);
|
|
for (uint16_t i = 0; i < length; i++) {
|
|
uint32_t g = a[i] * b[1];
|
|
if (g != w[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g, w[i]);
|
|
}
|
|
}
|
|
iot_printf("vector_multiply_const_uint16 finished\n");
|
|
vector_multiply_uint16_high_16bit(a, b, h, length);
|
|
for (uint16_t i = 0; i < length; i++) {
|
|
uint16_t g = (a[i] * b[i]) >> 16;
|
|
if (g != h[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g, h[i]);
|
|
}
|
|
}
|
|
iot_printf("vector_multiply_uint16_high finished\n");
|
|
vector_multiply_const_uint16_high_16bit(a, b[1], h, length);
|
|
for (uint16_t i = 0; i < length; i++) {
|
|
uint16_t g = (a[i] * b[1]) >> 16;
|
|
if (g != h[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g, h[i]);
|
|
}
|
|
}
|
|
iot_printf("vector_multiply_const_uint16_high finished\n");
|
|
vector_multiply_uint16_low_16bit(a, b, l, length);
|
|
for (uint16_t i = 0; i < length; i++) {
|
|
#ifdef SATURATION
|
|
uint32_t g = (uint32_t)a[i] * (uint32_t)b[i];
|
|
if (g > 65535) {
|
|
g = 65535;
|
|
}
|
|
#else
|
|
uint16_t g = (a[i] * b[i]) & 0xffff;
|
|
#endif
|
|
if (g != l[i]) {
|
|
iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[i], g, l[i]);
|
|
}
|
|
}
|
|
iot_printf("vector_multiply_uint16_low finished\n");
|
|
vector_multiply_const_uint16_low_16bit(a, b[1], l, length);
|
|
for (uint16_t i = 0; i < length; i++) {
|
|
#ifdef SATURATION
|
|
uint32_t g = (uint32_t)a[i] * (uint32_t)b[1];
|
|
if (g > 65535) {
|
|
g = 65535;
|
|
}
|
|
#else
|
|
uint16_t g = (a[i] * b[1]) & 0xffff;
|
|
#endif
|
|
if (g != l[i]) {
|
|
iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[1], g, l[i]);
|
|
}
|
|
}
|
|
iot_printf("vector_multiply_const_uint16_low finished\n");
|
|
os_mem_free(a);
|
|
os_mem_free(b);
|
|
os_mem_free(h);
|
|
os_mem_free(l);
|
|
os_mem_free(w);
|
|
}
|
|
|
|
void vector_madd_msub_test_uint16() {
|
|
uint16_t length = 50;
|
|
uint16_t *a = os_mem_malloc(1, length * 2);
|
|
uint16_t *b = os_mem_malloc(1, length * 2);
|
|
uint16_t *j = os_mem_malloc(1, length * 2);
|
|
uint16_t *o = os_mem_malloc(1, length * 2);
|
|
uint64_t *J = os_mem_malloc(1, length * 2 * 4);
|
|
uint64_t *O = os_mem_malloc(1, length * 2 * 4);
|
|
for (uint16_t i = 0; i < length; i++) {
|
|
a[i] = 2000 * (i + 235);
|
|
b[i] = 3000 * (i + 333);
|
|
j[i] = i + 504;
|
|
J[i] = i * 504 + 800000;
|
|
//iot_printf("a[%d] = %d, b[%d] = %d, j[%d] = %d, J[%d] = %d\n", i, a[i], i, b[i], i, j[i], i, J[i]);
|
|
}
|
|
vector_mul_add_uint16(a, b, j, o, length);
|
|
for (uint16_t i = 0; i < length; i++) {
|
|
#ifdef SATURATION
|
|
uint32_t m = a[i] * b[i];
|
|
m = m > 65535 ? 65535 : m;
|
|
uint32_t g = j[i] + m;
|
|
g = g > 65535 ? 65535 : g;
|
|
#else
|
|
uint16_t g = j[i] + a[i] * b[i];
|
|
#endif
|
|
if (g != o[i]) {
|
|
iot_printf("incorrect at %d, a = %d, b = %d, j = %d, golden %d, output %d\n", i, a[i], b[i], j[i], g, o[i]);
|
|
}
|
|
}
|
|
iot_printf("vector_mul_add_uint16 finished\n");
|
|
vector_mul_sub_uint16(a, b, j, o, length);
|
|
for (uint16_t i = 0; i < length; i++) {
|
|
#ifdef SATURATION
|
|
uint32_t m = a[i] * b[i];
|
|
m = m > 65535 ? 65535 : m;
|
|
uint32_t g = j[i] < m ? 0 : j[i] - m;
|
|
#else
|
|
uint16_t g = j[i] - a[i] * b[i];
|
|
#endif
|
|
if (g != o[i]) {
|
|
iot_printf("incorrect at %d, a = %d, b = %d, j = %d, golden %d, output %d\n", i, a[i], b[i], j[i], g, o[i]);
|
|
}
|
|
}
|
|
iot_printf("vector_mul_sub_uint16 finished\n");
|
|
vector_mul_add_uint16_to_uint64(a, b, J, O, length);
|
|
for (uint16_t i = 0; i < length; i++) {
|
|
uint64_t g = J[i] + (uint64_t)a[i] * (uint64_t)b[i];
|
|
if (g != O[i]) {
|
|
uint32_t *pJ = (uint32_t *)J;
|
|
uint32_t *pg = (uint32_t *)(&g);
|
|
uint32_t *pO = (uint32_t *)O;
|
|
iot_printf("incorrect at %d, J = %x%x, a = %x, b = %x, golden %x%x, output %x%x\n", i, pJ[2 * i + 1], pJ[2 * i], a[i], b[i], pg[1], pg[0], pO[2 * i + 1], pO[2 * i]);
|
|
}
|
|
}
|
|
iot_printf("vector_mul_add_uint16_to_uint32 finished\n");
|
|
vector_mul_sub_uint16_to_uint64(a, b, J, O, length);
|
|
for (uint16_t i = 0; i < length; i++) {
|
|
uint64_t g = (uint64_t)(J[i] - (uint64_t)a[i] * (uint64_t)b[i]);
|
|
if (g != O[i]) {
|
|
uint32_t *pJ = (uint32_t *)J;
|
|
uint32_t *pg = (uint32_t *)(&g);
|
|
uint32_t *pO = (uint32_t *)O;
|
|
iot_printf("incorrect at %d, J = %x%x, a = %x, b = %x, golden %x%x, output %x%x\n", i, pJ[2 * i + 1], pJ[2 * i], a[i], b[i], pg[1], pg[0], pO[2 * i + 1], pO[2 * i]);
|
|
}
|
|
}
|
|
iot_printf("vector_mul_sub_uint16_to_uint32 finished\n");
|
|
}
|
|
|
|
void vector_min_test_uint16() {
|
|
uint16_t a[50];
|
|
uint16_t b[50];
|
|
uint16_t o[50];
|
|
uint16_t g[50];
|
|
for (uint32_t i = 0; i < 50; i++) {
|
|
a[i] = 2 * i;
|
|
b[i] = 3 * i + 23;
|
|
}
|
|
vector_min_uint16(a, b, o, 50);
|
|
for (uint16_t i = 0; i < 50; i++) {
|
|
g[i] = a[i] < b[i] ? a[i] : b[i];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_min_const_uint16(a, b[0], o, 50);
|
|
for (uint16_t i = 0; i < 50; i++) {
|
|
g[i] = a[i] < b[0] ? a[i] : b[0];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("min of uint16 test end\n");
|
|
}
|
|
|
|
void vector_max_test_uint16() {
|
|
uint16_t a[50];
|
|
uint16_t b[50];
|
|
uint16_t o[50];
|
|
uint16_t g[50];
|
|
for (uint32_t i = 0; i < 50; i++) {
|
|
a[i] = 2 * i;
|
|
b[i] = 3 * i + 23;
|
|
}
|
|
vector_max_uint16(a, b, o, 50);
|
|
for (uint16_t i = 0; i < 50; i++) {
|
|
g[i] = a[i] > b[i] ? a[i] : b[i];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_max_const_uint16(a, b[0], o, 50);
|
|
for (uint16_t i = 0; i < 50; i++) {
|
|
g[i] = a[i] > b[0] ? a[i] : b[0];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("max of uint16 test end\n");
|
|
}
|
|
|
|
void vector_equal_test_uint16() {
|
|
uint16_t a[50];
|
|
uint16_t b[50];
|
|
uint16_t o[50];
|
|
uint16_t g[50];
|
|
for (uint32_t i = 0; i < 50; i++) {
|
|
a[i] = 2 * i;
|
|
b[i] = 3 * i - 23;
|
|
}
|
|
vector_equal_uint16(a, b, o, 50);
|
|
for (uint16_t i = 0; i < 50; i++) {
|
|
g[i] = a[i] == b[i] ? 1 : 0;
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_equal_const_uint16(a, b[11], o, 50);
|
|
for (uint16_t i = 0; i < 50; i++) {
|
|
g[i] = a[i] == b[11] ? 1 : 0;
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("equal of uint16 test end\n");
|
|
}
|
|
|
|
void vector_not_equal_test_uint16() {
|
|
uint16_t a[50];
|
|
uint16_t b[50];
|
|
uint16_t o[50];
|
|
uint16_t g[50];
|
|
for (uint32_t i = 0; i < 50; i++) {
|
|
a[i] = 2 * i;
|
|
b[i] = 3 * i - 23;
|
|
}
|
|
vector_not_equal_uint16(a, b, o, 50);
|
|
for (uint16_t i = 0; i < 50; i++) {
|
|
g[i] = a[i] != b[i] ? 1 : 0;
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_not_equal_const_uint16(a, b[11], o, 50);
|
|
for (uint16_t i = 0; i < 50; i++) {
|
|
g[i] = a[i] != b[11] ? 1 : 0;
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("not equal of uint16 test end\n");
|
|
}
|
|
|
|
void vector_less_than_test_uint16() {
|
|
uint16_t a[50];
|
|
uint16_t b[50];
|
|
uint16_t o[50];
|
|
uint16_t g[50];
|
|
for (uint32_t i = 0; i < 50; i++) {
|
|
a[i] = 2 * i;
|
|
b[i] = 3 * i - 23;
|
|
}
|
|
vector_less_than_uint16(a, b, o, 50);
|
|
for (uint16_t i = 0; i < 50; i++) {
|
|
g[i] = a[i] < b[i] ? 1 : 0;
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_less_than_const_uint16(a, b[11], o, 50);
|
|
for (uint16_t i = 0; i < 50; i++) {
|
|
g[i] = a[i] < b[11] ? 1 : 0;
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_const_less_than_uint16(a, b[11], o, 50);
|
|
for (uint16_t i = 0; i < 50; i++) {
|
|
g[i] = a[i] > b[11] ? 1 : 0;
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("less than of uint16 test end\n");
|
|
}
|
|
|
|
void vector_greater_or_equal_test_uint16() {
|
|
uint16_t a[50];
|
|
uint16_t b[50];
|
|
uint16_t o[50];
|
|
uint16_t g[50];
|
|
for (uint32_t i = 0; i < 50; i++) {
|
|
a[i] = 2 * i;
|
|
b[i] = 3 * i - 23;
|
|
}
|
|
vector_greater_or_equal_uint16(a, b, o, 50);
|
|
for (uint16_t i = 0; i < 50; i++) {
|
|
g[i] = a[i] >= b[i] ? 1 : 0;
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_greater_or_equal_const_uint16(a, b[11], o, 50);
|
|
for (uint16_t i = 0; i < 50; i++) {
|
|
g[i] = a[i] >= b[11] ? 1 : 0;
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_const_greater_or_equal_uint16(a, b[11], o, 50);
|
|
for (uint16_t i = 0; i < 50; i++) {
|
|
g[i] = a[i] <= b[11] ? 1 : 0;
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("greater or equal of uint16 test end\n");
|
|
}
|
|
|
|
void vector_logic_and_test_uint16() {
|
|
uint16_t a[50];
|
|
uint16_t b[50];
|
|
uint16_t o[50];
|
|
uint16_t g[50];
|
|
for (uint32_t i = 0; i < 50; i++) {
|
|
a[i] = 2 * i;
|
|
b[i] = 3 * i - 23;
|
|
}
|
|
vector_logic_and_uint16(a, b, o, 50);
|
|
for (uint16_t i = 0; i < 50; i++) {
|
|
g[i] = a[i] & b[i];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_logic_and_const_uint16(a, b[11], o, 50);
|
|
for (uint16_t i = 0; i < 50; i++) {
|
|
g[i] = a[i] & b[11];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("logic and of uint16 test end\n");
|
|
}
|
|
|
|
void vector_logic_or_test_uint16() {
|
|
uint16_t a[50];
|
|
uint16_t b[50];
|
|
uint16_t o[50];
|
|
uint16_t g[50];
|
|
for (uint32_t i = 0; i < 50; i++) {
|
|
a[i] = 2 * i;
|
|
b[i] = 3 * i - 23;
|
|
}
|
|
vector_logic_or_uint16(a, b, o, 50);
|
|
for (uint16_t i = 0; i < 50; i++) {
|
|
g[i] = a[i] | b[i];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_logic_or_const_uint16(a, b[11], o, 50);
|
|
for (uint16_t i = 0; i < 50; i++) {
|
|
g[i] = a[i] | b[11];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("logic or of uint16 test end\n");
|
|
}
|
|
|
|
void vector_logic_xor_test_uint16() {
|
|
uint16_t a[50];
|
|
uint16_t b[50];
|
|
uint16_t o[50];
|
|
uint16_t g[50];
|
|
for (uint32_t i = 0; i < 50; i++) {
|
|
a[i] = 2 * i;
|
|
b[i] = 3 * i - 23;
|
|
}
|
|
vector_logic_xor_uint16(a, b, o, 50);
|
|
for (uint16_t i = 0; i < 50; i++) {
|
|
g[i] = a[i] ^ b[i];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_logic_xor_const_uint16(a, b[11], o, 50);
|
|
for (uint16_t i = 0; i < 50; i++) {
|
|
g[i] = a[i] ^ b[11];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("logic xor of uint16 test end\n");
|
|
}
|
|
|
|
void vector_logic_xnor_test_uint16() {
|
|
uint16_t a[50];
|
|
uint16_t b[50];
|
|
uint16_t o[50];
|
|
uint16_t g[50];
|
|
for (uint32_t i = 0; i < 50; i++) {
|
|
a[i] = 2 * i;
|
|
b[i] = 3 * i - 23;
|
|
}
|
|
vector_logic_xnor_uint16(a, b, o, 50);
|
|
for (uint16_t i = 0; i < 50; i++) {
|
|
g[i] = ~(a[i] ^ b[i]);
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_logic_xnor_const_uint16(a, b[11], o, 50);
|
|
for (uint16_t i = 0; i < 50; i++) {
|
|
g[i] = ~(a[i] ^ b[11]);
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("logic xnor of uint16 test end\n");
|
|
}
|
|
|
|
void vector_left_shift_test_uint16() {
|
|
uint16_t a[50];
|
|
uint16_t b[50];
|
|
uint16_t o[50];
|
|
uint16_t g[50];
|
|
for (uint32_t i = 0; i < 50; i++) {
|
|
a[i] = i + 1;
|
|
b[i] = i % 7 + 1;
|
|
}
|
|
vector_left_shift_uint16(a, b, o, 50);
|
|
for (uint16_t i = 0; i < 50; i++) {
|
|
g[i] = a[i] << b[i];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_left_shift_const_uint16(a, b[2], o, 50);
|
|
for (uint16_t i = 0; i < 50; i++) {
|
|
g[i] = a[i] << b[2];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_const_left_shift_uint16(b, a[6], o, 50);
|
|
for (uint16_t i = 0; i < 50; i++) {
|
|
g[i] = a[6] << b[i];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("left shift of uint16 test end\n");
|
|
}
|
|
|
|
void vector_right_shift_test_uint16() {
|
|
uint16_t a[50];
|
|
uint16_t b[50];
|
|
uint16_t o[50];
|
|
uint16_t g[50];
|
|
for (uint32_t i = 0; i < 50; i++) {
|
|
a[i] = i - 50;
|
|
b[i] = i % 7 + 1;
|
|
}
|
|
vector_right_shift_uint16(a, b, o, 50);
|
|
for (uint16_t i = 0; i < 50; i++) {
|
|
g[i] = a[i] >> b[i];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_right_shift_const_uint16(a, b[2], o, 50);
|
|
for (uint16_t i = 0; i < 50; i++) {
|
|
g[i] = a[i] >> b[2];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_const_right_shift_uint16(b, a[6], o, 50);
|
|
for (uint16_t i = 0; i < 50; i++) {
|
|
g[i] = a[6] >> b[i];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("logic right shift of uint16 test end\n");
|
|
}
|
|
|
|
void vector_half_bits_test_uint16() {
|
|
uint16_t a[50];
|
|
uint8_t h[50];
|
|
uint8_t l[50];
|
|
uint8_t g[50];
|
|
for (uint32_t i = 0; i < 50; i++) {
|
|
a[i] = i + (i + 80) * 256;
|
|
}
|
|
uint64_t begin = cpu_get_mcycle();
|
|
vector_high_half_bits_uint16(a, h, 50);
|
|
uint64_t end = cpu_get_mcycle();
|
|
iot_printf("time = %d\n", (uint32_t)(end - begin));
|
|
for (uint16_t i = 0; i < 50; i++) {
|
|
g[i] = a[i] >> 8;
|
|
if (g[i] != h[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], h[i]);
|
|
}
|
|
}
|
|
iot_printf("vector_high_half_bits_uint16 finished\n");
|
|
vector_low_half_bits_uint16(a, l, 50);
|
|
for (uint16_t i = 0; i < 50; i++) {
|
|
g[i] = a[i] & 0xff;
|
|
if (g[i] != l[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], l[i]);
|
|
}
|
|
}
|
|
iot_printf("vector_low_half_bits_uint16 finished\n");
|
|
vector_high_and_low_half_bits_uint16(a, h, l, 50);
|
|
for (uint16_t i = 0; i < 50; i++) {
|
|
g[i] = a[i] >> 8;
|
|
if (g[i] != h[i]) {
|
|
iot_printf("incorrect higt at %d, golden %d, output %d\n", i, g[i], h[i]);
|
|
}
|
|
g[i] = a[i] & 0xff;
|
|
if (g[i] != l[i]) {
|
|
iot_printf("incorrect low at %d, golden %d, output %d\n", i, g[i], l[i]);
|
|
}
|
|
}
|
|
iot_printf("vector_high_and_low_half_bits_uint16 finished\n");
|
|
}
|
|
|
|
void vector_add_test_int16() {
|
|
int16_t a[50];
|
|
int16_t b[50];
|
|
int16_t o[50];
|
|
int16_t g[50];
|
|
for (uint32_t i = 0; i < 50; i++) {
|
|
a[i] = 2000 * i;
|
|
b[i] = 3000 * i + 23;
|
|
}
|
|
vector_add_const_int16(a, b[0], o, 50);
|
|
for (uint16_t i = 0; i < 50; i++) {
|
|
#ifdef SATURATION
|
|
int32_t gg = a[i] + b[0];
|
|
g[i] = gg > 32767 ? 32767 : (gg < -32768 ? -32768 : gg);
|
|
#else
|
|
g[i] = a[i] + b[0];
|
|
#endif
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
uint64_t begin = cpu_get_mcycle();
|
|
for (uint32_t repeat = 0; repeat < 1000; repeat++) {
|
|
vector_add_int16(a, b, o, 50);
|
|
}
|
|
uint64_t end = cpu_get_mcycle();
|
|
iot_printf("time = %d\n", (uint32_t)(end - begin));
|
|
|
|
begin = cpu_get_mcycle();
|
|
for (uint32_t repeat = 0; repeat < 1000; repeat++) {
|
|
for (uint16_t i = 0; i < 50; i++) {
|
|
#ifdef SATURATION
|
|
int32_t gg = a[i] + b[i];
|
|
g[i] = gg > 32767 ? 32767 : (gg < -32768 ? -32768 : gg);
|
|
#else
|
|
g[i] = a[i] + b[i];
|
|
#endif
|
|
//if (g[i] != o[i]) {
|
|
// iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
//}
|
|
}
|
|
}
|
|
end = cpu_get_mcycle();
|
|
for (uint16_t i = 0; i < 50; i++) {
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("time = %d\n", (uint32_t)(end - begin));
|
|
iot_printf("add const of int16 test end\n");
|
|
}
|
|
|
|
void vector_sub_test_int16() {
|
|
int16_t a[50];
|
|
int16_t b[50];
|
|
int16_t o[50];
|
|
int16_t g[50];
|
|
for (uint32_t i = 0; i < 50; i++) {
|
|
a[i] = 2000 * i;
|
|
b[i] = 3000 * i + 23;
|
|
}
|
|
vector_sub_const_int16(a, b[0], o, 50);
|
|
for (uint16_t i = 0; i < 50; i++) {
|
|
#ifdef SATURATION
|
|
int32_t gg = a[i] - b[0];
|
|
g[i] = gg > 32767 ? 32767 : (gg < -32768 ? -32768 : gg);
|
|
#else
|
|
g[i] = a[i] - b[0];
|
|
#endif
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_const_sub_int16(a, b[0], o, 50);
|
|
for (uint16_t i = 0; i < 50; i++) {
|
|
#ifdef SATURATION
|
|
int32_t gg = b[0] - a[i];
|
|
g[i] = gg > 32767 ? 32767 : (gg < -32768 ? -32768 : gg);
|
|
#else
|
|
g[i] = b[0] - a[i];
|
|
#endif
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
uint64_t begin = cpu_get_mcycle();
|
|
for (uint32_t repeat = 0; repeat < 1; repeat++) {
|
|
vector_sub_int16(a, b, o, 50);
|
|
}
|
|
uint64_t end = cpu_get_mcycle();
|
|
iot_printf("time = %d\n", (uint32_t)(end - begin));
|
|
|
|
begin = cpu_get_mcycle();
|
|
for (uint32_t repeat = 0; repeat < 1; repeat++) {
|
|
for (uint16_t i = 0; i < 50; i++) {
|
|
#ifdef SATURATION
|
|
int32_t gg = a[i] - b[i];
|
|
g[i] = gg > 32767 ? 32767 : (gg < -32768 ? -32768 : gg);
|
|
#else
|
|
g[i] = a[i] - b[i];
|
|
#endif
|
|
}
|
|
}
|
|
end = cpu_get_mcycle();
|
|
for (uint16_t i = 0; i < 50; i++) {
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("time = %d\n", (uint32_t)(end - begin));
|
|
iot_printf("sub const of int16 test end\n");
|
|
}
|
|
|
|
void vector_mul_test_int16() {
|
|
uint16_t length = 50;
|
|
int16_t *a = (int16_t *)os_mem_malloc(1, length * 2);
|
|
int16_t *b = (int16_t *)os_mem_malloc(1, length * 2);
|
|
int16_t *l = (int16_t *)os_mem_malloc(1, length * 2);
|
|
int16_t *h = (int16_t *)os_mem_malloc(1, length * 2);
|
|
int32_t *w = (int32_t *)os_mem_malloc(1, length * 2 * 2);
|
|
for (uint16_t i = 0; i < length; i++) {
|
|
a[i] = -2000 * (i - 25);
|
|
b[i] = 3000 * (i - 33);
|
|
//iot_printf("a[%d] = %d, b[%d] = %d\n", i, a[i], i, b[i]);
|
|
}
|
|
vector_multiply_int16(a, b, w, length);
|
|
for (uint16_t i = 0; i < length; i++) {
|
|
int32_t g = a[i] * b[i];
|
|
if (g != w[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g, w[i]);
|
|
}
|
|
}
|
|
iot_printf("vector_multiply_int16 finished\n");
|
|
vector_multiply_const_int16(a, b[1], w, length);
|
|
for (uint16_t i = 0; i < length; i++) {
|
|
int32_t g = a[i] * b[1];
|
|
if (g != w[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g, w[i]);
|
|
}
|
|
}
|
|
iot_printf("vector_multiply_const_int16 finished\n");
|
|
vector_multiply_int16_high_16bit(a, b, h, length);
|
|
for (uint16_t i = 0; i < length; i++) {
|
|
int32_t g = (a[i] * b[i]) >> 16;
|
|
if (g != h[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g, h[i]);
|
|
}
|
|
}
|
|
iot_printf("vector_multiply_int16_high finished\n");
|
|
vector_multiply_const_int16_high_16bit(a, b[1], h, length);
|
|
for (uint16_t i = 0; i < length; i++) {
|
|
int32_t g = (a[i] * b[1]) >> 16;
|
|
if (g != h[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g, h[i]);
|
|
}
|
|
}
|
|
iot_printf("vector_multiply_const_int16_high finished\n");
|
|
vector_multiply_int16_low_16bit(a, b, l, length);
|
|
for (uint16_t i = 0; i < length; i++) {
|
|
#ifdef SATURATION
|
|
int32_t gg = a[i] * b[i];
|
|
int32_t g = gg > 32767 ? 32767 : (gg < -32768 ? -32768 : gg);
|
|
#else
|
|
int16_t g = (a[i] * b[i]) & 0xffff;
|
|
#endif
|
|
if (g != l[i]) {
|
|
iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[i], g, l[i]);
|
|
}
|
|
}
|
|
iot_printf("vector_multiply_int16_low finished\n");
|
|
vector_multiply_const_int16_low_16bit(a, b[1], l, length);
|
|
for (uint16_t i = 0; i < length; i++) {
|
|
#ifdef SATURATION
|
|
int32_t gg = a[i] * b[1];
|
|
int32_t g = gg > 32767 ? 32767 : (gg < -32768 ? -32768 : gg);
|
|
#else
|
|
int16_t g = (a[i] * b[1]) & 0xffff;
|
|
#endif
|
|
if (g != l[i]) {
|
|
iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[1], g, l[i]);
|
|
}
|
|
}
|
|
iot_printf("vector_multiply_const_int16_low finished\n");
|
|
os_mem_free(a);
|
|
os_mem_free(b);
|
|
os_mem_free(h);
|
|
os_mem_free(l);
|
|
os_mem_free(w);
|
|
}
|
|
|
|
void vector_madd_msub_test_int16() {
|
|
uint16_t length = 50;
|
|
int16_t *a = os_mem_malloc(1, length * 2);
|
|
int16_t *b = os_mem_malloc(1, length * 2);
|
|
int16_t *j = os_mem_malloc(1, length * 2);
|
|
int16_t *o = os_mem_malloc(1, length * 2);
|
|
int64_t *J = os_mem_malloc(1, length * 2 * 4);
|
|
int64_t *O = os_mem_malloc(1, length * 2 * 4);
|
|
for (uint16_t i = 0; i < length; i++) {
|
|
a[i] = -2000 * (i - 25);
|
|
b[i] = 3000 * (i - 33);
|
|
j[i] = i - 50;
|
|
J[i] = i * 50 - 500;
|
|
//iot_printf("a[%d] = %d, b[%d] = %d, j[%d] = %d, J[%d] = %d\n", i, a[i], i, b[i], i, j[i], i, J[i]);
|
|
}
|
|
vector_mul_add_int16(a, b, j, o, length);
|
|
for (uint16_t i = 0; i < length; i++) {
|
|
#ifdef SATURATION
|
|
int32_t m = a[i] * b[i];
|
|
m = m > 32767 ? 32767 : (m < -32768 ? -32768 : m);
|
|
int32_t g = j[i] + m;
|
|
g = g > 32767 ? 32767 : (g < -32768 ? -32768 : g);
|
|
#else
|
|
int16_t g = j[i] + a[i] * b[i];
|
|
#endif
|
|
if (g != o[i]) {
|
|
iot_printf("incorrect at %d, a = %d, b = %d, j = %d, golden %d, output %d\n", i, a[i], b[i], j[i], g, o[i]);
|
|
}
|
|
}
|
|
iot_printf("vector_mul_add_int16 finished\n");
|
|
vector_mul_sub_int16(a, b, j, o, length);
|
|
for (uint16_t i = 0; i < length; i++) {
|
|
#ifdef SATURATION
|
|
int32_t m = a[i] * b[i];
|
|
m = m > 32767 ? 32767 : (m < -32768 ? -32768 : m);
|
|
int32_t g = j[i] - m;
|
|
g = g > 32767 ? 32767 : (g < -32768 ? -32768 : g);
|
|
#else
|
|
int16_t g = j[i] - a[i] * b[i];
|
|
#endif
|
|
if (g != o[i]) {
|
|
iot_printf("incorrect at %d, a = %d, b = %d, j = %d, golden %d, output %d\n", i, a[i], b[i], j[i], g, o[i]);
|
|
}
|
|
}
|
|
iot_printf("vector_mul_sub_int16 finished\n");
|
|
vector_mul_add_int16_to_int64(a, b, J, O, length);
|
|
for (uint16_t i = 0; i < length; i++) {
|
|
int32_t g = J[i] + a[i] * b[i];
|
|
if (g != O[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g, O[i]);
|
|
}
|
|
}
|
|
iot_printf("vector_mul_add_int16_to_int32 finished\n");
|
|
vector_mul_sub_int16_to_int64(a, b, J, O, length);
|
|
for (uint16_t i = 0; i < length; i++) {
|
|
int32_t g = J[i] - a[i] * b[i];
|
|
if (g != O[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g, O[i]);
|
|
}
|
|
}
|
|
iot_printf("vector_mul_sub_int16_to_int32 finished\n");
|
|
}
|
|
void vector_min_test_int16() {
|
|
int16_t a[50];
|
|
int16_t b[50];
|
|
int16_t o[50];
|
|
int16_t g[50];
|
|
for (uint32_t i = 0; i < 50; i++) {
|
|
a[i] = 2 * i;
|
|
b[i] = 3 * i + 23;
|
|
}
|
|
vector_min_int16(a, b, o, 50);
|
|
for (uint16_t i = 0; i < 50; i++) {
|
|
g[i] = a[i] < b[i] ? a[i] : b[i];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_min_const_int16(a, b[0], o, 50);
|
|
for (uint16_t i = 0; i < 50; i++) {
|
|
g[i] = a[i] < b[0] ? a[i] : b[0];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("min of int16 test end\n");
|
|
}
|
|
|
|
void vector_max_test_int16() {
|
|
int16_t a[50];
|
|
int16_t b[50];
|
|
int16_t o[50];
|
|
int16_t g[50];
|
|
for (uint32_t i = 0; i < 50; i++) {
|
|
a[i] = 2 * i;
|
|
b[i] = 3 * i + 23;
|
|
}
|
|
vector_max_int16(a, b, o, 50);
|
|
for (uint16_t i = 0; i < 50; i++) {
|
|
g[i] = a[i] > b[i] ? a[i] : b[i];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_max_const_int16(a, b[0], o, 50);
|
|
for (uint16_t i = 0; i < 50; i++) {
|
|
g[i] = a[i] > b[0] ? a[i] : b[0];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("max of int16 test end\n");
|
|
}
|
|
|
|
void vector_equal_test_int16() {
|
|
int16_t a[50];
|
|
int16_t b[50];
|
|
int16_t o[50];
|
|
int16_t g[50];
|
|
for (uint32_t i = 0; i < 50; i++) {
|
|
a[i] = 2 * i;
|
|
b[i] = 3 * i - 23;
|
|
}
|
|
vector_equal_int16(a, b, o, 50);
|
|
for (uint16_t i = 0; i < 50; i++) {
|
|
g[i] = a[i] == b[i] ? 1 : 0;
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_equal_const_int16(a, b[11], o, 50);
|
|
for (uint16_t i = 0; i < 50; i++) {
|
|
g[i] = a[i] == b[11] ? 1 : 0;
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("equal of int16 test end\n");
|
|
}
|
|
|
|
void vector_not_equal_test_int16() {
|
|
int16_t a[50];
|
|
int16_t b[50];
|
|
int16_t o[50];
|
|
int16_t g[50];
|
|
for (uint32_t i = 0; i < 50; i++) {
|
|
a[i] = 2 * i;
|
|
b[i] = 3 * i - 23;
|
|
}
|
|
vector_not_equal_int16(a, b, o, 50);
|
|
for (uint16_t i = 0; i < 50; i++) {
|
|
g[i] = a[i] != b[i] ? 1 : 0;
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_not_equal_const_int16(a, b[11], o, 50);
|
|
for (uint16_t i = 0; i < 50; i++) {
|
|
g[i] = a[i] != b[11] ? 1 : 0;
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("not equal of int16 test end\n");
|
|
}
|
|
|
|
void vector_less_than_test_int16() {
|
|
int16_t a[50];
|
|
int16_t b[50];
|
|
int16_t o[50];
|
|
int16_t g[50];
|
|
for (uint32_t i = 0; i < 50; i++) {
|
|
a[i] = 2 * i;
|
|
b[i] = 3 * i - 23;
|
|
}
|
|
vector_less_than_int16(a, b, o, 50);
|
|
for (uint16_t i = 0; i < 50; i++) {
|
|
g[i] = a[i] < b[i] ? 1 : 0;
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_less_than_const_int16(a, b[11], o, 50);
|
|
for (uint16_t i = 0; i < 50; i++) {
|
|
g[i] = a[i] < b[11] ? 1 : 0;
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_const_less_than_int16(a, b[11], o, 50);
|
|
for (uint16_t i = 0; i < 50; i++) {
|
|
g[i] = a[i] > b[11] ? 1 : 0;
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("less than of int16 test end\n");
|
|
}
|
|
|
|
void vector_greater_or_equal_test_int16() {
|
|
int16_t a[50];
|
|
int16_t b[50];
|
|
int16_t o[50];
|
|
int16_t g[50];
|
|
for (uint32_t i = 0; i < 50; i++) {
|
|
a[i] = 2 * i;
|
|
b[i] = 3 * i - 23;
|
|
}
|
|
vector_greater_or_equal_int16(a, b, o, 50);
|
|
for (uint16_t i = 0; i < 50; i++) {
|
|
g[i] = a[i] >= b[i] ? 1 : 0;
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_greater_or_equal_const_int16(a, b[11], o, 50);
|
|
for (uint16_t i = 0; i < 50; i++) {
|
|
g[i] = a[i] >= b[11] ? 1 : 0;
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_const_greater_or_equal_int16(a, b[11], o, 50);
|
|
for (uint16_t i = 0; i < 50; i++) {
|
|
g[i] = a[i] <= b[11] ? 1 : 0;
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("greater or equal of int16 test end\n");
|
|
}
|
|
|
|
void vector_logic_and_test_int16() {
|
|
int16_t a[50];
|
|
int16_t b[50];
|
|
int16_t o[50];
|
|
int16_t g[50];
|
|
for (uint32_t i = 0; i < 50; i++) {
|
|
a[i] = 2 * i;
|
|
b[i] = 3 * i - 23;
|
|
}
|
|
vector_logic_and_int16(a, b, o, 50);
|
|
for (uint16_t i = 0; i < 50; i++) {
|
|
g[i] = a[i] & b[i];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_logic_and_const_int16(a, b[11], o, 50);
|
|
for (uint16_t i = 0; i < 50; i++) {
|
|
g[i] = a[i] & b[11];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("logic and of int16 test end\n");
|
|
}
|
|
|
|
void vector_logic_or_test_int16() {
|
|
int16_t a[50];
|
|
int16_t b[50];
|
|
int16_t o[50];
|
|
int16_t g[50];
|
|
for (uint32_t i = 0; i < 50; i++) {
|
|
a[i] = 2 * i;
|
|
b[i] = 3 * i - 23;
|
|
}
|
|
vector_logic_or_int16(a, b, o, 50);
|
|
for (uint16_t i = 0; i < 50; i++) {
|
|
g[i] = a[i] | b[i];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_logic_or_const_int16(a, b[11], o, 50);
|
|
for (uint16_t i = 0; i < 50; i++) {
|
|
g[i] = a[i] | b[11];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("logic or of int16 test end\n");
|
|
}
|
|
|
|
void vector_logic_xor_test_int16() {
|
|
int16_t a[50];
|
|
int16_t b[50];
|
|
int16_t o[50];
|
|
int16_t g[50];
|
|
for (uint32_t i = 0; i < 50; i++) {
|
|
a[i] = 2 * i;
|
|
b[i] = 3 * i - 23;
|
|
}
|
|
vector_logic_xor_int16(a, b, o, 50);
|
|
for (uint16_t i = 0; i < 50; i++) {
|
|
g[i] = a[i] ^ b[i];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_logic_xor_const_int16(a, b[11], o, 50);
|
|
for (uint16_t i = 0; i < 50; i++) {
|
|
g[i] = a[i] ^ b[11];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("logic xor of int16 test end\n");
|
|
}
|
|
|
|
void vector_logic_xnor_test_int16() {
|
|
int16_t a[50];
|
|
int16_t b[50];
|
|
int16_t o[50];
|
|
int16_t g[50];
|
|
for (uint32_t i = 0; i < 50; i++) {
|
|
a[i] = 2 * i;
|
|
b[i] = 3 * i - 23;
|
|
}
|
|
vector_logic_xnor_int16(a, b, o, 50);
|
|
for (uint16_t i = 0; i < 50; i++) {
|
|
g[i] = ~(a[i] ^ b[i]);
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_logic_xnor_const_int16(a, b[11], o, 50);
|
|
for (uint16_t i = 0; i < 50; i++) {
|
|
g[i] = ~(a[i] ^ b[11]);
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("logic xnor of int16 test end\n");
|
|
}
|
|
|
|
void vector_left_shift_test_int16() {
|
|
int16_t a[50];
|
|
uint16_t b[50];
|
|
int16_t o[50];
|
|
int16_t g[50];
|
|
for (uint32_t i = 0; i < 50; i++) {
|
|
a[i] = i + 1;
|
|
b[i] = i % 7 + 1;
|
|
}
|
|
vector_left_shift_int16(a, b, o, 50);
|
|
for (uint16_t i = 0; i < 50; i++) {
|
|
g[i] = a[i] << b[i];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_left_shift_const_int16(a, b[2], o, 50);
|
|
for (uint16_t i = 0; i < 50; i++) {
|
|
g[i] = a[i] << b[2];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_const_left_shift_int16(b, a[6], o, 50);
|
|
for (uint16_t i = 0; i < 50; i++) {
|
|
g[i] = a[6] << b[i];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("left shift of int16 test end\n");
|
|
}
|
|
|
|
void vector_logic_right_shift_test_int16() {
|
|
int16_t a[50];
|
|
uint16_t b[50];
|
|
int16_t o[50];
|
|
int16_t g[50];
|
|
for (uint32_t i = 0; i < 50; i++) {
|
|
a[i] = i - 50;
|
|
b[i] = i % 7 + 1;
|
|
}
|
|
vector_logic_right_shift_int16(a, b, o, 50);
|
|
for (uint16_t i = 0; i < 50; i++) {
|
|
g[i] = (uint16_t)a[i] >> b[i];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_logic_right_shift_const_int16(a, b[2], o, 50);
|
|
for (uint16_t i = 0; i < 50; i++) {
|
|
g[i] = (uint16_t)a[i] >> b[2];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_const_logic_right_shift_int16(b, a[6], o, 50);
|
|
for (uint16_t i = 0; i < 50; i++) {
|
|
g[i] = (uint16_t)a[6] >> b[i];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("logic right shift of int16 test end\n");
|
|
}
|
|
|
|
void vector_arithmatic_right_shift_test_int16() {
|
|
int16_t a[50];
|
|
uint16_t b[50];
|
|
int16_t o[50];
|
|
int16_t g[50];
|
|
for (uint32_t i = 0; i < 50; i++) {
|
|
a[i] = i - 50;
|
|
b[i] = i % 7 + 1;
|
|
}
|
|
vector_arithmatic_right_shift_int16(a, b, o, 50);
|
|
for (uint16_t i = 0; i < 50; i++) {
|
|
g[i] = a[i] >> b[i];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_arithmatic_right_shift_const_int16(a, b[2], o, 50);
|
|
for (uint16_t i = 0; i < 50; i++) {
|
|
g[i] = a[i] >> b[2];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_const_arithmatic_right_shift_int16(b, a[6], o, 50);
|
|
for (uint16_t i = 0; i < 50; i++) {
|
|
g[i] = a[6] >> b[i];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("arithmatic right shift of int16 test end\n");
|
|
}
|
|
|
|
void vector_half_bits_test_int16() {
|
|
int16_t a[50];
|
|
int8_t h[50];
|
|
int8_t l[50];
|
|
int8_t g[50];
|
|
for (int16_t i = 0; i < 50; i++) {
|
|
a[i] = i + (i + 80) * 506;
|
|
}
|
|
vector_high_half_bits_int16(a, h, 50);
|
|
for (int16_t i = 0; i < 50; i++) {
|
|
g[i] = a[i] >> 8;
|
|
if (g[i] != h[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], h[i]);
|
|
}
|
|
}
|
|
iot_printf("vector_high_half_bits_int16 finished\n");
|
|
vector_low_half_bits_int16(a, l, 50);
|
|
for (int16_t i = 0; i < 50; i++) {
|
|
g[i] = a[i] & 0xff;
|
|
if (g[i] != l[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], l[i]);
|
|
}
|
|
}
|
|
iot_printf("vector_low_half_bits_int16 finished\n");
|
|
vector_high_and_low_half_bits_int16(a, h, l, 50);
|
|
for (int16_t i = 0; i < 50; i++) {
|
|
g[i] = a[i] >> 8;
|
|
if (g[i] != h[i]) {
|
|
iot_printf("incorrect higt at %d, golden %d, output %d\n", i, g[i], h[i]);
|
|
}
|
|
g[i] = a[i] & 0xff;
|
|
if (g[i] != l[i]) {
|
|
iot_printf("incorrect low at %d, golden %d, output %d\n", i, g[i], l[i]);
|
|
}
|
|
}
|
|
iot_printf("vector_high_and_low_half_bits_int16 finished\n");
|
|
}
|
|
|
|
|
|
void vector_add_test_uint32() {
|
|
uint32_t a[25];
|
|
uint32_t b[25];
|
|
uint32_t o[25];
|
|
uint32_t g[25];
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
a[i] = 200000000 * i;
|
|
b[i] = 300000000 * i + 23;
|
|
}
|
|
vector_add_const_uint32(a, b[0], o, 25);
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
#ifdef SATURATION
|
|
uint64_t gg = (uint64_t)a[i] + (uint64_t)b[0];
|
|
g[i] = gg > 0xffffffff ? 0xffffffff : gg;
|
|
#else
|
|
g[i] = a[i] + b[0];
|
|
#endif
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, a = %x, b = %x, golden %x, output %x\n", i, a[i], b[0], g[i], o[i]);
|
|
}
|
|
}
|
|
vector_add_uint32(a, b, o, 25);
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
#ifdef SATURATION
|
|
uint64_t gg = (uint64_t)a[i] + (uint64_t)b[i];
|
|
g[i] = gg > 0xffffffff ? 0xffffffff : gg;
|
|
#else
|
|
g[i] = a[i] + b[i];
|
|
#endif
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, a = %x, b = %x, golden %x, output %x\n", i, a[i], b[i], g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("add const of uint32 test end\n");
|
|
}
|
|
|
|
void vector_sub_test_uint32() {
|
|
uint32_t a[25];
|
|
uint32_t b[25];
|
|
uint32_t o[25];
|
|
uint32_t g[25];
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
a[i] = 200000001 * i + 23;
|
|
b[i] = 300000001 * i;
|
|
}
|
|
vector_sub_const_uint32(a, b[0], o, 25);
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
#ifdef SATURATION
|
|
if (a[i] < b[0]) {
|
|
g[i] = 0;
|
|
} else {
|
|
g[i] = a[i] - b[0];
|
|
}
|
|
#else
|
|
g[i] = a[i] - b[0];
|
|
#endif
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_const_sub_uint32(a, b[0], o, 25);
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
#ifdef SATURATION
|
|
if (a[i] > b[0]) {
|
|
g[i] = 0;
|
|
} else {
|
|
g[i] = b[0] - a[i];
|
|
}
|
|
#else
|
|
g[i] = b[0] - a[i];
|
|
#endif
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_sub_uint32(a, b, o, 25);
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
#ifdef SATURATION
|
|
if (a[i] < b[i]) {
|
|
g[i] = 0;
|
|
} else {
|
|
g[i] = a[i] - b[i];
|
|
}
|
|
#else
|
|
g[i] = a[i] - b[i];
|
|
#endif
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("sub const of uint32 test end\n");
|
|
}
|
|
|
|
void vector_mul_test_uint32() {
|
|
uint32_t length = 100;
|
|
uint32_t *a = (uint32_t *)os_mem_malloc(1, length * 4);
|
|
uint32_t *b = (uint32_t *)os_mem_malloc(1, length * 4);
|
|
uint32_t *l = (uint32_t *)os_mem_malloc(1, length * 4);
|
|
uint32_t *h = (uint32_t *)os_mem_malloc(1, length * 4);
|
|
uint64_t *w = (uint64_t *)os_mem_malloc(1, length * 4 * 2);
|
|
for (uint32_t i = 0; i < length; i++) {
|
|
a[i] = 201 * (103 - i);
|
|
b[i] = 301 * (127 - i);
|
|
//iot_printf("a[%d] = %d, b[%d] = %d\n", i, a[i], i, b[i]);
|
|
}
|
|
vector_multiply_uint32(a, b, w, length);
|
|
for (uint32_t i = 0; i < length; i++) {
|
|
uint64_t g = (uint64_t)a[i] * (uint64_t)b[i];
|
|
if (g != w[i]) {
|
|
iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[i], g, w[i]);
|
|
}
|
|
}
|
|
iot_printf("vector_multiply_uint32 finished\n");
|
|
vector_multiply_const_uint32(a, b[1], w, length);
|
|
for (uint32_t i = 0; i < length; i++) {
|
|
uint64_t g = (uint64_t)a[i] * (uint64_t)b[1];
|
|
if (g != w[i]) {
|
|
iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[1], g, w[i]);
|
|
}
|
|
}
|
|
iot_printf("vector_multiply_const_uint32 finished\n");
|
|
vector_multiply_uint32_high_32bit(a, b, h, length);
|
|
for (uint32_t i = 0; i < length; i++) {
|
|
uint32_t g = ((uint64_t)a[i] * (uint64_t)b[i]) >> 32;
|
|
if (g != h[i]) {
|
|
iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[i], g, h[i]);
|
|
}
|
|
}
|
|
iot_printf("vector_multiply_uint32_high finished\n");
|
|
vector_multiply_const_uint32_high_32bit(a, b[1], h, length);
|
|
for (uint32_t i = 0; i < length; i++) {
|
|
uint32_t g = ((uint64_t)a[i] * (uint64_t)b[1]) >> 32;
|
|
if (g != h[i]) {
|
|
iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[1], g, h[i]);
|
|
}
|
|
}
|
|
iot_printf("vector_multiply_const_uint32_high finished\n");
|
|
vector_multiply_uint32_low_32bit(a, b, l, length);
|
|
for (uint32_t i = 0; i < length; i++) {
|
|
#ifdef SATURATION
|
|
uint64_t gg = (uint64_t)a[i] * (uint64_t)b[i];
|
|
uint32_t g = gg > 0xffffffff ? 0xffffffff : gg;
|
|
#else
|
|
uint32_t g = ((uint64_t)a[i] * (uint64_t)b[i]) & 0xffffffff;
|
|
#endif
|
|
if (g != l[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g, l[i]);
|
|
}
|
|
}
|
|
iot_printf("vector_multiply_uint32_low finished\n");
|
|
vector_multiply_const_uint32_low_32bit(a, b[1], l, length);
|
|
for (uint32_t i = 0; i < length; i++) {
|
|
#ifdef SATURATION
|
|
uint64_t gg = (uint64_t)a[i] * (uint64_t)b[1];
|
|
uint32_t g = gg > 0xffffffff ? 0xffffffff : gg;
|
|
#else
|
|
uint32_t g = ((uint64_t)a[i] * (uint64_t)b[1]) & 0xffffffff;
|
|
#endif
|
|
if (g != l[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g, l[i]);
|
|
}
|
|
}
|
|
iot_printf("vector_multiply_const_uint32_low finished\n");
|
|
os_mem_free(a);
|
|
os_mem_free(b);
|
|
os_mem_free(h);
|
|
os_mem_free(l);
|
|
os_mem_free(w);
|
|
}
|
|
|
|
void vector_madd_msub_test_uint32() {
|
|
uint32_t length = 100;
|
|
uint32_t *a = os_mem_malloc(1, length * 4);
|
|
uint32_t *b = os_mem_malloc(1, length * 4);
|
|
uint32_t *j = os_mem_malloc(1, length * 4);
|
|
uint32_t *o = os_mem_malloc(1, length * 4);
|
|
for (uint32_t i = 0; i < length; i++) {
|
|
a[i] = 222 * (103 - i);
|
|
b[i] = 333 * (127 - i);
|
|
j[i] = i + 120000000;
|
|
//iot_printf("a[%d] = %d, b[%d] = %d, j[%d] = %d\n", i, a[i], i, b[i], i, j[i]);
|
|
}
|
|
vector_mul_add_uint32(a, b, j, o, length);
|
|
for (uint32_t i = 0; i < length; i++) {
|
|
#ifdef SATURATION
|
|
uint64_t gg = (uint64_t)a[i] * (uint64_t)b[i];
|
|
gg = gg > 0xffffffff ? 0xffffffff : gg;
|
|
gg = (uint64_t)j[i] + gg;
|
|
uint32_t g = gg > 0xffffffff ? 0xffffffff : gg;
|
|
#else
|
|
uint32_t g = j[i] + a[i] * b[i];
|
|
#endif
|
|
if (g != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g, o[i]);
|
|
}
|
|
}
|
|
iot_printf("vector_mul_add_uint32 finished\n");
|
|
vector_mul_sub_uint32(a, b, j, o, length);
|
|
for (uint32_t i = 0; i < length; i++) {
|
|
#ifdef SATURATION
|
|
uint64_t gg = (uint64_t)a[i] * (uint64_t)b[i];
|
|
gg = gg > 0xffffffff ? 0xffffffff : gg;
|
|
uint32_t g = j[i] < gg ? 0 : j[i] - gg;
|
|
#else
|
|
uint32_t g = j[i] - a[i] * b[i];
|
|
#endif
|
|
if (g != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g, o[i]);
|
|
}
|
|
}
|
|
iot_printf("vector_mul_sub_uint32 finished\n");
|
|
os_mem_free(a);
|
|
os_mem_free(b);
|
|
os_mem_free(j);
|
|
os_mem_free(o);
|
|
}
|
|
|
|
void vector_min_test_uint32() {
|
|
uint32_t a[25];
|
|
uint32_t b[25];
|
|
uint32_t o[25];
|
|
uint32_t g[25];
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
a[i] = 200000000 * i;
|
|
b[i] = 300000000 * i + 23;
|
|
}
|
|
vector_min_uint32(a, b, o, 25);
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
g[i] = a[i] < b[i] ? a[i] : b[i];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_min_const_uint32(a, b[0], o, 25);
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
g[i] = a[i] < b[0] ? a[i] : b[0];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("min of uint32 test end\n");
|
|
}
|
|
|
|
void vector_max_test_uint32() {
|
|
uint32_t a[25];
|
|
uint32_t b[25];
|
|
uint32_t o[25];
|
|
uint32_t g[25];
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
a[i] = 200000000 * i;
|
|
b[i] = 300000000 * i + 23;
|
|
}
|
|
vector_max_uint32(a, b, o, 25);
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
g[i] = a[i] > b[i] ? a[i] : b[i];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_max_const_uint32(a, b[0], o, 25);
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
g[i] = a[i] > b[0] ? a[i] : b[0];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("max of uint32 test end\n");
|
|
}
|
|
|
|
void vector_equal_test_uint32() {
|
|
uint32_t a[25];
|
|
uint32_t b[25];
|
|
uint32_t o[25];
|
|
uint32_t g[25];
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
a[i] = 200000000 * i;
|
|
b[i] = 300000000 * i + 23;
|
|
}
|
|
vector_equal_uint32(a, b, o, 25);
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
g[i] = a[i] == b[i] ? 1 : 0;
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_equal_const_uint32(a, b[11], o, 25);
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
g[i] = a[i] == b[11] ? 1 : 0;
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("equal of uint32 test end\n");
|
|
}
|
|
|
|
void vector_not_equal_test_uint32() {
|
|
uint32_t a[25];
|
|
uint32_t b[25];
|
|
uint32_t o[25];
|
|
uint32_t g[25];
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
a[i] = 200000000 * i;
|
|
b[i] = 300000000 * i + 23;
|
|
}
|
|
vector_not_equal_uint32(a, b, o, 25);
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
g[i] = a[i] != b[i] ? 1 : 0;
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_not_equal_const_uint32(a, b[11], o, 25);
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
g[i] = a[i] != b[11] ? 1 : 0;
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("not equal of uint32 test end\n");
|
|
}
|
|
|
|
void vector_less_than_test_uint32() {
|
|
uint32_t a[25];
|
|
uint32_t b[25];
|
|
uint32_t o[25];
|
|
uint32_t g[25];
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
a[i] = 200000000 * i;
|
|
b[i] = 300000000 * i + 23;
|
|
}
|
|
vector_less_than_uint32(a, b, o, 25);
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
g[i] = a[i] < b[i] ? 1 : 0;
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_less_than_const_uint32(a, b[11], o, 25);
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
g[i] = a[i] < b[11] ? 1 : 0;
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_const_less_than_uint32(a, b[11], o, 25);
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
g[i] = a[i] > b[11] ? 1 : 0;
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("less than of uint32 test end\n");
|
|
}
|
|
|
|
void vector_greater_or_equal_test_uint32() {
|
|
uint32_t a[25];
|
|
uint32_t b[25];
|
|
uint32_t o[25];
|
|
uint32_t g[25];
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
a[i] = 200000000 * i;
|
|
b[i] = 300000000 * i + 23;
|
|
}
|
|
vector_greater_or_equal_uint32(a, b, o, 25);
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
g[i] = a[i] >= b[i] ? 1 : 0;
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_greater_or_equal_const_uint32(a, b[11], o, 25);
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
g[i] = a[i] >= b[11] ? 1 : 0;
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_const_greater_or_equal_uint32(a, b[11], o, 25);
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
g[i] = a[i] <= b[11] ? 1 : 0;
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("greater or equal of uint32 test end\n");
|
|
}
|
|
|
|
void vector_logic_and_test_uint32() {
|
|
uint32_t a[25];
|
|
uint32_t b[25];
|
|
uint32_t o[25];
|
|
uint32_t g[25];
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
a[i] = 200000000 * i;
|
|
b[i] = 300000000 * i + 23;
|
|
}
|
|
vector_logic_and_uint32(a, b, o, 25);
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
g[i] = a[i] & b[i];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_logic_and_const_uint32(a, b[11], o, 25);
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
g[i] = a[i] & b[11];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("logic and of uint32 test end\n");
|
|
}
|
|
|
|
void vector_logic_or_test_uint32() {
|
|
uint32_t a[25];
|
|
uint32_t b[25];
|
|
uint32_t o[25];
|
|
uint32_t g[25];
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
a[i] = 200 * i;
|
|
b[i] = 300000000 * i + 23;
|
|
}
|
|
vector_logic_or_uint32(a, b, o, 25);
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
g[i] = a[i] | b[i];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_logic_or_const_uint32(a, b[11], o, 25);
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
g[i] = a[i] | b[11];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("logic or of uint32 test end\n");
|
|
}
|
|
|
|
void vector_logic_xor_test_uint32() {
|
|
uint32_t a[25];
|
|
uint32_t b[25];
|
|
uint32_t o[25];
|
|
uint32_t g[25];
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
a[i] = 2 * i;
|
|
b[i] = 3 * i - 23;
|
|
}
|
|
vector_logic_xor_uint32(a, b, o, 25);
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
g[i] = a[i] ^ b[i];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_logic_xor_const_uint32(a, b[11], o, 25);
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
g[i] = a[i] ^ b[11];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("logic xor of uint32 test end\n");
|
|
}
|
|
|
|
void vector_logic_xnor_test_uint32() {
|
|
uint32_t a[25];
|
|
uint32_t b[25];
|
|
uint32_t o[25];
|
|
uint32_t g[25];
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
a[i] = 2 * i;
|
|
b[i] = 3 * i - 23;
|
|
}
|
|
vector_logic_xnor_uint32(a, b, o, 25);
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
g[i] = ~(a[i] ^ b[i]);
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_logic_xnor_const_uint32(a, b[11], o, 25);
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
g[i] = ~(a[i] ^ b[11]);
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("logic xnor of uint32 test end\n");
|
|
}
|
|
|
|
void vector_left_shift_test_uint32() {
|
|
uint32_t a[25];
|
|
uint32_t b[25];
|
|
uint32_t o[25];
|
|
uint32_t g[25];
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
a[i] = i + 1;
|
|
b[i] = i + 1;
|
|
}
|
|
vector_left_shift_uint32(a, b, o, 25);
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
g[i] = a[i] << b[i];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_left_shift_const_uint32(a, b[2], o, 25);
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
g[i] = a[i] << b[2];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_const_left_shift_uint32(b, a[6], o, 25);
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
g[i] = a[6] << b[i];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("left shift of uint32 test end\n");
|
|
}
|
|
|
|
void vector_right_shift_test_uint32() {
|
|
uint32_t a[25];
|
|
uint32_t b[25];
|
|
uint32_t o[25];
|
|
uint32_t g[25];
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
a[i] = i * 1000000000 - 25;
|
|
b[i] = i + 1;
|
|
}
|
|
vector_right_shift_uint32(a, b, o, 25);
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
g[i] = a[i] >> b[i];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_right_shift_const_uint32(a, b[2], o, 25);
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
g[i] = a[i] >> b[2];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_const_right_shift_uint32(b, a[6], o, 25);
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
g[i] = a[6] >> b[i];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("logic right shift of uint32 test end\n");
|
|
}
|
|
|
|
void vector_half_bits_test_uint32() {
|
|
uint32_t a[25];
|
|
uint16_t h[25];
|
|
uint16_t l[25];
|
|
uint16_t g[25];
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
a[i] = i + (i + 80) * 65536;
|
|
}
|
|
vector_high_half_bits_uint32(a, h, 25);
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
g[i] = a[i] >> 16;
|
|
if (g[i] != h[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], h[i]);
|
|
}
|
|
}
|
|
iot_printf("vector_high_half_bits_uint32 finished\n");
|
|
vector_low_half_bits_uint32(a, l, 25);
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
g[i] = a[i] & 0xffff;
|
|
if (g[i] != l[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], l[i]);
|
|
}
|
|
}
|
|
iot_printf("vector_low_half_bits_uint32 finished\n");
|
|
vector_high_and_low_half_bits_uint32(a, h, l, 25);
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
g[i] = a[i] >> 16;
|
|
if (g[i] != h[i]) {
|
|
iot_printf("incorrect higt at %d, golden %d, output %d\n", i, g[i], h[i]);
|
|
}
|
|
g[i] = a[i] & 0xffff;
|
|
if (g[i] != l[i]) {
|
|
iot_printf("incorrect low at %d, golden %d, output %d\n", i, g[i], l[i]);
|
|
}
|
|
}
|
|
iot_printf("vector_high_and_low_half_bits_uint32 finished\n");
|
|
}
|
|
|
|
|
|
void vector_add_test_int32() {
|
|
int32_t a[25];
|
|
int32_t b[25];
|
|
int32_t o[25];
|
|
int32_t g[25];
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
a[i] = 200000000 * i;
|
|
b[i] = 300000000 * i + 23;
|
|
}
|
|
vector_add_const_int32(a, b[0], o, 25);
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
#ifdef SATURATION
|
|
int64_t gg = (int64_t)a[i] + (int64_t)b[0];
|
|
g[i] = gg > 0x7fffffff ? 0x7fffffff : (gg < -2147483648 ? -2147483648 : gg);
|
|
#else
|
|
g[i] = a[i] + b[0];
|
|
#endif
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[0], g[i], o[i]);
|
|
}
|
|
}
|
|
uint64_t begin = cpu_get_mcycle();
|
|
for (uint32_t repeat = 0; repeat < 1000; repeat++) {
|
|
vector_add_int32(a, b, o, 25);
|
|
}
|
|
uint64_t end = cpu_get_mcycle();
|
|
iot_printf("time = %d\n", (uint32_t)(end - begin));
|
|
|
|
begin = cpu_get_mcycle();
|
|
for (uint32_t repeat = 0; repeat < 1000; repeat++) {
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
#ifdef SATURATION
|
|
int64_t gg = (int64_t)a[i] + (int64_t)b[i];
|
|
g[i] = gg > 0x7fffffff ? 0x7fffffff : (gg < -2147483648 ? -2147483648: gg);
|
|
#else
|
|
g[i] = a[i] + b[i];
|
|
#endif
|
|
}
|
|
}
|
|
end = cpu_get_mcycle();
|
|
iot_printf("time = %d\n", (uint32_t)(end - begin));
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[i], g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("add const of int32 test end\n");
|
|
}
|
|
|
|
void vector_sub_test_int32() {
|
|
int32_t a[25];
|
|
int32_t b[25];
|
|
int32_t o[25];
|
|
int32_t g[25];
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
a[i] = 200000000 * i;
|
|
b[i] = 300000000 * i + 23;
|
|
}
|
|
vector_sub_const_int32(a, b[0], o, 25);
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
#ifdef SATURATION
|
|
int64_t gg = (int64_t)a[i] - (int64_t)b[0];
|
|
g[i] = gg > 0x7fffffff ? 0x7fffffff : (gg < -2147483648 ? -2147483648 : gg);
|
|
#else
|
|
g[i] = a[i] - b[0];
|
|
#endif
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[i], g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("sub const of int32 test end\n");
|
|
vector_const_sub_int32(a, b[0], o, 25);
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
#ifdef SATURATION
|
|
int64_t gg = (int64_t)b[0] - (int64_t)a[i];
|
|
g[i] = gg > 0x7fffffff ? 0x7fffffff : (gg < -2147483648 ? -2147483648 : gg);
|
|
#else
|
|
g[i] = b[0] - a[i];
|
|
#endif
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[i], g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("const sub of int32 test end\n");
|
|
vector_sub_int32(a, b, o, 25);
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
#ifdef SATURATION
|
|
int64_t gg = (int64_t)a[i] - (int64_t)b[i];
|
|
g[i] = gg > 0x7fffffff ? 0x7fffffff : (gg < -2147483648 ? -2147483648: gg);
|
|
#else
|
|
g[i] = a[i] - b[i];
|
|
#endif
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[i], g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("sub of int32 test end\n");
|
|
}
|
|
|
|
void vector_mul_test_int32() {
|
|
uint32_t length = 100;
|
|
int32_t *a = (int32_t *)os_mem_malloc(1, length * 4);
|
|
int32_t *b = (int32_t *)os_mem_malloc(1, length * 4);
|
|
int32_t *l = (int32_t *)os_mem_malloc(1, length * 4);
|
|
int32_t *h = (int32_t *)os_mem_malloc(1, length * 4);
|
|
int64_t *w = (int64_t *)os_mem_malloc(1, length * 4 * 2);
|
|
for (uint32_t i = 0; i < length; i++) {
|
|
a[i] = -201 * (i - 14);
|
|
b[i] = 301 * (i - 6);
|
|
//iot_printf("a[%d] = %d, b[%d] = %d\n", i, a[i], i, b[i]);
|
|
}
|
|
vector_multiply_int32(a, b, w, length);
|
|
for (uint32_t i = 0; i < length; i++) {
|
|
int64_t g = (int64_t)a[i] * (int64_t)b[i];
|
|
if (g != w[i]) {
|
|
iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[i], g, w[i]);
|
|
}
|
|
}
|
|
iot_printf("vector_multiply_int32 finished\n");
|
|
vector_multiply_const_int32(a, b[1], w, length);
|
|
for (uint32_t i = 0; i < length; i++) {
|
|
int64_t g = (int64_t)a[i] * (int64_t)b[1];
|
|
if (g != w[i]) {
|
|
iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[i], g, w[i]);
|
|
}
|
|
}
|
|
iot_printf("vector_multiply_const_int32 finished\n");
|
|
vector_multiply_int32_high_32bit(a, b, h, length);
|
|
for (uint32_t i = 0; i < length; i++) {
|
|
int32_t g = ((int64_t)a[i] * (int64_t)b[i]) >> 32;
|
|
if (g != h[i]) {
|
|
iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[i], g, h[i]);
|
|
}
|
|
}
|
|
iot_printf("vector_multiply_int32_high finished\n");
|
|
vector_multiply_const_int32_high_32bit(a, b[1], h, length);
|
|
for (uint32_t i = 0; i < length; i++) {
|
|
int32_t g = ((int64_t)a[i] * (int64_t)b[1]) >> 32;
|
|
if (g != h[i]) {
|
|
iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[i], g, h[i]);
|
|
}
|
|
}
|
|
iot_printf("vector_multiply_const_int32_high finished\n");
|
|
vector_multiply_int32_low_32bit(a, b, l, length);
|
|
for (uint32_t i = 0; i < length; i++) {
|
|
#ifdef SATURATION
|
|
int64_t gg = (int64_t)a[i] * (int64_t)b[i];
|
|
int32_t g = gg > 0x7fffffff ? 0x7fffffff : (gg < -2147483648 ? -2147483648: gg);
|
|
#else
|
|
int32_t g = ((int64_t)a[i] * (int64_t)b[i]) & 0xffffffff;
|
|
#endif
|
|
if (g != l[i]) {
|
|
iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[i], g, l[i]);
|
|
}
|
|
}
|
|
iot_printf("vector_multiply_int32_low finished\n");
|
|
vector_multiply_const_int32_low_32bit(a, b[1], l, length);
|
|
for (uint32_t i = 0; i < length; i++) {
|
|
#ifdef SATURATION
|
|
int64_t gg = (int64_t)a[i] * (int64_t)b[1];
|
|
int32_t g = gg > 0x7fffffff ? 0x7fffffff : (gg < -2147483648 ? -2147483648: gg);
|
|
#else
|
|
int32_t g = ((int64_t)a[i] * (int64_t)b[1]) & 0xffffffff;
|
|
#endif
|
|
if (g != l[i]) {
|
|
iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[i], g, l[i]);
|
|
}
|
|
}
|
|
iot_printf("vector_multiply_const_int32_low finished\n");
|
|
os_mem_free(a);
|
|
os_mem_free(b);
|
|
os_mem_free(h);
|
|
os_mem_free(l);
|
|
os_mem_free(w);
|
|
}
|
|
|
|
void vector_madd_msub_test_int32() {
|
|
uint32_t length = 100;
|
|
int32_t *a = os_mem_malloc(1, length * 4);
|
|
int32_t *b = os_mem_malloc(1, length * 4);
|
|
int32_t *j = os_mem_malloc(1, length * 4);
|
|
int32_t *o = os_mem_malloc(1, length * 4);
|
|
for (uint32_t i = 0; i < length; i++) {
|
|
a[i] = -222 * (i - 25);
|
|
b[i] = 333 * (i - 33);
|
|
j[i] = i - 1200000000;
|
|
//iot_printf("a[%d] = %d, b[%d] = %d, j[%d] = %d\n", i, a[i], i, b[i], i, j[i]);
|
|
}
|
|
vector_mul_add_int32(a, b, j, o, length);
|
|
for (uint32_t i = 0; i < length; i++) {
|
|
#ifdef SATURATION
|
|
int64_t gg = (int64_t)a[i] * (int64_t)b[i];
|
|
gg = gg > 0x7fffffff ? 0x7fffffff : (gg < -2147483648 ? -2147483648: gg);
|
|
int64_t g = (int64_t)j[i] + gg;
|
|
g = g > 0x7fffffff ? 0x7fffffff : (g < -2147483648 ? -2147483648: g);
|
|
#else
|
|
int32_t g = j[i] + a[i] * b[i];
|
|
#endif
|
|
if (g != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g, o[i]);
|
|
}
|
|
}
|
|
iot_printf("vector_mul_add_int32 finished\n");
|
|
vector_mul_sub_int32(a, b, j, o, length);
|
|
for (uint32_t i = 0; i < length; i++) {
|
|
#ifdef SATURATION
|
|
int64_t gg = (int64_t)a[i] * (int64_t)b[i];
|
|
gg = gg > 0x7fffffff ? 0x7fffffff : (gg < -2147483648 ? -2147483648: gg);
|
|
int64_t g = (int64_t)j[i] - gg;
|
|
g = g > 0x7fffffff ? 0x7fffffff : (g < -2147483648 ? -2147483648: g);
|
|
#else
|
|
int32_t g = j[i] - a[i] * b[i];
|
|
#endif
|
|
if (g != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g, o[i]);
|
|
}
|
|
}
|
|
iot_printf("vector_mul_sub_int32 finished\n");
|
|
os_mem_free(a);
|
|
os_mem_free(b);
|
|
os_mem_free(j);
|
|
os_mem_free(o);
|
|
}
|
|
|
|
void vector_min_test_int32() {
|
|
int32_t a[25];
|
|
int32_t b[25];
|
|
int32_t o[25];
|
|
int32_t g[25];
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
a[i] = 2 * i;
|
|
b[i] = 3 * i + 23;
|
|
}
|
|
vector_min_int32(a, b, o, 25);
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
g[i] = a[i] < b[i] ? a[i] : b[i];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_min_const_int32(a, b[0], o, 25);
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
g[i] = a[i] < b[0] ? a[i] : b[0];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("min of int32 test end\n");
|
|
}
|
|
|
|
void vector_max_test_int32() {
|
|
int32_t a[25];
|
|
int32_t b[25];
|
|
int32_t o[25];
|
|
int32_t g[25];
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
a[i] = 2 * i;
|
|
b[i] = 3 * i + 23;
|
|
}
|
|
vector_max_int32(a, b, o, 25);
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
g[i] = a[i] > b[i] ? a[i] : b[i];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_max_const_int32(a, b[0], o, 25);
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
g[i] = a[i] > b[0] ? a[i] : b[0];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("max of int32 test end\n");
|
|
}
|
|
|
|
void vector_equal_test_int32() {
|
|
int32_t a[25];
|
|
int32_t b[25];
|
|
int32_t o[25];
|
|
int32_t g[25];
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
a[i] = 200000000 * i;
|
|
b[i] = 300000000 * i - 23;
|
|
}
|
|
vector_equal_int32(a, b, o, 25);
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
g[i] = a[i] == b[i] ? 1 : 0;
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_equal_const_int32(a, b[11], o, 25);
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
g[i] = a[i] == b[11] ? 1 : 0;
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("equal of int32 test end\n");
|
|
}
|
|
|
|
void vector_not_equal_test_int32() {
|
|
int32_t a[25];
|
|
int32_t b[25];
|
|
int32_t o[25];
|
|
int32_t g[25];
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
a[i] = 200000000 * i;
|
|
b[i] = 300000000 * i - 23;
|
|
}
|
|
vector_not_equal_int32(a, b, o, 25);
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
g[i] = a[i] != b[i] ? 1 : 0;
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_not_equal_const_int32(a, b[11], o, 25);
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
g[i] = a[i] != b[11] ? 1 : 0;
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("not equal of int32 test end\n");
|
|
}
|
|
|
|
void vector_less_than_test_int32() {
|
|
int32_t a[25];
|
|
int32_t b[25];
|
|
int32_t o[25];
|
|
int32_t g[25];
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
a[i] = 2000000000 * i;
|
|
b[i] = 3000000000 * i - 23;
|
|
}
|
|
vector_less_than_int32(a, b, o, 25);
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
g[i] = a[i] < b[i] ? 1 : 0;
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_less_than_const_int32(a, b[11], o, 25);
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
g[i] = a[i] < b[11] ? 1 : 0;
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_const_less_than_int32(a, b[11], o, 25);
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
g[i] = a[i] > b[11] ? 1 : 0;
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("less than of int32 test end\n");
|
|
}
|
|
|
|
void vector_greater_or_equal_test_int32() {
|
|
int32_t a[25];
|
|
int32_t b[25];
|
|
int32_t o[25];
|
|
int32_t g[25];
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
a[i] = 200000000 * i;
|
|
b[i] = 300000000 * i - 23;
|
|
}
|
|
vector_greater_or_equal_int32(a, b, o, 25);
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
g[i] = a[i] >= b[i] ? 1 : 0;
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_greater_or_equal_const_int32(a, b[11], o, 25);
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
g[i] = a[i] >= b[11] ? 1 : 0;
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_const_greater_or_equal_int32(a, b[11], o, 25);
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
g[i] = a[i] <= b[11] ? 1 : 0;
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("greater or equal of int32 test end\n");
|
|
}
|
|
|
|
void vector_logic_and_test_int32() {
|
|
int32_t a[25];
|
|
int32_t b[25];
|
|
int32_t o[25];
|
|
int32_t g[25];
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
a[i] = 200000000 * i;
|
|
b[i] = 300000000 * i - 23;
|
|
}
|
|
vector_logic_and_int32(a, b, o, 25);
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
g[i] = a[i] & b[i];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_logic_and_const_int32(a, b[11], o, 25);
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
g[i] = a[i] & b[11];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("logic and of int32 test end\n");
|
|
}
|
|
|
|
void vector_logic_or_test_int32() {
|
|
int32_t a[25];
|
|
int32_t b[25];
|
|
int32_t o[25];
|
|
int32_t g[25];
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
a[i] = 200000000 * i;
|
|
b[i] = 300000000 * i - 23;
|
|
}
|
|
vector_logic_or_int32(a, b, o, 25);
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
g[i] = a[i] | b[i];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_logic_or_const_int32(a, b[11], o, 25);
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
g[i] = a[i] | b[11];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("logic or of int32 test end\n");
|
|
}
|
|
|
|
void vector_logic_xor_test_int32() {
|
|
int32_t a[25];
|
|
int32_t b[25];
|
|
int32_t o[25];
|
|
int32_t g[25];
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
a[i] = 200000000 * i;
|
|
b[i] = 300000000 * i - 23;
|
|
}
|
|
vector_logic_xor_int32(a, b, o, 25);
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
g[i] = a[i] ^ b[i];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_logic_xor_const_int32(a, b[11], o, 25);
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
g[i] = a[i] ^ b[11];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("logic xor of int32 test end\n");
|
|
}
|
|
|
|
void vector_logic_xnor_test_int32() {
|
|
int32_t a[25];
|
|
int32_t b[25];
|
|
int32_t o[25];
|
|
int32_t g[25];
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
a[i] = 200000000 * i;
|
|
b[i] = 300000000 * i - 23;
|
|
}
|
|
vector_logic_xnor_int32(a, b, o, 25);
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
g[i] = ~(a[i] ^ b[i]);
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_logic_xnor_const_int32(a, b[11], o, 25);
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
g[i] = ~(a[i] ^ b[11]);
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("logic xnor of int32 test end\n");
|
|
}
|
|
|
|
void vector_left_shift_test_int32() {
|
|
int32_t a[25];
|
|
uint32_t b[25];
|
|
int32_t o[25];
|
|
int32_t g[25];
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
a[i] = i * 500000000 + 1;
|
|
b[i] = i;
|
|
}
|
|
vector_left_shift_int32(a, b, o, 25);
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
g[i] = a[i] << b[i];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_left_shift_const_int32(a, b[2], o, 25);
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
g[i] = a[i] << b[2];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_const_left_shift_int32(b, a[6], o, 25);
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
g[i] = a[6] << b[i];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("left shift of int32 test end\n");
|
|
}
|
|
|
|
void vector_logic_right_shift_test_int32() {
|
|
int32_t a[25];
|
|
uint32_t b[25];
|
|
int32_t o[25];
|
|
int32_t g[25];
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
a[i] = i - 25;
|
|
b[i] = i % 7 + 1;
|
|
}
|
|
vector_logic_right_shift_int32(a, b, o, 25);
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
g[i] = (uint32_t)a[i] >> b[i];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_logic_right_shift_const_int32(a, b[2], o, 25);
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
g[i] = (uint32_t)a[i] >> b[2];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_const_logic_right_shift_int32(b, a[6], o, 25);
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
g[i] = (uint32_t)a[6] >> b[i];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("logic right shift of int32 test end\n");
|
|
}
|
|
|
|
void vector_arithmatic_right_shift_test_int32() {
|
|
int32_t a[25];
|
|
uint32_t b[25];
|
|
int32_t o[25];
|
|
int32_t g[25];
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
a[i] = i - 25;
|
|
b[i] = i % 7 + 1;
|
|
}
|
|
vector_arithmatic_right_shift_int32(a, b, o, 25);
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
g[i] = a[i] >> b[i];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_arithmatic_right_shift_const_int32(a, b[2], o, 25);
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
g[i] = a[i] >> b[2];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_const_arithmatic_right_shift_int32(b, a[6], o, 25);
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
g[i] = a[6] >> b[i];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("arithmatic right shift of int32 test end\n");
|
|
}
|
|
|
|
void vector_half_bits_test_int32() {
|
|
int32_t a[25];
|
|
int16_t h[25];
|
|
int16_t l[25];
|
|
int16_t g[25];
|
|
for (int32_t i = 0; i < 25; i++) {
|
|
a[i] = i + (i + 80) * 65536;
|
|
}
|
|
vector_high_half_bits_int32(a, h, 25);
|
|
for (int32_t i = 0; i < 25; i++) {
|
|
g[i] = a[i] >> 16;
|
|
if (g[i] != h[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], h[i]);
|
|
}
|
|
}
|
|
iot_printf("vector_high_half_bits_int32 finished\n");
|
|
vector_low_half_bits_int32(a, l, 25);
|
|
for (int32_t i = 0; i < 25; i++) {
|
|
g[i] = a[i] & 0xffff;
|
|
if (g[i] != l[i]) {
|
|
iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], l[i]);
|
|
}
|
|
}
|
|
iot_printf("vector_low_half_bits_int32 finished\n");
|
|
vector_high_and_low_half_bits_int32(a, h, l, 25);
|
|
for (int32_t i = 0; i < 25; i++) {
|
|
g[i] = a[i] >> 16;
|
|
if (g[i] != h[i]) {
|
|
iot_printf("incorrect higt at %d, golden %d, output %d\n", i, g[i], h[i]);
|
|
}
|
|
g[i] = a[i] & 0xffff;
|
|
if (g[i] != l[i]) {
|
|
iot_printf("incorrect low at %d, golden %d, output %d\n", i, g[i], l[i]);
|
|
}
|
|
}
|
|
iot_printf("vector_high_and_low_half_bits_int32 finished\n");
|
|
}
|
|
|
|
|
|
void vector_add_test_float() {
|
|
float a[25];
|
|
float b[25];
|
|
float o[25];
|
|
float g[25];
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
a[i] = 2 * i;
|
|
b[i] = 3 * i + 23;
|
|
}
|
|
vector_add_const_float(a, b[0], o, 25);
|
|
for (uint8_t i = 0; i < 25; i++) {
|
|
g[i] = a[i] + b[0];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %f, output %f\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_add_float(a, b, o, 25);
|
|
for (uint8_t i = 0; i < 25; i++) {
|
|
g[i] = a[i] + b[i];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %f, output %f\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("add const of float test end\n");
|
|
}
|
|
|
|
void vector_sub_test_float() {
|
|
float a[25];
|
|
float b[25];
|
|
float o[25];
|
|
float g[25];
|
|
uint32_t *o32 = (uint32_t *)o;
|
|
uint32_t *g32 = (uint32_t *)g;
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
a[i] = 2 * i;
|
|
b[i] = 3 * i + 23;
|
|
}
|
|
vector_sub_const_float(a, b[0], o, 25);
|
|
for (uint8_t i = 0; i < 25; i++) {
|
|
g[i] = a[i] - b[0];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %f(0x%08x), output %f(0x%08x)\n", i, g[i], g32[i], o[i], o32[i]);
|
|
}
|
|
}
|
|
vector_const_sub_float(a, b[0], o, 25);
|
|
for (uint8_t i = 0; i < 25; i++) {
|
|
g[i] = b[0] - a[i];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %f(0x%08x), output %f(0x%08x)\n", i, g[i], g32[i], o[i], o32[i]);
|
|
}
|
|
}
|
|
vector_sub_float(a, b, o, 25);
|
|
for (uint8_t i = 0; i < 25; i++) {
|
|
g[i] = a[i] - b[i];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %f(0x%08x), output %f(0x%08x)\n", i, g[i], g32[i], o[i], o32[i]);
|
|
}
|
|
}
|
|
iot_printf("sub const of float test end\n");
|
|
}
|
|
|
|
void vector_mul_test_float() {
|
|
uint8_t length = 25;
|
|
float a[25];
|
|
float b[25];
|
|
float g[25];
|
|
float w[25];
|
|
uint32_t *a32 = (uint32_t *)&a[0];
|
|
uint32_t *b32 = (uint32_t *)&b[0];
|
|
uint32_t *g32 = (uint32_t *)&g[0];
|
|
uint32_t *w32 = (uint32_t *)&w[0];
|
|
for (uint8_t i = 0; i < length; i++) {
|
|
a[i] = -0.2 * (i - 2.5);
|
|
b[i] = 0.3 * (i - 3.3);
|
|
//iot_printf("a[%d] = %f, b[%d] = %f\n", i, a[i], i, b[i]);
|
|
}
|
|
vector_multiply_float(a, b, w, length);
|
|
for (uint8_t i = 0; i < length; i++) {
|
|
g[i] = a[i] * b[i];
|
|
if (g[i] != w[i]) {
|
|
iot_printf("golden %08x, output %08x\n", g32[i], w32[i]);
|
|
iot_printf("golden %08x, output %08x\n", g32[i], w32[i]);
|
|
iot_printf("golden %08x, output %08x\n", g32[i], w32[i]);
|
|
iot_printf("golden %08x, output %08x\n", g32[i], w32[i]);
|
|
iot_printf("golden %08x, output %08x\n", g32[i], w32[i]);
|
|
iot_printf("golden %08x, output %08x\n", g32[i], w32[i]);
|
|
iot_printf("golden %08x, output %08x\n", g32[i], w32[i]);
|
|
iot_printf("golden %08x, output %08x\n", g32[i], w32[i]);
|
|
iot_printf("golden %08x, output %08x\n\n", g32[i], w32[i]);
|
|
|
|
iot_printf("incorrect at %d, a = 0x%08x, b = 0x%08x, golden 0x%08x, output 0x%08x\n", i, a32[i], b32[i], g32[i], w32[i]);
|
|
iot_printf("golden %f, output %f\n", g[i], w[i]);
|
|
}
|
|
}
|
|
//iot_printf("vector_multiply_float finished\n");
|
|
vector_multiply_const_float(a, b[1], w, length);
|
|
for (uint8_t i = 0; i < length; i++) {
|
|
g[i] = a[i] * b[1];
|
|
if (g[i] != w[i]) {
|
|
iot_printf("incorrect at %d, a = %f(0x%08x), b = %f(0x%08x), golden %f(0x%08x), output %f(0x%08x)\n", i, a[i], a32[i], b[1], b32[1], g[i], g32[i], w[i], w32[i]);
|
|
}
|
|
}
|
|
//iot_printf("vector_multiply_const_float finished\n");
|
|
}
|
|
|
|
void vector_madd_msub_test_float() {
|
|
uint8_t length = 25;
|
|
float a[25];
|
|
float b[25];
|
|
float j[25];
|
|
float o[25];
|
|
float g[25];
|
|
uint32_t *uo = (uint32_t *)o;
|
|
uint32_t *ug = (uint32_t *)g;
|
|
for (uint8_t i = 0; i < length; i++) {
|
|
a[i] = -0.2 * (i - 25);
|
|
b[i] = 0.3 * (i - 33);
|
|
j[i] = i - 50;
|
|
//iot_printf("a[%d] = %f, b[%d] = %f, j[%d] = %f\n", i, a[i], i, b[i], i, j[i]);
|
|
}
|
|
vector_mul_add_float(a, b, j, o, length);
|
|
for (uint8_t i = 0; i < length; i++) {
|
|
g[i] = a[i] * b[i];
|
|
g[i] = g[i] + j[i];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %f(0x%08x), output %f(0x%08x)\n", i, g[i], ug[i], o[i], uo[i]);
|
|
}
|
|
}
|
|
iot_printf("vector_mul_add_float finished\n");
|
|
vector_mul_sub_float(a, b, j, o, length);
|
|
for (uint8_t i = 0; i < length; i++) {
|
|
g[i] = a[i] * b[i];
|
|
g[i] = j[i] - g[i];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %f(0x%08x), output %f(0x%08x)\n", i, g[i], ug[i], o[i], uo[i]);
|
|
}
|
|
}
|
|
iot_printf("vector_mul_sub_float finished\n");
|
|
}
|
|
void vector_min_test_float() {
|
|
float a[25];
|
|
float b[25];
|
|
float o[25];
|
|
float g[25];
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
a[i] = 2 * i;
|
|
b[i] = 3 * i + 23;
|
|
}
|
|
vector_min_float(a, b, o, 25);
|
|
for (uint8_t i = 0; i < 25; i++) {
|
|
g[i] = a[i] < b[i] ? a[i] : b[i];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %f, output %f\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_min_const_float(a, b[0], o, 25);
|
|
for (uint8_t i = 0; i < 25; i++) {
|
|
g[i] = a[i] < b[0] ? a[i] : b[0];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %f, output %f\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("min of float test end\n");
|
|
}
|
|
|
|
void vector_max_test_float() {
|
|
float a[25];
|
|
float b[25];
|
|
float o[25];
|
|
float g[25];
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
a[i] = 2 * i;
|
|
b[i] = 3 * i + 23;
|
|
}
|
|
vector_max_float(a, b, o, 25);
|
|
for (uint8_t i = 0; i < 25; i++) {
|
|
g[i] = a[i] > b[i] ? a[i] : b[i];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %f, output %f\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_max_const_float(a, b[0], o, 25);
|
|
for (uint8_t i = 0; i < 25; i++) {
|
|
g[i] = a[i] > b[0] ? a[i] : b[0];
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %f, output %f\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("max of float test end\n");
|
|
}
|
|
|
|
void vector_equal_test_float() {
|
|
float a[25];
|
|
float b[25];
|
|
float o[25];
|
|
float g[25];
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
a[i] = 2 * i;
|
|
b[i] = 3 * i - 23;
|
|
}
|
|
vector_equal_float(a, b, o, 25);
|
|
for (uint8_t i = 0; i < 25; i++) {
|
|
g[i] = a[i] == b[i] ? 1 : 0;
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %f, output %f\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_equal_const_float(a, b[11], o, 25);
|
|
for (uint8_t i = 0; i < 25; i++) {
|
|
g[i] = a[i] == b[11] ? 1 : 0;
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %f, output %f\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("equal of float test end\n");
|
|
}
|
|
|
|
void vector_not_equal_test_float() {
|
|
float a[25];
|
|
float b[25];
|
|
float o[25];
|
|
float g[25];
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
a[i] = 2 * i;
|
|
b[i] = 3 * i - 23;
|
|
}
|
|
vector_not_equal_float(a, b, o, 25);
|
|
for (uint8_t i = 0; i < 25; i++) {
|
|
g[i] = a[i] != b[i] ? 1 : 0;
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %f, output %f\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_not_equal_const_float(a, b[11], o, 25);
|
|
for (uint8_t i = 0; i < 25; i++) {
|
|
g[i] = a[i] != b[11] ? 1 : 0;
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %f, output %f\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("not equal of float test end\n");
|
|
}
|
|
|
|
void vector_less_than_test_float() {
|
|
float a[25];
|
|
float b[25];
|
|
float o[25];
|
|
float g[25];
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
a[i] = 2 * i;
|
|
b[i] = 3 * i - 23;
|
|
}
|
|
vector_less_than_float(a, b, o, 25);
|
|
for (uint8_t i = 0; i < 25; i++) {
|
|
g[i] = a[i] < b[i] ? 1 : 0;
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %f, output %f\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_less_than_const_float(a, b[11], o, 25);
|
|
for (uint8_t i = 0; i < 25; i++) {
|
|
uint32_t *pg = (uint32_t *)(g + i);
|
|
uint32_t *po = (uint32_t *)(o + i);
|
|
g[i] = a[i] < b[11] ? 1 : 0;
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %f(%08x), output %f(%08x)\n", i, g[i], *pg, o[i], *po);
|
|
}
|
|
}
|
|
vector_const_less_than_float(a, b[11], o, 25);
|
|
for (uint8_t i = 0; i < 25; i++) {
|
|
g[i] = a[i] > b[11] ? 1 : 0;
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %f, output %f\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("less than of float test end\n");
|
|
}
|
|
|
|
void vector_greater_or_equal_test_float() {
|
|
float a[25];
|
|
float b[25];
|
|
float o[25];
|
|
float g[25];
|
|
for (uint32_t i = 0; i < 25; i++) {
|
|
a[i] = 2 * i;
|
|
b[i] = 3 * i - 23;
|
|
}
|
|
vector_greater_or_equal_float(a, b, o, 25);
|
|
for (uint8_t i = 0; i < 25; i++) {
|
|
uint32_t *pg = (uint32_t *)(g + i);
|
|
uint32_t *po = (uint32_t *)(o + i);
|
|
g[i] = a[i] >= b[i] ? 1 : 0;
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %f(%08x), output %f(%08x)\n", i, g[i], *pg, o[i], *po);
|
|
}
|
|
}
|
|
vector_greater_or_equal_const_float(a, b[11], o, 25);
|
|
for (uint8_t i = 0; i < 25; i++) {
|
|
g[i] = a[i] >= b[11] ? 1 : 0;
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %f, output %f\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
vector_const_greater_or_equal_float(a, b[11], o, 25);
|
|
for (uint8_t i = 0; i < 25; i++) {
|
|
g[i] = a[i] <= b[11] ? 1 : 0;
|
|
if (g[i] != o[i]) {
|
|
iot_printf("incorrect at %d, golden %f, output %f\n", i, g[i], o[i]);
|
|
}
|
|
}
|
|
iot_printf("greater or equal of float test end\n");
|
|
}
|
|
|
|
|
|
void float_greater_equal_test(float *a, float *b, float *o, float *g, uint32_t length) {
|
|
uint32_t length_tail = length & (SIMD_W - 1);
|
|
uint32_t length_body = length - length_tail;
|
|
uint32_t *ua = (uint32_t *)a;
|
|
uint32_t *ub = (uint32_t *)b;
|
|
float *pa = a;
|
|
float *pb = b;
|
|
float *po = o;
|
|
for (uint32_t i = 0; i < length; i++) {
|
|
iot_printf("a[%d] = %f(0x%08x), b[%d] = %f(0x%08x)\n", i, a[i], ua[i], i, b[i], ub[i]);
|
|
g[i] = a[i] >= b[i] ? 1.0 : 0.0;
|
|
}
|
|
for (uint32_t i = 0; i < length_body; i += SIMD_W) {
|
|
vld_fpw(0, pa, 1);
|
|
vld_fpw(1, pb, 1);
|
|
vsge(2, 0, 1);
|
|
vst_fpw(po, 1, 2);
|
|
pa += SIMD_W;
|
|
pb += SIMD_W;
|
|
po += SIMD_W;
|
|
asm("fence");
|
|
}
|
|
for (uint32_t i = 0; i < length; i++) {
|
|
if (g[i] != o[i]) {
|
|
iot_printf("g[%d] = %f, o[%d] = %f\n", i, g[i], i, o[i]);
|
|
}
|
|
}
|
|
}
|
|
|
|
void sigmoid_int8_test() {
|
|
int8_t input_[256];
|
|
for (uint32_t i = 0; i < 256; i++) {
|
|
input_[i] = i - 128;
|
|
}
|
|
int8_t output[256];
|
|
uint64_t begin = cpu_get_mcycle();
|
|
sigmoid_int8(input_, output, 256);
|
|
uint64_t end = cpu_get_mcycle();
|
|
for (uint32_t i = 0; i < 256; i++) {
|
|
iot_printf("%d\n", output[i]);
|
|
}
|
|
iot_printf("time = %d\n", (uint32_t)(end - begin));
|
|
}
|
|
|
|
void tanh_int8_test() {
|
|
int8_t input_[256];
|
|
for (uint32_t i = 0; i < 256; i++) {
|
|
input_[i] = i - 128;
|
|
}
|
|
int8_t output[256];
|
|
uint64_t begin = cpu_get_mcycle();
|
|
tanh_int8(input_, output, 256);
|
|
uint64_t end = cpu_get_mcycle();
|
|
for (uint32_t i = 0; i < 256; i++) {
|
|
iot_printf("%d\n", output[i]);
|
|
}
|
|
iot_printf("time = %d\n", (uint32_t)(end - begin));
|
|
}
|
|
|
|
void half_bits_test_16_to_8() {
|
|
//uint16_t a[256] = {44641,49129,26256,16088,58270,14244,30327,37995,6931,56640,5774,49320,7276,17402,32136,45320,36031,19644,19435,37152,49723,63428,61882,14223,23149,54102,831,36226,55644,19419,36099,7483,30896,18941,25805,23607,48792,282,48783,54897,49426,6762,20780,53309,24469,8539,61695,19415,59186,50222,32014,25999,41713,64471,33742,23461,4465,38142,60141,23549,24896,15021,3281,41924,38058,7016,24388,14059,4248,54782,40738,39345,58949,38653,25051,58363,19180,3206,3344,40722,33240,16872,34385,65101,62755,61266,28302,12659,40628,21913,4935,63079,31480,25878,21467,21352,64551,55641,38244,30774,5504,48349,57711,14148,54166,25793,3399,23600,39731,57112,45684,38384,31193,33232,63416,7174,52058,22787,60669,44254,32499,36905,43035,60968,26860,60485,3310,48805,45049,37903,143,41464,60646,4187,63860,55819,30387,40709,64641,39538,23352,21361,36043,63552,2583,54181,12272,18361,50277,20570,23835,46469,61349,53900,63130,34784,6593,11239,11974,52307,41069,23393,25954,22119,63136,7816,55537,44185,14606,36887,48995,63029,48235,43904,43939,3273,291,54180,40612,64664,16495,24803,47522,45992,9694,37811,49634,30017,6245,33255,32350,65516,31063,3328,36368,56021,23334,61044,770,43954,56915,8246,59888,19817,44941,61740,51312,30965,54138,54642,4226,32393,48326,25609,54200,32173,49050,14796,54710,51153,28930,54744,56167,47714,31678,64915,61073,64506,30663,17403,37980,12405,8087,58728,65146,45550,56799,38387,38227,44411,18989,61993,48043,54784,40966,25067,16106,29127,19598,19525,43595,36213,65384,26661,39739,41330};
|
|
uint16_t a[256];
|
|
for (uint32_t i = 0; i < 256; i++) {
|
|
a[i] = ((i - 128) << 8) | i;
|
|
//iot_printf("a[%d] = 0x%04x\n", i, a[i]);
|
|
}
|
|
uint8_t h[256];
|
|
uint8_t l[256];
|
|
uint16_t *pa = a;
|
|
uint8_t *ph = h;
|
|
uint8_t *pl = l;
|
|
for (uint16_t i = 0; i < 256; i += SIMD_W << 2) {
|
|
vld_uih(0, pa, 1);
|
|
pa += SIMD_W << 1;
|
|
vld_uih(1, pa, 1);
|
|
pa += SIMD_W << 1;
|
|
vnwh(2, 0, 1);
|
|
vnwl(3, 0, 1);
|
|
vst_uib(ph, 1, 2);
|
|
vst_uib(pl, 1, 3);
|
|
ph += SIMD_W << 2;
|
|
pl += SIMD_W << 2;
|
|
asm("fence");
|
|
}
|
|
for (uint16_t i = 0; i < 16; i++) {
|
|
uint8_t hi = a[i] >> 8;
|
|
if (hi != h[i]) {
|
|
iot_printf("golden_h[%d] = %d, output_h[%d] = %d\n", i, hi, i, h[i]);
|
|
}
|
|
}
|
|
for (uint16_t i = 0; i < 16; i++) {
|
|
uint8_t lo = a[i] & 0xff;
|
|
if (lo != l[i]) {
|
|
iot_printf("golden_l[%d] = %d, output_l[%d] = %d\n", i, lo, i, l[i]);
|
|
}
|
|
}
|
|
}
|
|
|
|
void half_bits_test_32_to_16() {
|
|
uint32_t number_elem = 256;
|
|
int32_t *a = (int32_t *)os_mem_malloc(1, number_elem * sizeof(int32_t));
|
|
int16_t *h = (int16_t *)os_mem_malloc(1, number_elem * sizeof(int16_t));
|
|
int16_t *l = (int16_t *)os_mem_malloc(1, number_elem * sizeof(int16_t));
|
|
int32_t *pa = a;
|
|
int16_t *ph = h;
|
|
int16_t *pl = l;
|
|
for (uint32_t i = 0; i < number_elem; i++) {
|
|
a[i] = ((i - 128) << 16) | i;
|
|
//iot_printf("a[%d] = 0x%08x\n", i, a[i]);
|
|
}
|
|
for (uint16_t i = 0; i < number_elem; i += SIMD_WH) {
|
|
vld_siw(0, pa, 1);
|
|
pa += SIMD_W;
|
|
vld_siw(1, pa, 1);
|
|
pa += SIMD_W;
|
|
vnwh(2, 0, 1);
|
|
vnwl(3, 0, 1);
|
|
vst_sih(ph, 1, 2);
|
|
vst_sih(pl, 1, 3);
|
|
ph += SIMD_WH;
|
|
pl += SIMD_WH;
|
|
asm("fence");
|
|
}
|
|
for (uint16_t i = 0; i < number_elem; i++) {
|
|
int16_t hi = a[i] >> 16;
|
|
if (hi != h[i]) {
|
|
iot_printf("golden_h[%d] = 0x%04x, output_h[%d] = 0x%04x\n", i, hi, i, h[i]);
|
|
}
|
|
}
|
|
for (uint16_t i = 0; i < number_elem; i++) {
|
|
int16_t lo = a[i] & 0xffff;
|
|
if (lo != l[i]) {
|
|
iot_printf("golden_l[%d] = 0x%04x, output_l[%d] = 0x%04x\n", i, lo, i, l[i]);
|
|
}
|
|
}
|
|
os_mem_free(a);
|
|
os_mem_free(h);
|
|
os_mem_free(l);
|
|
}
|
|
|
|
void half_bits_test_32_to_16u() {
|
|
uint32_t number_elem = 256;
|
|
uint32_t *a = (uint32_t *)os_mem_malloc(1, number_elem * sizeof(uint32_t));
|
|
uint16_t *h = (uint16_t *)os_mem_malloc(1, number_elem * sizeof(uint16_t));
|
|
uint16_t *l = (uint16_t *)os_mem_malloc(1, number_elem * sizeof(uint16_t));
|
|
uint32_t *pa = a;
|
|
uint16_t *ph = h;
|
|
uint16_t *pl = l;
|
|
for (uint32_t i = 0; i < number_elem; i++) {
|
|
a[i] = ((i - 128) << 16) | i;
|
|
//iot_printf("a[%d] = 0x%08x\n", i, a[i]);
|
|
}
|
|
for (uint16_t i = 0; i < number_elem; i += SIMD_WH) {
|
|
vld_uiw(0, pa, 1);
|
|
pa += SIMD_W;
|
|
vld_uiw(1, pa, 1);
|
|
pa += SIMD_W;
|
|
vnwh(2, 0, 1);
|
|
vnwl(3, 0, 1);
|
|
vst_uih(ph, 1, 2);
|
|
vst_uih(pl, 1, 3);
|
|
ph += SIMD_WH;
|
|
pl += SIMD_WH;
|
|
asm("fence");
|
|
}
|
|
for (uint16_t i = 0; i < number_elem; i++) {
|
|
int16_t hi = a[i] >> 16;
|
|
if (hi != h[i]) {
|
|
iot_printf("golden_h[%d] = 0x%04x, output_h[%d] = 0x%04x\n", i, hi, i, h[i]);
|
|
}
|
|
}
|
|
for (uint16_t i = 0; i < number_elem; i++) {
|
|
int16_t lo = a[i] & 0xffff;
|
|
if (lo != l[i]) {
|
|
iot_printf("golden_l[%d] = 0x%04x, output_l[%d] = 0x%04x\n", i, lo, i, l[i]);
|
|
}
|
|
}
|
|
os_mem_free(a);
|
|
os_mem_free(h);
|
|
os_mem_free(l);
|
|
}
|
|
|
|
void fc_test_from_pc() {
|
|
uint8_t config_read[120];
|
|
uint32_t config_get[60];
|
|
uint32_t data_begin = 0x10200000;
|
|
read_case_from_python(config_read, config_get, (uint8_t *)data_begin);
|
|
uint32_t weight_addr = data_begin;
|
|
uint32_t bias_addr = weight_addr + config_get[52];
|
|
uint32_t input_addr = bias_addr + config_get[53];
|
|
uint32_t golden_addr = input_addr + config_get[54];
|
|
uint32_t output_addr = golden_addr + config_get[55];
|
|
uint32_t elem_bytes = config_get[4];
|
|
uint32_t batch = config_get[5];
|
|
uint32_t bias_shift_enable = config_get[6];
|
|
uint64_t begin = cpu_get_mcycle();
|
|
switch (elem_bytes)
|
|
{
|
|
case 1: // 8bits
|
|
switch (bias_shift_enable)
|
|
{
|
|
case 0: // no bias_shift
|
|
fc_int8_to_int8_weight_8bit_batch((int8_t *)input_addr, (int8_t *)output_addr, (int8_t *)weight_addr, (int32_t *)bias_addr, config_get[0], config_get[1], config_get[2], batch);
|
|
break;
|
|
|
|
case 1: // bias_shift
|
|
fc_int8_to_int8_weight_8bit_bias_shift_batch((int8_t *)input_addr, (int8_t *)output_addr, (int8_t *)weight_addr, (int8_t *)bias_addr, config_get[0], config_get[1], config_get[2], config_get[3], batch);
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
break;
|
|
|
|
case 2: // 16bits
|
|
switch (bias_shift_enable)
|
|
{
|
|
case 0: // no bias_shift
|
|
fc_int16_to_int16_weight_16bit_batch((int16_t *)input_addr, (int16_t *)output_addr, (int16_t *)weight_addr, (int64_t *)bias_addr, config_get[0], config_get[1], config_get[2], batch);
|
|
break;
|
|
|
|
case 1: // bias_shift
|
|
fc_int16_to_int16_weight_16bit_bias_shift_batch((int16_t *)input_addr, (int16_t *)output_addr, (int16_t *)weight_addr, (int16_t *)bias_addr, config_get[0], config_get[1], config_get[2], config_get[3], batch);
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
uint64_t end = cpu_get_mcycle();
|
|
iot_printf("time = %d\n", (uint32_t)(end - begin));
|
|
verify_8bits(output_addr, golden_addr, config_get[55]);
|
|
}
|
|
|
|
void depth_fc_test_from_pc() {
|
|
uint8_t config_read[120];
|
|
uint32_t config_get[60];
|
|
uint32_t data_begin = 0x10200000;
|
|
read_case_from_python(config_read, config_get, (uint8_t *)data_begin);
|
|
uint32_t weight_addr = data_begin;
|
|
uint32_t bias_addr = weight_addr + config_get[52];
|
|
uint32_t input_addr = bias_addr + config_get[53];
|
|
uint32_t golden_addr = input_addr + config_get[54];
|
|
uint32_t output_addr = golden_addr + config_get[55];
|
|
uint32_t elem_bytes = config_get[4];
|
|
uint32_t batch = config_get[5];
|
|
uint32_t bias_shift_enable = config_get[6];
|
|
uint64_t begin = cpu_get_mcycle();
|
|
switch (elem_bytes)
|
|
{
|
|
case 1: // 8bits
|
|
switch (bias_shift_enable)
|
|
{
|
|
case 0: // no bias_shift
|
|
fc_depth_int8_to_int8_weight_8bit_batch((int8_t *)input_addr, (int8_t *)output_addr, (int8_t *)weight_addr, (int32_t *)bias_addr, config_get[1], config_get[2], batch);
|
|
break;
|
|
|
|
case 1: // bias_shift
|
|
fc_depth_int8_to_int8_weight_8bit_bias_shift_batch((int8_t *)input_addr, (int8_t *)output_addr, (int8_t *)weight_addr, (int8_t *)bias_addr, config_get[1], config_get[2], config_get[3], batch);
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
break;
|
|
|
|
case 2: // 16bits
|
|
switch (bias_shift_enable)
|
|
{
|
|
case 0: // no bias_shift
|
|
fc_depth_int16_to_int16_weight_16bit_batch((int16_t *)input_addr, (int16_t *)output_addr, (int16_t *)weight_addr, (int64_t *)bias_addr, config_get[1], config_get[2], batch);
|
|
break;
|
|
|
|
case 1: // bias_shift
|
|
fc_depth_int16_to_int16_weight_16bit_bias_shift_batch((int16_t *)input_addr, (int16_t *)output_addr, (int16_t *)weight_addr, (int16_t *)bias_addr, config_get[1], config_get[2], config_get[3], batch);
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
uint64_t end = cpu_get_mcycle();
|
|
iot_printf("time = %d\n", (uint32_t)(end - begin));
|
|
//if (output_addr + config_get[55] > 0x10008000) {
|
|
// iot_printf("too large\nfinished~");
|
|
// return;
|
|
//}
|
|
verify_8bits(output_addr, golden_addr, config_get[55]);
|
|
}
|
|
|
|
void dsp_init(){
|
|
asm("csrs mstatus,%0"::"r"(0x18000));
|
|
int32_t *p_reg = (int32_t *)0x02000148;
|
|
*p_reg = 0x3;
|
|
}
|
|
|
|
void vmaddw_test() {
|
|
int8_t a[16];
|
|
int8_t b[16];
|
|
int32_t in[16];
|
|
int32_t out[16];
|
|
int32_t *pi = in;
|
|
int32_t *po = out;
|
|
for (uint8_t i = 0; i < 16; i++) {
|
|
a[i] = i - 8;
|
|
b[i] = i - 8;
|
|
in[i] = 0;
|
|
}
|
|
vld_sib(0, a, 1);
|
|
vld_sib(1, b, 1);
|
|
vld_siw(4, pi, 1);
|
|
pi += SIMD_WW;
|
|
vld_siw(5, pi, 1);
|
|
pi += SIMD_WW;
|
|
vld_siw(6, pi, 1);
|
|
pi += SIMD_WW;
|
|
vld_siw(7, pi, 1);
|
|
pi += SIMD_WW;
|
|
vmaddw(4, 0, 1);
|
|
vst_siw(po, 1, 4);
|
|
po += SIMD_WW;
|
|
vst_siw(po, 1, 5);
|
|
po += SIMD_WW;
|
|
vst_siw(po, 1, 6);
|
|
po += SIMD_WW;
|
|
vst_siw(po, 1, 7);
|
|
po += SIMD_WW;
|
|
asm("fence");
|
|
for (uint8_t i = 0; i < 16; i++) {
|
|
int32_t g = in[i] + a[i] * b[i];
|
|
iot_printf("golden[%d] = %d, out[%d] = %d\n", i, g, i, out[i]);
|
|
}
|
|
}
|
|
|
|
void vmsubw_testu() {
|
|
uint8_t a[16];
|
|
uint8_t b[16];
|
|
uint32_t in[16];
|
|
uint32_t out[16];
|
|
uint32_t *pi = in;
|
|
uint32_t *po = out;
|
|
for (uint8_t i = 0; i < 16; i++) {
|
|
a[i] = 16 - i;
|
|
b[i] = 17 - i;
|
|
in[i] = 500;
|
|
}
|
|
vld_uib(0, a, 1);
|
|
vld_uib(1, b, 1);
|
|
vld_uiw(4, pi, 1);
|
|
pi += SIMD_WW;
|
|
vld_uiw(5, pi, 1);
|
|
pi += SIMD_WW;
|
|
vld_uiw(6, pi, 1);
|
|
pi += SIMD_WW;
|
|
vld_uiw(7, pi, 1);
|
|
pi += SIMD_WW;
|
|
vmsubw(4, 0, 1);
|
|
vst_uiw(po, 1, 4);
|
|
po += SIMD_WW;
|
|
vst_uiw(po, 1, 5);
|
|
po += SIMD_WW;
|
|
vst_uiw(po, 1, 6);
|
|
po += SIMD_WW;
|
|
vst_uiw(po, 1, 7);
|
|
po += SIMD_WW;
|
|
asm("fence");
|
|
for (uint8_t i = 0; i < 16; i++) {
|
|
uint32_t g = in[i] - a[i] * b[i];
|
|
iot_printf("golden[%d] = %d, out[%d] = %d\n", i, g, i, out[i]);
|
|
}
|
|
}
|
|
|
|
|
|
void vmsubw_test() {
|
|
int8_t a[16];
|
|
int8_t b[16];
|
|
int32_t in[16];
|
|
int32_t out[16];
|
|
int32_t *pi = in;
|
|
int32_t *po = out;
|
|
for (uint8_t i = 0; i < 16; i++) {
|
|
a[i] = 16 - i;
|
|
b[i] = 17 - i;
|
|
in[i] = 500;
|
|
}
|
|
vld_sib(0, a, 1);
|
|
vld_sib(1, b, 1);
|
|
vld_siw(4, pi, 1);
|
|
pi += SIMD_WW;
|
|
vld_siw(5, pi, 1);
|
|
pi += SIMD_WW;
|
|
vld_siw(6, pi, 1);
|
|
pi += SIMD_WW;
|
|
vld_siw(7, pi, 1);
|
|
pi += SIMD_WW;
|
|
vmsubw(4, 0, 1);
|
|
vst_siw(po, 1, 4);
|
|
po += SIMD_WW;
|
|
vst_siw(po, 1, 5);
|
|
po += SIMD_WW;
|
|
vst_siw(po, 1, 6);
|
|
po += SIMD_WW;
|
|
vst_siw(po, 1, 7);
|
|
po += SIMD_WW;
|
|
asm("fence");
|
|
for (uint8_t i = 0; i < 16; i++) {
|
|
int32_t g = in[i] - a[i] * b[i];
|
|
iot_printf("golden[%d] = %d, out[%d] = %d\n", i, g, i, out[i]);
|
|
}
|
|
}
|
|
|
|
void max_min_test_float() {
|
|
float a[4] = {-0.1, 0.0, 0.1, 0.2};
|
|
float b[4] = {0.2, 0.1, 0.0, -0.1};
|
|
float h[4] = {1.0, 1.0, 1.0, 1.0};
|
|
float l[4] = {1.1, 1.1, 1.1, 1.1};
|
|
vld_fpw(0, a, 1);
|
|
vld_fpw(1, b, 1);
|
|
vmax(2, 0, 1);
|
|
vmin(3, 0, 1);
|
|
vst_fpw(h, 1, 2);
|
|
vst_fpw(l, 1, 3);
|
|
asm("fence");
|
|
for (uint8_t i = 0; i < 4; i++) {
|
|
iot_printf("max[%d] = %f, min[%d] = %f\n", i, h[i], i, l[i]);
|
|
}
|
|
}
|
|
|
|
void compare_test_float() {
|
|
float a[4] = {1.0, 2.0, 3.0, 4.0};
|
|
float b[4] = {5.0, 4.0, 3.0, 2.0};
|
|
uint32_t ne[4];
|
|
uint32_t eq[4];
|
|
uint32_t lt[4];
|
|
uint32_t ge[4];
|
|
vld_fpw(0, a, 1)
|
|
vld_fpw(1, b, 1)
|
|
vseq(2, 0, 1)
|
|
vsne(3, 0, 1)
|
|
vslt(4, 0, 1)
|
|
vsge(5, 0, 1)
|
|
vst_fpw(eq, 1, 2)
|
|
vst_fpw(ne, 1, 3)
|
|
vst_fpw(lt, 1, 4)
|
|
vst_fpw(ge, 1, 5)
|
|
asm("fence");
|
|
for (uint8_t i = 0; i < 4; i++) {
|
|
iot_printf("eq[%d] = %08x, ne[%d] = %08x, lt[%d] = %08x, ge[%d] = %08x\n", i, eq[i], i, ne[i], i, lt[i], i, ge[i]);
|
|
}
|
|
}
|
|
|
|
void mul_test_float() {
|
|
float a[4];
|
|
float b[4];
|
|
float g[4];
|
|
float w[4];
|
|
uint32_t *pa = (uint32_t *)a;
|
|
uint32_t *pb = (uint32_t *)b;
|
|
uint32_t *pg = (uint32_t *)g;
|
|
uint32_t *pw = (uint32_t *)w;
|
|
for (uint8_t i = 0; i < 4; i++) {
|
|
a[i] = -0.2 * (i - 2.5);
|
|
b[i] = 0.3 * (i - 3.3);
|
|
}
|
|
vld_fpw(0, a, 1);
|
|
vld_fpw(1, b, 1);
|
|
vmull(2, 0, 1);
|
|
vst_fpw(w, 1, 2);
|
|
asm("fence");
|
|
for (uint8_t i = 0; i < 4; i++) {
|
|
g[i] = a[i] * b[i];
|
|
iot_printf("a[%d] = %f(0x%08x), b[%d] = %f(0x%08x), g[%d] = %f(0x%08x), o[%d] = %f(0x%08x)\n", i, a[i], pa[i], i, b[i], pb[i], i, g[i], pg[i], i, w[i], pw[i]);
|
|
}
|
|
}
|
|
|
|
void vmadd_test_float() {
|
|
float a[4] = {0.1, 0.2, 0.3, 0.4};
|
|
float b[4] = {1.0, 2.0, 3.0, 4.0};
|
|
float j[4] = {1.0, 2.0, 3.0, 4.0};
|
|
float p[4] = {0.0, 0.0, 0.0, 0.0};
|
|
float s[4] = {0.0, 0.0, 0.0, 0.0};
|
|
uint32_t *up = (uint32_t *)p;
|
|
uint32_t *us = (uint32_t *)s;
|
|
vld_fpw(0, a, 1)
|
|
vld_fpw(1, b, 1)
|
|
vld_fpw(2, j, 1)
|
|
vld_fpw(3, j, 1)
|
|
vmadd(2, 0, 1)
|
|
vmsub(3, 0, 1)
|
|
vst_fpw(p, 1, 2)
|
|
vst_fpw(s, 1, 3)
|
|
for (uint32_t i = 0; i < 4; i++) {
|
|
iot_printf("add result[%d] = %f(0x%08x), sub result[%d] = %f(0x%08x)\n", i, p[i], up[i], i, s[i], us[i]);
|
|
}
|
|
}
|
|
|
|
void sigmoid_int16_test() {
|
|
int16_t in[16];
|
|
int16_t ou[16];
|
|
for (uint32_t i = 0; i < 65536; i += 16) {
|
|
for (uint8_t j = 0; j < 16; j++) {
|
|
in[j] = j + i - 32768;
|
|
}
|
|
sigmoid_int16(in, ou, 16);
|
|
for (uint8_t j = 0; j < 16; j++) {
|
|
iot_printf("%d\n", ou[j]);
|
|
}
|
|
}
|
|
}
|
|
|
|
void tanh_int16_test() {
|
|
int16_t in[16];
|
|
int16_t ou[16];
|
|
for (uint32_t i = 0; i < 65536; i += 16) {
|
|
for (uint8_t j = 0; j < 16; j++) {
|
|
in[j] = j + i - 32768;
|
|
}
|
|
tanh_int16(in, ou, 16);
|
|
for (uint8_t j = 0; j < 16; j++) {
|
|
iot_printf("%d\n", ou[j]);
|
|
}
|
|
}
|
|
}
|
|
|
|
void vld_stride_test() {
|
|
uint32_t data_len = 2048;
|
|
uint32_t *data = (uint32_t *)os_mem_malloc(1, data_len * 4);
|
|
uint8_t *data8 = (uint8_t *)data;
|
|
for (uint32_t i = 0; i < data_len * 4; i++) {
|
|
data8[i] = i;
|
|
}
|
|
uint32_t load[4];
|
|
uint8_t *load8 = (uint8_t *)load;
|
|
for (uint32_t s = 0; s < 512; s++) {
|
|
iot_printf("stride = %d\n", s);
|
|
vlds_uib(0, data, (s * 4))
|
|
vst_uib(load8, 1, 0)
|
|
asm("fence");
|
|
for (uint32_t i = 0; i < SIMD_WB; i++) {
|
|
iot_printf("load[%d] = %d\n", i, load8[i]);
|
|
}
|
|
}
|
|
os_mem_free(data);
|
|
}
|
|
|
|
void vst_stride_test() {
|
|
uint32_t store_len = 2048;
|
|
uint32_t *store = (uint32_t *)os_mem_malloc(1, store_len * 4);
|
|
uint32_t load[4] = {0x12345678, 0x23456789, 0x34567890, 0x45678901};
|
|
vld_uiw(0, load, 1);
|
|
for (uint32_t s = 0; s < 512; s++) {
|
|
for (uint32_t i = 0; i < 2048; i++) {
|
|
store[i] = 0;
|
|
}
|
|
vsts_uiw(store, (s * 4), 0)
|
|
for (uint32_t i = 0; i < 2048; i++) {
|
|
if (i == 0) {
|
|
if (store[i] != load[0]) {
|
|
iot_printf("store incorrect at stride %d, store[%d] = %x, should be %x\n", s, i, store[i], load[0]);
|
|
}
|
|
} else if (i == s) {
|
|
if (store[i] != load[1]) {
|
|
iot_printf("store incorrect at stride %d, store[%d] = %x, should be %x\n", s, i, store[i], load[1]);
|
|
}
|
|
}
|
|
// else if (i == s * 2) {
|
|
// if (store[i] != load[2]) {
|
|
// iot_printf("store incorrect at stride %d, store[%d] = %x, should be %x\n", s, i, store[i], load[2]);
|
|
// }
|
|
// } else if (i == s * 3) {
|
|
// if (store[i] != load[3]) {
|
|
// iot_printf("store incorrect at stride %d, store[%d] = %x, should be %x\n", s, i, store[i], load[3]);
|
|
// }
|
|
// }
|
|
else {
|
|
if (store[i] != 0) {
|
|
iot_printf("store incorrect at stride %d, store[%d] = %x, should be 0\n", s, i, store[i]);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
os_mem_free(store);
|
|
}
|
|
|
|
void uint8_to_float_test() {
|
|
uint8_t *in = (uint8_t *)os_mem_malloc(1 ,256);
|
|
float *out = (float *)os_mem_malloc(1, 256 * 4);
|
|
float *golden = (float *)os_mem_malloc(1, 256 * 4);
|
|
for (uint32_t i = 0; i < 256; i++) {
|
|
in[i] = i;
|
|
}
|
|
uint64_t begin = cpu_get_mcycle();
|
|
vector_uint8_to_float(in, out, 1, 2, 256);
|
|
uint64_t end = cpu_get_mcycle();
|
|
for (uint32_t i = 0; i < 256; i++) {
|
|
golden[i] = (i + 1.0) * 2.0;
|
|
if (out[i] != golden[i]) {
|
|
iot_printf("%f, %f, %d\n", golden[i], out[i], i);
|
|
}
|
|
}
|
|
iot_printf("time = %d\n", (uint32_t)(end - begin));
|
|
begin = cpu_get_mcycle();
|
|
for (uint32_t i = 0; i < 256; i+=8) {
|
|
out[i + 0] = (float)in[i + 0];
|
|
out[i + 1] = (float)in[i + 1];
|
|
out[i + 2] = (float)in[i + 2];
|
|
out[i + 3] = (float)in[i + 3];
|
|
out[i + 4] = (float)in[i + 4];
|
|
out[i + 5] = (float)in[i + 5];
|
|
out[i + 6] = (float)in[i + 6];
|
|
out[i + 7] = (float)in[i + 7];
|
|
}
|
|
end = cpu_get_mcycle();
|
|
iot_printf("time = %d\n", (uint32_t)(end - begin));
|
|
os_mem_free(in);
|
|
os_mem_free(out);
|
|
os_mem_free(golden);
|
|
iot_printf("uint8_to_float_test_end\n");
|
|
}
|
|
|
|
void int8_to_float_test() {
|
|
int8_t *in = (int8_t *)os_mem_malloc(1 ,256);
|
|
float *out = (float *)os_mem_malloc(1, 256 * 4);
|
|
uint32_t *uo = (uint32_t *)out;
|
|
float *golden = (float *)os_mem_malloc(1, 256 * 4);
|
|
uint32_t *ug = (uint32_t *)golden;
|
|
for (int32_t i = 0; i < 256; i++) {
|
|
in[i] = i - 128;
|
|
}
|
|
uint64_t begin = cpu_get_mcycle();
|
|
vector_int8_to_float(in, out, 1.0, 2.0, 256);
|
|
uint64_t end = cpu_get_mcycle();
|
|
for (uint32_t i = 0; i < 256; i++) {
|
|
golden[i] = (i - 128.0 + 1.0) * 2.0;
|
|
if (out[i] != golden[i]) {
|
|
iot_printf("%f, 0x%08x, 0x%08x\n", out[i], uo[i], ug[i]);
|
|
}
|
|
}
|
|
iot_printf("time = %d\n", (uint32_t)(end - begin));
|
|
begin = cpu_get_mcycle();
|
|
for (uint32_t i = 0; i < 256; i++) {
|
|
out[i] = (float)i;
|
|
}
|
|
end = cpu_get_mcycle();
|
|
iot_printf("time = %d\n", (uint32_t)(end - begin));
|
|
os_mem_free(in);
|
|
os_mem_free(golden);
|
|
os_mem_free(out);
|
|
iot_printf("int8_to_float_test_end\n");
|
|
}
|
|
|
|
|
|
void uint16_to_float_test() {
|
|
uint16_t *in = (uint16_t *)os_mem_malloc(1, 4096 * 2);
|
|
float *out = (float *)os_mem_malloc(1, 4096 * 4);
|
|
float *golden = (float *)os_mem_malloc(1, 4096 * 4);
|
|
for (uint32_t i = 0; i < 4096; i++) {
|
|
in[i] = i;
|
|
}
|
|
uint64_t begin = cpu_get_mcycle();
|
|
vector_uint16_to_float(in, out, 1, 2, 4096);
|
|
uint64_t end = cpu_get_mcycle();
|
|
for (uint32_t i = 0; i < 4096; i++) {
|
|
golden[i] = (i + 1.0) * 2.0;
|
|
if (out[i] != golden[i]) {
|
|
iot_printf("%f, %f, %d\n", golden[i], out[i], i);
|
|
}
|
|
}
|
|
iot_printf("time = %d\n", (uint32_t)(end - begin));
|
|
begin = cpu_get_mcycle();
|
|
for (uint32_t i = 0; i < 4096; i+=8) {
|
|
out[i + 0] = (float)in[i + 0];
|
|
out[i + 1] = (float)in[i + 1];
|
|
out[i + 2] = (float)in[i + 2];
|
|
out[i + 3] = (float)in[i + 3];
|
|
out[i + 4] = (float)in[i + 4];
|
|
out[i + 5] = (float)in[i + 5];
|
|
out[i + 6] = (float)in[i + 6];
|
|
out[i + 7] = (float)in[i + 7];
|
|
}
|
|
end = cpu_get_mcycle();
|
|
iot_printf("time = %d\n", (uint32_t)(end - begin));
|
|
os_mem_free(in);
|
|
os_mem_free(golden);
|
|
os_mem_free(out);
|
|
iot_printf("uint16_to_float_test_end\n");
|
|
}
|
|
|
|
void int16_to_float_test() {
|
|
int16_t *in = (int16_t *)os_mem_malloc(1, 4096 * 2);
|
|
float *out = (float *)os_mem_malloc(1, 4096 * 4);
|
|
uint32_t *uo = (uint32_t *)out;
|
|
float *golden = (float *)os_mem_malloc(1, 4096 * 4);
|
|
uint32_t *ug = (uint32_t *)golden;
|
|
for (int32_t i = 0; i < 4096; i++) {
|
|
in[i] = i - 32768;
|
|
}
|
|
uint64_t begin = cpu_get_mcycle();
|
|
vector_int16_to_float(in, out, 1, 2, 4096);
|
|
uint64_t end = cpu_get_mcycle();
|
|
for (uint32_t i = 0; i < 4096; i++) {
|
|
golden[i] = (i - 32768.0 + 1.0) * 2.0;
|
|
if (out[i] != golden[i]) {
|
|
iot_printf("%f, 0x%08x, 0x%08x\n", out[i], uo[i], ug[i]);
|
|
}
|
|
}
|
|
iot_printf("time = %d\n", (uint32_t)(end - begin));
|
|
begin = cpu_get_mcycle();
|
|
for (uint32_t i = 0; i < 4096; i++) {
|
|
out[i] = (float)i;
|
|
}
|
|
end = cpu_get_mcycle();
|
|
iot_printf("time = %d\n", (uint32_t)(end - begin));
|
|
os_mem_free(in);
|
|
os_mem_free(golden);
|
|
os_mem_free(out);
|
|
iot_printf("int16_to_float_test_end\n");
|
|
}
|
|
|
|
void float_to_int8_test() {
|
|
float *in = (float *)os_mem_malloc(1, 2560 * 4);
|
|
int8_t *out = (int8_t *)os_mem_malloc(1, 2560);
|
|
int8_t *golden = (int8_t *)os_mem_malloc(1, 2560);
|
|
for (int32_t i = 0; i < 2560; i++) {
|
|
in[i] = (i - 1280) * 0.1f;
|
|
}
|
|
uint64_t begin = cpu_get_mcycle();
|
|
vector_float_to_int8(in, out, 1.0, 2.0, 2560);
|
|
uint64_t end = cpu_get_mcycle();
|
|
for (uint32_t i = 0; i < 2560; i++) {
|
|
float gf = 2.0 * (in[i] - 1.0) + 128.0;
|
|
gf = (gf > 255.0) ? 255.0 : (gf < 0.0) ? 0.0 : gf;
|
|
golden[i] = (uint8_t)gf - 128;
|
|
if (out[i] != golden[i]) {
|
|
iot_printf("%f, %d\n", in[i], out[i]);
|
|
}
|
|
}
|
|
iot_printf("time = %d\n", (uint32_t)(end - begin));
|
|
begin = cpu_get_mcycle();
|
|
for (uint32_t i = 0; i < 2560; i++) {
|
|
out[i] = (int8_t)in[i];
|
|
}
|
|
end = cpu_get_mcycle();
|
|
iot_printf("time = %d\n", (uint32_t)(end - begin));
|
|
os_mem_free(in);
|
|
os_mem_free(out);
|
|
os_mem_free(golden);
|
|
iot_printf("float_to_int8_test end\n");
|
|
}
|
|
|
|
void float_to_uint8_test() {
|
|
float *in = (float *)os_mem_malloc(1, 2560 * 4);
|
|
uint8_t *out = (uint8_t *)os_mem_malloc(1, 2560);
|
|
uint8_t *golden = (uint8_t *)os_mem_malloc(1, 2560);
|
|
for (int32_t i = 0; i < 2560; i++) {
|
|
in[i] = i * 0.1f;
|
|
}
|
|
uint64_t begin = cpu_get_mcycle();
|
|
vector_float_to_uint8(in, out, 1.0, 2.0, 2560);
|
|
uint64_t end = cpu_get_mcycle();
|
|
for (uint32_t i = 0; i < 2560; i++) {
|
|
float gf = 2.0 * (in[i] - 1.0);
|
|
gf = (gf > 255.0) ? 255.0 : (gf < 0.0) ? 0.0 : gf;
|
|
golden[i] = (uint8_t)gf;
|
|
if (out[i] != golden[i]) {
|
|
iot_printf("at %d, %f, %d, %d\n", i, in[i], golden[i], out[i]);
|
|
}
|
|
}
|
|
iot_printf("time = %d\n", (uint32_t)(end - begin));
|
|
begin = cpu_get_mcycle();
|
|
for (uint32_t i = 0; i < 2560; i += 8) {
|
|
out[i] = (uint8_t)in[i];
|
|
out[i + 1] = (uint8_t)in[i + 1];
|
|
out[i + 2] = (uint8_t)in[i + 2];
|
|
out[i + 3] = (uint8_t)in[i + 3];
|
|
out[i + 4] = (uint8_t)in[i + 4];
|
|
out[i + 5] = (uint8_t)in[i + 5];
|
|
out[i + 6] = (uint8_t)in[i + 6];
|
|
out[i + 7] = (uint8_t)in[i + 7];
|
|
}
|
|
end = cpu_get_mcycle();
|
|
iot_printf("time = %d\n", (uint32_t)(end - begin));
|
|
os_mem_free(in);
|
|
os_mem_free(out);
|
|
os_mem_free(golden);
|
|
iot_printf("float_to_uint8_test end\n");
|
|
}
|
|
|
|
void float_to_int16_test() {
|
|
float *in = (float *)os_mem_malloc(1, 2560 * 4);
|
|
int16_t *out = (int16_t *)os_mem_malloc(1, 2560 * 2);
|
|
int16_t *golden = (int16_t *)os_mem_malloc(1, 2560 * 2);
|
|
for (int32_t i = 0; i < 2560; i++) {
|
|
in[i] = (i - 1280) * 10.125f;
|
|
}
|
|
uint64_t begin = cpu_get_mcycle();
|
|
vector_float_to_int16(in, out, 1.0, 2.0, 2560);
|
|
uint64_t end = cpu_get_mcycle();
|
|
for (uint32_t i = 0; i < 2560; i++) {
|
|
float gf = 2.0 * (in[i] - 1.0) + 32768.0;
|
|
gf = (gf > 65535.0) ? 65535.0 : (gf < 0.0) ? 0.0 : gf;
|
|
golden[i] = (uint16_t)gf - 32768;
|
|
if (out[i] != golden[i]) {
|
|
iot_printf("%f, %d, %d\n", in[i], golden[i], out[i]);
|
|
}
|
|
}
|
|
iot_printf("time = %d\n", (uint32_t)(end - begin));
|
|
begin = cpu_get_mcycle();
|
|
for (uint32_t i = 0; i < 2560; i++) {
|
|
out[i] = (int16_t)in[i];
|
|
}
|
|
end = cpu_get_mcycle();
|
|
iot_printf("time = %d\n", (uint32_t)(end - begin));
|
|
os_mem_free(in);
|
|
os_mem_free(out);
|
|
os_mem_free(golden);
|
|
iot_printf("float_to_int16_test end\n");
|
|
}
|
|
|
|
void float_to_uint16_test() {
|
|
float *in = (float *)os_mem_malloc(1, 2560 * 4);
|
|
uint16_t *out = (uint16_t *)os_mem_malloc(1, 2560 * 2);
|
|
uint16_t *golden = (uint16_t *)os_mem_malloc(1, 2560 * 2);
|
|
for (int32_t i = 0; i < 2560; i++) {
|
|
in[i] = i * 10.1f;
|
|
}
|
|
uint64_t begin = cpu_get_mcycle();
|
|
vector_float_to_uint16(in, out, 1.0, 2.0, 2560);
|
|
uint64_t end = cpu_get_mcycle();
|
|
for (uint32_t i = 0; i < 2560; i++) {
|
|
float gf = 2.0 * (in[i] - 1.0);
|
|
gf = (gf > 65535.0) ? 65535.0 : (gf < 0.0) ? 0.0 : gf;
|
|
golden[i] = (uint16_t)gf;
|
|
if (out[i] != golden[i]) {
|
|
iot_printf("%f, %d, %d\n", in[i], golden[i], out[i]);
|
|
}
|
|
}
|
|
iot_printf("time = %d\n", (uint32_t)(end - begin));
|
|
begin = cpu_get_mcycle();
|
|
for (uint32_t i = 0; i < 2560; i++) {
|
|
out[i] = (uint16_t)in[i];
|
|
}
|
|
end = cpu_get_mcycle();
|
|
iot_printf("time = %d\n", (uint32_t)(end - begin));
|
|
os_mem_free(in);
|
|
os_mem_free(out);
|
|
os_mem_free(golden);
|
|
iot_printf("float_to_uint16_test end\n");
|
|
}
|
|
|
|
void vector_inner_product_test_uint8() {
|
|
uint8_t a[100];
|
|
uint8_t b[100];
|
|
for (uint32_t i = 0; i < 100; i++) {
|
|
a[i] = i + 50;
|
|
b[i] = i * 7 + 1;
|
|
}
|
|
uint32_t o = 0;
|
|
uint32_t g = 0;
|
|
uint64_t begin = cpu_get_mcycle();
|
|
o = vector_inner_product_uint8(a, b, 100);
|
|
uint64_t end = cpu_get_mcycle();
|
|
iot_printf("time = %d\n", (uint32_t)(end - begin));
|
|
begin = cpu_get_mcycle();
|
|
for (uint8_t i = 0; i < 100; i++) {
|
|
g += a[i] * b[i];
|
|
}
|
|
end = cpu_get_mcycle();
|
|
iot_printf("time = %d\n", (uint32_t)(end - begin));
|
|
if (g != o) {
|
|
iot_printf("incorrect, golden = %d, output = %d\n", g, o);
|
|
}
|
|
iot_printf("inner product of uint8 test end\n");
|
|
}
|
|
|
|
void vector_inner_product_test_int8() {
|
|
int8_t a[100];
|
|
int8_t b[100];
|
|
for (uint32_t i = 0; i < 100; i++) {
|
|
a[i] = i;
|
|
b[i] = i + 1;
|
|
}
|
|
int32_t o = 0;
|
|
int32_t g = 0;
|
|
uint64_t begin = cpu_get_mcycle();
|
|
o = vector_inner_product_int8(a, b, 100);
|
|
uint64_t end = cpu_get_mcycle();
|
|
iot_printf("time = %d\n", (uint32_t)(end - begin));
|
|
begin = cpu_get_mcycle();
|
|
for (uint8_t i = 0; i < 100; i++) {
|
|
g += a[i] * b[i];
|
|
}
|
|
end = cpu_get_mcycle();
|
|
iot_printf("time = %d\n", (uint32_t)(end - begin));
|
|
if (g != o) {
|
|
iot_printf("incorrect, golden = %d, output = %d\n", g, o);
|
|
}
|
|
iot_printf("inner product of int8 test end\n");
|
|
}
|
|
|
|
|
|
void vector_inner_product_test_uint16() {
|
|
uint16_t a[100];
|
|
uint16_t b[100];
|
|
for (uint64_t i = 0; i < 100; i++) {
|
|
a[i] = i + 50;
|
|
b[i] = i * 7 + 1;
|
|
}
|
|
uint64_t o = 0;
|
|
uint64_t g = 0;
|
|
uint64_t begin = cpu_get_mcycle();
|
|
o = vector_inner_product_uint16(a, b, 100);
|
|
uint64_t end = cpu_get_mcycle();
|
|
iot_printf("time = %d\n", (uint32_t)(end - begin));
|
|
begin = cpu_get_mcycle();
|
|
for (uint16_t i = 0; i < 100; i++) {
|
|
g += a[i] * b[i];
|
|
}
|
|
end = cpu_get_mcycle();
|
|
iot_printf("time = %d\n", (uint32_t)(end - begin));
|
|
if (g != o) {
|
|
iot_printf("incorrect, golden = %d, output = %d\n", (uint32_t)g, (uint32_t)o);
|
|
}
|
|
iot_printf("inner product of uint16 test end\n");
|
|
}
|
|
|
|
void vector_inner_product_test_int16() {
|
|
int16_t a[100];
|
|
int16_t b[100];
|
|
for (uint64_t i = 0; i < 100; i++) {
|
|
a[i] = i * 100 + 1;
|
|
b[i] = i * 700 - 200;
|
|
}
|
|
int64_t o = 0;
|
|
int64_t g = 0;
|
|
uint64_t begin = cpu_get_mcycle();
|
|
o = vector_inner_product_int16(a, b, 100);
|
|
uint64_t end = cpu_get_mcycle();
|
|
iot_printf("time = %d\n", (uint32_t)(end - begin));
|
|
begin = cpu_get_mcycle();
|
|
for (uint16_t i = 0; i < 100; i++) {
|
|
g += a[i] * b[i];
|
|
}
|
|
end = cpu_get_mcycle();
|
|
iot_printf("time = %d\n", (uint32_t)(end - begin));
|
|
if (g != o) {
|
|
iot_printf("incorrect, golden = %d, output = %d\n", (uint32_t)g, (uint32_t)o);
|
|
}
|
|
iot_printf("inner product of int16 test end\n");
|
|
}
|
|
|
|
void vector_inner_product_test_uint32() {
|
|
uint32_t a[100];
|
|
uint32_t b[100];
|
|
for (uint64_t i = 0; i < 100; i++) {
|
|
a[i] = i + 50;
|
|
b[i] = i * 7 + 1;
|
|
}
|
|
uint32_t o = 0;
|
|
uint32_t g = 0;
|
|
uint64_t begin = cpu_get_mcycle();
|
|
o = vector_inner_product_uint32(a, b, 100);
|
|
uint64_t end = cpu_get_mcycle();
|
|
iot_printf("time = %d\n", (uint32_t)(end - begin));
|
|
begin = cpu_get_mcycle();
|
|
for (uint16_t i = 0; i < 100; i++) {
|
|
g += a[i] * b[i];
|
|
}
|
|
end = cpu_get_mcycle();
|
|
iot_printf("time = %d\n", (uint32_t)(end - begin));
|
|
if (g != o) {
|
|
iot_printf("incorrect, golden = %d, output = %d\n", (uint32_t)g, (uint32_t)o);
|
|
}
|
|
iot_printf("inner product of uint32 test end\n");
|
|
}
|
|
|
|
void vector_inner_product_test_int32() {
|
|
int32_t a[100];
|
|
int32_t b[100];
|
|
for (uint64_t i = 0; i < 100; i++) {
|
|
a[i] = i * 100 + 1;
|
|
b[i] = i * 700 - 200;
|
|
}
|
|
int32_t o = 0;
|
|
int32_t g = 0;
|
|
uint64_t begin = cpu_get_mcycle();
|
|
o = vector_inner_product_int32(a, b, 100);
|
|
uint64_t end = cpu_get_mcycle();
|
|
iot_printf("time = %d\n", (uint32_t)(end - begin));
|
|
begin = cpu_get_mcycle();
|
|
for (uint16_t i = 0; i < 100; i++) {
|
|
g += a[i] * b[i];
|
|
}
|
|
end = cpu_get_mcycle();
|
|
iot_printf("time = %d\n", (uint32_t)(end - begin));
|
|
if (g != o) {
|
|
iot_printf("incorrect, golden = %d, output = %d\n", (uint32_t)g, (uint32_t)o);
|
|
}
|
|
iot_printf("inner product of int32 test end\n");
|
|
}
|
|
|
|
void vector_inner_product_test_float() {
|
|
float a[100];
|
|
float b[100];
|
|
for (uint64_t i = 0; i < 100; i++) {
|
|
a[i] = i + 50.0;
|
|
b[i] = i * 7.0 + 1.0;
|
|
}
|
|
float o = 0;
|
|
float g = 0;
|
|
uint64_t begin = cpu_get_mcycle();
|
|
o = vector_inner_product_float(a, b, 100);
|
|
uint64_t end = cpu_get_mcycle();
|
|
iot_printf("time = %d\n", (uint32_t)(end - begin));
|
|
begin = cpu_get_mcycle();
|
|
for (uint16_t i = 0; i < 100; i++) {
|
|
g += a[i] * b[i];
|
|
}
|
|
end = cpu_get_mcycle();
|
|
iot_printf("time = %d\n", (uint32_t)(end - begin));
|
|
if (g != o) {
|
|
iot_printf("incorrect, golden = %d, output = %d\n", (uint32_t)g, (uint32_t)o);
|
|
}
|
|
iot_printf("inner product of float test end\n");
|
|
}
|
|
|
|
void max_min_test_int64() {
|
|
int32_t init_a[8] = {0xe0000000, 0xe0000001, 0xe0000002, 0xe0000003, 0xe0000004, 0xe0000005, 0xe0000006, 0xe0000007};
|
|
int32_t init_b[8] = {0x7fffffff, 0x7ffffffe, 0x7ffffffd, 0x7ffffffc, 0x7ffffffb, 0x7ffffffa, 0x7ffffff9, 0x7ffffff8};
|
|
int32_t one[] = {4};
|
|
int64_t curr_a[8];
|
|
int64_t curr_b[8];
|
|
int32_t *in32_a = (int32_t *)curr_a;
|
|
int32_t *in32_b = (int32_t *)curr_b;
|
|
int64_t gold_a[8];
|
|
int64_t gold_b[8];
|
|
int32_t *in32ga = (int32_t *)gold_a;
|
|
int32_t *in32gb = (int32_t *)gold_b;
|
|
int16_t sub1[] = {1, 1};
|
|
int16_t sub2[] = {2, 2};
|
|
int16_t sub3[] = {1, 1};
|
|
int16_t sub4[] = {3, 3};
|
|
int64_t gold_g[8];
|
|
int64_t gold_l[8];
|
|
int64_t curr_g[8];
|
|
int64_t curr_l[8];
|
|
int32_t *in32_g = (int32_t *)curr_g;
|
|
int32_t *in32_l = (int32_t *)curr_l;
|
|
int32_t *in32gg = (int32_t *)gold_g;
|
|
int32_t *in32gl = (int32_t *)gold_l;
|
|
vld_siw(10, one, 0)
|
|
vld_siw(8, init_a, 1)
|
|
vld_siw(9, init_b, 1)
|
|
vmulw(0, 10, 8)
|
|
vmulw(4, 10, 9)
|
|
vld_siw(8, (init_a + SIMD_WW), 1)
|
|
vld_siw(9, (init_b + SIMD_WW), 1)
|
|
vmulw(2, 10, 8)
|
|
vmulw(6, 10, 9)
|
|
vld_sih(12, sub1, 0)
|
|
vld_sih(13, sub2, 0)
|
|
vld_sih(14, sub3, 0)
|
|
vld_sih(15, sub4, 0)
|
|
for (uint32_t i = 0; i < 8; i++) {
|
|
gold_a[i] = (int64_t)init_a[i] * 4;
|
|
gold_b[i] = (int64_t)init_b[i] * 4;
|
|
}
|
|
for (uint32_t t = 0; t < 100000; t++) {
|
|
iot_printf("add and sub round %d\n", t);
|
|
in32ga = (int32_t *)gold_a;
|
|
in32gb = (int32_t *)gold_b;
|
|
for (uint32_t i = 0; i < 8; i++) {
|
|
gold_a[i] = gold_a[i] + sub1[0] * sub2[0];
|
|
gold_b[i] = gold_b[i] - sub3[0] * sub4[0];
|
|
gold_g[i] = gold_a[i] > gold_b[i] ? gold_a[i] : gold_b[i];
|
|
gold_l[i] = gold_a[i] < gold_b[i] ? gold_a[i] : gold_b[i];
|
|
}
|
|
vmaddw(0, 12, 13)
|
|
vmsubw(4, 14, 15)
|
|
|
|
in32_a = (int32_t *)curr_a;
|
|
in32_b = (int32_t *)curr_b;
|
|
in32_g = (int32_t *)curr_g;
|
|
in32_l = (int32_t *)curr_l;
|
|
|
|
vst_siw(in32_a, 1, 0)
|
|
vst_siw(in32_b, 1, 4)
|
|
vmax(11, 0, 4)
|
|
vst_siw(in32_g, 1, 11)
|
|
vmin(11, 0, 4)
|
|
vst_siw(in32_l, 1, 11)
|
|
in32_a += SIMD_WW;
|
|
in32_b += SIMD_WW;
|
|
in32_g += SIMD_WW;
|
|
in32_l += SIMD_WW;
|
|
|
|
vst_siw(in32_a, 1, 1)
|
|
vst_siw(in32_b, 1, 5)
|
|
vmax(11, 1, 5)
|
|
vst_siw(in32_g, 1, 11)
|
|
vmin(11, 1, 5)
|
|
vst_siw(in32_l, 1, 11)
|
|
in32_a += SIMD_WW;
|
|
in32_b += SIMD_WW;
|
|
in32_g += SIMD_WW;
|
|
in32_l += SIMD_WW;
|
|
|
|
vst_siw(in32_a, 1, 2)
|
|
vst_siw(in32_b, 1, 6)
|
|
vmax(11, 2, 6)
|
|
vst_siw(in32_g, 1, 11)
|
|
vmin(11, 2, 6)
|
|
vst_siw(in32_l, 1, 11)
|
|
in32_a += SIMD_WW;
|
|
in32_b += SIMD_WW;
|
|
in32_g += SIMD_WW;
|
|
in32_l += SIMD_WW;
|
|
|
|
vst_siw(in32_a, 1, 3)
|
|
vst_siw(in32_b, 1, 7)
|
|
vmax(11, 3, 7)
|
|
vst_siw(in32_g, 1, 11)
|
|
vmin(11, 3, 7)
|
|
vst_siw(in32_l, 1, 11)
|
|
in32_a += SIMD_WW;
|
|
in32_b += SIMD_WW;
|
|
in32_g += SIMD_WW;
|
|
in32_l += SIMD_WW;
|
|
|
|
in32_a = (int32_t *)curr_a;
|
|
in32_b = (int32_t *)curr_b;
|
|
in32_g = (int32_t *)curr_g;
|
|
in32_l = (int32_t *)curr_l;
|
|
|
|
for (uint32_t i = 0; i < 8; i++) {
|
|
if(curr_a[i] != gold_a[i]) {
|
|
iot_printf("incorrect at %d, gold_a[%d] = 0x%08x%08x, curr_a[%d] = 0x%08x%08x\n", i, i, in32ga[2 * i + 1], in32ga[2 * i], i, in32_a[2 * i + 1], in32_a[2 * i]);
|
|
}
|
|
if(curr_b[i] != gold_b[i]) {
|
|
iot_printf("incorrect at %d, gold_b[%d] = 0x%08x%08x, curr_b[%d] = 0x%08x%08x\n", i, i, in32gb[2 * i + 1], in32gb[2 * i], i, in32_b[2 * i + 1], in32_b[2 * i]);
|
|
}
|
|
if(curr_g[i] != gold_g[i]) {
|
|
iot_printf("incorrect at %d, gold_g[%d] = 0x%08x%08x, curr_g[%d] = 0x%08x%08x\n", i, i, in32gg[2 * i + 1], in32gg[2 * i], i, in32_g[2 * i + 1], in32_g[2 * i]);
|
|
}
|
|
if(curr_l[i] != gold_l[i]) {
|
|
iot_printf("incorrect at %d, gold_l[%d] = 0x%08x%08x, curr_l[%d] = 0x%08x%08x\n", i, i, in32gl[2 * i + 1], in32gl[2 * i], i, in32_l[2 * i + 1], in32_l[2 * i]);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void matrix_transpose_test_from_pc() {
|
|
uint8_t config_read[120];
|
|
uint32_t config_get[60];
|
|
uint32_t data_begin = 0x10200000;
|
|
read_case_from_python(config_read, config_get, (uint8_t *)data_begin);
|
|
uint32_t input_addr = data_begin;
|
|
uint32_t golden_addr = input_addr + config_get[54];
|
|
uint32_t output_addr = golden_addr + config_get[55];
|
|
switch (config_get[3])
|
|
{
|
|
case 0:
|
|
switch (config_get[2])
|
|
{
|
|
case 1:
|
|
matrix_transpose_uint8((uint8_t *)input_addr, (uint8_t *)output_addr, config_get[0], config_get[1]);
|
|
break;
|
|
|
|
case 2:
|
|
matrix_transpose_uint16((uint16_t *)input_addr, (uint16_t *)output_addr, config_get[0], config_get[1]);
|
|
break;
|
|
|
|
case 4:
|
|
matrix_transpose_uint32((uint32_t *)input_addr, (uint32_t *)output_addr, config_get[0], config_get[1]);
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
break;
|
|
|
|
case 1:
|
|
switch (config_get[2])
|
|
{
|
|
case 1:
|
|
matrix_transpose_int8((int8_t *)input_addr, (int8_t *)output_addr, config_get[0], config_get[1]);
|
|
break;
|
|
|
|
case 2:
|
|
matrix_transpose_int16((int16_t *)input_addr, (int16_t *)output_addr, config_get[0], config_get[1]);
|
|
break;
|
|
|
|
case 4:
|
|
matrix_transpose_int32((int32_t *)input_addr, (int32_t *)output_addr, config_get[0], config_get[1]);
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
verify_8bits(output_addr, golden_addr, config_get[55]);
|
|
}
|
|
|
|
void vector_test_from_pc() {
|
|
uint8_t config_read[120];
|
|
uint32_t config_get[60];
|
|
uint32_t data_begin = 0x10200000;
|
|
//uint8_t *data = os_mem_malloc(1, 512 * 1024);
|
|
//uint32_t data_begin = (uint32_t)data;
|
|
read_case_from_python(config_read, config_get, (uint8_t *)data_begin);
|
|
uint32_t weight_addr = data_begin;
|
|
uint32_t bias_addr = weight_addr + config_get[52];
|
|
uint32_t input_addr = bias_addr + config_get[53];
|
|
uint32_t golden_addr = input_addr + config_get[54];
|
|
uint32_t output_addr = golden_addr + config_get[55];
|
|
if (config_get[4]) {
|
|
uint32_t data = 0x1071f;
|
|
vcsrw(data,data,0)
|
|
} else {
|
|
uint32_t data = 0x1061f;
|
|
vcsrw(data,data,0)
|
|
}
|
|
switch (config_get[3])
|
|
{
|
|
case 0: // unsigned
|
|
switch (config_get[2])
|
|
{
|
|
case 1: // 8bit
|
|
switch (config_get[1])
|
|
{
|
|
case 0:
|
|
vector_add_uint8((uint8_t *)input_addr, (uint8_t *)bias_addr, (uint8_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 1:
|
|
vector_sub_uint8((uint8_t *)input_addr, (uint8_t *)bias_addr, (uint8_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 2:
|
|
vector_multiply_uint8((uint8_t *)input_addr, (uint8_t *)bias_addr, (uint16_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 3:
|
|
vector_multiply_uint8_high_8bit((uint8_t *)input_addr, (uint8_t *)bias_addr, (uint8_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 4:
|
|
vector_multiply_uint8_low_8bit((uint8_t *)input_addr, (uint8_t *)bias_addr, (uint8_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 5:
|
|
vector_mul_add_uint8((uint8_t *)input_addr, (uint8_t *)bias_addr, (uint8_t *)weight_addr, (uint8_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 6:
|
|
vector_mul_add_uint8_to_uint32((uint8_t *)input_addr, (uint8_t *)bias_addr, (uint32_t *)weight_addr, (uint32_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 7:
|
|
vector_mul_sub_uint8((uint8_t *)input_addr, (uint8_t *)bias_addr, (uint8_t *)weight_addr, (uint8_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 8:
|
|
vector_mul_sub_uint8_to_uint32((uint8_t *)input_addr, (uint8_t *)bias_addr, (uint32_t *)weight_addr, (uint32_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 9:
|
|
vector_max_uint8((uint8_t *)input_addr, (uint8_t *)bias_addr, (uint8_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 10:
|
|
vector_min_uint8((uint8_t *)input_addr, (uint8_t *)bias_addr, (uint8_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 11:
|
|
vector_equal_uint8((uint8_t *)input_addr, (uint8_t *)bias_addr, (uint8_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 12:
|
|
vector_not_equal_uint8((uint8_t *)input_addr, (uint8_t *)bias_addr, (uint8_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 13:
|
|
vector_less_than_uint8((uint8_t *)input_addr, (uint8_t *)bias_addr, (uint8_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 14:
|
|
vector_greater_or_equal_uint8((uint8_t *)input_addr, (uint8_t *)bias_addr, (uint8_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 15:
|
|
vector_logic_and_uint8((uint8_t *)input_addr, (uint8_t *)bias_addr, (uint8_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 16:
|
|
vector_logic_or_uint8((uint8_t *)input_addr, (uint8_t *)bias_addr, (uint8_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 17:
|
|
vector_logic_xor_uint8((uint8_t *)input_addr, (uint8_t *)bias_addr, (uint8_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 18:
|
|
vector_logic_xnor_uint8((uint8_t *)input_addr, (uint8_t *)bias_addr, (uint8_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 19:
|
|
vector_left_shift_uint8((uint8_t *)input_addr, (uint8_t *)bias_addr, (uint8_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 20:
|
|
vector_right_shift_uint8((uint8_t *)input_addr, (uint8_t *)bias_addr, (uint8_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 21:
|
|
vector_right_shift_uint8((uint8_t *)input_addr, (uint8_t *)bias_addr, (uint8_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
break;
|
|
|
|
case 2: // 16 bit
|
|
switch (config_get[1])
|
|
{
|
|
case 0:
|
|
vector_add_uint16((uint16_t *)input_addr, (uint16_t *)bias_addr, (uint16_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 1:
|
|
vector_sub_uint16((uint16_t *)input_addr, (uint16_t *)bias_addr, (uint16_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 2:
|
|
vector_multiply_uint16((uint16_t *)input_addr, (uint16_t *)bias_addr, (uint32_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 3:
|
|
vector_multiply_uint16_high_16bit((uint16_t *)input_addr, (uint16_t *)bias_addr, (uint16_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 4:
|
|
vector_multiply_uint16_low_16bit((uint16_t *)input_addr, (uint16_t *)bias_addr, (uint16_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 5:
|
|
vector_mul_add_uint16((uint16_t *)input_addr, (uint16_t *)bias_addr, (uint16_t *)weight_addr, (uint16_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 6:
|
|
vector_mul_add_uint16_to_uint64((uint16_t *)input_addr, (uint16_t *)bias_addr, (uint64_t *)weight_addr, (uint64_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 7:
|
|
vector_mul_sub_uint16((uint16_t *)input_addr, (uint16_t *)bias_addr, (uint16_t *)weight_addr, (uint16_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 8:
|
|
vector_mul_sub_uint16_to_uint64((uint16_t *)input_addr, (uint16_t *)bias_addr, (uint64_t *)weight_addr, (uint64_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 9:
|
|
vector_max_uint16((uint16_t *)input_addr, (uint16_t *)bias_addr, (uint16_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 10:
|
|
vector_min_uint16((uint16_t *)input_addr, (uint16_t *)bias_addr, (uint16_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 11:
|
|
vector_equal_uint16((uint16_t *)input_addr, (uint16_t *)bias_addr, (uint16_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 12:
|
|
vector_not_equal_uint16((uint16_t *)input_addr, (uint16_t *)bias_addr, (uint16_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 13:
|
|
vector_less_than_uint16((uint16_t *)input_addr, (uint16_t *)bias_addr, (uint16_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 14:
|
|
vector_greater_or_equal_uint16((uint16_t *)input_addr, (uint16_t *)bias_addr, (uint16_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 15:
|
|
vector_logic_and_uint16((uint16_t *)input_addr, (uint16_t *)bias_addr, (uint16_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 16:
|
|
vector_logic_or_uint16((uint16_t *)input_addr, (uint16_t *)bias_addr, (uint16_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 17:
|
|
vector_logic_xor_uint16((uint16_t *)input_addr, (uint16_t *)bias_addr, (uint16_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 18:
|
|
vector_logic_xnor_uint16((uint16_t *)input_addr, (uint16_t *)bias_addr, (uint16_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 19:
|
|
vector_left_shift_uint16((uint16_t *)input_addr, (uint16_t *)bias_addr, (uint16_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 20:
|
|
vector_right_shift_uint16((uint16_t *)input_addr, (uint16_t *)bias_addr, (uint16_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 21:
|
|
vector_right_shift_uint16((uint16_t *)input_addr, (uint16_t *)bias_addr, (uint16_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 22:
|
|
vector_high_half_bits_uint16((uint16_t *)input_addr, (uint8_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 23:
|
|
vector_low_half_bits_uint16((uint16_t *)input_addr, (uint8_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
break;
|
|
|
|
case 3: // float
|
|
switch (config_get[1])
|
|
{
|
|
case 0:
|
|
vector_add_float((float *)input_addr, (float *)bias_addr, (float *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 1:
|
|
vector_sub_float((float *)input_addr, (float *)bias_addr, (float *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 2:
|
|
vector_multiply_float((float *)input_addr, (float *)bias_addr, (float *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 5:
|
|
vector_mul_add_float((float *)input_addr, (float *)bias_addr, (float *)weight_addr, (float *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 7:
|
|
vector_mul_sub_float((float *)input_addr, (float *)bias_addr, (float *)weight_addr, (float *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 9:
|
|
vector_max_float((float *)input_addr, (float *)bias_addr, (float *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 10:
|
|
vector_min_float((float *)input_addr, (float *)bias_addr, (float *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 11:
|
|
vector_equal_float((float *)input_addr, (float *)bias_addr, (float *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 12:
|
|
vector_not_equal_float((float *)input_addr, (float *)bias_addr, (float *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 13:
|
|
vector_less_than_float((float *)input_addr, (float *)bias_addr, (float *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 14:
|
|
vector_greater_or_equal_float((float *)input_addr, (float *)bias_addr, (float *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
break;
|
|
case 4: // 32 bit
|
|
switch (config_get[1])
|
|
{
|
|
case 0:
|
|
vector_add_uint32((uint32_t *)input_addr, (uint32_t *)bias_addr, (uint32_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 1:
|
|
vector_sub_uint32((uint32_t *)input_addr, (uint32_t *)bias_addr, (uint32_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 2:
|
|
vector_multiply_uint32((uint32_t *)input_addr, (uint32_t *)bias_addr, (uint64_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 3:
|
|
vector_multiply_uint32_high_32bit((uint32_t *)input_addr, (uint32_t *)bias_addr, (uint32_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 4:
|
|
vector_multiply_uint32_low_32bit((uint32_t *)input_addr, (uint32_t *)bias_addr, (uint32_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 5:
|
|
vector_mul_add_uint32((uint32_t *)input_addr, (uint32_t *)bias_addr, (uint32_t *)weight_addr, (uint32_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 7:
|
|
vector_mul_sub_uint32((uint32_t *)input_addr, (uint32_t *)bias_addr, (uint32_t *)weight_addr, (uint32_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 9:
|
|
vector_max_uint32((uint32_t *)input_addr, (uint32_t *)bias_addr, (uint32_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 10:
|
|
vector_min_uint32((uint32_t *)input_addr, (uint32_t *)bias_addr, (uint32_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 11:
|
|
vector_equal_uint32((uint32_t *)input_addr, (uint32_t *)bias_addr, (uint32_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 12:
|
|
vector_not_equal_uint32((uint32_t *)input_addr, (uint32_t *)bias_addr, (uint32_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 13:
|
|
vector_less_than_uint32((uint32_t *)input_addr, (uint32_t *)bias_addr, (uint32_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 14:
|
|
vector_greater_or_equal_uint32((uint32_t *)input_addr, (uint32_t *)bias_addr, (uint32_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 15:
|
|
vector_logic_and_uint32((uint32_t *)input_addr, (uint32_t *)bias_addr, (uint32_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 16:
|
|
vector_logic_or_uint32((uint32_t *)input_addr, (uint32_t *)bias_addr, (uint32_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 17:
|
|
vector_logic_xor_uint32((uint32_t *)input_addr, (uint32_t *)bias_addr, (uint32_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 18:
|
|
vector_logic_xnor_uint32((uint32_t *)input_addr, (uint32_t *)bias_addr, (uint32_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 19:
|
|
vector_left_shift_uint32((uint32_t *)input_addr, (uint32_t *)bias_addr, (uint32_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 20:
|
|
vector_right_shift_uint32((uint32_t *)input_addr, (uint32_t *)bias_addr, (uint32_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 21:
|
|
vector_right_shift_uint32((uint32_t *)input_addr, (uint32_t *)bias_addr, (uint32_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 22:
|
|
vector_high_half_bits_uint32((uint32_t *)input_addr, (uint16_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 23:
|
|
vector_low_half_bits_uint32((uint32_t *)input_addr, (uint16_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
break;
|
|
|
|
case 1: // signed
|
|
switch (config_get[2])
|
|
{
|
|
case 1: // 8bit
|
|
switch (config_get[1])
|
|
{
|
|
case 0:
|
|
vector_add_int8((int8_t *)input_addr, (int8_t *)bias_addr, (int8_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 1:
|
|
vector_sub_int8((int8_t *)input_addr, (int8_t *)bias_addr, (int8_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 2:
|
|
vector_multiply_int8((int8_t *)input_addr, (int8_t *)bias_addr, (int16_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 3:
|
|
vector_multiply_int8_high_8bit((int8_t *)input_addr, (int8_t *)bias_addr, (int8_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 4:
|
|
vector_multiply_int8_low_8bit((int8_t *)input_addr, (int8_t *)bias_addr, (int8_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 5:
|
|
vector_mul_add_int8((int8_t *)input_addr, (int8_t *)bias_addr, (int8_t *)weight_addr, (int8_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 6:
|
|
vector_mul_add_int8_to_int32((int8_t *)input_addr, (int8_t *)bias_addr, (int32_t *)weight_addr, (int32_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 7:
|
|
vector_mul_sub_int8((int8_t *)input_addr, (int8_t *)bias_addr, (int8_t *)weight_addr, (int8_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 8:
|
|
vector_mul_sub_int8_to_int32((int8_t *)input_addr, (int8_t *)bias_addr, (int32_t *)weight_addr, (int32_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 9:
|
|
vector_max_int8((int8_t *)input_addr, (int8_t *)bias_addr, (int8_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 10:
|
|
vector_min_int8((int8_t *)input_addr, (int8_t *)bias_addr, (int8_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 11:
|
|
vector_equal_int8((int8_t *)input_addr, (int8_t *)bias_addr, (int8_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 12:
|
|
vector_not_equal_int8((int8_t *)input_addr, (int8_t *)bias_addr, (int8_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 13:
|
|
vector_less_than_int8((int8_t *)input_addr, (int8_t *)bias_addr, (int8_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 14:
|
|
vector_greater_or_equal_int8((int8_t *)input_addr, (int8_t *)bias_addr, (int8_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 15:
|
|
vector_logic_and_int8((int8_t *)input_addr, (int8_t *)bias_addr, (int8_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 16:
|
|
vector_logic_or_int8((int8_t *)input_addr, (int8_t *)bias_addr, (int8_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 17:
|
|
vector_logic_xor_int8((int8_t *)input_addr, (int8_t *)bias_addr, (int8_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 18:
|
|
vector_logic_xnor_int8((int8_t *)input_addr, (int8_t *)bias_addr, (int8_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 19:
|
|
vector_left_shift_int8((int8_t *)input_addr, (uint8_t *)bias_addr, (int8_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 20:
|
|
vector_arithmatic_right_shift_int8((int8_t *)input_addr, (uint8_t *)bias_addr, (int8_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 21:
|
|
vector_logic_right_shift_int8((int8_t *)input_addr, (uint8_t *)bias_addr, (int8_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
break;
|
|
|
|
case 2: // 16 bit
|
|
switch (config_get[1])
|
|
{
|
|
case 0:
|
|
vector_add_int16((int16_t *)input_addr, (int16_t *)bias_addr, (int16_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 1:
|
|
vector_sub_int16((int16_t *)input_addr, (int16_t *)bias_addr, (int16_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 2:
|
|
vector_multiply_int16((int16_t *)input_addr, (int16_t *)bias_addr, (int32_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 3:
|
|
vector_multiply_int16_high_16bit((int16_t *)input_addr, (int16_t *)bias_addr, (int16_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 4:
|
|
vector_multiply_int16_low_16bit((int16_t *)input_addr, (int16_t *)bias_addr, (int16_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 5:
|
|
vector_mul_add_int16((int16_t *)input_addr, (int16_t *)bias_addr, (int16_t *)weight_addr, (int16_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 6:
|
|
vector_mul_add_int16_to_int64((int16_t *)input_addr, (int16_t *)bias_addr, (int64_t *)weight_addr, (int64_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 7:
|
|
vector_mul_sub_int16((int16_t *)input_addr, (int16_t *)bias_addr, (int16_t *)weight_addr, (int16_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 8:
|
|
vector_mul_sub_int16_to_int64((int16_t *)input_addr, (int16_t *)bias_addr, (int64_t *)weight_addr, (int64_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 9:
|
|
vector_max_int16((int16_t *)input_addr, (int16_t *)bias_addr, (int16_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 10:
|
|
vector_min_int16((int16_t *)input_addr, (int16_t *)bias_addr, (int16_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 11:
|
|
vector_equal_int16((int16_t *)input_addr, (int16_t *)bias_addr, (int16_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 12:
|
|
vector_not_equal_int16((int16_t *)input_addr, (int16_t *)bias_addr, (int16_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 13:
|
|
vector_less_than_int16((int16_t *)input_addr, (int16_t *)bias_addr, (int16_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 14:
|
|
vector_greater_or_equal_int16((int16_t *)input_addr, (int16_t *)bias_addr, (int16_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 15:
|
|
vector_logic_and_int16((int16_t *)input_addr, (int16_t *)bias_addr, (int16_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 16:
|
|
vector_logic_or_int16((int16_t *)input_addr, (int16_t *)bias_addr, (int16_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 17:
|
|
vector_logic_xor_int16((int16_t *)input_addr, (int16_t *)bias_addr, (int16_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 18:
|
|
vector_logic_xnor_int16((int16_t *)input_addr, (int16_t *)bias_addr, (int16_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 19:
|
|
vector_left_shift_int16((int16_t *)input_addr, (uint16_t *)bias_addr, (int16_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 20:
|
|
vector_arithmatic_right_shift_int16((int16_t *)input_addr, (uint16_t *)bias_addr, (int16_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 21:
|
|
vector_logic_right_shift_int16((int16_t *)input_addr, (uint16_t *)bias_addr, (int16_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 22:
|
|
vector_high_half_bits_int16((int16_t *)input_addr, (int8_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 23:
|
|
vector_low_half_bits_int16((int16_t *)input_addr, (int8_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
break;
|
|
|
|
case 4: // 32 bit
|
|
switch (config_get[1])
|
|
{
|
|
case 0:
|
|
vector_add_int32((int32_t *)input_addr, (int32_t *)bias_addr, (int32_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 1:
|
|
vector_sub_int32((int32_t *)input_addr, (int32_t *)bias_addr, (int32_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 2:
|
|
vector_multiply_int32((int32_t *)input_addr, (int32_t *)bias_addr, (int64_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 3:
|
|
vector_multiply_int32_high_32bit((int32_t *)input_addr, (int32_t *)bias_addr, (int32_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 4:
|
|
vector_multiply_int32_low_32bit((int32_t *)input_addr, (int32_t *)bias_addr, (int32_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 5:
|
|
vector_mul_add_int32((int32_t *)input_addr, (int32_t *)bias_addr, (int32_t *)weight_addr, (int32_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 7:
|
|
vector_mul_sub_int32((int32_t *)input_addr, (int32_t *)bias_addr, (int32_t *)weight_addr, (int32_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 9:
|
|
vector_max_int32((int32_t *)input_addr, (int32_t *)bias_addr, (int32_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 10:
|
|
vector_min_int32((int32_t *)input_addr, (int32_t *)bias_addr, (int32_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 11:
|
|
vector_equal_int32((int32_t *)input_addr, (int32_t *)bias_addr, (int32_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 12:
|
|
vector_not_equal_int32((int32_t *)input_addr, (int32_t *)bias_addr, (int32_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 13:
|
|
vector_less_than_int32((int32_t *)input_addr, (int32_t *)bias_addr, (int32_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 14:
|
|
vector_greater_or_equal_int32((int32_t *)input_addr, (int32_t *)bias_addr, (int32_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 15:
|
|
vector_logic_and_int32((int32_t *)input_addr, (int32_t *)bias_addr, (int32_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 16:
|
|
vector_logic_or_int32((int32_t *)input_addr, (int32_t *)bias_addr, (int32_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 17:
|
|
vector_logic_xor_int32((int32_t *)input_addr, (int32_t *)bias_addr, (int32_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 18:
|
|
vector_logic_xnor_int32((int32_t *)input_addr, (int32_t *)bias_addr, (int32_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 19:
|
|
vector_left_shift_int32((int32_t *)input_addr, (uint32_t *)bias_addr, (int32_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 20:
|
|
vector_arithmatic_right_shift_int32((int32_t *)input_addr, (uint32_t *)bias_addr, (int32_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 21:
|
|
vector_logic_right_shift_int32((int32_t *)input_addr, (uint32_t *)bias_addr, (int32_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 22:
|
|
vector_high_half_bits_int32((int32_t *)input_addr, (int16_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
case 23:
|
|
vector_low_half_bits_int32((int32_t *)input_addr, (int16_t *)output_addr, config_get[0]);
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
uint32_t conf;
|
|
vcsrr(conf, 0)
|
|
iot_printf("conf = %x\n", conf);
|
|
//if (config_get[52] + config_get[53] + config_get[54] + config_get[55] * 2 <= 0x400000) {
|
|
if (config_get[2] != 3) {
|
|
verify_8bits(output_addr, golden_addr, config_get[55]);
|
|
} else {
|
|
verify_float(output_addr, golden_addr, config_get[55] / 4);
|
|
}
|
|
//os_mem_free(data);
|
|
}
|
|
|
|
|
|
void vector_saturation_test_from_pc() {
|
|
uint8_t config_read[120];
|
|
uint32_t config_get[60];
|
|
uint32_t data_begin = 0x10200000;
|
|
read_case_from_python(config_read, config_get, (uint8_t *)data_begin);
|
|
uint32_t weight_addr = data_begin;
|
|
uint32_t bias_addr = weight_addr + config_get[52];
|
|
uint32_t input_addr = bias_addr + config_get[53];
|
|
uint32_t golden_addr = input_addr + config_get[54];
|
|
uint32_t output_addr = golden_addr + config_get[55];
|
|
uint32_t with_const = config_get[4];
|
|
uint32_t right_shift = config_get[5];
|
|
switch (config_get[3])
|
|
{
|
|
case 0: // unsigned
|
|
switch (config_get[2])
|
|
{
|
|
case 1: // 8bit
|
|
switch (config_get[1])
|
|
{
|
|
case 2:
|
|
if (with_const) {
|
|
vector_multiply_const_uint8_right_shift_saturation((uint8_t *)input_addr, *((uint8_t *)bias_addr), (uint8_t *)output_addr, right_shift, config_get[0]);
|
|
} else {
|
|
vector_multiply_uint8_right_shift_saturation((uint8_t *)input_addr, (uint8_t *)bias_addr, (uint8_t *)output_addr, right_shift, config_get[0]);
|
|
}
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
break;
|
|
|
|
case 2: // 16 bit
|
|
switch (config_get[1])
|
|
{
|
|
case 2:
|
|
if (with_const) {
|
|
vector_multiply_const_uint16_right_shift_saturation((uint16_t *)input_addr, *((uint16_t *)bias_addr), (uint16_t *)output_addr, right_shift, config_get[0]);
|
|
} else {
|
|
vector_multiply_uint16_right_shift_saturation((uint16_t *)input_addr, (uint16_t *)bias_addr, (uint16_t *)output_addr, right_shift, config_get[0]);
|
|
}
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
break;
|
|
|
|
case 4: // 32 bit
|
|
switch (config_get[1])
|
|
{
|
|
case 2:
|
|
if (with_const) {
|
|
vector_multiply_const_uint32_right_shift_saturation((uint32_t *)input_addr, *((uint32_t *)bias_addr), (uint32_t *)output_addr, right_shift, config_get[0]);
|
|
} else {
|
|
vector_multiply_uint32_right_shift_saturation((uint32_t *)input_addr, (uint32_t *)bias_addr, (uint32_t *)output_addr, right_shift, config_get[0]);
|
|
}
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
break;
|
|
|
|
case 1: // signed
|
|
switch (config_get[2])
|
|
{
|
|
case 1: // 8bit
|
|
switch (config_get[1])
|
|
{
|
|
case 2:
|
|
if (with_const) {
|
|
vector_multiply_const_int8_right_shift_saturation((int8_t *)input_addr, *((int8_t *)bias_addr), (int8_t *)output_addr, right_shift, config_get[0]);
|
|
} else {
|
|
vector_multiply_int8_right_shift_saturation((int8_t *)input_addr, (int8_t *)bias_addr, (int8_t *)output_addr, right_shift, config_get[0]);
|
|
}
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
break;
|
|
|
|
case 2: // 16 bit
|
|
switch (config_get[1])
|
|
{
|
|
case 2:
|
|
if (with_const) {
|
|
vector_multiply_const_int16_right_shift_saturation((int16_t *)input_addr, *((int16_t *)bias_addr), (int16_t *)output_addr, right_shift, config_get[0]);
|
|
} else {
|
|
vector_multiply_int16_right_shift_saturation((int16_t *)input_addr, (int16_t *)bias_addr, (int16_t *)output_addr, right_shift, config_get[0]);
|
|
}
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
break;
|
|
|
|
case 4: // 32 bit
|
|
switch (config_get[1])
|
|
{
|
|
case 2:
|
|
if (with_const) {
|
|
vector_multiply_const_int32_right_shift_saturation((int32_t *)input_addr, *((int32_t *)bias_addr), (int32_t *)output_addr, right_shift, config_get[0]);
|
|
} else {
|
|
vector_multiply_int32_right_shift_saturation((int32_t *)input_addr, (int32_t *)bias_addr, (int32_t *)output_addr, right_shift, config_get[0]);
|
|
}
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
uint32_t conf;
|
|
vcsrr(conf, 0)
|
|
iot_printf("conf = %x\n", conf);
|
|
if (config_get[52] + config_get[53] + config_get[54] + config_get[55] * 2 <= 0x8000) {
|
|
verify_8bits(output_addr, golden_addr, config_get[55]);
|
|
} else {
|
|
iot_printf("too large\nfinished~");
|
|
}
|
|
}
|
|
|
|
void matrix_multi_test_from_pc() {
|
|
uint8_t config_read[120];
|
|
uint32_t config_get[60];
|
|
uint8_t *data_begin = (uint8_t *)0x10200000;
|
|
read_case_from_python(config_read, config_get, data_begin);
|
|
uint32_t matrixAaddr = (uint32_t)data_begin;
|
|
uint32_t matrixBaddr = matrixAaddr + config_get[53];
|
|
uint32_t golden_addr = matrixBaddr + config_get[54];
|
|
uint32_t output_addr = golden_addr + config_get[55];
|
|
uint64_t begin = cpu_get_mcycle();
|
|
uint32_t elem_bytes = config_get[3];
|
|
uint32_t signed_ = config_get[4];
|
|
uint32_t w4b = config_get[5];
|
|
switch (elem_bytes)
|
|
{
|
|
case 1: // 8bits
|
|
switch (signed_)
|
|
{
|
|
case 0: // unsigned
|
|
switch (w4b)
|
|
{
|
|
case 1: // vmadd
|
|
matrix_multi_uint8_to_uint8((uint8_t *)matrixAaddr, (uint8_t *)matrixBaddr, (uint8_t *)output_addr, config_get[0], config_get[1], config_get[2], config_get[6]);
|
|
break;
|
|
|
|
case 4: // vmaddw
|
|
matrix_multi_uint8_to_uint32((uint8_t *)matrixAaddr, (uint8_t *)matrixBaddr, (uint32_t *)output_addr, config_get[0], config_get[1], config_get[2]);
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
break;
|
|
|
|
case 1: // signed
|
|
switch (w4b)
|
|
{
|
|
case 1: // vmadd
|
|
matrix_multi_int8_to_int8((int8_t *)matrixAaddr, (int8_t *)matrixBaddr, (int8_t *)output_addr, config_get[0], config_get[1], config_get[2], config_get[6]);
|
|
break;
|
|
|
|
case 4: // vmaddw
|
|
matrix_multi_int8_to_int32((int8_t *)matrixAaddr, (int8_t *)matrixBaddr, (int32_t *)output_addr, config_get[0], config_get[1], config_get[2]);
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
break;
|
|
|
|
case 2: // 16bits
|
|
switch (signed_)
|
|
{
|
|
case 0: // unsigned
|
|
switch (w4b)
|
|
{
|
|
case 1: // vmadd
|
|
matrix_multi_uint16_to_uint16((uint16_t *)matrixAaddr, (uint16_t *)matrixBaddr, (uint16_t *)output_addr, config_get[0], config_get[1], config_get[2], config_get[6]);
|
|
break;
|
|
|
|
case 4: // vmaddw
|
|
matrix_multi_uint16_to_uint64((uint16_t *)matrixAaddr, (uint16_t *)matrixBaddr, (uint64_t *)output_addr, config_get[0], config_get[1], config_get[2]);
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
break;
|
|
|
|
case 1: // signed
|
|
switch (w4b)
|
|
{
|
|
case 1: // vmadd
|
|
matrix_multi_int16_to_int16((int16_t *)matrixAaddr, (int16_t *)matrixBaddr, (int16_t *)output_addr, config_get[0], config_get[1], config_get[2], config_get[6]);
|
|
break;
|
|
|
|
case 4: // vmaddw
|
|
matrix_multi_int16_to_int64((int16_t *)matrixAaddr, (int16_t *)matrixBaddr, (int64_t *)output_addr, config_get[0], config_get[1], config_get[2]);
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
break;
|
|
|
|
case 4: // 32bits
|
|
switch (signed_)
|
|
{
|
|
case 0: // signed
|
|
matrix_multi_uint32_to_uint32((uint32_t *)matrixAaddr, (uint32_t *)matrixBaddr, (uint32_t *)output_addr, config_get[0], config_get[1], config_get[2]);
|
|
break;
|
|
|
|
case 1: // unsigned
|
|
matrix_multi_int32_to_int32((int32_t *)matrixAaddr, (int32_t *)matrixBaddr, (int32_t *)output_addr, config_get[0], config_get[1], config_get[2]);
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
uint64_t end = cpu_get_mcycle();
|
|
iot_printf("time = %d\n", (uint32_t)(end - begin));
|
|
verify_8bits(output_addr, golden_addr, config_get[55]);
|
|
}
|
|
|
|
void matrix_transpose_multi_test_from_pc() {
|
|
uint8_t config_read[120];
|
|
uint32_t config_get[60];
|
|
uint32_t data_begin = 0x10200000;
|
|
read_case_from_python(config_read, config_get, (uint8_t *)data_begin);
|
|
uint32_t matrixAaddr = data_begin;
|
|
uint32_t matrixBaddr = matrixAaddr + config_get[53];
|
|
uint32_t golden_addr = matrixBaddr + config_get[54];
|
|
uint32_t output_addr = golden_addr + config_get[55];
|
|
uint64_t begin = cpu_get_mcycle();
|
|
uint32_t elem_bytes = config_get[3];
|
|
uint32_t signed_ = config_get[4];
|
|
uint32_t w4b = config_get[5];
|
|
switch (elem_bytes)
|
|
{
|
|
case 1: // 8bits
|
|
switch (signed_)
|
|
{
|
|
case 0: // unsigned
|
|
switch (w4b)
|
|
{
|
|
case 1: // vmadd
|
|
matrix_transpose_multi_uint8_to_uint8((uint8_t *)matrixAaddr, (uint8_t *)matrixBaddr, (uint8_t *)output_addr, config_get[0], config_get[1], config_get[2], config_get[6]);
|
|
break;
|
|
|
|
case 4: // vmaddw
|
|
matrix_transpose_multi_uint8_to_uint32((uint8_t *)matrixAaddr, (uint8_t *)matrixBaddr, (uint32_t *)output_addr, config_get[0], config_get[1], config_get[2]);
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
break;
|
|
|
|
case 1: // signed
|
|
switch (w4b)
|
|
{
|
|
case 1: // vmadd
|
|
matrix_transpose_multi_int8_to_int8((int8_t *)matrixAaddr, (int8_t *)matrixBaddr, (int8_t *)output_addr, config_get[0], config_get[1], config_get[2], config_get[6]);
|
|
break;
|
|
|
|
case 4: // vmaddw
|
|
matrix_transpose_multi_int8_to_int32((int8_t *)matrixAaddr, (int8_t *)matrixBaddr, (int32_t *)output_addr, config_get[0], config_get[1], config_get[2]);
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
break;
|
|
|
|
case 2: // 16bits
|
|
switch (signed_)
|
|
{
|
|
case 0: // unsigned
|
|
switch (w4b)
|
|
{
|
|
case 1: // vmadd
|
|
matrix_transpose_multi_uint16_to_uint16((uint16_t *)matrixAaddr, (uint16_t *)matrixBaddr, (uint16_t *)output_addr, config_get[0], config_get[1], config_get[2], config_get[6]);
|
|
break;
|
|
|
|
case 4: // vmaddw
|
|
matrix_transpose_multi_uint16_to_uint64((uint16_t *)matrixAaddr, (uint16_t *)matrixBaddr, (uint64_t *)output_addr, config_get[0], config_get[1], config_get[2]);
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
break;
|
|
|
|
case 1: // signed
|
|
switch (w4b)
|
|
{
|
|
case 1: // vmadd
|
|
matrix_transpose_multi_int16_to_int16((int16_t *)matrixAaddr, (int16_t *)matrixBaddr, (int16_t *)output_addr, config_get[0], config_get[1], config_get[2], config_get[6]);
|
|
break;
|
|
|
|
case 4: // vmaddw
|
|
matrix_transpose_multi_int16_to_int64((int16_t *)matrixAaddr, (int16_t *)matrixBaddr, (int64_t *)output_addr, config_get[0], config_get[1], config_get[2]);
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
break;
|
|
|
|
case 4: // 32bits
|
|
switch (signed_)
|
|
{
|
|
case 0: // signed
|
|
matrix_transpose_multi_uint32_to_uint32((uint32_t *)matrixAaddr, (uint32_t *)matrixBaddr, (uint32_t *)output_addr, config_get[0], config_get[1], config_get[2]);
|
|
break;
|
|
|
|
case 1: // unsigned
|
|
matrix_transpose_multi_int32_to_int32((int32_t *)matrixAaddr, (int32_t *)matrixBaddr, (int32_t *)output_addr, config_get[0], config_get[1], config_get[2]);
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
break;
|
|
|
|
default:
|
|
matrix_transpose_multi_float((float *)matrixAaddr, (float *)matrixBaddr, (float *)output_addr, config_get[0], config_get[1], config_get[2]);
|
|
break;
|
|
}
|
|
uint64_t end = cpu_get_mcycle();
|
|
iot_printf("time = %d\n", (uint32_t)(end - begin));
|
|
verify_8bits(output_addr, golden_addr, config_get[55]);
|
|
}
|
|
|
|
void maximum_test() {
|
|
int8_t v[256];
|
|
for (uint32_t i = 0; i < 256; i++) {
|
|
v[i] = 3 * i;
|
|
}
|
|
for (uint32_t i = 0; i < 256; i++) {
|
|
iot_printf("v[%d] = %d\n", i, v[i]);
|
|
}
|
|
uint8_t max_index;
|
|
int8_t max_value;
|
|
uint64_t begin;
|
|
uint64_t end;
|
|
begin = cpu_get_mcycle();
|
|
vector_maximum_element_int8_small(v, &max_index, &max_value, 0);
|
|
end = cpu_get_mcycle();
|
|
iot_printf("cycle = %d\n", (uint32_t)(end - begin));
|
|
iot_printf("max_index = %d\n", max_index);
|
|
iot_printf("max_value = %d\n", max_value);
|
|
begin = cpu_get_mcycle();
|
|
max_index = 0;
|
|
max_value = v[0];
|
|
for (uint32_t i = 1; i < 256; i++) {
|
|
if (max_value < v[i]) {
|
|
max_value = v[i];
|
|
max_index = i;
|
|
}
|
|
}
|
|
end = cpu_get_mcycle();
|
|
iot_printf("cycle = %d\n", (uint32_t)(end - begin));
|
|
int8_t *in = (int8_t *)0x10200000;
|
|
for (uint32_t i = 0; i < 128; i++) {
|
|
for (uint32_t j = 0; j < 256; j++) {
|
|
in[i * 256 + j] = (3 * j) % ((5 * i != 0) ? 5 * i : 1);
|
|
if (in[i * 256 + j] > 126) {
|
|
in[i * 256 + j] = 126;
|
|
}
|
|
//iot_printf("in[%d] = %d\n", i * 256 + j, in[i * 256 + j]);
|
|
}
|
|
}
|
|
uint16_t max_index16;
|
|
begin = cpu_get_mcycle();
|
|
vector_maximum_element_int8(in, &max_index16, &max_value, 128 * 256);
|
|
end = cpu_get_mcycle();
|
|
iot_printf("cycle = %d\n", (uint32_t)(end - begin));
|
|
iot_printf("max_index = %d\n", max_index16);
|
|
iot_printf("max_value = %d\n", max_value);
|
|
}
|
|
|
|
void vector_maxi_mini_test_from_pc() {
|
|
uint8_t config_read[120];
|
|
uint32_t config_get[60];
|
|
uint32_t data_begin = 0x10200000;
|
|
read_case_from_python(config_read, config_get, (uint8_t *)data_begin);
|
|
uint32_t weight_addr = data_begin;
|
|
uint32_t bias_addr = weight_addr + config_get[52];
|
|
uint32_t input_addr = bias_addr + config_get[53];
|
|
uint32_t golden_addr = input_addr + config_get[54];
|
|
uint32_t output_addr = golden_addr + config_get[55];
|
|
uint32_t length = config_get[0];
|
|
uint32_t to_do = config_get[1];
|
|
uint32_t elem_bytes = config_get[2];
|
|
uint32_t signed_ = config_get[3];
|
|
uint8_t *input_u8;
|
|
uint16_t *index_u8;
|
|
uint8_t *output_u8;
|
|
uint8_t *golden_u8;
|
|
int8_t *input_s8;
|
|
uint16_t *index_s8;
|
|
int8_t *output_s8;
|
|
int8_t *golden_s8;
|
|
uint16_t *input_u16;
|
|
uint16_t *index_u16;
|
|
uint16_t *output_u16;
|
|
uint16_t *golden_u16;
|
|
int16_t *input_s16;
|
|
uint16_t *index_s16;
|
|
int16_t *output_s16;
|
|
int16_t *golden_s16;
|
|
uint32_t *input_u32;
|
|
uint32_t *index_u32;
|
|
uint32_t *output_u32;
|
|
uint32_t *golden_u32;
|
|
int32_t *input_s32;
|
|
uint32_t *index_s32;
|
|
int32_t *output_s32;
|
|
int32_t *golden_s32;
|
|
float *input_f;
|
|
uint32_t *index_f;
|
|
float *output_f;
|
|
float *golden_f;
|
|
switch (elem_bytes)
|
|
{
|
|
case 1 /* 8bits */:
|
|
switch (to_do)
|
|
{
|
|
case 0 /* min */:
|
|
switch (signed_)
|
|
{
|
|
case 0 /* unsigned */:
|
|
input_u8 = (uint8_t *)input_addr;
|
|
index_u8 = (uint16_t *)(output_addr + 4);
|
|
output_u8 = (uint8_t *)output_addr;
|
|
golden_u8 = (uint8_t *)golden_addr;
|
|
vector_minimum_element_uint8(input_u8, index_u8, output_u8, length);
|
|
if (*output_u8 != *golden_u8) {
|
|
iot_printf("elem incorrent, golden = %d, output = %d\n", *golden_u8, *output_u8);
|
|
iot_printf("not all correct!\n");
|
|
} else if (input_u8[*index_u8] != *output_u8) {
|
|
iot_printf("elem index incorrent, input at index %d is %d\n", *index_u8, input_u8[*index_u8]);
|
|
iot_printf("not all correct!\n");
|
|
}
|
|
iot_printf("finished~");
|
|
/* code */
|
|
break;
|
|
|
|
case 1 /* signed */:
|
|
input_s8 = (int8_t *)input_addr;
|
|
index_s8 = (uint16_t *)(output_addr + 4);
|
|
output_s8 = (int8_t *)output_addr;
|
|
golden_s8 = (int8_t *)golden_addr;
|
|
vector_minimum_element_int8(input_s8, index_s8, output_s8, length);
|
|
if (*output_s8 != *golden_s8) {
|
|
iot_printf("elem incorrent, golden = %d, output = %d\n", *golden_s8, *output_s8);
|
|
iot_printf("not all correct!\n");
|
|
} else if (input_s8[*index_s8] != *output_s8) {
|
|
iot_printf("elem index incorrent, input at index %d is %d\n", *index_s8, input_s8[*index_s8]);
|
|
iot_printf("not all correct!\n");
|
|
}
|
|
iot_printf("finished~");
|
|
/* code */
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
/* code */
|
|
break;
|
|
case 1 /* max */:
|
|
switch (signed_)
|
|
{
|
|
case 0 /* unsigned */:
|
|
input_u8 = (uint8_t *)input_addr;
|
|
index_u8 = (uint16_t *)(output_addr + 4);
|
|
output_u8 = (uint8_t *)output_addr;
|
|
golden_u8 = (uint8_t *)golden_addr;
|
|
vector_maximum_element_uint8(input_u8, index_u8, output_u8, length);
|
|
if (*output_u8 != *golden_u8) {
|
|
iot_printf("elem incorrent, golden = %d, output = %d\n", *golden_u8, *output_u8);
|
|
iot_printf("not all correct!\n");
|
|
} else if (input_u8[*index_u8] != *output_u8) {
|
|
iot_printf("elem index incorrent, input at index %d is %d\n", *index_u8, input_u8[*index_u8]);
|
|
iot_printf("not all correct!\n");
|
|
}
|
|
iot_printf("finished~");
|
|
/* code */
|
|
break;
|
|
|
|
case 1 /* signed */:
|
|
input_s8 = (int8_t *)input_addr;
|
|
index_s8 = (uint16_t *)(output_addr + 4);
|
|
output_s8 = (int8_t *)output_addr;
|
|
golden_s8 = (int8_t *)golden_addr;
|
|
vector_maximum_element_int8(input_s8, index_s8, output_s8, length);
|
|
if (*output_s8 != *golden_s8) {
|
|
iot_printf("elem incorrent, golden = %d, output = %d\n", *golden_s8, *output_s8);
|
|
iot_printf("not all correct!\n");
|
|
} else if (input_s8[*index_s8] != *golden_s8) {
|
|
iot_printf("elem index incorrent, input at index %d is %d\n", *index_s8, input_s8[*index_s8]);
|
|
iot_printf("not all correct!\n");
|
|
}
|
|
iot_printf("finished~");
|
|
/* code */
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
/* code */
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
/* code */
|
|
break;
|
|
|
|
case 2 /* 16bits */:
|
|
switch (to_do)
|
|
{
|
|
case 0 /* min */:
|
|
switch (signed_)
|
|
{
|
|
case 0 /* unsigned */:
|
|
input_u16 = (uint16_t *)input_addr;
|
|
index_u16 = (uint16_t *)(output_addr + 4);
|
|
output_u16 = (uint16_t *)output_addr;
|
|
golden_u16 = (uint16_t *)golden_addr;
|
|
vector_minimum_element_uint16(input_u16, index_u16, output_u16, length);
|
|
if (*output_u16 != *golden_u16) {
|
|
iot_printf("elem incorrent, golden = %d, output = %d\n", *golden_u16, *output_u16);
|
|
iot_printf("not all correct!\n");
|
|
} else if (input_u16[*index_u16] != *output_u16) {
|
|
iot_printf("elem index incorrent, input at index %d is %d\n", *index_u16, input_u16[*index_u16]);
|
|
iot_printf("not all correct!\n");
|
|
}
|
|
iot_printf("finished~");
|
|
/* code */
|
|
break;
|
|
|
|
case 1 /* signed */:
|
|
input_s16 = (int16_t *)input_addr;
|
|
index_s16 = (uint16_t *)(output_addr + 4);
|
|
output_s16 = (int16_t *)output_addr;
|
|
golden_s16 = (int16_t *)golden_addr;
|
|
vector_minimum_element_int16(input_s16, index_s16, output_s16, length);
|
|
if (*output_s16 != *golden_s16) {
|
|
iot_printf("elem incorrent, golden = %d, output = %d\n", *golden_s16, *output_s16);
|
|
iot_printf("not all correct!\n");
|
|
} else if (input_s16[*index_s16] != *output_s16) {
|
|
iot_printf("elem index incorrent, input at index %d is %d\n", *index_s16, input_s16[*index_s16]);
|
|
iot_printf("not all correct!\n");
|
|
}
|
|
iot_printf("finished~");
|
|
/* code */
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
/* code */
|
|
break;
|
|
case 1 /* max */:
|
|
switch (signed_)
|
|
{
|
|
case 0 /* unsigned */:
|
|
input_u16 = (uint16_t *)input_addr;
|
|
index_u16 = (uint16_t *)(output_addr + 4);
|
|
output_u16 = (uint16_t *)output_addr;
|
|
golden_u16 = (uint16_t *)golden_addr;
|
|
vector_maximum_element_uint16(input_u16, index_u16, output_u16, length);
|
|
if (*output_u16 != *golden_u16) {
|
|
iot_printf("elem incorrent, golden = %d, output = %d\n", *golden_u16, *output_u16);
|
|
iot_printf("not all correct!\n");
|
|
} else if (input_u16[*index_u16] != *output_u16) {
|
|
iot_printf("elem index incorrent, input at index %d is %d\n", *index_u16, input_u16[*index_u16]);
|
|
iot_printf("not all correct!\n");
|
|
}
|
|
iot_printf("finished~");
|
|
/* code */
|
|
break;
|
|
|
|
case 1 /* signed */:
|
|
input_s16 = (int16_t *)input_addr;
|
|
index_s16 = (uint16_t *)(output_addr + 4);
|
|
output_s16 = (int16_t *)output_addr;
|
|
golden_s16 = (int16_t *)golden_addr;
|
|
vector_maximum_element_int16(input_s16, index_s16, output_s16, length);
|
|
if (*output_s16 != *golden_s16) {
|
|
iot_printf("elem incorrent, golden = %d, output = %d\n", *golden_s16, *output_s16);
|
|
iot_printf("not all correct!\n");
|
|
} else if (input_s16[*index_s16] != *output_s16) {
|
|
iot_printf("elem index incorrent, input at index %d is %d\n", *index_s16, input_s16[*index_s16]);
|
|
iot_printf("not all correct!\n");
|
|
}
|
|
iot_printf("finished~");
|
|
/* code */
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
/* code */
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
/* code */
|
|
break;
|
|
|
|
case 3 /* float */:
|
|
switch (to_do)
|
|
{
|
|
case 0 /* min */:
|
|
input_f = (float *)input_addr;
|
|
index_f = (uint32_t *)(output_addr + 4);
|
|
output_f = (float *)output_addr;
|
|
golden_f = (float *)golden_addr;
|
|
vector_minimum_element_float(input_f, index_f, output_f, length);
|
|
if (*output_f != *golden_f) {
|
|
iot_printf("elem incorrent, golden = %f, output = %f\n", *golden_f, *output_f);
|
|
iot_printf("not all correct!\n");
|
|
} else if (input_f[*index_f] != *output_f) {
|
|
iot_printf("elem index incorrent, input at index %d is %f\n", *index_f, input[*index_f]);
|
|
iot_printf("not all correct!\n");
|
|
}
|
|
iot_printf("finished~");
|
|
/* code */
|
|
break;
|
|
|
|
case 1 /* max */:
|
|
input_f = (float *)input_addr;
|
|
index_f = (uint32_t *)(output_addr + 4);
|
|
output_f = (float *)output_addr;
|
|
golden_f = (float *)golden_addr;
|
|
vector_maximum_element_float(input_f, index_f, output_f, length);
|
|
if (*output_f != *golden_f) {
|
|
iot_printf("elem incorrent, golden = %f, output = %f\n", *golden_f, *output_f);
|
|
iot_printf("not all correct!\n");
|
|
} else if (input_f[*index_f] != *output_f) {
|
|
iot_printf("elem index incorrent, input at index %d is %f\n", *index_f, input[*index_f]);
|
|
iot_printf("not all correct!\n");
|
|
}
|
|
iot_printf("finished~");
|
|
/* code */
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
/* code */
|
|
break;
|
|
|
|
case 4 /* 32bits */:
|
|
switch (to_do)
|
|
{
|
|
case 0 /* min */:
|
|
switch (signed_)
|
|
{
|
|
case 0 /* unsigned */:
|
|
input_u32 = (uint32_t *)input_addr;
|
|
index_u32 = (uint32_t *)(output_addr + 4);
|
|
output_u32 = (uint32_t *)output_addr;
|
|
golden_u32 = (uint32_t *)golden_addr;
|
|
vector_minimum_element_uint32(input_u32, index_u32, output_u32, length);
|
|
if (*output_u32 != *golden_u32) {
|
|
iot_printf("elem incorrent, golden = %d, output = %d\n", *golden_u32, *output_u32);
|
|
iot_printf("not all correct!\n");
|
|
} else if (input_u32[*index_u32] != *output_u32) {
|
|
iot_printf("elem index incorrent, input at index %d is %d\n", *index_u32, input_u32[*index_u32]);
|
|
iot_printf("not all correct!\n");
|
|
}
|
|
iot_printf("finished~");
|
|
/* code */
|
|
break;
|
|
|
|
case 1 /* signed */:
|
|
input_s32 = (int32_t *)input_addr;
|
|
index_s32 = (uint32_t *)(output_addr + 4);
|
|
output_s32 = (int32_t *)output_addr;
|
|
golden_s32 = (int32_t *)golden_addr;
|
|
vector_minimum_element_int32(input_s32, index_s32, output_s32, length);
|
|
if (*output_s32 != *golden_s32) {
|
|
iot_printf("elem incorrent, golden = %d, output = %d\n", *golden_s32, *output_s32);
|
|
iot_printf("not all correct!\n");
|
|
} else if (input_s32[*index_s32] != *output_s32) {
|
|
iot_printf("elem index incorrent, input at index %d is %d\n", *index_s32, input_s32[*index_s32]);
|
|
iot_printf("not all correct!\n");
|
|
}
|
|
iot_printf("finished~");
|
|
/* code */
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
/* code */
|
|
break;
|
|
case 1 /* max */:
|
|
switch (signed_)
|
|
{
|
|
case 0 /* unsigned */:
|
|
input_u32 = (uint32_t *)input_addr;
|
|
index_u32 = (uint32_t *)(output_addr + 4);
|
|
output_u32 = (uint32_t *)output_addr;
|
|
golden_u32 = (uint32_t *)golden_addr;
|
|
vector_maximum_element_uint32(input_u32, index_u32, output_u32, length);
|
|
if (*output_u32 != *golden_u32) {
|
|
iot_printf("elem incorrent, golden = %d, output = %d\n", *golden_u32, *output_u32);
|
|
iot_printf("not all correct!\n");
|
|
} else if (input_u32[*index_u32] != *output_u32) {
|
|
iot_printf("elem index incorrent, input at index %d is %d\n", *index_u32, input_u32[*index_u32]);
|
|
iot_printf("not all correct!\n");
|
|
}
|
|
iot_printf("finished~");
|
|
/* code */
|
|
break;
|
|
|
|
case 1 /* signed */:
|
|
input_s32 = (int32_t *)input_addr;
|
|
index_s32 = (uint32_t *)(output_addr + 4);
|
|
output_s32 = (int32_t *)output_addr;
|
|
golden_s32 = (int32_t *)golden_addr;
|
|
vector_maximum_element_int32(input_s32, index_s32, output_s32, length);
|
|
if (*output_s32 != *golden_s32) {
|
|
iot_printf("elem incorrent, golden = %d, output = %d\n", *golden_s32, *output_s32);
|
|
iot_printf("not all correct!\n");
|
|
} else if (input_s32[*index_s32] != *output_s32) {
|
|
iot_printf("elem index incorrent, input at index %d is %d\n", *index_s32, input_s32[*index_s32]);
|
|
iot_printf("not all correct!\n");
|
|
}
|
|
iot_printf("finished~");
|
|
/* code */
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
/* code */
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
/* code */
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
void logsoftmax_test_from_pc() {
|
|
uint8_t config_read[120];
|
|
uint32_t config_get[60];
|
|
uint32_t data_begin = 0x10200000;
|
|
read_case_from_python(config_read, config_get, (uint8_t *)data_begin);
|
|
uint32_t weight_addr = data_begin;
|
|
uint32_t bias_addr = weight_addr + config_get[52];
|
|
uint32_t input_addr = bias_addr + config_get[53];
|
|
uint32_t golden_addr = input_addr + config_get[54];
|
|
uint32_t output_addr = golden_addr + config_get[55];
|
|
uint32_t length = config_get[0];
|
|
uint32_t elem_bytes = config_get[1];
|
|
uint32_t batch = config_get[2];
|
|
uint16_t *max_indice = (uint16_t *)(output_addr + config_get[55]);
|
|
int8_t *max = (int8_t *)(output_addr + config_get[55] + 4 * length);;
|
|
switch (elem_bytes)
|
|
{
|
|
case 1/* constant-expression */:
|
|
logsoftmax_int8((int8_t *)input_addr, (int8_t *)output_addr, max_indice, max, length, batch);
|
|
break;
|
|
|
|
default:
|
|
logsoftmax_int16((int16_t *)input_addr, (int16_t *)output_addr, max_indice, (int16_t *)max, length, batch);
|
|
break;
|
|
}
|
|
verify_8bits(output_addr, golden_addr, config_get[55]);
|
|
}
|
|
|
|
void softmax_test_from_pc() {
|
|
uint8_t config_read[120];
|
|
uint32_t config_get[60];
|
|
#ifdef AI_USING_PSRAM
|
|
uint8_t *data_begin = (uint8_t *)os_mem_malloc(1, 131072);
|
|
#else
|
|
uint32_t data_begin = 0x10200000;
|
|
#endif
|
|
read_case_from_python(config_read, config_get, (uint8_t *)data_begin);
|
|
uint32_t weight_addr = (uint32_t)data_begin;
|
|
uint32_t bias_addr = weight_addr + config_get[52];
|
|
uint32_t input_addr = bias_addr + config_get[53];
|
|
uint32_t golden_addr = input_addr + config_get[54];
|
|
uint32_t output_addr = golden_addr + config_get[55];
|
|
uint32_t length = config_get[0];
|
|
uint32_t elem_bytes = config_get[1];
|
|
switch (elem_bytes)
|
|
{
|
|
case 1/* constant-expression */:
|
|
softmax_int8((int8_t *)input_addr, (int8_t *)output_addr, length);
|
|
break;
|
|
|
|
default:
|
|
softmax_int16((int16_t *)input_addr, (int16_t *)output_addr, length);
|
|
break;
|
|
}
|
|
verify_8bits(output_addr, golden_addr, config_get[55]);
|
|
#ifdef AI_USING_PSRAM
|
|
os_mem_free(data_begin);
|
|
#endif
|
|
}
|
|
|
|
void dequantize_test_from_pc() {
|
|
uint8_t config_read[120];
|
|
uint32_t config_get[60];
|
|
#ifdef AI_USING_PSRAM
|
|
uint8_t *data_begin = (uint8_t *)os_mem_malloc(1, 131072);
|
|
#else
|
|
uint32_t data_begin = 0x10200000;
|
|
#endif
|
|
read_case_from_python(config_read, config_get, (uint8_t *)data_begin);
|
|
uint32_t weight_addr = (uint32_t)data_begin;
|
|
uint32_t bias_addr = weight_addr + config_get[52];
|
|
uint32_t input_addr = bias_addr + config_get[53];
|
|
uint32_t golden_addr = input_addr + config_get[54];
|
|
uint32_t output_addr = golden_addr + config_get[55];
|
|
uint32_t length = config_get[0];
|
|
uint32_t elem_bytes = config_get[1];
|
|
uint32_t signed_ = config_get[2];
|
|
int32_t center = config_get[3];
|
|
float scale = (float)((int32_t)config_get[4] - 128);
|
|
uint32_t scale_power = config_get[5];
|
|
for (uint32_t i = 0; i < scale_power; i++) {
|
|
scale = scale * 2.0;
|
|
}
|
|
switch (elem_bytes)
|
|
{
|
|
case 1/* constant-expression */:
|
|
//iot_printf("scale = %f\n", scale);
|
|
scale = scale / 256.0;
|
|
//iot_printf("scale = %f\n", scale);
|
|
switch (signed_)
|
|
{
|
|
case 0/* constant-expression */:
|
|
vector_uint8_to_float((uint8_t *)input_addr, (float *)output_addr, (float)(center), scale, length);
|
|
break;
|
|
|
|
case 1/* constant-expression */:
|
|
vector_int8_to_float((int8_t *)input_addr, (float *)output_addr, (float)(center - 128), scale, length);
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
break;
|
|
|
|
default:
|
|
//iot_printf("scale = %f\n", scale);
|
|
scale = scale / 65536.0;
|
|
//iot_printf("scale = %f\n", scale);
|
|
switch (signed_)
|
|
{
|
|
case 0/* constant-expression */:
|
|
vector_uint16_to_float((uint16_t *)input_addr, (float *)output_addr, (float)(center), scale, length);
|
|
break;
|
|
|
|
case 1/* constant-expression */:
|
|
vector_int16_to_float((int16_t *)input_addr, (float *)output_addr, (float)(center - 32768), scale, length);
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
break;
|
|
}
|
|
verify_float(output_addr, golden_addr, length);
|
|
#ifdef AI_USING_PSRAM
|
|
os_mem_free(data_begin);
|
|
#endif
|
|
}
|
|
|
|
void quantize_test_from_pc() {
|
|
uint8_t config_read[120];
|
|
uint32_t config_get[60];
|
|
#ifdef AI_USING_PSRAM
|
|
uint8_t *data_begin = (uint8_t *)os_mem_malloc(1, 131072);
|
|
#else
|
|
uint32_t data_begin = 0x10200000;
|
|
#endif
|
|
read_case_from_python(config_read, config_get, (uint8_t *)data_begin);
|
|
uint32_t weight_addr = (uint32_t)data_begin;
|
|
uint32_t bias_addr = weight_addr + config_get[52];
|
|
uint32_t input_addr = bias_addr + config_get[53];
|
|
uint32_t golden_addr = input_addr + config_get[54];
|
|
uint32_t output_addr = golden_addr + config_get[55];
|
|
uint32_t length = config_get[0];
|
|
uint32_t elem_bytes = config_get[1];
|
|
uint32_t signed_ = config_get[2];
|
|
float *center = (float *)bias_addr;
|
|
float *scale = (float *)weight_addr;
|
|
switch (elem_bytes)
|
|
{
|
|
case 1/* constant-expression */:
|
|
switch (signed_)
|
|
{
|
|
case 0/* constant-expression */:
|
|
vector_float_to_uint8((float *)input_addr, (uint8_t *)output_addr, *center, *scale, length);
|
|
break;
|
|
|
|
case 1/* constant-expression */:
|
|
vector_float_to_int8((float *)input_addr, (int8_t *)output_addr, *center, *scale, length);
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
break;
|
|
|
|
default:
|
|
switch (signed_)
|
|
{
|
|
case 0/* constant-expression */:
|
|
vector_float_to_uint16((float *)input_addr, (uint16_t *)output_addr, *center, *scale, length);
|
|
break;
|
|
|
|
case 1/* constant-expression */:
|
|
vector_float_to_int16((float *)input_addr, (int16_t *)output_addr, *center, *scale, length);
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
break;
|
|
}
|
|
verify_8bits(output_addr, golden_addr, config_get[55]);
|
|
#ifdef AI_USING_PSRAM
|
|
os_mem_free(data_begin);
|
|
#endif
|
|
}
|
|
|
|
void psram_test() {
|
|
uint32_t *writer = (uint32_t *)0x10200000;
|
|
for (uint32_t i = 0; i < 1024 * 512; i++) {
|
|
writer[i] = i * 3;
|
|
}
|
|
for (uint32_t i = 0; i < 1024 * 512; i++) {
|
|
if (writer[i] != i * 3) {
|
|
iot_printf("incorrect at %08x, should be %08x, in fact %08x\n", 0x10200000 + 4 * i, i * 3, writer[i]);
|
|
}
|
|
}
|
|
}
|
|
|
|
void temp_test() {
|
|
uint32_t h_a = 64;
|
|
uint32_t w_b = 64;
|
|
uint32_t w_a = 64;
|
|
uint64_t begin;
|
|
uint64_t end;
|
|
begin = cpu_get_mcycle();
|
|
int8_t *a = (int8_t *)os_mem_malloc(1, h_a * w_a);
|
|
int8_t *b = (int8_t *)os_mem_malloc(1, w_a * w_b);
|
|
int8_t *o = (int8_t *)os_mem_malloc(1, h_a * w_b);
|
|
end = cpu_get_mcycle();
|
|
iot_printf("cycle~~~~~~~~~~~~~~~~~~ = %d\n", (uint32_t)(end - begin));
|
|
begin = cpu_get_mcycle();
|
|
matrix_multi_int8_to_int8(a, b, o, h_a, w_a, w_b, 0);
|
|
end = cpu_get_mcycle();
|
|
iot_printf("cycle~~~~~~~~~~~~~~~~~~ = %d\n", (uint32_t)(end - begin));
|
|
begin = cpu_get_mcycle();
|
|
os_mem_free(a);
|
|
os_mem_free(b);
|
|
os_mem_free(o);
|
|
end = cpu_get_mcycle();
|
|
iot_printf("cycle~~~~~~~~~~~~~~~~~ = %d\n", (uint32_t)(end - begin));
|
|
iot_printf("temp test finished~~~~~~~~~~~~~~~~~\n");
|
|
}
|
|
|
|
#ifndef AI_OS_TASK
|
|
int main(void) {
|
|
dbg_uart_init();
|
|
test_uart_init();
|
|
iot_dbg_uart_set_port(0, 115200 * 1, 0, 8, 1);
|
|
iot_printf("main start!\n");
|
|
dsp_init();
|
|
//while(1){
|
|
//test_uart0_getc();}
|
|
#ifdef SATURATION
|
|
uint32_t data = 0x1071f;
|
|
vcsrw(data,data,0)
|
|
#else
|
|
uint32_t data = 0x1061f;
|
|
vcsrw(data,data,0)
|
|
#endif
|
|
//psram_test();
|
|
//maximum_test();
|
|
//uint64_t begin;
|
|
//uint64_t end;
|
|
//begin = cpu_get_mcycle();
|
|
//end = cpu_get_mcycle();
|
|
//iot_printf("cycle = %d\n", (uint32_t)(end - begin));
|
|
//begin = cpu_get_mcycle();
|
|
//end = cpu_get_mcycle();
|
|
//iot_printf("cycle = %d\n", (uint32_t)(end - begin));
|
|
//vector_add_test_float();
|
|
//vector_sub_test_float();
|
|
//vector_mul_test_float();
|
|
//vector_madd_msub_test_float();
|
|
//vector_max_test_float();
|
|
//vector_min_test_float();
|
|
//vector_equal_test_float();
|
|
//vector_not_equal_test_float();
|
|
//vector_less_than_test_float();
|
|
//vector_greater_or_equal_test_float();
|
|
//vector_inner_product_test_float();
|
|
//vector_inner_product_test_int32();
|
|
//vector_inner_product_test_uint32();
|
|
//float_to_int8_test();
|
|
//float_to_uint8_test();
|
|
//float_to_int16_test();
|
|
//float_to_uint16_test();
|
|
//int8_to_float_test();
|
|
//uint8_to_float_test();
|
|
//int16_to_float_test();
|
|
//uint16_to_float_test();
|
|
while(1) {
|
|
//temp_test();
|
|
//softmax_test_from_pc();
|
|
//vector_maxi_mini_test_from_pc();
|
|
//matrix_multi_test_from_pc();
|
|
//matrix_transpose_multi_test_from_pc();
|
|
//vector_test_from_pc();
|
|
//logsoftmax_test_from_pc();
|
|
//fc_test_from_pc();
|
|
dequantize_test_from_pc();
|
|
//vector_saturation_test_from_pc();
|
|
//fc_16bit_test();
|
|
//matrix_transpose_test_from_pc();
|
|
//depth_fc_test_from_pc();
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
#else
|
|
|
|
void user_task_1(){
|
|
|
|
test_uart_init();
|
|
iot_dbg_uart_set_port(0, 115200 * 1, 0, 8, 1);
|
|
iot_printf("main start!\n");
|
|
dsp_init();
|
|
|
|
while(1)
|
|
{
|
|
matrix_multi_test_from_pc();
|
|
}
|
|
}
|
|
|
|
int32_t iot__task_init()
|
|
{
|
|
os_task_h handle;
|
|
|
|
handle = os_create_task(user_task_1, NULL, 9);
|
|
|
|
//create the tasks;
|
|
if(handle != NULL) {
|
|
iot_printf("task 1 init successfully...\n");
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int32_t iot__module_init(void)
|
|
{
|
|
//platform intialization;
|
|
iot__platform_init();
|
|
|
|
//create all the tasks;
|
|
iot__task_init();
|
|
iot_rtc_init();
|
|
iot_printf("starting...\n");
|
|
|
|
return 0;
|
|
}
|
|
|
|
int32_t iot__task_start()
|
|
{
|
|
//start the tasks;
|
|
os_start_kernel();
|
|
|
|
return 0;
|
|
}
|
|
|
|
int32_t iot__module_start(void)
|
|
{
|
|
int32_t res = 0;
|
|
|
|
res = iot__task_start();
|
|
|
|
return res;
|
|
}
|
|
|
|
|
|
int main(void)
|
|
{
|
|
//module init;
|
|
iot__module_init();
|
|
|
|
//module start;
|
|
iot__module_start();
|
|
|
|
return 0;
|
|
}
|
|
|
|
#endif |