//-------------------------------------------------------------------------- #include #include #include #include "bee_simd_custom.h" //#include "uart.h" //#include "hw_reg_api.h" #include "os_types.h" //#include "dbg_io.h" //#include "cpl_types.h" #include "iot_diag.h" #include "iot_io.h" #include "simd_vector_function.h" #include "simd_matrix_function.h" #include "simd_nn_function.h" //#include "clk.h" //#include "chip_reg_base.h" #include "os_mem.h" //#include "cpu.h" #include "simd_config.h" //#define SATURATION 1 //#define AI_USING_PSRAM 1 //#define AI_OS_TASK 1 #ifdef AI_OS_TASK #include "os_task.h" #endif void dbg_uart_init(); void read_case_from_python(uint8_t *config_read, uint32_t *config_get, uint8_t *data_get); void read_nn_case_from_python(uint8_t *config_read, uint32_t *config_get); void read_case_from_python_matrix(uint8_t *config_read, uint32_t *config_get); void read_case_from_python_matrix_multi(uint8_t *config_read, uint32_t *config_get); void verify_8bits(uint32_t out_addr, uint32_t golden_addr, uint32_t length); extern void uart_dma_init(int port, int br); extern void uart_dma_read(uint8_t *bufptr, uint32_t size, void (*callback) (void*, uint8_t), void* dummy); void vector_maximum_element_int8_small(int8_t *v, uint8_t *max_index, int8_t *max, uint8_t len); void iot_dbg_uart_set_port(uint8_t port, uint32_t baud, uint8_t parity, uint8_t data, uint8_t stop); void verify_float(uint32_t out_addr, uint32_t golden_addr, uint32_t length); int32_t test_uart0_getc(); uint64_t cpu_get_mcycle(); void test_uart_init(); int8_t table8[256]; int16_t table16[256]; int32_t table32[256]; int32_t table32[256]; int8_t weight[] = { 0x02, // w(0,0)2.0 0xcc, // w(1,0)-52.0 0x3e, // w(2,0)62.0 0xfe, // w(3,0)-2.0 0x60, // w(4,0)96.0 0x69, // w(5,0)105.0 0x59, // w(6,0)89.0 0x98, // w(7,0)-104.0 0xbe, // w(8,0)-66.0 0x34, // w(9,0)52.0 0x06, // w(10,0)6.0 0x16, // w(11,0)22.0 0x13, // w(12,0)19.0 0x74, // w(13,0)116.0 0x41, // w(14,0)65.0 0x25, // w(15,0)37.0 0xd8, // w(0,1)-40.0 0xeb, // w(1,1)-21.0 0xbc, // w(2,1)-68.0 0x32, // w(3,1)50.0 0x87, // w(4,1)-121.0 0x95, // w(5,1)-107.0 0xa0, // w(6,1)-96.0 0x5a, // w(7,1)90.0 0x0e, // w(8,1)14.0 0xf9, // w(9,1)-7.0 0x60, // w(10,1)96.0 0xeb, // w(11,1)-21.0 0xdd, // w(12,1)-35.0 0x20, // w(13,1)32.0 0x68, // w(14,1)104.0 0x13, // w(15,1)19.0 0x18, // w(0,2)24.0 0x5c, // w(1,2)92.0 0x00, // w(2,2)0.0 0xb1, // w(3,2)-79.0 0xae, // w(4,2)-82.0 0xcb, // w(5,2)-53.0 0x17, // w(6,2)23.0 0x24, // w(7,2)36.0 0x80, // w(8,2)-128.0 0x3a, // w(9,2)58.0 0x32, // w(10,2)50.0 0xec, // w(11,2)-20.0 0x0f, // w(12,2)15.0 0x14, // w(13,2)20.0 0xd9, // w(14,2)-39.0 0xd6, // w(15,2)-42.0 0xdc, // w(0,3)-36.0 0x6f, // w(1,3)111.0 0x3a, // w(2,3)58.0 0x7f, // w(3,3)127.0 0x3a, // w(4,3)58.0 0x7a, // w(5,3)122.0 0x8e, // w(6,3)-114.0 0x95, // w(7,3)-107.0 0x5c, // w(8,3)92.0 0x0a, // w(9,3)10.0 0xa0, // w(10,3)-96.0 0xb5, // w(11,3)-75.0 0xc8, // w(12,3)-56.0 0xf7, // w(13,3)-9.0 0xaf, // w(14,3)-81.0 0x6a, // w(15,3)106.0 0x9a, // w(0,4)-102.0 0xb1, // w(1,4)-79.0 0xd5, // w(2,4)-43.0 0x94, // w(3,4)-108.0 0x68, // w(4,4)104.0 0x6d, // w(5,4)109.0 0xcb, // w(6,4)-53.0 0xb5, // w(7,4)-75.0 0x42, // w(8,4)66.0 0xe3, // w(9,4)-29.0 0xc3, // w(10,4)-61.0 0x1c, // w(11,4)28.0 0xfc, // w(12,4)-4.0 0xd2, // w(13,4)-46.0 0xf0, // w(14,4)-16.0 0xcf, // w(15,4)-49.0 0x13, // w(0,5)19.0 0x05, // w(1,5)5.0 0x37, // w(2,5)55.0 0xee, // w(3,5)-18.0 0xf1, // w(4,5)-15.0 0x26, // w(5,5)38.0 0xd2, // w(6,5)-46.0 0xf7, // w(7,5)-9.0 0x0c, // w(8,5)12.0 0xed, // w(9,5)-19.0 0x2d, // w(10,5)45.0 0x23, // w(11,5)35.0 0x38, // w(12,5)56.0 0xb3, // w(13,5)-77.0 0xec, // w(14,5)-20.0 0x8c, // w(15,5)-116.0 0xc9, // w(0,6)-55.0 0xdf, // w(1,6)-33.0 0xc6, // w(2,6)-58.0 0x24, // w(3,6)36.0 0x99, // w(4,6)-103.0 0xd1, // w(5,6)-47.0 0x23, // w(6,6)35.0 0x2a, // w(7,6)42.0 0x96, // w(8,6)-106.0 0xa9, // w(9,6)-87.0 0x17, // w(10,6)23.0 0x06, // w(11,6)6.0 0xe3, // w(12,6)-29.0 0xf0, // w(13,6)-16.0 0x58, // w(14,6)88.0 0x70, // w(15,6)112.0 0x10, // w(0,7)16.0 0xb0, // w(1,7)-80.0 0x74, // w(2,7)116.0 0x3e, // w(3,7)62.0 0x3d, // w(4,7)61.0 0x99, // w(5,7)-103.0 0xa1, // w(6,7)-95.0 0x0c, // w(7,7)12.0 0x4f, // w(8,7)79.0 0x35, // w(9,7)53.0 0xf7, // w(10,7)-9.0 0x8b, // w(11,7)-117.0 0x28, // w(12,7)40.0 0xc7, // w(13,7)-57.0 0xbe, // w(14,7)-66.0 0x13, // w(15,7)19.0 0x85, // w(0,8)-123.0 0x64, // w(1,8)100.0 0xd8, // w(2,8)-40.0 0xf4, // w(3,8)-12.0 0x18, // w(4,8)24.0 0xab, // w(5,8)-85.0 0x37, // w(6,8)55.0 0x87, // w(7,8)-121.0 0x0f, // w(8,8)15.0 0x6a, // w(9,8)106.0 0x67, // w(10,8)103.0 0x8f, // w(11,8)-113.0 0xb5, // w(12,8)-75.0 0x4f, // w(13,8)79.0 0x7a, // w(14,8)122.0 0xc8, // w(15,8)-56.0 0x4d, // w(0,9)77.0 0x5f, // w(1,9)95.0 0x2e, // w(2,9)46.0 0x17, // w(3,9)23.0 0x4b, // w(4,9)75.0 0x0b, // w(5,9)11.0 0x3d, // w(6,9)61.0 0xbf, // w(7,9)-65.0 0x6f, // w(8,9)111.0 0x2f, // w(9,9)47.0 0xa8, // w(10,9)-88.0 0x71, // w(11,9)113.0 0x68, // w(12,9)104.0 0xb9, // w(13,9)-71.0 0xe9, // w(14,9)-23.0 0xc1, // w(15,9)-63.0 0x2d, // w(0,10)45.0 0xb5, // w(1,10)-75.0 0x6b, // w(2,10)107.0 0x66, // w(3,10)102.0 0x05, // w(4,10)5.0 0x76, // w(5,10)118.0 0x00, // w(6,10)0.0 0x29, // w(7,10)41.0 0xe6, // w(8,10)-26.0 0xc9, // w(9,10)-55.0 0x22, // w(10,10)34.0 0xf6, // w(11,10)-10.0 0x2f, // w(12,10)47.0 0xec, // w(13,10)-20.0 0xb9, // w(14,10)-71.0 0x96, // w(15,10)-106.0 0x33, // w(0,11)51.0 0x4c, // w(1,11)76.0 0xeb, // w(2,11)-21.0 0x48, // w(3,11)72.0 0xea, // w(4,11)-22.0 0x0b, // w(5,11)11.0 0xcb, // w(6,11)-53.0 0x0d, // w(7,11)13.0 0x0e, // w(8,11)14.0 0x9b, // w(9,11)-101.0 0xba, // w(10,11)-70.0 0xe0, // w(11,11)-32.0 0x54, // w(12,11)84.0 0xd2, // w(13,11)-46.0 0x43, // w(14,11)67.0 0x1c, // w(15,11)28.0 0x37, // w(0,12)55.0 0xee, // w(1,12)-18.0 0xca, // w(2,12)-54.0 0xe5, // w(3,12)-27.0 0xf0, // w(4,12)-16.0 0x4f, // w(5,12)79.0 0xb5, // w(6,12)-75.0 0x62, // w(7,12)98.0 0x92, // w(8,12)-110.0 0xaa, // w(9,12)-86.0 0x58, // w(10,12)88.0 0xbc, // w(11,12)-68.0 0xc6, // w(12,12)-58.0 0xf7, // w(13,12)-9.0 0x7b, // w(14,12)123.0 0xae, // w(15,12)-82.0 0xc1, // w(0,13)-63.0 0x4e, // w(1,13)78.0 0xf9, // w(2,13)-7.0 0x0b, // w(3,13)11.0 0xa5, // w(4,13)-91.0 0xf0, // w(5,13)-16.0 0x6d, // w(6,13)109.0 0xa8, // w(7,13)-88.0 0x80, // w(8,13)-128.0 0x97, // w(9,13)-105.0 0x92, // w(10,13)-110.0 0x68, // w(11,13)104.0 0x92, // w(12,13)-110.0 0xed, // w(13,13)-19.0 0xdd, // w(14,13)-35.0 0x47, // w(15,13)71.0 // end of one group of 16 filters 0x1d, // w(16,0)29.0 0x99, // w(17,0)-103.0 0x18, // w(18,0)24.0 0x4e, // w(19,0)78.0 0x63, // w(20,0)99.0 0x83, // w(21,0)-125.0 0xa9, // w(22,0)-87.0 0xfe, // w(23,0)-2.0 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x73, // w(16,1)115.0 0x6a, // w(17,1)106.0 0x49, // w(18,1)73.0 0x77, // w(19,1)119.0 0x7a, // w(20,1)122.0 0x1e, // w(21,1)30.0 0xdc, // w(22,1)-36.0 0x0b, // w(23,1)11.0 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x75, // w(16,2)117.0 0x37, // w(17,2)55.0 0x22, // w(18,2)34.0 0xee, // w(19,2)-18.0 0xe8, // w(20,2)-24.0 0x23, // w(21,2)35.0 0x2b, // w(22,2)43.0 0xc1, // w(23,2)-63.0 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2d, // w(16,3)45.0 0xa0, // w(17,3)-96.0 0x53, // w(18,3)83.0 0x6f, // w(19,3)111.0 0x09, // w(20,3)9.0 0xf0, // w(21,3)-16.0 0x66, // w(22,3)102.0 0xa9, // w(23,3)-87.0 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x87, // w(16,4)-121.0 0x64, // w(17,4)100.0 0x56, // w(18,4)86.0 0x0a, // w(19,4)10.0 0xee, // w(20,4)-18.0 0xdb, // w(21,4)-37.0 0xad, // w(22,4)-83.0 0xd5, // w(23,4)-43.0 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc1, // w(16,5)-63.0 0xc1, // w(17,5)-63.0 0x23, // w(18,5)35.0 0x94, // w(19,5)-108.0 0x4b, // w(20,5)75.0 0x4f, // w(21,5)79.0 0x59, // w(22,5)89.0 0xb8, // w(23,5)-72.0 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x41, // w(16,6)65.0 0xd8, // w(17,6)-40.0 0xbe, // w(18,6)-66.0 0x9c, // w(19,6)-100.0 0xc1, // w(20,6)-63.0 0x98, // w(21,6)-104.0 0x4a, // w(22,6)74.0 0xab, // w(23,6)-85.0 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc2, // w(16,7)-62.0 0x00, // w(17,7)0.0 0xa9, // w(18,7)-87.0 0xc7, // w(19,7)-57.0 0xd8, // w(20,7)-40.0 0x47, // w(21,7)71.0 0x18, // w(22,7)24.0 0x2b, // w(23,7)43.0 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf5, // w(16,8)-11.0 0x2e, // w(17,8)46.0 0xf5, // w(18,8)-11.0 0x21, // w(19,8)33.0 0x75, // w(20,8)117.0 0x55, // w(21,8)85.0 0x19, // w(22,8)25.0 0x81, // w(23,8)-127.0 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe2, // w(16,9)-30.0 0x38, // w(17,9)56.0 0xbb, // w(18,9)-69.0 0x18, // w(19,9)24.0 0xa7, // w(20,9)-89.0 0x29, // w(21,9)41.0 0xa9, // w(22,9)-87.0 0x6d, // w(23,9)109.0 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, // w(16,10)64.0 0x9c, // w(17,10)-100.0 0x5e, // w(18,10)94.0 0xf8, // w(19,10)-8.0 0x2d, // w(20,10)45.0 0x90, // w(21,10)-112.0 0x3a, // w(22,10)58.0 0x8b, // w(23,10)-117.0 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xdf, // w(16,11)-33.0 0xde, // w(17,11)-34.0 0x0a, // w(18,11)10.0 0xc1, // w(19,11)-63.0 0x7b, // w(20,11)123.0 0xe1, // w(21,11)-31.0 0xc3, // w(22,11)-61.0 0x82, // w(23,11)-126.0 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x39, // w(16,12)57.0 0x5a, // w(17,12)90.0 0x2d, // w(18,12)45.0 0x87, // w(19,12)-121.0 0x9e, // w(20,12)-98.0 0x0b, // w(21,12)11.0 0x9a, // w(22,12)-102.0 0xee, // w(23,12)-18.0 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x6a, // w(16,13)106.0 0x26, // w(17,13)38.0 0x34, // w(18,13)52.0 0x5d, // w(19,13)93.0 0xf5, // w(20,13)-11.0 0xdd, // w(21,13)-35.0 0x3f, // w(22,13)63.0 0x61, // w(23,13)97.0 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 // end of one group of 16 filters }; int32_t bias[32]; int8_t bias8[] = { 0x5e, // F(0,0)94 0xe1, // F(1,0)-31 0xe9, // F(2,0)-23 0xcc, // F(3,0)-52 0xd2, // F(4,0)-46 0x89, // F(5,0)-119 0xdc, // F(6,0)-36 0x02, // F(7,0)2 0x08, // F(8,0)8 0x4f, // F(9,0)79 0xb1, // F(10,0)-79 0xde, // F(11,0)-34 0x97, // F(12,0)-105 0xfe, // F(13,0)-2 0x1f, // F(14,0)31 0xf1, // F(15,0)-15 0x0c, // F(16,0)12 0x69, // F(17,0)105 0x9d, // F(18,0)-99 0xbc, // F(19,0)-68 0xe7, // F(20,0)-25 0xd3, // F(21,0)-45 0xae, // F(22,0)-82 0xc7, // F(23,0)-57 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; int8_t output[32]; int8_t input[] = { 0xbc, // F(0,0)-68 0xdb, // F(1,0)-37 0xc2, // F(2,0)-62 0x3a, // F(3,0)58 0x59, // F(4,0)89 0x15, // F(5,0)21 0x47, // F(6,0)71 0xbb, // F(7,0)-69 0x38, // F(8,0)56 0xf1, // F(9,0)-15 0xc3, // F(10,0)-61 0xdf, // F(11,0)-33 0xa3, // F(12,0)-93 0x47, // F(13,0)71 0x00, 0x00 }; void vldx_test_uint8() { for (uint16_t i = 0; i < 256; i++) { table8[i] = i; } uint8_t in[16] = {96, 15, 30, 4, 24, 25, 57, 23, 71, 112, 83, 89, 6, 60, 57, 32}; uint8_t o[16]; vld_uib(0, in, 1) vldx_uib(1, table8, 0) vst_uib(o, 1, 1) asm("fence"); for (uint8_t i = 0; i < 16; i++) { iot_printf("in[%d] = %x, out[%d] = %x\n", i, in[i], i, o[i]); } } void vldx_test_int8() { for (uint16_t i = 0; i < 256; i++) { table8[i] = i; } int8_t in[16] = {-6, -15, 30, 4, 24, -25, 57, -23, 71, -112, 83, -89, -6, 60, 57, 32}; int8_t o[16]; vld_sib(0, in, 1) vldx_sib(1, (table8 + 128), 0) vst_sib(o, 1, 1) asm("fence"); for (uint8_t i = 0; i < 16; i++) { iot_printf("in[%d] = %x, out[%d] = %x\n", i, in[i], i, o[i]); } } void vldx_test_uint16() { for (uint16_t i = 0; i < 256; i++) { table16[i] = i | ((i + 1) << 8); } uint16_t in[16] = {96, 15, 30, 4, 24, 25, 57, 23, 71, 112, 83, 89, 6, 60, 57, 32}; uint16_t o[16]; vld_uih(0, in, 1) vadd(0, 0, 0) vldx_uih(1, table16, 0) vst_uih(o, 1, 1) asm("fence"); for (uint16_t i = 0; i < 16; i++) { iot_printf("in[%d] = %x, out[%d] = %x\n", i, in[i], i, o[i]); } } void vldx_test_int16() { for (uint16_t i = 0; i < 256; i++) { table16[i] = i | ((i + 1) << 8); } int16_t in[16] = {-6, -15, 30, 4, 24, -25, 57, -23, 71, -112, 83, -89, -6, 60, 57, 32}; int16_t o[16]; vld_sih(0, in, 1) vadd(0, 0, 0) vldx_sih(1, (table16 + 128), 0) vst_sih(o, 1, 1) asm("fence"); for (uint16_t i = 0; i < 16; i++) { iot_printf("in[%d] = %x, out[%d] = %x\n", i, in[i], i, o[i]); } } void vldx_test_uint32() { for (uint32_t i = 0; i < 256; i++) { table32[i] = i | ((i + 1) << 8) | ((i + 2) << 16) | ((i + 3) << 24); } uint32_t in[16] = {96, 15, 30, 4, 24, 25, 57, 23, 71, 112, 83, 89, 6, 60, 57, 32}; uint32_t o[16]; vld_uiw(0, in, 1) vadd(0, 0, 0) vadd(0, 0, 0) vldx_uiw(1, table32, 0) vst_uiw(o, 1, 1) asm("fence"); for (uint32_t i = 0; i < 4; i++) { iot_printf("in[%d] = %x, out[%d] = %x\n", i, in[i], i, o[i]); } } void vldx_test_int32() { for (uint32_t i = 0; i < 256; i++) { table32[i] = i | ((i + 1) << 8) | ((i + 2) << 16) | ((i + 3) << 24); } int32_t in[16] = {-6, -15, 30, 4, 24, -25, 57, -23, 71, -112, 83, -89, -6, 60, 57, 32}; int32_t o[16]; vld_siw(0, in, 1) vadd(0, 0, 0) vadd(0, 0, 0) vldx_siw(1, (table32 + 128), 0) vst_siw(o, 1, 1) asm("fence"); for (uint32_t i = 0; i < 4; i++) { iot_printf("in[%d] = %x, out[%d] = %x\n", i, in[i], i, o[i]); } } void vstx_test_uint8() { uint8_t index[16] = {0, 3, 6, 9, 12, 15, 2, 5, 8, 11, 14, 1, 4, 7, 10, 13}; uint8_t in[16] = {0, 3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 33, 36, 39, 42, 45}; uint8_t out[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; vld_uib(0, index, 1) vld_uib(1, in, 1) vstx_uib(out, 0, 1) asm("fence"); for (uint8_t i = 0; i < 16; i++) { iot_printf("out[%d] = %d\n", i, out[i]); } } void vstx_test_int8() { int8_t index[16] = {0-8, 3-8, 6-8, 9-8, 12-8, 15-8, 2-8, 5-8, 8-8, 11-8, 14-8, 1-8, 4-8, 7-8, 10-8, 13-8}; int8_t in[16] = {0-12, 3-12, 6-12, 9-12, 12-12, 15-12, 18-12, 21-12, 24-12, 27-12, 30-12, 33-12, 36-12, 39-12, 42-12, 45}; int8_t out[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; vld_sib(0, index, 1) vld_sib(1, in, 1) vstx_sib((out + 8), 0, 1) asm("fence"); for (uint8_t i = 0; i < 16; i++) { iot_printf("out[%d] = %d\n", i, out[i]); } } void vstx_test_uint16() { uint16_t index[16] = {0, 3, 6, 9, 12, 15, 2, 5, 8, 11, 14, 1, 4, 7, 10, 13}; uint16_t in[16] = {0, 3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 33, 36, 39, 42, 45}; uint16_t out[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; uint16_t *pi = in; uint16_t *pd = index; vld_uih(0, pd, 1) vadd(0, 0, 0) vld_uih(1, pi, 1) vstx_uih(out, 0, 1) pi += 8; pd += 8; vld_uih(0, pd, 1) vadd(0, 0, 0) vld_uih(1, pi, 1) vstx_uih(out, 0, 1) asm("fence"); for (uint8_t i = 0; i < 16; i++) { iot_printf("out[%d] = %d\n", i, out[i]); } } void vstx_test_int16() { int16_t index[16] = {0-8, 3-8, 6-8, 9-8, 12-8, 15-8, 2-8, 5-8, 8-8, 11-8, 14-8, 1-8, 4-8, 7-8, 10-8, 13-8}; int16_t in[16] = {0-12, 3-12, 6-12, 9-12, 12-12, 15-12, 18-12, 21-12, 24-12, 27-12, 30-12, 33-12, 36-12, 39-12, 42-12, 45}; int16_t out[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; int16_t *pi = in; int16_t *pd = index; vld_sih(0, pd, 1) vadd(0, 0, 0) vld_sih(1, pi, 1) vstx_sih((out + 8), 0, 1) pi += 8; pd += 8; vld_sih(0, pd, 1) vadd(0, 0, 0) vld_sih(1, pi, 1) vstx_sih((out + 8), 0, 1) asm("fence"); for (uint8_t i = 0; i < 16; i++) { iot_printf("out[%d] = %d\n", i, out[i]); } } void vstx_test_uint32() { uint32_t index[16] = {0, 3, 6, 9, 12, 15, 2, 5, 8, 11, 14, 1, 4, 7, 10, 13}; uint32_t in[16] = {0, 3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 33, 36, 39, 42, 45}; uint32_t out[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; uint32_t *pi = in; uint32_t *pd = index; vld_uiw(0, pd, 1) vadd(0, 0, 0) vadd(0, 0, 0) vld_uiw(1, pi, 1) vstx_uiw(out, 0, 1) pi += 4; pd += 4; vld_uiw(0, pd, 1) vadd(0, 0, 0) vadd(0, 0, 0) vld_uiw(1, pi, 1) vstx_uiw(out, 0, 1) pi += 4; pd += 4; vld_uiw(0, pd, 1) vadd(0, 0, 0) vadd(0, 0, 0) vld_uiw(1, pi, 1) vstx_uiw(out, 0, 1) pi += 4; pd += 4; vld_uiw(0, pd, 1) vadd(0, 0, 0) vadd(0, 0, 0) vld_uiw(1, pi, 1) vstx_uiw(out, 0, 1) pi += 4; pd += 4; asm("fence"); for (uint8_t i = 0; i < 16; i++) { iot_printf("out[%d] = %d\n", i, out[i]); } } void vstx_test_int32() { int32_t index[16] = {0-8, 3-8, 6-8, 9-8, 12-8, 15-8, 2-8, 5-8, 8-8, 11-8, 14-8, 1-8, 4-8, 7-8, 10-8, 13-8}; int32_t in[16] = {0-12, 3-12, 6-12, 9-12, 12-12, 15-12, 18-12, 21-12, 24-12, 27-12, 30-12, 33-12, 36-12, 39-12, 42-12, 45}; int32_t out[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; int32_t *pi = in; int32_t *pd = index; vld_siw(0, pd, 1) vadd(0, 0, 0) vadd(0, 0, 0) vld_siw(1, pi, 1) vstx_siw((out + 8), 0, 1) pi += 4; pd += 4; vld_siw(0, pd, 1) vadd(0, 0, 0) vadd(0, 0, 0) vld_siw(1, pi, 1) vstx_siw((out + 8), 0, 1) pi += 4; pd += 4; vld_siw(0, pd, 1) vadd(0, 0, 0) vadd(0, 0, 0) vld_siw(1, pi, 1) vstx_siw((out + 8), 0, 1) pi += 4; pd += 4; vld_siw(0, pd, 1) vadd(0, 0, 0) vadd(0, 0, 0) vld_siw(1, pi, 1) vstx_siw((out + 8), 0, 1) pi += 4; pd += 4; asm("fence"); for (uint8_t i = 0; i < 16; i++) { iot_printf("out[%d] = %d\n", i, out[i]); } } uint8_t context_switch_test(){ uint8_t error = 0; int32_t v_od_00[8] = { 31891, -20565, 20631, 12774, -24314, -9825, 18402, -22729}; int32_t v_od_01[8] = { 1501, -25964, -10420, 10591, 18387, -72, -27290, -17956}; int32_t v_od_02[8] = { 16671, -7064, 23947, -1473, 13293, -12916, 191, -14974}; int32_t v_od_03[8] = { -30005, -8091, -19722, -23435, -69, 4113, 3432, 27961}; int32_t v_od_04[8] = { -618, 660, -28635, 24275, 25812, -16048, -29275, -11623}; int32_t v_od_05[8] = { -4293, -7881, -3072, -12244, 18456, 4407, 22162, 7440}; int32_t v_od_06[8] = { 23991, 835, 10023, -24922, 19792, -8055, 505, 13278}; int32_t v_od_07[8] = { -31128, 718, -25631, -17336, -18325, 9021, -4872, 29411}; int32_t v_od_08[8] = { 14059, -14991, -7484, -16624, -1074, 20256, 17419, 4956}; int32_t v_od_09[8] = { 21760, -19031, 15016, 32652, -32195, 25705, 1899, 7788}; int32_t v_od_10[8] = { 31891, -20165, 20131, 12774, -24314, -9821, 18102, -22719}; int32_t v_od_11[8] = { 1501, -25164, -10120, 10591, 18317, -71, -27190, -17916}; int32_t v_od_12[8] = { 16671, -7164, 23147, -1473, 13213, -12911, 191, -14914}; int32_t v_od_13[8] = { -30005, -8191, -19122, -23435, -19, 4111, 3132, 27911}; int32_t v_od_14[8] = { -618, 160, -28135, 24275, 25812, -16041, -29175, -11613}; int32_t v_od_15[8] = { -4293, -7181, -3172, -12244, 18416, 4401, 22162, 7410}; int32_t v_od_16[8] = { 23991, 135, 10123, -24922, 19712, -8051, 105, 13218}; int32_t v_od_17[8] = { -31128, 118, -25131, -17336, -18315, 9021, -4172, 29411}; int32_t v_od_18[8] = { 14059, -14191, -7184, -16624, -1014, 20251, 17119, 4916}; int32_t v_od_19[8] = { 21760, -19131, 15116, 32652, -32115, 25701, 1199, 7718}; int32_t v_od_20[8] = { 31881, -20865, 28131, 18774, -24384, -9881, 18802, -28719}; int32_t v_od_21[8] = { 1581, -25864, -18120, 18591, 18387, -81, -27890, -18916}; int32_t v_od_22[8] = { 16681, -7864, 28147, -8473, 13283, -12981, 891, -18914}; int32_t v_od_23[8] = { -30085, -8891, -18122, -28435, -89, 4181, 3832, 28911}; int32_t v_od_24[8] = { -688, 860, -28135, 28275, 25882, -16081, -29875, -18613}; int32_t v_od_25[8] = { -4283, -7881, -8172, -18244, 18486, 4481, 22862, 8410}; int32_t v_od_26[8] = { 23981, 835, 18123, -28922, 19782, -8081, 805, 18218}; int32_t v_od_27[8] = { -31188, 818, -28131, -18336, -18385, 9081, -4872, 28411}; int32_t v_od_28[8] = { 14089, -14891, -8184, -18624, -1084, 20281, 17819, 8916}; int32_t v_od_29[8] = { 21780, -19831, 18116, 38652, -32185, 25781, 1899, 8718}; int32_t v_od_30[8] = { 31891, -29865, 29131, 18974, -29384, -9889, 18902, -29919}; int32_t v_od_31[8] = { 1591, -29864, -19120, 18991, 19387, -89, -27990, -19916}; int32_t v_dd_00[8]; int32_t v_dd_01[8]; int32_t v_dd_02[8]; int32_t v_dd_03[8]; int32_t v_dd_04[8]; int32_t v_dd_05[8]; int32_t v_dd_06[8]; int32_t v_dd_07[8]; int32_t v_dd_08[8]; int32_t v_dd_09[8]; int32_t v_dd_10[8]; int32_t v_dd_11[8]; int32_t v_dd_12[8]; int32_t v_dd_13[8]; int32_t v_dd_14[8]; int32_t v_dd_15[8]; int32_t v_dd_16[8]; int32_t v_dd_17[8]; int32_t v_dd_18[8]; int32_t v_dd_19[8]; int32_t v_dd_20[8]; int32_t v_dd_21[8]; int32_t v_dd_22[8]; int32_t v_dd_23[8]; int32_t v_dd_24[8]; int32_t v_dd_25[8]; int32_t v_dd_26[8]; int32_t v_dd_27[8]; int32_t v_dd_28[8]; int32_t v_dd_29[8]; int32_t v_dd_30[8]; int32_t v_dd_31[8]; int32_t v_dt[5]; vld_fpw( 0, v_od_00, 1) vld_sib( 1, v_od_01, 1) vld_uib( 2, v_od_02, 1) vld_sih( 3, v_od_03, 1) vld_uih( 4, v_od_04, 1) vld_siw( 5, v_od_05, 1) vld_uiw( 6, v_od_06, 1) vld_fpw( 7, v_od_07, 1) vld_sib( 8, v_od_08, 1) vld_uib( 9, v_od_09, 1) vld_sih(10, v_od_10, 1) vld_uih(11, v_od_11, 1) vld_siw(12, v_od_12, 1) vld_uiw(13, v_od_13, 1) vld_fpw(14, v_od_14, 1) vld_sib(15, v_od_15, 1) // vld_uib(16, v_od_16, 1) // vld_sih(17, v_od_17, 1) // vld_uih(18, v_od_18, 1) // vld_siw(19, v_od_19, 1) // vld_uiw(20, v_od_20, 1) // vld_fpw(21, v_od_21, 1) // vld_sib(22, v_od_22, 1) // vld_uib(23, v_od_23, 1) // vld_sih(24, v_od_24, 1) // vld_uih(25, v_od_25, 1) // vld_siw(26, v_od_26, 1) // vld_uiw(27, v_od_27, 1) // vld_fpw(28, v_od_28, 1) // vld_sib(29, v_od_29, 1) // vld_uib(30, v_od_30, 1) // vld_sih(31, v_od_31, 1) vst_fpw(v_dd_00, 1, 0) vst_sib(v_dd_01, 1, 1) vst_uib(v_dd_02, 1, 2) vst_sih(v_dd_03, 1, 3) vst_uih(v_dd_04, 1, 4) vst_siw(v_dd_05, 1, 5) vst_uiw(v_dd_06, 1, 6) vst_fpw(v_dd_07, 1, 7) vst_sib(v_dd_08, 1, 8) vst_uib(v_dd_09, 1, 9) vst_sih(v_dd_10, 1, 10) vst_uih(v_dd_11, 1, 11) vst_siw(v_dd_12, 1, 12) vst_uiw(v_dd_13, 1, 13) vst_fpw(v_dd_14, 1, 14) vst_sib(v_dd_15, 1, 15) // vst_uib(v_dd_16, 1, 16) // vst_sih(v_dd_17, 1, 17) // vst_uih(v_dd_18, 1, 18) // vst_siw(v_dd_19, 1, 19) // vst_uiw(v_dd_20, 1, 20) // vst_fpw(v_dd_21, 1, 21) // vst_sib(v_dd_22, 1, 22) // vst_uib(v_dd_23, 1, 23) // vst_sih(v_dd_24, 1, 24) // vst_uih(v_dd_25, 1, 25) // vst_siw(v_dd_26, 1, 26) // vst_uiw(v_dd_27, 1, 27) // vst_fpw(v_dd_28, 1, 28) // vst_sib(v_dd_29, 1, 29) // vst_uib(v_dd_30, 1, 30) // vst_sih(v_dd_31, 1, 31) vcsrr(v_dt[0], 0) vcsrr(v_dt[1], 4) vcsrr(v_dt[2], 5) for (uint8_t i = 0; i < 5; i++) { iot_printf("v_dt[%d]=%d\n", i, v_dt[i]); } // vcsrr(v_dt[3], 6) // vcsrr(v_dt[4], 7) for(uint8_t i = 0; i < 4; i++) { error += (v_od_00[i] != v_dd_00[i]); error += (v_od_01[i] != v_dd_01[i]); error += (v_od_02[i] != v_dd_02[i]); error += (v_od_03[i] != v_dd_03[i]); error += (v_od_04[i] != v_dd_04[i]); error += (v_od_05[i] != v_dd_05[i]); error += (v_od_06[i] != v_dd_06[i]); error += (v_od_07[i] != v_dd_07[i]); error += (v_od_08[i] != v_dd_08[i]); error += (v_od_09[i] != v_dd_09[i]); error += (v_od_10[i] != v_dd_10[i]); error += (v_od_11[i] != v_dd_11[i]); error += (v_od_12[i] != v_dd_12[i]); error += (v_od_13[i] != v_dd_13[i]); error += (v_od_14[i] != v_dd_14[i]); error += (v_od_15[i] != v_dd_15[i]); error += (v_od_16[i] != v_dd_16[i]); error += (v_od_17[i] != v_dd_17[i]); error += (v_od_18[i] != v_dd_18[i]); error += (v_od_19[i] != v_dd_19[i]); error += (v_od_20[i] != v_dd_20[i]); error += (v_od_21[i] != v_dd_21[i]); error += (v_od_22[i] != v_dd_22[i]); error += (v_od_23[i] != v_dd_23[i]); error += (v_od_24[i] != v_dd_24[i]); error += (v_od_25[i] != v_dd_25[i]); error += (v_od_26[i] != v_dd_26[i]); error += (v_od_27[i] != v_dd_27[i]); error += (v_od_28[i] != v_dd_28[i]); error += (v_od_29[i] != v_dd_29[i]); error += (v_od_30[i] != v_dd_30[i]); error += (v_od_31[i] != v_dd_31[i]); } return error; } uint8_t fpw_test(){ int i; uint8_t error = 0; int len = 256; float a = 34.75; float b = -44.1876; a = a; b = b; float v_a[256] = {777.07, 4935.03, -2095.22, 12332.16, 5333.75, -10814.01, 6083.75, 9508.28, -19271.97, 16169.42, -17471.97, -7809.87, -8338.85, 6279.29, -7754.14, 3226.75, -11237.26, 16013.69, 8641.08, -11088.85, -19109.87, 7806.68, 4344.90, -7388.85, -4545.22, -245.85, 10789.49, 13392.35, -19191.71, -9114.01, 15573.24, -3244.26, 10161.46, 19411.14, -1048.08, 13182.80, -555.73, 16038.21, 798.40, 6779.61, -14253.50, 1392.99, 12161.14, -13104.14, 13215.60, -10624.84, 12779.29, -7574.52, -15975.15, 15696.17, -11969.10, -8404.14, -2035.35, -9578.02, 17222.29, -19736.62, 12621.97, 16485.03, 4657.96, 3929.61, -135.35, 16592.35, -11447.13, -3710.19, 1707.32, 14610.82, -14592.67, 19698.72, 3263.05, 9526.11, 19942.03, -3376.75, -13132.80, 12350.31, 1144.58, -10321.97, 7638.53, 3089.49, -15864.01, -18837.26, 3166.87, 14204.45, -12628.34, -12936.62, -13019.74, -8274.20, 14179.93, -20016.24, -1608.28, 8886.30, -16579.29, -4057.32, 12142.03, 7670.06, -16300.0, 9552.86, -1552.54, -12142.35, 17177.70, 17026.75, 6104.77, 17009.55, -3898.40, 3015.28, 20788.21, 11214.01, -9442.99, -7353.50, -357.00, -13278.02, -7142.35, -12297.77, -16668.15, -19056.36, 11109.23, -4065.92, 2818.15, -10530.57, 3775.79, -12004.14, 18415.28, -8501.91, -9812.10, -7161.46, -5634.71, -14551.91, 15949.36, 13456.68}; float v_b[256] = {8438.85, 280.25, -8459.55, -5259.23, 15479.29, 15932.48, -16.87, -10713.05, -6168.15, -10871.01, 12173.56, -15502.54, -9540.76, -12503.82, 13469.10, -4861.14, -15288.53, -13114.01, -9909.87, -13050.0, 4168.47, 18297.45, -10086.94, -8887.89, 2401.59, -16019.10, 10295.22, 9995.22, 16160.82, 9697.77, 426.11, 20664.96, 6679.93, -2966.87, -7217.19, 2012.73, -1391.71, -5016.24, -16111.46, 12742.35, 7115.60, 4667.51, -6773.24, 12151.27, -6780.89, -12656.36, -12919.42, 1100.31, 10725.79, -1694.26, -1034.07, -6064.33, 6150.0, -7362.10, 20117.19, 12178.66, 14416.24, -15065.28, 8656.36, -13225.47, 16494.58, 1716.24, -11512.10, -12388.53, 1336.30, 15338.85, -353.18, -6915.28, -18771.33, -13300.95, -19781.84, -8577.38, -2192.35, 13153.50, -1938.21, 6155.09, -20264.96, -5348.72, -7686.30, -10642.35, -4306.05, 11095.22, 19364.96, 17412.73, 4867.83, -2163.05, -16232.16, 1861.46, 10643.63, -434.39, -17728.34, -7474.84, 17064.33, 7873.88, 19580.89, -17547.13, 13165.28, 3083.12, 18822.61, -17371.01, -1943.63, 6529.61, -17193.94, -10397.45, -1166.56, 2749.04, -16870.38, 1806.05, -3050.95, -8488.21, -14813.69, -7595.54, 19336.30, 19500.95, 4617.19, -9113.37, -17397.45, 9054.77, 13341.71, 20647.77, 5813.69, 13237.57, 12785.66, 18758.59, 18591.08, -2187.26, 1249.04, 13889.49}; float v_d0[256]; float v_d1[256]; float *p_a = v_a; float *p_b = v_b; float *p_d0 = v_d0; vld_fpw(0, p_a, 1) vld_fpw(1, p_b, 1) for(i=0; i> 16; v_d1[i] = v_a[i] * v_b[i]; } for(i=0; i> 16; v_d1[i] = v_a[i] * v_b[i]; } for(i=0; i> 8; } for(i=0; i> 8; } for(i=0; i> 4; } for(i=0; i> 4; } for(i=0; i 255 ? 255 : gg; #else g[i] = a[i] + b[0]; #endif if (g[i] != o[i]) { iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[0], g[i], o[i]); } } iot_printf("add of uint8 test end\n"); vector_add_uint8(a, b, o, 100); for (uint8_t i = 0; i < 100; i++) { #ifdef SATURATION uint16_t gg = a[i] + b[i]; g[i] = gg > 255 ? 255 : gg; #else g[i] = a[i] + b[i]; #endif if (g[i] != o[i]) { iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[i], g[i], o[i]); } } iot_printf("add const of uint8 test end\n"); } void vector_sub_test_uint8() { uint8_t a[100]; uint8_t b[100]; uint8_t o[100]; uint8_t g[100]; for (uint32_t i = 0; i < 100; i++) { a[i] = 2 * i; b[i] = 3 * i + 23; } vector_sub_const_uint8(a, b[0], o, 100); for (uint8_t i = 0; i < 100; i++) { #ifdef SATURATION if (a[i] < b[0]) { g[i] = 0; } else { g[i] = a[i] - b[0]; } #else g[i] = a[i] - b[0]; #endif if (g[i] != o[i]) { iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[0], g[i], o[i]); } } iot_printf("sub const of uint8 test end\n"); vector_const_sub_uint8(a, b[0], o, 100); for (uint8_t i = 0; i < 100; i++) { #ifdef SATURATION if (a[i] > b[0]) { g[i] = 0; } else { g[i] = b[0] - a[i]; } #else g[i] = b[0] - a[i]; #endif if (g[i] != o[i]) { iot_printf("incorrect at %d, b = %d, a = %d, golden %d, output %d\n", i, b[0], a[i], g[i], o[i]); } } iot_printf("sub const of uint8 test end\n"); vector_sub_uint8(a, b, o, 100); for (uint8_t i = 0; i < 100; i++) { #ifdef SATURATION if (a[i] < b[i]) { g[i] = 0; } else { g[i] = a[i] - b[i]; } #else g[i] = a[i] - b[i]; #endif if (g[i] != o[i]) { iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[i], g[i], o[i]); } } iot_printf("sub of uint8 test end\n"); } void vector_mul_test_uint8() { uint8_t length = 100; uint8_t *a = (uint8_t *)os_mem_malloc(1, length); uint8_t *b = (uint8_t *)os_mem_malloc(1, length); uint8_t *l = (uint8_t *)os_mem_malloc(1, length); uint8_t *h = (uint8_t *)os_mem_malloc(1, length); uint16_t *w = (uint16_t *)os_mem_malloc(1, length * 2); for (uint8_t i = 0; i < length; i++) { a[i] = -2 * (i - 25); b[i] = 3 * (i - 33); //iot_printf("a[%d] = %d, b[%d] = %d\n", i, a[i], i, b[i]); } vector_multiply_uint8(a, b, w, length); for (uint8_t i = 0; i < length; i++) { uint16_t g = a[i] * b[i]; if (g != w[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g, w[i]); } } iot_printf("vector_multiply_uint8 finished\n"); vector_multiply_const_uint8(a, b[1], w, length); for (uint8_t i = 0; i < length; i++) { uint16_t g = a[i] * b[1]; if (g != w[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g, w[i]); } } iot_printf("vector_multiply_const_uint8 finished\n"); vector_multiply_uint8_high_8bit(a, b, h, length); for (uint8_t i = 0; i < length; i++) { uint16_t g = (a[i] * b[i]) >> 8; if (g != h[i]) { iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[i], g, l[i]); } } iot_printf("vector_multiply_uint8_high finished\n"); vector_multiply_const_uint8_high_8bit(a, b[1], h, length); for (uint8_t i = 0; i < length; i++) { uint16_t g = (a[i] * b[1]) >> 8; if (g != h[i]) { iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[1], g, l[i]); } } iot_printf("vector_multiply_const_uint8_high finished\n"); vector_multiply_uint8_low_8bit(a, b, l, length); for (uint8_t i = 0; i < length; i++) { #ifdef SATURATION uint16_t g = (a[i] * b[i]); if (g > 255) { g = 255; } #else uint16_t g = (a[i] * b[i]) & 0xff; #endif if (g != l[i]) { iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[i], g, l[i]); } } iot_printf("vector_multiply_uint8_low finished\n"); vector_multiply_const_uint8_low_8bit(a, b[1], l, length); for (uint8_t i = 0; i < length; i++) { #ifdef SATURATION uint16_t g = (a[i] * b[1]); if (g > 255) { g = 255; } #else uint16_t g = (a[i] * b[1]) & 0xff; #endif if (g != l[i]) { iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[1], g, l[i]); } } iot_printf("vector_multiply_const_uint8_low finished\n"); os_mem_free(a); os_mem_free(b); os_mem_free(h); os_mem_free(l); os_mem_free(w); } void vector_madd_msub_test_uint8() { uint8_t length = 100; uint8_t *a = os_mem_malloc(1, length); uint8_t *b = os_mem_malloc(1, length); uint8_t *j = os_mem_malloc(1, length); uint8_t *o = os_mem_malloc(1, length); uint32_t *J = os_mem_malloc(1, length * 4); uint32_t *O = os_mem_malloc(1, length * 4); for (uint8_t i = 0; i < length; i++) { a[i] = 2 * (i + 25); b[i] = 3 * (i + 33); j[i] = i + 50; J[i] = i * 100 + 50000; //iot_printf("a[%d] = %d, b[%d] = %d, j[%d] = %d, J[%d] = %d\n", i, a[i], i, b[i], i, j[i], i, J[i]); } vector_mul_add_uint8(a, b, j, o, length); for (uint8_t i = 0; i < length; i++) { #ifdef SATURATION uint16_t l = a[i] * b[i]; l = l > 255 ? 255 : l; uint16_t g = j[i] + l; g = g > 255 ? 255 : g; #else uint8_t g = j[i] + a[i] * b[i]; #endif if (g != o[i]) { iot_printf("incorrect at %d, j = %d, a = %d, b = %d, golden %d, output %d\n", i, j[i], a[i], b[i], g, o[i]); } } iot_printf("vector_mul_add_uint8 finished\n"); vector_mul_sub_uint8(a, b, j, o, length); for (uint8_t i = 0; i < length; i++) { #ifdef SATURATION uint16_t l = a[i] * b[i]; l = l > 255 ? 255 : l; uint8_t g = j[i] > l ? j[i] - l : 0; #else uint8_t g = j[i] - a[i] * b[i]; #endif if (g != o[i]) { iot_printf("incorrect at %d, j = %d, a = %d, b = %d, golden %d, output %d\n", i, j[i], a[i], b[i], g, o[i]); } } iot_printf("vector_mul_sub_uint8 finished\n"); vector_mul_add_uint8_to_uint32(a, b, J, O, length); for (uint8_t i = 0; i < length; i++) { uint32_t g = J[i] + (uint32_t)a[i] * (uint32_t)b[i]; if (g != O[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g, O[i]); } } iot_printf("vector_mul_add_uint8_to_uint32 finished\n"); vector_mul_sub_uint8_to_uint32(a, b, J, O, length); for (uint8_t i = 0; i < length; i++) { uint32_t g = J[i] - (uint32_t)a[i] * (uint32_t)b[i]; if (g != O[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g, O[i]); } } iot_printf("vector_mul_sub_uint8_to_uint32 finished\n"); } void vector_min_test_uint8() { uint8_t a[100]; uint8_t b[100]; uint8_t o[100]; uint8_t g[100]; for (uint32_t i = 0; i < 100; i++) { a[i] = 2 * i; b[i] = 3 * i + 23; } vector_min_uint8(a, b, o, 100); for (uint8_t i = 0; i < 100; i++) { g[i] = a[i] < b[i] ? a[i] : b[i]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_min_const_uint8(a, b[0], o, 100); for (uint8_t i = 0; i < 100; i++) { g[i] = a[i] < b[0] ? a[i] : b[0]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } iot_printf("min of uint8 test end\n"); } void vector_max_test_uint8() { uint8_t a[100]; uint8_t b[100]; uint8_t o[100]; uint8_t g[100]; for (uint32_t i = 0; i < 100; i++) { a[i] = 2 * i; b[i] = 3 * i + 23; } vector_max_uint8(a, b, o, 100); for (uint8_t i = 0; i < 100; i++) { g[i] = a[i] > b[i] ? a[i] : b[i]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_max_const_uint8(a, b[0], o, 100); for (uint8_t i = 0; i < 100; i++) { g[i] = a[i] > b[0] ? a[i] : b[0]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } iot_printf("max of uint8 test end\n"); } void vector_equal_test_uint8() { uint8_t a[100]; uint8_t b[100]; uint8_t o[100]; uint8_t g[100]; for (uint32_t i = 0; i < 100; i++) { a[i] = 2 * i; b[i] = 3 * i - 23; } vector_equal_uint8(a, b, o, 100); for (uint8_t i = 0; i < 100; i++) { g[i] = a[i] == b[i] ? 1 : 0; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_equal_const_uint8(a, b[11], o, 100); for (uint8_t i = 0; i < 100; i++) { g[i] = a[i] == b[11] ? 1 : 0; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } iot_printf("equal of uint8 test end\n"); } void vector_not_equal_test_uint8() { uint8_t a[100]; uint8_t b[100]; uint8_t o[100]; uint8_t g[100]; for (uint32_t i = 0; i < 100; i++) { a[i] = 2 * i; b[i] = 3 * i - 23; } vector_not_equal_uint8(a, b, o, 100); for (uint8_t i = 0; i < 100; i++) { g[i] = a[i] != b[i] ? 1 : 0; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_not_equal_const_uint8(a, b[11], o, 100); for (uint8_t i = 0; i < 100; i++) { g[i] = a[i] != b[11] ? 1 : 0; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } iot_printf("not equal of uint8 test end\n"); } void vector_less_than_test_uint8() { uint8_t a[100]; uint8_t b[100]; uint8_t o[100]; uint8_t g[100]; for (uint32_t i = 0; i < 100; i++) { a[i] = 2 * i; b[i] = 3 * i - 23; } vector_less_than_uint8(a, b, o, 100); for (uint8_t i = 0; i < 100; i++) { g[i] = a[i] < b[i] ? 1 : 0; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_less_than_const_uint8(a, b[11], o, 100); for (uint8_t i = 0; i < 100; i++) { g[i] = a[i] < b[11] ? 1 : 0; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_const_less_than_uint8(a, b[11], o, 100); for (uint8_t i = 0; i < 100; i++) { g[i] = a[i] > b[11] ? 1 : 0; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } iot_printf("less than of uint8 test end\n"); } void vector_greater_or_equal_test_uint8() { uint8_t a[100]; uint8_t b[100]; uint8_t o[100]; uint8_t g[100]; for (uint32_t i = 0; i < 100; i++) { a[i] = 2 * i; b[i] = 3 * i - 23; } vector_greater_or_equal_uint8(a, b, o, 100); for (uint8_t i = 0; i < 100; i++) { g[i] = a[i] >= b[i] ? 1 : 0; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_greater_or_equal_const_uint8(a, b[11], o, 100); for (uint8_t i = 0; i < 100; i++) { g[i] = a[i] >= b[11] ? 1 : 0; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_const_greater_or_equal_uint8(a, b[11], o, 100); for (uint8_t i = 0; i < 100; i++) { g[i] = a[i] <= b[11] ? 1 : 0; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } iot_printf("greater or equal of uint8 test end\n"); } void vector_logic_and_test_uint8() { uint8_t a[100]; uint8_t b[100]; uint8_t o[100]; uint8_t g[100]; for (uint32_t i = 0; i < 100; i++) { a[i] = 2 * i; b[i] = 3 * i - 23; } vector_logic_and_uint8(a, b, o, 100); for (uint8_t i = 0; i < 100; i++) { g[i] = a[i] & b[i]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_logic_and_const_uint8(a, b[11], o, 100); for (uint8_t i = 0; i < 100; i++) { g[i] = a[i] & b[11]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } iot_printf("logic xor of uint8 test end\n"); } void vector_logic_or_test_uint8() { uint8_t a[100]; uint8_t b[100]; uint8_t o[100]; uint8_t g[100]; for (uint32_t i = 0; i < 100; i++) { a[i] = 2 * i; b[i] = 3 * i - 23; } vector_logic_or_uint8(a, b, o, 100); for (uint8_t i = 0; i < 100; i++) { g[i] = a[i] | b[i]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_logic_or_const_uint8(a, b[11], o, 100); for (uint8_t i = 0; i < 100; i++) { g[i] = a[i] | b[11]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } iot_printf("logic or of uint8 test end\n"); } void vector_logic_xor_test_uint8() { uint8_t a[100]; uint8_t b[100]; uint8_t o[100]; uint8_t g[100]; for (uint32_t i = 0; i < 100; i++) { a[i] = 2 * i; b[i] = 3 * i - 23; } vector_logic_xor_uint8(a, b, o, 100); for (uint8_t i = 0; i < 100; i++) { g[i] = a[i] ^ b[i]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_logic_xor_const_uint8(a, b[11], o, 100); for (uint8_t i = 0; i < 100; i++) { g[i] = a[i] ^ b[11]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } iot_printf("logic xor of uint8 test end\n"); } void vector_logic_xnor_test_uint8() { uint8_t a[100]; uint8_t b[100]; uint8_t o[100]; uint8_t g[100]; for (uint32_t i = 0; i < 100; i++) { a[i] = 2 * i; b[i] = 3 * i - 23; } vector_logic_xnor_uint8(a, b, o, 100); for (uint8_t i = 0; i < 100; i++) { g[i] = ~(a[i] ^ b[i]); if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_logic_xnor_const_uint8(a, b[11], o, 100); for (uint8_t i = 0; i < 100; i++) { g[i] = ~(a[i] ^ b[11]); if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } iot_printf("logic xor of uint8 test end\n"); } void vector_left_shift_test_uint8() { uint8_t a[100]; uint8_t b[100]; uint8_t o[100]; uint8_t g[100]; for (uint32_t i = 0; i < 100; i++) { a[i] = i + 1; b[i] = i % 7 + 1; } vector_left_shift_uint8(a, b, o, 100); for (uint8_t i = 0; i < 100; i++) { g[i] = a[i] << b[i]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_left_shift_const_uint8(a, b[2], o, 100); for (uint8_t i = 0; i < 100; i++) { g[i] = a[i] << b[2]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_const_left_shift_uint8(b, a[6], o, 100); for (uint8_t i = 0; i < 100; i++) { g[i] = a[6] << b[i]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } iot_printf("left shift of uint8 test end\n"); } void vector_right_shift_test_uint8() { uint8_t a[100]; uint8_t b[100]; uint8_t o[100]; uint8_t g[100]; for (uint32_t i = 0; i < 100; i++) { a[i] = i - 50; b[i] = i % 7 + 1; } vector_right_shift_uint8(a, b, o, 100); for (uint8_t i = 0; i < 100; i++) { g[i] = a[i] >> b[i]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_right_shift_const_uint8(a, b[2], o, 100); for (uint8_t i = 0; i < 100; i++) { g[i] = a[i] >> b[2]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_const_right_shift_uint8(b, a[6], o, 100); for (uint8_t i = 0; i < 100; i++) { g[i] = a[6] >> b[i]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } iot_printf("logic right shift of uint8 test end\n"); } void vector_add_test_int8() { int8_t *a = (int8_t *)0x10200000; int8_t *b = (int8_t *)0x10101000; int8_t *o = (int8_t *)0x10102000; int8_t *g = (int8_t *)0x10103000; for (uint32_t i = 0; i < 128; i++) { a[i] = 2 * i; b[i] = 3 * i + 23; } vector_add_const_int8(a, b[0], o, 128); for (uint8_t i = 0; i < 128; i++) { #ifdef SATURATION int16_t gg = a[i] + b[0]; g[i] = gg > 127 ? 127 : (gg < -128 ? -128 : gg); #else g[i] = a[i] + b[0]; #endif if (g[i] != o[i]) { iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[0], g[i], o[i]); } } uint64_t begin = cpu_get_mcycle(); for (uint32_t repeat = 0; repeat < 1024; repeat++) { vector_add_int8(a, b, o, 128); } uint64_t end = cpu_get_mcycle(); iot_printf("time = %d\n", (uint32_t)(end - begin)); begin = cpu_get_mcycle(); for (uint32_t repeat = 0; repeat < 1024; repeat++) { for (uint8_t i = 0; i < 128; i++) { #ifdef SATURATION int16_t gg = a[i] + b[i]; g[i] = gg > 127 ? 127 : (gg < -128 ? -128 : gg); #else g[i] = a[i] + b[i]; #endif } } end = cpu_get_mcycle(); iot_printf("time = %d\n", (uint32_t)(end - begin)); for (uint8_t i = 0; i < 128; i++) { if (g[i] != o[i]) { iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[i], g[i], o[i]); } } iot_printf("add const of int8 test end\n"); } void vector_sub_test_int8() { int8_t a[128]; int8_t b[128]; int8_t o[128]; int8_t g[128]; for (uint32_t i = 0; i < 128; i++) { a[i] = 2 * i; b[i] = 3 * i + 23; } vector_sub_const_int8(a, b[0], o, 128); for (uint8_t i = 0; i < 128; i++) { #ifdef SATURATION int16_t gg = a[i] - b[0]; g[i] = gg > 127 ? 127 : (gg < -128 ? -128 : gg); #else g[i] = a[i] - b[0]; #endif if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_const_sub_int8(a, b[0], o, 128); for (uint8_t i = 0; i < 128; i++) { #ifdef SATURATION int16_t gg = b[0] - a[i]; g[i] = gg > 127 ? 127 : (gg < -128 ? -128 : gg); #else g[i] = b[0] - a[i]; #endif if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } uint64_t begin = cpu_get_mcycle(); for (uint32_t repeat = 0; repeat < 1024; repeat++) { vector_sub_int8(a, b, o, 128); } uint64_t end = cpu_get_mcycle(); iot_printf("time = %d\n", (uint32_t)(end - begin)); begin = cpu_get_mcycle(); for (uint32_t repeat = 0; repeat < 1024; repeat++) { for (uint8_t i = 0; i < 128; i++) { #ifdef SATURATION int16_t gg = a[i] - b[i]; g[i] = gg > 127 ? 127 : (gg < -128 ? -128 : gg); #else g[i] = a[i] - b[i]; #endif } } end = cpu_get_mcycle(); iot_printf("time = %d\n", (uint32_t)(end - begin)); for (uint8_t i = 0; i < 128; i++) { if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } iot_printf("sub const of int8 test end\n"); } void vector_mul_test_int8() { uint8_t length = 128; int8_t *a = (int8_t *)os_mem_malloc(1, length); int8_t *b = (int8_t *)os_mem_malloc(1, length); int8_t *l = (int8_t *)os_mem_malloc(1, length); int8_t *h = (int8_t *)os_mem_malloc(1, length); int16_t *w = (int16_t *)os_mem_malloc(1, length * 2); for (uint8_t i = 0; i < length; i++) { a[i] = -2 * (i - 25); b[i] = 3 * (i - 33); //iot_printf("a[%d] = %d, b[%d] = %d\n", i, a[i], i, b[i]); } uint64_t begin = cpu_get_mcycle(); for (uint32_t repeat = 0; repeat < 1024; repeat++) { vector_multiply_int8(a, b, w, length); } uint64_t end = cpu_get_mcycle(); iot_printf("time = %d\n", (uint32_t)(end - begin)); for (uint8_t i = 0; i < length; i++) { int16_t g = a[i] * b[i]; if (g != w[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g, w[i]); } } begin = cpu_get_mcycle(); for (uint32_t repeat = 0; repeat < 1024; repeat++) { for (uint8_t i = 0; i < length; i++) { w[i] = a[i] * b[i]; } } end = cpu_get_mcycle(); iot_printf("time = %d\n", (uint32_t)(end - begin)); iot_printf("vector_multiply_int8 finished\n"); vector_multiply_const_int8(a, b[1], w, length); for (uint8_t i = 0; i < length; i++) { int16_t g = a[i] * b[1]; if (g != w[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g, w[i]); } } iot_printf("vector_multiply_const_int8 finished\n"); vector_multiply_int8_high_8bit(a, b, h, length); for (uint8_t i = 0; i < length; i++) { int16_t g = (a[i] * b[i]) >> 8; if (g != h[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g, h[i]); } } iot_printf("vector_multiply_int8_high finished\n"); vector_multiply_const_int8_high_8bit(a, b[1], h, length); for (uint8_t i = 0; i < length; i++) { int16_t g = (a[i] * b[1]) >> 8; if (g != h[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g, h[i]); } } iot_printf("vector_multiply_const_int8_high finished\n"); vector_multiply_int8_low_8bit(a, b, l, length); for (uint8_t i = 0; i < length; i++) { #ifdef SATURATION int16_t g = a[i] * b[i]; g = g > 127 ? 127 : (g < -128 ? -128 : g); #else int8_t g = (a[i] * b[i]) & 0xff; #endif if (g != l[i]) { iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[i], g, l[i]); } } iot_printf("vector_multiply_int8_low finished\n"); vector_multiply_const_int8_low_8bit(a, b[1], l, length); for (uint8_t i = 0; i < length; i++) { #ifdef SATURATION int16_t g = a[i] * b[1]; g = g > 127 ? 127 : (g < -128 ? -128 : g); #else int8_t g = (a[i] * b[1]) & 0xff; #endif if (g != l[i]) { iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[1], g, l[i]); } } iot_printf("vector_multiply_const_int8_low finished\n"); os_mem_free(a); os_mem_free(b); os_mem_free(h); os_mem_free(l); os_mem_free(w); } void vector_madd_msub_test_int8() { uint8_t length = 128; int8_t *a = os_mem_malloc(1, length); int8_t *b = os_mem_malloc(1, length); int8_t *j = os_mem_malloc(1, length); int8_t *o = os_mem_malloc(1, length); int32_t *J = os_mem_malloc(1, length * 4); int32_t *O = os_mem_malloc(1, length * 4); for (uint8_t i = 0; i < length; i++) { a[i] = -2 * (i - 25); b[i] = 3 * (i - 33); j[i] = i - 50; J[i] = i * 100 - 500; //iot_printf("a[%d] = %d, b[%d] = %d, j[%d] = %d, J[%d] = %d\n", i, a[i], i, b[i], i, j[i], i, J[i]); } uint64_t begin = cpu_get_mcycle(); vector_mul_add_int8(a, b, j, o, length); uint64_t end = cpu_get_mcycle(); iot_printf("time = %d\n", (uint32_t)(end - begin)); for (uint8_t i = 0; i < length; i++) { #ifdef SATURATION int16_t m = a[i] * b[i]; m = m > 127 ? 127 : (m < -128 ? -128 : m); int16_t g = (int16_t)j[i] + m; g = g > 127 ? 127 : (g < -128 ? -128 : g); #else int8_t g = j[i] + a[i] * b[i]; #endif if (g != o[i]) { iot_printf("incorrect at %d, a = %d, b = %d, j = %d, golden %d, output %d\n", i, a[i], b[i], j[i], g, o[i]); } } iot_printf("vector_mul_add_int8 finished\n"); vector_mul_sub_int8(a, b, j, o, length); for (uint8_t i = 0; i < length; i++) { #ifdef SATURATION int16_t m = a[i] * b[i]; m = m > 127 ? 127 : (m < -128 ? -128 : m); int16_t g = (int16_t)j[i] - m; g = g > 127 ? 127 : (g < -128 ? -128 : g); #else int8_t g = j[i] - a[i] * b[i]; #endif if (g != o[i]) { iot_printf("incorrect at %d, a = %d, b = %d, j = %d, golden %d, output %d\n", i, a[i], b[i], j[i], g, o[i]); } } iot_printf("vector_mul_sub_int8 finished\n"); vector_mul_add_int8_to_int32(a, b, J, O, length); for (uint8_t i = 0; i < length; i++) { int32_t g = J[i] + a[i] * b[i]; if (g != O[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g, O[i]); } } iot_printf("vector_mul_add_int8_to_int32 finished\n"); vector_mul_sub_int8_to_int32(a, b, J, O, length); for (uint8_t i = 0; i < length; i++) { int32_t g = J[i] - a[i] * b[i]; if (g != O[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g, O[i]); } } iot_printf("vector_mul_sub_int8_to_int32 finished\n"); } void vector_min_test_int8() { int8_t a[100]; int8_t b[100]; int8_t o[100]; int8_t g[100]; for (uint32_t i = 0; i < 100; i++) { a[i] = 2 * i; b[i] = 3 * i + 23; } vector_min_int8(a, b, o, 100); for (uint8_t i = 0; i < 100; i++) { g[i] = a[i] < b[i] ? a[i] : b[i]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_min_const_int8(a, b[0], o, 100); for (uint8_t i = 0; i < 100; i++) { g[i] = a[i] < b[0] ? a[i] : b[0]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } iot_printf("min of int8 test end\n"); } void vector_max_test_int8() { int8_t a[100]; int8_t b[100]; int8_t o[100]; int8_t g[100]; for (uint32_t i = 0; i < 100; i++) { a[i] = 2 * i; b[i] = 3 * i + 23; } vector_max_int8(a, b, o, 100); for (uint8_t i = 0; i < 100; i++) { g[i] = a[i] > b[i] ? a[i] : b[i]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_max_const_int8(a, b[0], o, 100); for (uint8_t i = 0; i < 100; i++) { g[i] = a[i] > b[0] ? a[i] : b[0]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } iot_printf("max of int8 test end\n"); } void vector_equal_test_int8() { int8_t a[100]; int8_t b[100]; int8_t o[100]; int8_t g[100]; for (uint32_t i = 0; i < 100; i++) { a[i] = 2 * i; b[i] = 3 * i - 23; } vector_equal_int8(a, b, o, 100); for (uint8_t i = 0; i < 100; i++) { g[i] = a[i] == b[i] ? 1 : 0; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_equal_const_int8(a, b[11], o, 100); for (uint8_t i = 0; i < 100; i++) { g[i] = a[i] == b[11] ? 1 : 0; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } iot_printf("equal of int8 test end\n"); } void vector_not_equal_test_int8() { int8_t a[100]; int8_t b[100]; int8_t o[100]; int8_t g[100]; for (uint32_t i = 0; i < 100; i++) { a[i] = 2 * i; b[i] = 3 * i - 23; } vector_not_equal_int8(a, b, o, 100); for (uint8_t i = 0; i < 100; i++) { g[i] = a[i] != b[i] ? 1 : 0; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_not_equal_const_int8(a, b[11], o, 100); for (uint8_t i = 0; i < 100; i++) { g[i] = a[i] != b[11] ? 1 : 0; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } iot_printf("not equal of int8 test end\n"); } void vector_less_than_test_int8() { int8_t a[100]; int8_t b[100]; int8_t o[100]; int8_t g[100]; for (uint32_t i = 0; i < 100; i++) { a[i] = 2 * i; b[i] = 3 * i - 23; } vector_less_than_int8(a, b, o, 100); for (uint8_t i = 0; i < 100; i++) { g[i] = a[i] < b[i] ? 1 : 0; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_less_than_const_int8(a, b[11], o, 100); for (uint8_t i = 0; i < 100; i++) { g[i] = a[i] < b[11] ? 1 : 0; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_const_less_than_int8(a, b[11], o, 100); for (uint8_t i = 0; i < 100; i++) { g[i] = a[i] > b[11] ? 1 : 0; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } iot_printf("less than of int8 test end\n"); } void vector_greater_or_equal_test_int8() { int8_t a[100]; int8_t b[100]; int8_t o[100]; int8_t g[100]; for (uint32_t i = 0; i < 100; i++) { a[i] = 2 * i; b[i] = 3 * i - 23; //iot_printf("a[%d] = %d, b[%d] = %d\n", i, a[i], i, b[i]); } vector_greater_or_equal_int8(a, b, o, 100); for (uint8_t i = 0; i < 100; i++) { g[i] = a[i] >= b[i] ? 1 : 0; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_greater_or_equal_const_int8(a, b[11], o, 100); for (uint8_t i = 0; i < 100; i++) { g[i] = a[i] >= b[11] ? 1 : 0; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_const_greater_or_equal_int8(a, b[11], o, 100); for (uint8_t i = 0; i < 100; i++) { g[i] = a[i] <= b[11] ? 1 : 0; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } iot_printf("greater or equal of int8 test end\n"); } void vector_logic_and_test_int8() { int8_t a[100]; int8_t b[100]; int8_t o[100]; int8_t g[100]; for (uint32_t i = 0; i < 100; i++) { a[i] = 2 * i; b[i] = 3 * i - 23; } vector_logic_and_int8(a, b, o, 100); for (uint8_t i = 0; i < 100; i++) { g[i] = a[i] & b[i]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_logic_and_const_int8(a, b[11], o, 100); for (uint8_t i = 0; i < 100; i++) { g[i] = a[i] & b[11]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } iot_printf("logic and of int8 test end\n"); } void vector_logic_or_test_int8() { int8_t a[100]; int8_t b[100]; int8_t o[100]; int8_t g[100]; for (uint32_t i = 0; i < 100; i++) { a[i] = 2 * i; b[i] = 3 * i - 23; } vector_logic_or_int8(a, b, o, 100); for (uint8_t i = 0; i < 100; i++) { g[i] = a[i] | b[i]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_logic_or_const_int8(a, b[11], o, 100); for (uint8_t i = 0; i < 100; i++) { g[i] = a[i] | b[11]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } iot_printf("logic or of int8 test end\n"); } void vector_logic_xor_test_int8() { int8_t a[100]; int8_t b[100]; int8_t o[100]; int8_t g[100]; for (uint32_t i = 0; i < 100; i++) { a[i] = 2 * i; b[i] = 3 * i - 23; } vector_logic_xor_int8(a, b, o, 100); for (uint8_t i = 0; i < 100; i++) { g[i] = a[i] ^ b[i]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_logic_xor_const_int8(a, b[11], o, 100); for (uint8_t i = 0; i < 100; i++) { g[i] = a[i] ^ b[11]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } iot_printf("logic xor of int8 test end\n"); } void vector_logic_xnor_test_int8() { int8_t a[100]; int8_t b[100]; int8_t o[100]; int8_t g[100]; for (uint32_t i = 0; i < 100; i++) { a[i] = 2 * i; b[i] = 3 * i - 23; } vector_logic_xnor_int8(a, b, o, 100); for (uint8_t i = 0; i < 100; i++) { g[i] = ~(a[i] ^ b[i]); if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_logic_xnor_const_int8(a, b[11], o, 100); for (uint8_t i = 0; i < 100; i++) { g[i] = ~(a[i] ^ b[11]); if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } iot_printf("logic xnor of int8 test end\n"); } void vector_left_shift_test_int8() { int8_t a[100]; uint8_t b[100]; int8_t o[100]; int8_t g[100]; for (uint32_t i = 0; i < 100; i++) { a[i] = i + 1; b[i] = i % 7 + 1; } vector_left_shift_int8(a, b, o, 100); for (uint8_t i = 0; i < 100; i++) { g[i] = a[i] << b[i]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_left_shift_const_int8(a, b[2], o, 100); for (uint8_t i = 0; i < 100; i++) { g[i] = a[i] << b[2]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_const_left_shift_int8(b, a[6], o, 100); for (uint8_t i = 0; i < 100; i++) { g[i] = a[6] << b[i]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } iot_printf("left shift of int8 test end\n"); } void vector_logic_right_shift_test_int8() { int8_t a[100]; uint8_t b[100]; int8_t o[100]; int8_t g[100]; for (uint32_t i = 0; i < 100; i++) { a[i] = i - 50; b[i] = i % 7 + 1; } vector_logic_right_shift_int8(a, b, o, 100); for (uint8_t i = 0; i < 100; i++) { g[i] = (uint8_t)a[i] >> b[i]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_logic_right_shift_const_int8(a, b[2], o, 100); for (uint8_t i = 0; i < 100; i++) { g[i] = (uint8_t)a[i] >> b[2]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_const_logic_right_shift_int8(b, a[6], o, 100); for (uint8_t i = 0; i < 100; i++) { g[i] = (uint8_t)a[6] >> b[i]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } iot_printf("logic right shift of int8 test end\n"); } void vector_arithmatic_right_shift_test_int8() { int8_t a[100]; uint8_t b[100]; int8_t o[100]; int8_t g[100]; for (uint32_t i = 0; i < 100; i++) { a[i] = i - 50; b[i] = i % 7 + 1; } vector_arithmatic_right_shift_int8(a, b, o, 100); for (uint8_t i = 0; i < 100; i++) { g[i] = a[i] >> b[i]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_arithmatic_right_shift_const_int8(a, b[2], o, 100); for (uint8_t i = 0; i < 100; i++) { g[i] = a[i] >> b[2]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_const_arithmatic_right_shift_int8(b, a[6], o, 100); for (uint8_t i = 0; i < 100; i++) { g[i] = a[6] >> b[i]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } iot_printf("arithmatic right shift of int8 test end\n"); } void vector_add_test_uint16() { uint16_t a[50]; uint16_t b[50]; uint16_t o[50]; uint16_t g[50]; for (uint32_t i = 0; i < 50; i++) { a[i] = 2000 * i; b[i] = 3000 * i + 23; } vector_add_const_uint16(a, b[0], o, 50); for (uint16_t i = 0; i < 50; i++) { #ifdef SATURATION uint32_t gg = a[i] + b[0]; g[i] = gg > 65535 ? 65535 : gg; #else g[i] = a[i] + b[0]; #endif if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_add_uint16(a, b, o, 50); for (uint16_t i = 0; i < 50; i++) { #ifdef SATURATION uint32_t gg = a[i] + b[i]; g[i] = gg > 65535 ? 65535 : gg; #else g[i] = a[i] + b[i]; #endif if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } iot_printf("add const of uint16 test end\n"); } void vector_sub_test_uint16() { uint16_t a[50]; uint16_t b[50]; uint16_t o[50]; uint16_t g[50]; for (uint32_t i = 0; i < 50; i++) { a[i] = 2000 * i; b[i] = 3000 * i + 23; } vector_sub_const_uint16(a, b[0], o, 50); for (uint16_t i = 0; i < 50; i++) { #ifdef SATURATION if (a[i] < b[0]) { g[i] = 0; } else { g[i] = a[i] - b[0]; } #else g[i] = a[i] - b[0]; #endif if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_const_sub_uint16(a, b[0], o, 50); for (uint16_t i = 0; i < 50; i++) { #ifdef SATURATION if (a[i] > b[0]) { g[i] = 0; } else { g[i] = b[0] - a[i]; } #else g[i] = b[0] - a[i]; #endif if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_sub_uint16(a, b, o, 50); for (uint16_t i = 0; i < 50; i++) { #ifdef SATURATION if (a[i] < b[i]) { g[i] = 0; } else { g[i] = a[i] - b[i]; } #else g[i] = a[i] - b[i]; #endif if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } iot_printf("sub const of uint16 test end\n"); } void vector_mul_test_uint16() { uint16_t length = 50; uint16_t *a = (uint16_t *)os_mem_malloc(1, length * 2); uint16_t *b = (uint16_t *)os_mem_malloc(1, length * 2); uint16_t *l = (uint16_t *)os_mem_malloc(1, length * 2); uint16_t *h = (uint16_t *)os_mem_malloc(1, length * 2); uint32_t *w = (uint32_t *)os_mem_malloc(1, length * 2 * 2); for (uint16_t i = 0; i < length; i++) { a[i] = (uint16_t)(-2000 * (i - 25)); b[i] = (uint16_t)(3000 * (i - 33)); //iot_printf("a[%d] = %d, b[%d] = %d\n", i, a[i], i, b[i]); } vector_multiply_uint16(a, b, w, length); for (uint16_t i = 0; i < length; i++) { uint32_t g = a[i] * b[i]; if (g != w[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g, w[i]); } } iot_printf("vector_multiply_uint16 finished\n"); vector_multiply_const_uint16(a, b[1], w, length); for (uint16_t i = 0; i < length; i++) { uint32_t g = a[i] * b[1]; if (g != w[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g, w[i]); } } iot_printf("vector_multiply_const_uint16 finished\n"); vector_multiply_uint16_high_16bit(a, b, h, length); for (uint16_t i = 0; i < length; i++) { uint16_t g = (a[i] * b[i]) >> 16; if (g != h[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g, h[i]); } } iot_printf("vector_multiply_uint16_high finished\n"); vector_multiply_const_uint16_high_16bit(a, b[1], h, length); for (uint16_t i = 0; i < length; i++) { uint16_t g = (a[i] * b[1]) >> 16; if (g != h[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g, h[i]); } } iot_printf("vector_multiply_const_uint16_high finished\n"); vector_multiply_uint16_low_16bit(a, b, l, length); for (uint16_t i = 0; i < length; i++) { #ifdef SATURATION uint32_t g = (uint32_t)a[i] * (uint32_t)b[i]; if (g > 65535) { g = 65535; } #else uint16_t g = (a[i] * b[i]) & 0xffff; #endif if (g != l[i]) { iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[i], g, l[i]); } } iot_printf("vector_multiply_uint16_low finished\n"); vector_multiply_const_uint16_low_16bit(a, b[1], l, length); for (uint16_t i = 0; i < length; i++) { #ifdef SATURATION uint32_t g = (uint32_t)a[i] * (uint32_t)b[1]; if (g > 65535) { g = 65535; } #else uint16_t g = (a[i] * b[1]) & 0xffff; #endif if (g != l[i]) { iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[1], g, l[i]); } } iot_printf("vector_multiply_const_uint16_low finished\n"); os_mem_free(a); os_mem_free(b); os_mem_free(h); os_mem_free(l); os_mem_free(w); } void vector_madd_msub_test_uint16() { uint16_t length = 50; uint16_t *a = os_mem_malloc(1, length * 2); uint16_t *b = os_mem_malloc(1, length * 2); uint16_t *j = os_mem_malloc(1, length * 2); uint16_t *o = os_mem_malloc(1, length * 2); uint64_t *J = os_mem_malloc(1, length * 2 * 4); uint64_t *O = os_mem_malloc(1, length * 2 * 4); for (uint16_t i = 0; i < length; i++) { a[i] = 2000 * (i + 235); b[i] = 3000 * (i + 333); j[i] = i + 504; J[i] = i * 504 + 800000; //iot_printf("a[%d] = %d, b[%d] = %d, j[%d] = %d, J[%d] = %d\n", i, a[i], i, b[i], i, j[i], i, J[i]); } vector_mul_add_uint16(a, b, j, o, length); for (uint16_t i = 0; i < length; i++) { #ifdef SATURATION uint32_t m = a[i] * b[i]; m = m > 65535 ? 65535 : m; uint32_t g = j[i] + m; g = g > 65535 ? 65535 : g; #else uint16_t g = j[i] + a[i] * b[i]; #endif if (g != o[i]) { iot_printf("incorrect at %d, a = %d, b = %d, j = %d, golden %d, output %d\n", i, a[i], b[i], j[i], g, o[i]); } } iot_printf("vector_mul_add_uint16 finished\n"); vector_mul_sub_uint16(a, b, j, o, length); for (uint16_t i = 0; i < length; i++) { #ifdef SATURATION uint32_t m = a[i] * b[i]; m = m > 65535 ? 65535 : m; uint32_t g = j[i] < m ? 0 : j[i] - m; #else uint16_t g = j[i] - a[i] * b[i]; #endif if (g != o[i]) { iot_printf("incorrect at %d, a = %d, b = %d, j = %d, golden %d, output %d\n", i, a[i], b[i], j[i], g, o[i]); } } iot_printf("vector_mul_sub_uint16 finished\n"); vector_mul_add_uint16_to_uint64(a, b, J, O, length); for (uint16_t i = 0; i < length; i++) { uint64_t g = J[i] + (uint64_t)a[i] * (uint64_t)b[i]; if (g != O[i]) { uint32_t *pJ = (uint32_t *)J; uint32_t *pg = (uint32_t *)(&g); uint32_t *pO = (uint32_t *)O; iot_printf("incorrect at %d, J = %x%x, a = %x, b = %x, golden %x%x, output %x%x\n", i, pJ[2 * i + 1], pJ[2 * i], a[i], b[i], pg[1], pg[0], pO[2 * i + 1], pO[2 * i]); } } iot_printf("vector_mul_add_uint16_to_uint32 finished\n"); vector_mul_sub_uint16_to_uint64(a, b, J, O, length); for (uint16_t i = 0; i < length; i++) { uint64_t g = (uint64_t)(J[i] - (uint64_t)a[i] * (uint64_t)b[i]); if (g != O[i]) { uint32_t *pJ = (uint32_t *)J; uint32_t *pg = (uint32_t *)(&g); uint32_t *pO = (uint32_t *)O; iot_printf("incorrect at %d, J = %x%x, a = %x, b = %x, golden %x%x, output %x%x\n", i, pJ[2 * i + 1], pJ[2 * i], a[i], b[i], pg[1], pg[0], pO[2 * i + 1], pO[2 * i]); } } iot_printf("vector_mul_sub_uint16_to_uint32 finished\n"); } void vector_min_test_uint16() { uint16_t a[50]; uint16_t b[50]; uint16_t o[50]; uint16_t g[50]; for (uint32_t i = 0; i < 50; i++) { a[i] = 2 * i; b[i] = 3 * i + 23; } vector_min_uint16(a, b, o, 50); for (uint16_t i = 0; i < 50; i++) { g[i] = a[i] < b[i] ? a[i] : b[i]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_min_const_uint16(a, b[0], o, 50); for (uint16_t i = 0; i < 50; i++) { g[i] = a[i] < b[0] ? a[i] : b[0]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } iot_printf("min of uint16 test end\n"); } void vector_max_test_uint16() { uint16_t a[50]; uint16_t b[50]; uint16_t o[50]; uint16_t g[50]; for (uint32_t i = 0; i < 50; i++) { a[i] = 2 * i; b[i] = 3 * i + 23; } vector_max_uint16(a, b, o, 50); for (uint16_t i = 0; i < 50; i++) { g[i] = a[i] > b[i] ? a[i] : b[i]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_max_const_uint16(a, b[0], o, 50); for (uint16_t i = 0; i < 50; i++) { g[i] = a[i] > b[0] ? a[i] : b[0]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } iot_printf("max of uint16 test end\n"); } void vector_equal_test_uint16() { uint16_t a[50]; uint16_t b[50]; uint16_t o[50]; uint16_t g[50]; for (uint32_t i = 0; i < 50; i++) { a[i] = 2 * i; b[i] = 3 * i - 23; } vector_equal_uint16(a, b, o, 50); for (uint16_t i = 0; i < 50; i++) { g[i] = a[i] == b[i] ? 1 : 0; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_equal_const_uint16(a, b[11], o, 50); for (uint16_t i = 0; i < 50; i++) { g[i] = a[i] == b[11] ? 1 : 0; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } iot_printf("equal of uint16 test end\n"); } void vector_not_equal_test_uint16() { uint16_t a[50]; uint16_t b[50]; uint16_t o[50]; uint16_t g[50]; for (uint32_t i = 0; i < 50; i++) { a[i] = 2 * i; b[i] = 3 * i - 23; } vector_not_equal_uint16(a, b, o, 50); for (uint16_t i = 0; i < 50; i++) { g[i] = a[i] != b[i] ? 1 : 0; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_not_equal_const_uint16(a, b[11], o, 50); for (uint16_t i = 0; i < 50; i++) { g[i] = a[i] != b[11] ? 1 : 0; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } iot_printf("not equal of uint16 test end\n"); } void vector_less_than_test_uint16() { uint16_t a[50]; uint16_t b[50]; uint16_t o[50]; uint16_t g[50]; for (uint32_t i = 0; i < 50; i++) { a[i] = 2 * i; b[i] = 3 * i - 23; } vector_less_than_uint16(a, b, o, 50); for (uint16_t i = 0; i < 50; i++) { g[i] = a[i] < b[i] ? 1 : 0; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_less_than_const_uint16(a, b[11], o, 50); for (uint16_t i = 0; i < 50; i++) { g[i] = a[i] < b[11] ? 1 : 0; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_const_less_than_uint16(a, b[11], o, 50); for (uint16_t i = 0; i < 50; i++) { g[i] = a[i] > b[11] ? 1 : 0; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } iot_printf("less than of uint16 test end\n"); } void vector_greater_or_equal_test_uint16() { uint16_t a[50]; uint16_t b[50]; uint16_t o[50]; uint16_t g[50]; for (uint32_t i = 0; i < 50; i++) { a[i] = 2 * i; b[i] = 3 * i - 23; } vector_greater_or_equal_uint16(a, b, o, 50); for (uint16_t i = 0; i < 50; i++) { g[i] = a[i] >= b[i] ? 1 : 0; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_greater_or_equal_const_uint16(a, b[11], o, 50); for (uint16_t i = 0; i < 50; i++) { g[i] = a[i] >= b[11] ? 1 : 0; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_const_greater_or_equal_uint16(a, b[11], o, 50); for (uint16_t i = 0; i < 50; i++) { g[i] = a[i] <= b[11] ? 1 : 0; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } iot_printf("greater or equal of uint16 test end\n"); } void vector_logic_and_test_uint16() { uint16_t a[50]; uint16_t b[50]; uint16_t o[50]; uint16_t g[50]; for (uint32_t i = 0; i < 50; i++) { a[i] = 2 * i; b[i] = 3 * i - 23; } vector_logic_and_uint16(a, b, o, 50); for (uint16_t i = 0; i < 50; i++) { g[i] = a[i] & b[i]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_logic_and_const_uint16(a, b[11], o, 50); for (uint16_t i = 0; i < 50; i++) { g[i] = a[i] & b[11]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } iot_printf("logic and of uint16 test end\n"); } void vector_logic_or_test_uint16() { uint16_t a[50]; uint16_t b[50]; uint16_t o[50]; uint16_t g[50]; for (uint32_t i = 0; i < 50; i++) { a[i] = 2 * i; b[i] = 3 * i - 23; } vector_logic_or_uint16(a, b, o, 50); for (uint16_t i = 0; i < 50; i++) { g[i] = a[i] | b[i]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_logic_or_const_uint16(a, b[11], o, 50); for (uint16_t i = 0; i < 50; i++) { g[i] = a[i] | b[11]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } iot_printf("logic or of uint16 test end\n"); } void vector_logic_xor_test_uint16() { uint16_t a[50]; uint16_t b[50]; uint16_t o[50]; uint16_t g[50]; for (uint32_t i = 0; i < 50; i++) { a[i] = 2 * i; b[i] = 3 * i - 23; } vector_logic_xor_uint16(a, b, o, 50); for (uint16_t i = 0; i < 50; i++) { g[i] = a[i] ^ b[i]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_logic_xor_const_uint16(a, b[11], o, 50); for (uint16_t i = 0; i < 50; i++) { g[i] = a[i] ^ b[11]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } iot_printf("logic xor of uint16 test end\n"); } void vector_logic_xnor_test_uint16() { uint16_t a[50]; uint16_t b[50]; uint16_t o[50]; uint16_t g[50]; for (uint32_t i = 0; i < 50; i++) { a[i] = 2 * i; b[i] = 3 * i - 23; } vector_logic_xnor_uint16(a, b, o, 50); for (uint16_t i = 0; i < 50; i++) { g[i] = ~(a[i] ^ b[i]); if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_logic_xnor_const_uint16(a, b[11], o, 50); for (uint16_t i = 0; i < 50; i++) { g[i] = ~(a[i] ^ b[11]); if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } iot_printf("logic xnor of uint16 test end\n"); } void vector_left_shift_test_uint16() { uint16_t a[50]; uint16_t b[50]; uint16_t o[50]; uint16_t g[50]; for (uint32_t i = 0; i < 50; i++) { a[i] = i + 1; b[i] = i % 7 + 1; } vector_left_shift_uint16(a, b, o, 50); for (uint16_t i = 0; i < 50; i++) { g[i] = a[i] << b[i]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_left_shift_const_uint16(a, b[2], o, 50); for (uint16_t i = 0; i < 50; i++) { g[i] = a[i] << b[2]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_const_left_shift_uint16(b, a[6], o, 50); for (uint16_t i = 0; i < 50; i++) { g[i] = a[6] << b[i]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } iot_printf("left shift of uint16 test end\n"); } void vector_right_shift_test_uint16() { uint16_t a[50]; uint16_t b[50]; uint16_t o[50]; uint16_t g[50]; for (uint32_t i = 0; i < 50; i++) { a[i] = i - 50; b[i] = i % 7 + 1; } vector_right_shift_uint16(a, b, o, 50); for (uint16_t i = 0; i < 50; i++) { g[i] = a[i] >> b[i]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_right_shift_const_uint16(a, b[2], o, 50); for (uint16_t i = 0; i < 50; i++) { g[i] = a[i] >> b[2]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_const_right_shift_uint16(b, a[6], o, 50); for (uint16_t i = 0; i < 50; i++) { g[i] = a[6] >> b[i]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } iot_printf("logic right shift of uint16 test end\n"); } void vector_half_bits_test_uint16() { uint16_t a[50]; uint8_t h[50]; uint8_t l[50]; uint8_t g[50]; for (uint32_t i = 0; i < 50; i++) { a[i] = i + (i + 80) * 256; } uint64_t begin = cpu_get_mcycle(); vector_high_half_bits_uint16(a, h, 50); uint64_t end = cpu_get_mcycle(); iot_printf("time = %d\n", (uint32_t)(end - begin)); for (uint16_t i = 0; i < 50; i++) { g[i] = a[i] >> 8; if (g[i] != h[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], h[i]); } } iot_printf("vector_high_half_bits_uint16 finished\n"); vector_low_half_bits_uint16(a, l, 50); for (uint16_t i = 0; i < 50; i++) { g[i] = a[i] & 0xff; if (g[i] != l[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], l[i]); } } iot_printf("vector_low_half_bits_uint16 finished\n"); vector_high_and_low_half_bits_uint16(a, h, l, 50); for (uint16_t i = 0; i < 50; i++) { g[i] = a[i] >> 8; if (g[i] != h[i]) { iot_printf("incorrect higt at %d, golden %d, output %d\n", i, g[i], h[i]); } g[i] = a[i] & 0xff; if (g[i] != l[i]) { iot_printf("incorrect low at %d, golden %d, output %d\n", i, g[i], l[i]); } } iot_printf("vector_high_and_low_half_bits_uint16 finished\n"); } void vector_add_test_int16() { int16_t a[50]; int16_t b[50]; int16_t o[50]; int16_t g[50]; for (uint32_t i = 0; i < 50; i++) { a[i] = 2000 * i; b[i] = 3000 * i + 23; } vector_add_const_int16(a, b[0], o, 50); for (uint16_t i = 0; i < 50; i++) { #ifdef SATURATION int32_t gg = a[i] + b[0]; g[i] = gg > 32767 ? 32767 : (gg < -32768 ? -32768 : gg); #else g[i] = a[i] + b[0]; #endif if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } uint64_t begin = cpu_get_mcycle(); for (uint32_t repeat = 0; repeat < 1000; repeat++) { vector_add_int16(a, b, o, 50); } uint64_t end = cpu_get_mcycle(); iot_printf("time = %d\n", (uint32_t)(end - begin)); begin = cpu_get_mcycle(); for (uint32_t repeat = 0; repeat < 1000; repeat++) { for (uint16_t i = 0; i < 50; i++) { #ifdef SATURATION int32_t gg = a[i] + b[i]; g[i] = gg > 32767 ? 32767 : (gg < -32768 ? -32768 : gg); #else g[i] = a[i] + b[i]; #endif //if (g[i] != o[i]) { // iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); //} } } end = cpu_get_mcycle(); for (uint16_t i = 0; i < 50; i++) { if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } iot_printf("time = %d\n", (uint32_t)(end - begin)); iot_printf("add const of int16 test end\n"); } void vector_sub_test_int16() { int16_t a[50]; int16_t b[50]; int16_t o[50]; int16_t g[50]; for (uint32_t i = 0; i < 50; i++) { a[i] = 2000 * i; b[i] = 3000 * i + 23; } vector_sub_const_int16(a, b[0], o, 50); for (uint16_t i = 0; i < 50; i++) { #ifdef SATURATION int32_t gg = a[i] - b[0]; g[i] = gg > 32767 ? 32767 : (gg < -32768 ? -32768 : gg); #else g[i] = a[i] - b[0]; #endif if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_const_sub_int16(a, b[0], o, 50); for (uint16_t i = 0; i < 50; i++) { #ifdef SATURATION int32_t gg = b[0] - a[i]; g[i] = gg > 32767 ? 32767 : (gg < -32768 ? -32768 : gg); #else g[i] = b[0] - a[i]; #endif if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } uint64_t begin = cpu_get_mcycle(); for (uint32_t repeat = 0; repeat < 1; repeat++) { vector_sub_int16(a, b, o, 50); } uint64_t end = cpu_get_mcycle(); iot_printf("time = %d\n", (uint32_t)(end - begin)); begin = cpu_get_mcycle(); for (uint32_t repeat = 0; repeat < 1; repeat++) { for (uint16_t i = 0; i < 50; i++) { #ifdef SATURATION int32_t gg = a[i] - b[i]; g[i] = gg > 32767 ? 32767 : (gg < -32768 ? -32768 : gg); #else g[i] = a[i] - b[i]; #endif } } end = cpu_get_mcycle(); for (uint16_t i = 0; i < 50; i++) { if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } iot_printf("time = %d\n", (uint32_t)(end - begin)); iot_printf("sub const of int16 test end\n"); } void vector_mul_test_int16() { uint16_t length = 50; int16_t *a = (int16_t *)os_mem_malloc(1, length * 2); int16_t *b = (int16_t *)os_mem_malloc(1, length * 2); int16_t *l = (int16_t *)os_mem_malloc(1, length * 2); int16_t *h = (int16_t *)os_mem_malloc(1, length * 2); int32_t *w = (int32_t *)os_mem_malloc(1, length * 2 * 2); for (uint16_t i = 0; i < length; i++) { a[i] = -2000 * (i - 25); b[i] = 3000 * (i - 33); //iot_printf("a[%d] = %d, b[%d] = %d\n", i, a[i], i, b[i]); } vector_multiply_int16(a, b, w, length); for (uint16_t i = 0; i < length; i++) { int32_t g = a[i] * b[i]; if (g != w[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g, w[i]); } } iot_printf("vector_multiply_int16 finished\n"); vector_multiply_const_int16(a, b[1], w, length); for (uint16_t i = 0; i < length; i++) { int32_t g = a[i] * b[1]; if (g != w[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g, w[i]); } } iot_printf("vector_multiply_const_int16 finished\n"); vector_multiply_int16_high_16bit(a, b, h, length); for (uint16_t i = 0; i < length; i++) { int32_t g = (a[i] * b[i]) >> 16; if (g != h[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g, h[i]); } } iot_printf("vector_multiply_int16_high finished\n"); vector_multiply_const_int16_high_16bit(a, b[1], h, length); for (uint16_t i = 0; i < length; i++) { int32_t g = (a[i] * b[1]) >> 16; if (g != h[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g, h[i]); } } iot_printf("vector_multiply_const_int16_high finished\n"); vector_multiply_int16_low_16bit(a, b, l, length); for (uint16_t i = 0; i < length; i++) { #ifdef SATURATION int32_t gg = a[i] * b[i]; int32_t g = gg > 32767 ? 32767 : (gg < -32768 ? -32768 : gg); #else int16_t g = (a[i] * b[i]) & 0xffff; #endif if (g != l[i]) { iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[i], g, l[i]); } } iot_printf("vector_multiply_int16_low finished\n"); vector_multiply_const_int16_low_16bit(a, b[1], l, length); for (uint16_t i = 0; i < length; i++) { #ifdef SATURATION int32_t gg = a[i] * b[1]; int32_t g = gg > 32767 ? 32767 : (gg < -32768 ? -32768 : gg); #else int16_t g = (a[i] * b[1]) & 0xffff; #endif if (g != l[i]) { iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[1], g, l[i]); } } iot_printf("vector_multiply_const_int16_low finished\n"); os_mem_free(a); os_mem_free(b); os_mem_free(h); os_mem_free(l); os_mem_free(w); } void vector_madd_msub_test_int16() { uint16_t length = 50; int16_t *a = os_mem_malloc(1, length * 2); int16_t *b = os_mem_malloc(1, length * 2); int16_t *j = os_mem_malloc(1, length * 2); int16_t *o = os_mem_malloc(1, length * 2); int64_t *J = os_mem_malloc(1, length * 2 * 4); int64_t *O = os_mem_malloc(1, length * 2 * 4); for (uint16_t i = 0; i < length; i++) { a[i] = -2000 * (i - 25); b[i] = 3000 * (i - 33); j[i] = i - 50; J[i] = i * 50 - 500; //iot_printf("a[%d] = %d, b[%d] = %d, j[%d] = %d, J[%d] = %d\n", i, a[i], i, b[i], i, j[i], i, J[i]); } vector_mul_add_int16(a, b, j, o, length); for (uint16_t i = 0; i < length; i++) { #ifdef SATURATION int32_t m = a[i] * b[i]; m = m > 32767 ? 32767 : (m < -32768 ? -32768 : m); int32_t g = j[i] + m; g = g > 32767 ? 32767 : (g < -32768 ? -32768 : g); #else int16_t g = j[i] + a[i] * b[i]; #endif if (g != o[i]) { iot_printf("incorrect at %d, a = %d, b = %d, j = %d, golden %d, output %d\n", i, a[i], b[i], j[i], g, o[i]); } } iot_printf("vector_mul_add_int16 finished\n"); vector_mul_sub_int16(a, b, j, o, length); for (uint16_t i = 0; i < length; i++) { #ifdef SATURATION int32_t m = a[i] * b[i]; m = m > 32767 ? 32767 : (m < -32768 ? -32768 : m); int32_t g = j[i] - m; g = g > 32767 ? 32767 : (g < -32768 ? -32768 : g); #else int16_t g = j[i] - a[i] * b[i]; #endif if (g != o[i]) { iot_printf("incorrect at %d, a = %d, b = %d, j = %d, golden %d, output %d\n", i, a[i], b[i], j[i], g, o[i]); } } iot_printf("vector_mul_sub_int16 finished\n"); vector_mul_add_int16_to_int64(a, b, J, O, length); for (uint16_t i = 0; i < length; i++) { int32_t g = J[i] + a[i] * b[i]; if (g != O[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g, O[i]); } } iot_printf("vector_mul_add_int16_to_int32 finished\n"); vector_mul_sub_int16_to_int64(a, b, J, O, length); for (uint16_t i = 0; i < length; i++) { int32_t g = J[i] - a[i] * b[i]; if (g != O[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g, O[i]); } } iot_printf("vector_mul_sub_int16_to_int32 finished\n"); } void vector_min_test_int16() { int16_t a[50]; int16_t b[50]; int16_t o[50]; int16_t g[50]; for (uint32_t i = 0; i < 50; i++) { a[i] = 2 * i; b[i] = 3 * i + 23; } vector_min_int16(a, b, o, 50); for (uint16_t i = 0; i < 50; i++) { g[i] = a[i] < b[i] ? a[i] : b[i]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_min_const_int16(a, b[0], o, 50); for (uint16_t i = 0; i < 50; i++) { g[i] = a[i] < b[0] ? a[i] : b[0]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } iot_printf("min of int16 test end\n"); } void vector_max_test_int16() { int16_t a[50]; int16_t b[50]; int16_t o[50]; int16_t g[50]; for (uint32_t i = 0; i < 50; i++) { a[i] = 2 * i; b[i] = 3 * i + 23; } vector_max_int16(a, b, o, 50); for (uint16_t i = 0; i < 50; i++) { g[i] = a[i] > b[i] ? a[i] : b[i]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_max_const_int16(a, b[0], o, 50); for (uint16_t i = 0; i < 50; i++) { g[i] = a[i] > b[0] ? a[i] : b[0]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } iot_printf("max of int16 test end\n"); } void vector_equal_test_int16() { int16_t a[50]; int16_t b[50]; int16_t o[50]; int16_t g[50]; for (uint32_t i = 0; i < 50; i++) { a[i] = 2 * i; b[i] = 3 * i - 23; } vector_equal_int16(a, b, o, 50); for (uint16_t i = 0; i < 50; i++) { g[i] = a[i] == b[i] ? 1 : 0; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_equal_const_int16(a, b[11], o, 50); for (uint16_t i = 0; i < 50; i++) { g[i] = a[i] == b[11] ? 1 : 0; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } iot_printf("equal of int16 test end\n"); } void vector_not_equal_test_int16() { int16_t a[50]; int16_t b[50]; int16_t o[50]; int16_t g[50]; for (uint32_t i = 0; i < 50; i++) { a[i] = 2 * i; b[i] = 3 * i - 23; } vector_not_equal_int16(a, b, o, 50); for (uint16_t i = 0; i < 50; i++) { g[i] = a[i] != b[i] ? 1 : 0; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_not_equal_const_int16(a, b[11], o, 50); for (uint16_t i = 0; i < 50; i++) { g[i] = a[i] != b[11] ? 1 : 0; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } iot_printf("not equal of int16 test end\n"); } void vector_less_than_test_int16() { int16_t a[50]; int16_t b[50]; int16_t o[50]; int16_t g[50]; for (uint32_t i = 0; i < 50; i++) { a[i] = 2 * i; b[i] = 3 * i - 23; } vector_less_than_int16(a, b, o, 50); for (uint16_t i = 0; i < 50; i++) { g[i] = a[i] < b[i] ? 1 : 0; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_less_than_const_int16(a, b[11], o, 50); for (uint16_t i = 0; i < 50; i++) { g[i] = a[i] < b[11] ? 1 : 0; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_const_less_than_int16(a, b[11], o, 50); for (uint16_t i = 0; i < 50; i++) { g[i] = a[i] > b[11] ? 1 : 0; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } iot_printf("less than of int16 test end\n"); } void vector_greater_or_equal_test_int16() { int16_t a[50]; int16_t b[50]; int16_t o[50]; int16_t g[50]; for (uint32_t i = 0; i < 50; i++) { a[i] = 2 * i; b[i] = 3 * i - 23; } vector_greater_or_equal_int16(a, b, o, 50); for (uint16_t i = 0; i < 50; i++) { g[i] = a[i] >= b[i] ? 1 : 0; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_greater_or_equal_const_int16(a, b[11], o, 50); for (uint16_t i = 0; i < 50; i++) { g[i] = a[i] >= b[11] ? 1 : 0; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_const_greater_or_equal_int16(a, b[11], o, 50); for (uint16_t i = 0; i < 50; i++) { g[i] = a[i] <= b[11] ? 1 : 0; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } iot_printf("greater or equal of int16 test end\n"); } void vector_logic_and_test_int16() { int16_t a[50]; int16_t b[50]; int16_t o[50]; int16_t g[50]; for (uint32_t i = 0; i < 50; i++) { a[i] = 2 * i; b[i] = 3 * i - 23; } vector_logic_and_int16(a, b, o, 50); for (uint16_t i = 0; i < 50; i++) { g[i] = a[i] & b[i]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_logic_and_const_int16(a, b[11], o, 50); for (uint16_t i = 0; i < 50; i++) { g[i] = a[i] & b[11]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } iot_printf("logic and of int16 test end\n"); } void vector_logic_or_test_int16() { int16_t a[50]; int16_t b[50]; int16_t o[50]; int16_t g[50]; for (uint32_t i = 0; i < 50; i++) { a[i] = 2 * i; b[i] = 3 * i - 23; } vector_logic_or_int16(a, b, o, 50); for (uint16_t i = 0; i < 50; i++) { g[i] = a[i] | b[i]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_logic_or_const_int16(a, b[11], o, 50); for (uint16_t i = 0; i < 50; i++) { g[i] = a[i] | b[11]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } iot_printf("logic or of int16 test end\n"); } void vector_logic_xor_test_int16() { int16_t a[50]; int16_t b[50]; int16_t o[50]; int16_t g[50]; for (uint32_t i = 0; i < 50; i++) { a[i] = 2 * i; b[i] = 3 * i - 23; } vector_logic_xor_int16(a, b, o, 50); for (uint16_t i = 0; i < 50; i++) { g[i] = a[i] ^ b[i]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_logic_xor_const_int16(a, b[11], o, 50); for (uint16_t i = 0; i < 50; i++) { g[i] = a[i] ^ b[11]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } iot_printf("logic xor of int16 test end\n"); } void vector_logic_xnor_test_int16() { int16_t a[50]; int16_t b[50]; int16_t o[50]; int16_t g[50]; for (uint32_t i = 0; i < 50; i++) { a[i] = 2 * i; b[i] = 3 * i - 23; } vector_logic_xnor_int16(a, b, o, 50); for (uint16_t i = 0; i < 50; i++) { g[i] = ~(a[i] ^ b[i]); if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_logic_xnor_const_int16(a, b[11], o, 50); for (uint16_t i = 0; i < 50; i++) { g[i] = ~(a[i] ^ b[11]); if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } iot_printf("logic xnor of int16 test end\n"); } void vector_left_shift_test_int16() { int16_t a[50]; uint16_t b[50]; int16_t o[50]; int16_t g[50]; for (uint32_t i = 0; i < 50; i++) { a[i] = i + 1; b[i] = i % 7 + 1; } vector_left_shift_int16(a, b, o, 50); for (uint16_t i = 0; i < 50; i++) { g[i] = a[i] << b[i]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_left_shift_const_int16(a, b[2], o, 50); for (uint16_t i = 0; i < 50; i++) { g[i] = a[i] << b[2]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_const_left_shift_int16(b, a[6], o, 50); for (uint16_t i = 0; i < 50; i++) { g[i] = a[6] << b[i]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } iot_printf("left shift of int16 test end\n"); } void vector_logic_right_shift_test_int16() { int16_t a[50]; uint16_t b[50]; int16_t o[50]; int16_t g[50]; for (uint32_t i = 0; i < 50; i++) { a[i] = i - 50; b[i] = i % 7 + 1; } vector_logic_right_shift_int16(a, b, o, 50); for (uint16_t i = 0; i < 50; i++) { g[i] = (uint16_t)a[i] >> b[i]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_logic_right_shift_const_int16(a, b[2], o, 50); for (uint16_t i = 0; i < 50; i++) { g[i] = (uint16_t)a[i] >> b[2]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_const_logic_right_shift_int16(b, a[6], o, 50); for (uint16_t i = 0; i < 50; i++) { g[i] = (uint16_t)a[6] >> b[i]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } iot_printf("logic right shift of int16 test end\n"); } void vector_arithmatic_right_shift_test_int16() { int16_t a[50]; uint16_t b[50]; int16_t o[50]; int16_t g[50]; for (uint32_t i = 0; i < 50; i++) { a[i] = i - 50; b[i] = i % 7 + 1; } vector_arithmatic_right_shift_int16(a, b, o, 50); for (uint16_t i = 0; i < 50; i++) { g[i] = a[i] >> b[i]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_arithmatic_right_shift_const_int16(a, b[2], o, 50); for (uint16_t i = 0; i < 50; i++) { g[i] = a[i] >> b[2]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_const_arithmatic_right_shift_int16(b, a[6], o, 50); for (uint16_t i = 0; i < 50; i++) { g[i] = a[6] >> b[i]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } iot_printf("arithmatic right shift of int16 test end\n"); } void vector_half_bits_test_int16() { int16_t a[50]; int8_t h[50]; int8_t l[50]; int8_t g[50]; for (int16_t i = 0; i < 50; i++) { a[i] = i + (i + 80) * 506; } vector_high_half_bits_int16(a, h, 50); for (int16_t i = 0; i < 50; i++) { g[i] = a[i] >> 8; if (g[i] != h[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], h[i]); } } iot_printf("vector_high_half_bits_int16 finished\n"); vector_low_half_bits_int16(a, l, 50); for (int16_t i = 0; i < 50; i++) { g[i] = a[i] & 0xff; if (g[i] != l[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], l[i]); } } iot_printf("vector_low_half_bits_int16 finished\n"); vector_high_and_low_half_bits_int16(a, h, l, 50); for (int16_t i = 0; i < 50; i++) { g[i] = a[i] >> 8; if (g[i] != h[i]) { iot_printf("incorrect higt at %d, golden %d, output %d\n", i, g[i], h[i]); } g[i] = a[i] & 0xff; if (g[i] != l[i]) { iot_printf("incorrect low at %d, golden %d, output %d\n", i, g[i], l[i]); } } iot_printf("vector_high_and_low_half_bits_int16 finished\n"); } void vector_add_test_uint32() { uint32_t a[25]; uint32_t b[25]; uint32_t o[25]; uint32_t g[25]; for (uint32_t i = 0; i < 25; i++) { a[i] = 200000000 * i; b[i] = 300000000 * i + 23; } vector_add_const_uint32(a, b[0], o, 25); for (uint32_t i = 0; i < 25; i++) { #ifdef SATURATION uint64_t gg = (uint64_t)a[i] + (uint64_t)b[0]; g[i] = gg > 0xffffffff ? 0xffffffff : gg; #else g[i] = a[i] + b[0]; #endif if (g[i] != o[i]) { iot_printf("incorrect at %d, a = %x, b = %x, golden %x, output %x\n", i, a[i], b[0], g[i], o[i]); } } vector_add_uint32(a, b, o, 25); for (uint32_t i = 0; i < 25; i++) { #ifdef SATURATION uint64_t gg = (uint64_t)a[i] + (uint64_t)b[i]; g[i] = gg > 0xffffffff ? 0xffffffff : gg; #else g[i] = a[i] + b[i]; #endif if (g[i] != o[i]) { iot_printf("incorrect at %d, a = %x, b = %x, golden %x, output %x\n", i, a[i], b[i], g[i], o[i]); } } iot_printf("add const of uint32 test end\n"); } void vector_sub_test_uint32() { uint32_t a[25]; uint32_t b[25]; uint32_t o[25]; uint32_t g[25]; for (uint32_t i = 0; i < 25; i++) { a[i] = 200000001 * i + 23; b[i] = 300000001 * i; } vector_sub_const_uint32(a, b[0], o, 25); for (uint32_t i = 0; i < 25; i++) { #ifdef SATURATION if (a[i] < b[0]) { g[i] = 0; } else { g[i] = a[i] - b[0]; } #else g[i] = a[i] - b[0]; #endif if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_const_sub_uint32(a, b[0], o, 25); for (uint32_t i = 0; i < 25; i++) { #ifdef SATURATION if (a[i] > b[0]) { g[i] = 0; } else { g[i] = b[0] - a[i]; } #else g[i] = b[0] - a[i]; #endif if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_sub_uint32(a, b, o, 25); for (uint32_t i = 0; i < 25; i++) { #ifdef SATURATION if (a[i] < b[i]) { g[i] = 0; } else { g[i] = a[i] - b[i]; } #else g[i] = a[i] - b[i]; #endif if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } iot_printf("sub const of uint32 test end\n"); } void vector_mul_test_uint32() { uint32_t length = 100; uint32_t *a = (uint32_t *)os_mem_malloc(1, length * 4); uint32_t *b = (uint32_t *)os_mem_malloc(1, length * 4); uint32_t *l = (uint32_t *)os_mem_malloc(1, length * 4); uint32_t *h = (uint32_t *)os_mem_malloc(1, length * 4); uint64_t *w = (uint64_t *)os_mem_malloc(1, length * 4 * 2); for (uint32_t i = 0; i < length; i++) { a[i] = 201 * (103 - i); b[i] = 301 * (127 - i); //iot_printf("a[%d] = %d, b[%d] = %d\n", i, a[i], i, b[i]); } vector_multiply_uint32(a, b, w, length); for (uint32_t i = 0; i < length; i++) { uint64_t g = (uint64_t)a[i] * (uint64_t)b[i]; if (g != w[i]) { iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[i], g, w[i]); } } iot_printf("vector_multiply_uint32 finished\n"); vector_multiply_const_uint32(a, b[1], w, length); for (uint32_t i = 0; i < length; i++) { uint64_t g = (uint64_t)a[i] * (uint64_t)b[1]; if (g != w[i]) { iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[1], g, w[i]); } } iot_printf("vector_multiply_const_uint32 finished\n"); vector_multiply_uint32_high_32bit(a, b, h, length); for (uint32_t i = 0; i < length; i++) { uint32_t g = ((uint64_t)a[i] * (uint64_t)b[i]) >> 32; if (g != h[i]) { iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[i], g, h[i]); } } iot_printf("vector_multiply_uint32_high finished\n"); vector_multiply_const_uint32_high_32bit(a, b[1], h, length); for (uint32_t i = 0; i < length; i++) { uint32_t g = ((uint64_t)a[i] * (uint64_t)b[1]) >> 32; if (g != h[i]) { iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[1], g, h[i]); } } iot_printf("vector_multiply_const_uint32_high finished\n"); vector_multiply_uint32_low_32bit(a, b, l, length); for (uint32_t i = 0; i < length; i++) { #ifdef SATURATION uint64_t gg = (uint64_t)a[i] * (uint64_t)b[i]; uint32_t g = gg > 0xffffffff ? 0xffffffff : gg; #else uint32_t g = ((uint64_t)a[i] * (uint64_t)b[i]) & 0xffffffff; #endif if (g != l[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g, l[i]); } } iot_printf("vector_multiply_uint32_low finished\n"); vector_multiply_const_uint32_low_32bit(a, b[1], l, length); for (uint32_t i = 0; i < length; i++) { #ifdef SATURATION uint64_t gg = (uint64_t)a[i] * (uint64_t)b[1]; uint32_t g = gg > 0xffffffff ? 0xffffffff : gg; #else uint32_t g = ((uint64_t)a[i] * (uint64_t)b[1]) & 0xffffffff; #endif if (g != l[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g, l[i]); } } iot_printf("vector_multiply_const_uint32_low finished\n"); os_mem_free(a); os_mem_free(b); os_mem_free(h); os_mem_free(l); os_mem_free(w); } void vector_madd_msub_test_uint32() { uint32_t length = 100; uint32_t *a = os_mem_malloc(1, length * 4); uint32_t *b = os_mem_malloc(1, length * 4); uint32_t *j = os_mem_malloc(1, length * 4); uint32_t *o = os_mem_malloc(1, length * 4); for (uint32_t i = 0; i < length; i++) { a[i] = 222 * (103 - i); b[i] = 333 * (127 - i); j[i] = i + 120000000; //iot_printf("a[%d] = %d, b[%d] = %d, j[%d] = %d\n", i, a[i], i, b[i], i, j[i]); } vector_mul_add_uint32(a, b, j, o, length); for (uint32_t i = 0; i < length; i++) { #ifdef SATURATION uint64_t gg = (uint64_t)a[i] * (uint64_t)b[i]; gg = gg > 0xffffffff ? 0xffffffff : gg; gg = (uint64_t)j[i] + gg; uint32_t g = gg > 0xffffffff ? 0xffffffff : gg; #else uint32_t g = j[i] + a[i] * b[i]; #endif if (g != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g, o[i]); } } iot_printf("vector_mul_add_uint32 finished\n"); vector_mul_sub_uint32(a, b, j, o, length); for (uint32_t i = 0; i < length; i++) { #ifdef SATURATION uint64_t gg = (uint64_t)a[i] * (uint64_t)b[i]; gg = gg > 0xffffffff ? 0xffffffff : gg; uint32_t g = j[i] < gg ? 0 : j[i] - gg; #else uint32_t g = j[i] - a[i] * b[i]; #endif if (g != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g, o[i]); } } iot_printf("vector_mul_sub_uint32 finished\n"); os_mem_free(a); os_mem_free(b); os_mem_free(j); os_mem_free(o); } void vector_min_test_uint32() { uint32_t a[25]; uint32_t b[25]; uint32_t o[25]; uint32_t g[25]; for (uint32_t i = 0; i < 25; i++) { a[i] = 200000000 * i; b[i] = 300000000 * i + 23; } vector_min_uint32(a, b, o, 25); for (uint32_t i = 0; i < 25; i++) { g[i] = a[i] < b[i] ? a[i] : b[i]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_min_const_uint32(a, b[0], o, 25); for (uint32_t i = 0; i < 25; i++) { g[i] = a[i] < b[0] ? a[i] : b[0]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } iot_printf("min of uint32 test end\n"); } void vector_max_test_uint32() { uint32_t a[25]; uint32_t b[25]; uint32_t o[25]; uint32_t g[25]; for (uint32_t i = 0; i < 25; i++) { a[i] = 200000000 * i; b[i] = 300000000 * i + 23; } vector_max_uint32(a, b, o, 25); for (uint32_t i = 0; i < 25; i++) { g[i] = a[i] > b[i] ? a[i] : b[i]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_max_const_uint32(a, b[0], o, 25); for (uint32_t i = 0; i < 25; i++) { g[i] = a[i] > b[0] ? a[i] : b[0]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } iot_printf("max of uint32 test end\n"); } void vector_equal_test_uint32() { uint32_t a[25]; uint32_t b[25]; uint32_t o[25]; uint32_t g[25]; for (uint32_t i = 0; i < 25; i++) { a[i] = 200000000 * i; b[i] = 300000000 * i + 23; } vector_equal_uint32(a, b, o, 25); for (uint32_t i = 0; i < 25; i++) { g[i] = a[i] == b[i] ? 1 : 0; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_equal_const_uint32(a, b[11], o, 25); for (uint32_t i = 0; i < 25; i++) { g[i] = a[i] == b[11] ? 1 : 0; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } iot_printf("equal of uint32 test end\n"); } void vector_not_equal_test_uint32() { uint32_t a[25]; uint32_t b[25]; uint32_t o[25]; uint32_t g[25]; for (uint32_t i = 0; i < 25; i++) { a[i] = 200000000 * i; b[i] = 300000000 * i + 23; } vector_not_equal_uint32(a, b, o, 25); for (uint32_t i = 0; i < 25; i++) { g[i] = a[i] != b[i] ? 1 : 0; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_not_equal_const_uint32(a, b[11], o, 25); for (uint32_t i = 0; i < 25; i++) { g[i] = a[i] != b[11] ? 1 : 0; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } iot_printf("not equal of uint32 test end\n"); } void vector_less_than_test_uint32() { uint32_t a[25]; uint32_t b[25]; uint32_t o[25]; uint32_t g[25]; for (uint32_t i = 0; i < 25; i++) { a[i] = 200000000 * i; b[i] = 300000000 * i + 23; } vector_less_than_uint32(a, b, o, 25); for (uint32_t i = 0; i < 25; i++) { g[i] = a[i] < b[i] ? 1 : 0; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_less_than_const_uint32(a, b[11], o, 25); for (uint32_t i = 0; i < 25; i++) { g[i] = a[i] < b[11] ? 1 : 0; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_const_less_than_uint32(a, b[11], o, 25); for (uint32_t i = 0; i < 25; i++) { g[i] = a[i] > b[11] ? 1 : 0; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } iot_printf("less than of uint32 test end\n"); } void vector_greater_or_equal_test_uint32() { uint32_t a[25]; uint32_t b[25]; uint32_t o[25]; uint32_t g[25]; for (uint32_t i = 0; i < 25; i++) { a[i] = 200000000 * i; b[i] = 300000000 * i + 23; } vector_greater_or_equal_uint32(a, b, o, 25); for (uint32_t i = 0; i < 25; i++) { g[i] = a[i] >= b[i] ? 1 : 0; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_greater_or_equal_const_uint32(a, b[11], o, 25); for (uint32_t i = 0; i < 25; i++) { g[i] = a[i] >= b[11] ? 1 : 0; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_const_greater_or_equal_uint32(a, b[11], o, 25); for (uint32_t i = 0; i < 25; i++) { g[i] = a[i] <= b[11] ? 1 : 0; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } iot_printf("greater or equal of uint32 test end\n"); } void vector_logic_and_test_uint32() { uint32_t a[25]; uint32_t b[25]; uint32_t o[25]; uint32_t g[25]; for (uint32_t i = 0; i < 25; i++) { a[i] = 200000000 * i; b[i] = 300000000 * i + 23; } vector_logic_and_uint32(a, b, o, 25); for (uint32_t i = 0; i < 25; i++) { g[i] = a[i] & b[i]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_logic_and_const_uint32(a, b[11], o, 25); for (uint32_t i = 0; i < 25; i++) { g[i] = a[i] & b[11]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } iot_printf("logic and of uint32 test end\n"); } void vector_logic_or_test_uint32() { uint32_t a[25]; uint32_t b[25]; uint32_t o[25]; uint32_t g[25]; for (uint32_t i = 0; i < 25; i++) { a[i] = 200 * i; b[i] = 300000000 * i + 23; } vector_logic_or_uint32(a, b, o, 25); for (uint32_t i = 0; i < 25; i++) { g[i] = a[i] | b[i]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_logic_or_const_uint32(a, b[11], o, 25); for (uint32_t i = 0; i < 25; i++) { g[i] = a[i] | b[11]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } iot_printf("logic or of uint32 test end\n"); } void vector_logic_xor_test_uint32() { uint32_t a[25]; uint32_t b[25]; uint32_t o[25]; uint32_t g[25]; for (uint32_t i = 0; i < 25; i++) { a[i] = 2 * i; b[i] = 3 * i - 23; } vector_logic_xor_uint32(a, b, o, 25); for (uint32_t i = 0; i < 25; i++) { g[i] = a[i] ^ b[i]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_logic_xor_const_uint32(a, b[11], o, 25); for (uint32_t i = 0; i < 25; i++) { g[i] = a[i] ^ b[11]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } iot_printf("logic xor of uint32 test end\n"); } void vector_logic_xnor_test_uint32() { uint32_t a[25]; uint32_t b[25]; uint32_t o[25]; uint32_t g[25]; for (uint32_t i = 0; i < 25; i++) { a[i] = 2 * i; b[i] = 3 * i - 23; } vector_logic_xnor_uint32(a, b, o, 25); for (uint32_t i = 0; i < 25; i++) { g[i] = ~(a[i] ^ b[i]); if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_logic_xnor_const_uint32(a, b[11], o, 25); for (uint32_t i = 0; i < 25; i++) { g[i] = ~(a[i] ^ b[11]); if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } iot_printf("logic xnor of uint32 test end\n"); } void vector_left_shift_test_uint32() { uint32_t a[25]; uint32_t b[25]; uint32_t o[25]; uint32_t g[25]; for (uint32_t i = 0; i < 25; i++) { a[i] = i + 1; b[i] = i + 1; } vector_left_shift_uint32(a, b, o, 25); for (uint32_t i = 0; i < 25; i++) { g[i] = a[i] << b[i]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_left_shift_const_uint32(a, b[2], o, 25); for (uint32_t i = 0; i < 25; i++) { g[i] = a[i] << b[2]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_const_left_shift_uint32(b, a[6], o, 25); for (uint32_t i = 0; i < 25; i++) { g[i] = a[6] << b[i]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } iot_printf("left shift of uint32 test end\n"); } void vector_right_shift_test_uint32() { uint32_t a[25]; uint32_t b[25]; uint32_t o[25]; uint32_t g[25]; for (uint32_t i = 0; i < 25; i++) { a[i] = i * 1000000000 - 25; b[i] = i + 1; } vector_right_shift_uint32(a, b, o, 25); for (uint32_t i = 0; i < 25; i++) { g[i] = a[i] >> b[i]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_right_shift_const_uint32(a, b[2], o, 25); for (uint32_t i = 0; i < 25; i++) { g[i] = a[i] >> b[2]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_const_right_shift_uint32(b, a[6], o, 25); for (uint32_t i = 0; i < 25; i++) { g[i] = a[6] >> b[i]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } iot_printf("logic right shift of uint32 test end\n"); } void vector_half_bits_test_uint32() { uint32_t a[25]; uint16_t h[25]; uint16_t l[25]; uint16_t g[25]; for (uint32_t i = 0; i < 25; i++) { a[i] = i + (i + 80) * 65536; } vector_high_half_bits_uint32(a, h, 25); for (uint32_t i = 0; i < 25; i++) { g[i] = a[i] >> 16; if (g[i] != h[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], h[i]); } } iot_printf("vector_high_half_bits_uint32 finished\n"); vector_low_half_bits_uint32(a, l, 25); for (uint32_t i = 0; i < 25; i++) { g[i] = a[i] & 0xffff; if (g[i] != l[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], l[i]); } } iot_printf("vector_low_half_bits_uint32 finished\n"); vector_high_and_low_half_bits_uint32(a, h, l, 25); for (uint32_t i = 0; i < 25; i++) { g[i] = a[i] >> 16; if (g[i] != h[i]) { iot_printf("incorrect higt at %d, golden %d, output %d\n", i, g[i], h[i]); } g[i] = a[i] & 0xffff; if (g[i] != l[i]) { iot_printf("incorrect low at %d, golden %d, output %d\n", i, g[i], l[i]); } } iot_printf("vector_high_and_low_half_bits_uint32 finished\n"); } void vector_add_test_int32() { int32_t a[25]; int32_t b[25]; int32_t o[25]; int32_t g[25]; for (uint32_t i = 0; i < 25; i++) { a[i] = 200000000 * i; b[i] = 300000000 * i + 23; } vector_add_const_int32(a, b[0], o, 25); for (uint32_t i = 0; i < 25; i++) { #ifdef SATURATION int64_t gg = (int64_t)a[i] + (int64_t)b[0]; g[i] = gg > 0x7fffffff ? 0x7fffffff : (gg < -2147483648 ? -2147483648 : gg); #else g[i] = a[i] + b[0]; #endif if (g[i] != o[i]) { iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[0], g[i], o[i]); } } uint64_t begin = cpu_get_mcycle(); for (uint32_t repeat = 0; repeat < 1000; repeat++) { vector_add_int32(a, b, o, 25); } uint64_t end = cpu_get_mcycle(); iot_printf("time = %d\n", (uint32_t)(end - begin)); begin = cpu_get_mcycle(); for (uint32_t repeat = 0; repeat < 1000; repeat++) { for (uint32_t i = 0; i < 25; i++) { #ifdef SATURATION int64_t gg = (int64_t)a[i] + (int64_t)b[i]; g[i] = gg > 0x7fffffff ? 0x7fffffff : (gg < -2147483648 ? -2147483648: gg); #else g[i] = a[i] + b[i]; #endif } } end = cpu_get_mcycle(); iot_printf("time = %d\n", (uint32_t)(end - begin)); for (uint32_t i = 0; i < 25; i++) { if (g[i] != o[i]) { iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[i], g[i], o[i]); } } iot_printf("add const of int32 test end\n"); } void vector_sub_test_int32() { int32_t a[25]; int32_t b[25]; int32_t o[25]; int32_t g[25]; for (uint32_t i = 0; i < 25; i++) { a[i] = 200000000 * i; b[i] = 300000000 * i + 23; } vector_sub_const_int32(a, b[0], o, 25); for (uint32_t i = 0; i < 25; i++) { #ifdef SATURATION int64_t gg = (int64_t)a[i] - (int64_t)b[0]; g[i] = gg > 0x7fffffff ? 0x7fffffff : (gg < -2147483648 ? -2147483648 : gg); #else g[i] = a[i] - b[0]; #endif if (g[i] != o[i]) { iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[i], g[i], o[i]); } } iot_printf("sub const of int32 test end\n"); vector_const_sub_int32(a, b[0], o, 25); for (uint32_t i = 0; i < 25; i++) { #ifdef SATURATION int64_t gg = (int64_t)b[0] - (int64_t)a[i]; g[i] = gg > 0x7fffffff ? 0x7fffffff : (gg < -2147483648 ? -2147483648 : gg); #else g[i] = b[0] - a[i]; #endif if (g[i] != o[i]) { iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[i], g[i], o[i]); } } iot_printf("const sub of int32 test end\n"); vector_sub_int32(a, b, o, 25); for (uint32_t i = 0; i < 25; i++) { #ifdef SATURATION int64_t gg = (int64_t)a[i] - (int64_t)b[i]; g[i] = gg > 0x7fffffff ? 0x7fffffff : (gg < -2147483648 ? -2147483648: gg); #else g[i] = a[i] - b[i]; #endif if (g[i] != o[i]) { iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[i], g[i], o[i]); } } iot_printf("sub of int32 test end\n"); } void vector_mul_test_int32() { uint32_t length = 100; int32_t *a = (int32_t *)os_mem_malloc(1, length * 4); int32_t *b = (int32_t *)os_mem_malloc(1, length * 4); int32_t *l = (int32_t *)os_mem_malloc(1, length * 4); int32_t *h = (int32_t *)os_mem_malloc(1, length * 4); int64_t *w = (int64_t *)os_mem_malloc(1, length * 4 * 2); for (uint32_t i = 0; i < length; i++) { a[i] = -201 * (i - 14); b[i] = 301 * (i - 6); //iot_printf("a[%d] = %d, b[%d] = %d\n", i, a[i], i, b[i]); } vector_multiply_int32(a, b, w, length); for (uint32_t i = 0; i < length; i++) { int64_t g = (int64_t)a[i] * (int64_t)b[i]; if (g != w[i]) { iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[i], g, w[i]); } } iot_printf("vector_multiply_int32 finished\n"); vector_multiply_const_int32(a, b[1], w, length); for (uint32_t i = 0; i < length; i++) { int64_t g = (int64_t)a[i] * (int64_t)b[1]; if (g != w[i]) { iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[i], g, w[i]); } } iot_printf("vector_multiply_const_int32 finished\n"); vector_multiply_int32_high_32bit(a, b, h, length); for (uint32_t i = 0; i < length; i++) { int32_t g = ((int64_t)a[i] * (int64_t)b[i]) >> 32; if (g != h[i]) { iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[i], g, h[i]); } } iot_printf("vector_multiply_int32_high finished\n"); vector_multiply_const_int32_high_32bit(a, b[1], h, length); for (uint32_t i = 0; i < length; i++) { int32_t g = ((int64_t)a[i] * (int64_t)b[1]) >> 32; if (g != h[i]) { iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[i], g, h[i]); } } iot_printf("vector_multiply_const_int32_high finished\n"); vector_multiply_int32_low_32bit(a, b, l, length); for (uint32_t i = 0; i < length; i++) { #ifdef SATURATION int64_t gg = (int64_t)a[i] * (int64_t)b[i]; int32_t g = gg > 0x7fffffff ? 0x7fffffff : (gg < -2147483648 ? -2147483648: gg); #else int32_t g = ((int64_t)a[i] * (int64_t)b[i]) & 0xffffffff; #endif if (g != l[i]) { iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[i], g, l[i]); } } iot_printf("vector_multiply_int32_low finished\n"); vector_multiply_const_int32_low_32bit(a, b[1], l, length); for (uint32_t i = 0; i < length; i++) { #ifdef SATURATION int64_t gg = (int64_t)a[i] * (int64_t)b[1]; int32_t g = gg > 0x7fffffff ? 0x7fffffff : (gg < -2147483648 ? -2147483648: gg); #else int32_t g = ((int64_t)a[i] * (int64_t)b[1]) & 0xffffffff; #endif if (g != l[i]) { iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[i], g, l[i]); } } iot_printf("vector_multiply_const_int32_low finished\n"); os_mem_free(a); os_mem_free(b); os_mem_free(h); os_mem_free(l); os_mem_free(w); } void vector_madd_msub_test_int32() { uint32_t length = 100; int32_t *a = os_mem_malloc(1, length * 4); int32_t *b = os_mem_malloc(1, length * 4); int32_t *j = os_mem_malloc(1, length * 4); int32_t *o = os_mem_malloc(1, length * 4); for (uint32_t i = 0; i < length; i++) { a[i] = -222 * (i - 25); b[i] = 333 * (i - 33); j[i] = i - 1200000000; //iot_printf("a[%d] = %d, b[%d] = %d, j[%d] = %d\n", i, a[i], i, b[i], i, j[i]); } vector_mul_add_int32(a, b, j, o, length); for (uint32_t i = 0; i < length; i++) { #ifdef SATURATION int64_t gg = (int64_t)a[i] * (int64_t)b[i]; gg = gg > 0x7fffffff ? 0x7fffffff : (gg < -2147483648 ? -2147483648: gg); int64_t g = (int64_t)j[i] + gg; g = g > 0x7fffffff ? 0x7fffffff : (g < -2147483648 ? -2147483648: g); #else int32_t g = j[i] + a[i] * b[i]; #endif if (g != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g, o[i]); } } iot_printf("vector_mul_add_int32 finished\n"); vector_mul_sub_int32(a, b, j, o, length); for (uint32_t i = 0; i < length; i++) { #ifdef SATURATION int64_t gg = (int64_t)a[i] * (int64_t)b[i]; gg = gg > 0x7fffffff ? 0x7fffffff : (gg < -2147483648 ? -2147483648: gg); int64_t g = (int64_t)j[i] - gg; g = g > 0x7fffffff ? 0x7fffffff : (g < -2147483648 ? -2147483648: g); #else int32_t g = j[i] - a[i] * b[i]; #endif if (g != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g, o[i]); } } iot_printf("vector_mul_sub_int32 finished\n"); os_mem_free(a); os_mem_free(b); os_mem_free(j); os_mem_free(o); } void vector_min_test_int32() { int32_t a[25]; int32_t b[25]; int32_t o[25]; int32_t g[25]; for (uint32_t i = 0; i < 25; i++) { a[i] = 2 * i; b[i] = 3 * i + 23; } vector_min_int32(a, b, o, 25); for (uint32_t i = 0; i < 25; i++) { g[i] = a[i] < b[i] ? a[i] : b[i]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_min_const_int32(a, b[0], o, 25); for (uint32_t i = 0; i < 25; i++) { g[i] = a[i] < b[0] ? a[i] : b[0]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } iot_printf("min of int32 test end\n"); } void vector_max_test_int32() { int32_t a[25]; int32_t b[25]; int32_t o[25]; int32_t g[25]; for (uint32_t i = 0; i < 25; i++) { a[i] = 2 * i; b[i] = 3 * i + 23; } vector_max_int32(a, b, o, 25); for (uint32_t i = 0; i < 25; i++) { g[i] = a[i] > b[i] ? a[i] : b[i]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_max_const_int32(a, b[0], o, 25); for (uint32_t i = 0; i < 25; i++) { g[i] = a[i] > b[0] ? a[i] : b[0]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } iot_printf("max of int32 test end\n"); } void vector_equal_test_int32() { int32_t a[25]; int32_t b[25]; int32_t o[25]; int32_t g[25]; for (uint32_t i = 0; i < 25; i++) { a[i] = 200000000 * i; b[i] = 300000000 * i - 23; } vector_equal_int32(a, b, o, 25); for (uint32_t i = 0; i < 25; i++) { g[i] = a[i] == b[i] ? 1 : 0; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_equal_const_int32(a, b[11], o, 25); for (uint32_t i = 0; i < 25; i++) { g[i] = a[i] == b[11] ? 1 : 0; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } iot_printf("equal of int32 test end\n"); } void vector_not_equal_test_int32() { int32_t a[25]; int32_t b[25]; int32_t o[25]; int32_t g[25]; for (uint32_t i = 0; i < 25; i++) { a[i] = 200000000 * i; b[i] = 300000000 * i - 23; } vector_not_equal_int32(a, b, o, 25); for (uint32_t i = 0; i < 25; i++) { g[i] = a[i] != b[i] ? 1 : 0; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_not_equal_const_int32(a, b[11], o, 25); for (uint32_t i = 0; i < 25; i++) { g[i] = a[i] != b[11] ? 1 : 0; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } iot_printf("not equal of int32 test end\n"); } void vector_less_than_test_int32() { int32_t a[25]; int32_t b[25]; int32_t o[25]; int32_t g[25]; for (uint32_t i = 0; i < 25; i++) { a[i] = 2000000000 * i; b[i] = 3000000000 * i - 23; } vector_less_than_int32(a, b, o, 25); for (uint32_t i = 0; i < 25; i++) { g[i] = a[i] < b[i] ? 1 : 0; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_less_than_const_int32(a, b[11], o, 25); for (uint32_t i = 0; i < 25; i++) { g[i] = a[i] < b[11] ? 1 : 0; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_const_less_than_int32(a, b[11], o, 25); for (uint32_t i = 0; i < 25; i++) { g[i] = a[i] > b[11] ? 1 : 0; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } iot_printf("less than of int32 test end\n"); } void vector_greater_or_equal_test_int32() { int32_t a[25]; int32_t b[25]; int32_t o[25]; int32_t g[25]; for (uint32_t i = 0; i < 25; i++) { a[i] = 200000000 * i; b[i] = 300000000 * i - 23; } vector_greater_or_equal_int32(a, b, o, 25); for (uint32_t i = 0; i < 25; i++) { g[i] = a[i] >= b[i] ? 1 : 0; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_greater_or_equal_const_int32(a, b[11], o, 25); for (uint32_t i = 0; i < 25; i++) { g[i] = a[i] >= b[11] ? 1 : 0; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_const_greater_or_equal_int32(a, b[11], o, 25); for (uint32_t i = 0; i < 25; i++) { g[i] = a[i] <= b[11] ? 1 : 0; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } iot_printf("greater or equal of int32 test end\n"); } void vector_logic_and_test_int32() { int32_t a[25]; int32_t b[25]; int32_t o[25]; int32_t g[25]; for (uint32_t i = 0; i < 25; i++) { a[i] = 200000000 * i; b[i] = 300000000 * i - 23; } vector_logic_and_int32(a, b, o, 25); for (uint32_t i = 0; i < 25; i++) { g[i] = a[i] & b[i]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_logic_and_const_int32(a, b[11], o, 25); for (uint32_t i = 0; i < 25; i++) { g[i] = a[i] & b[11]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } iot_printf("logic and of int32 test end\n"); } void vector_logic_or_test_int32() { int32_t a[25]; int32_t b[25]; int32_t o[25]; int32_t g[25]; for (uint32_t i = 0; i < 25; i++) { a[i] = 200000000 * i; b[i] = 300000000 * i - 23; } vector_logic_or_int32(a, b, o, 25); for (uint32_t i = 0; i < 25; i++) { g[i] = a[i] | b[i]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_logic_or_const_int32(a, b[11], o, 25); for (uint32_t i = 0; i < 25; i++) { g[i] = a[i] | b[11]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } iot_printf("logic or of int32 test end\n"); } void vector_logic_xor_test_int32() { int32_t a[25]; int32_t b[25]; int32_t o[25]; int32_t g[25]; for (uint32_t i = 0; i < 25; i++) { a[i] = 200000000 * i; b[i] = 300000000 * i - 23; } vector_logic_xor_int32(a, b, o, 25); for (uint32_t i = 0; i < 25; i++) { g[i] = a[i] ^ b[i]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_logic_xor_const_int32(a, b[11], o, 25); for (uint32_t i = 0; i < 25; i++) { g[i] = a[i] ^ b[11]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } iot_printf("logic xor of int32 test end\n"); } void vector_logic_xnor_test_int32() { int32_t a[25]; int32_t b[25]; int32_t o[25]; int32_t g[25]; for (uint32_t i = 0; i < 25; i++) { a[i] = 200000000 * i; b[i] = 300000000 * i - 23; } vector_logic_xnor_int32(a, b, o, 25); for (uint32_t i = 0; i < 25; i++) { g[i] = ~(a[i] ^ b[i]); if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_logic_xnor_const_int32(a, b[11], o, 25); for (uint32_t i = 0; i < 25; i++) { g[i] = ~(a[i] ^ b[11]); if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } iot_printf("logic xnor of int32 test end\n"); } void vector_left_shift_test_int32() { int32_t a[25]; uint32_t b[25]; int32_t o[25]; int32_t g[25]; for (uint32_t i = 0; i < 25; i++) { a[i] = i * 500000000 + 1; b[i] = i; } vector_left_shift_int32(a, b, o, 25); for (uint32_t i = 0; i < 25; i++) { g[i] = a[i] << b[i]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_left_shift_const_int32(a, b[2], o, 25); for (uint32_t i = 0; i < 25; i++) { g[i] = a[i] << b[2]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_const_left_shift_int32(b, a[6], o, 25); for (uint32_t i = 0; i < 25; i++) { g[i] = a[6] << b[i]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } iot_printf("left shift of int32 test end\n"); } void vector_logic_right_shift_test_int32() { int32_t a[25]; uint32_t b[25]; int32_t o[25]; int32_t g[25]; for (uint32_t i = 0; i < 25; i++) { a[i] = i - 25; b[i] = i % 7 + 1; } vector_logic_right_shift_int32(a, b, o, 25); for (uint32_t i = 0; i < 25; i++) { g[i] = (uint32_t)a[i] >> b[i]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_logic_right_shift_const_int32(a, b[2], o, 25); for (uint32_t i = 0; i < 25; i++) { g[i] = (uint32_t)a[i] >> b[2]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_const_logic_right_shift_int32(b, a[6], o, 25); for (uint32_t i = 0; i < 25; i++) { g[i] = (uint32_t)a[6] >> b[i]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } iot_printf("logic right shift of int32 test end\n"); } void vector_arithmatic_right_shift_test_int32() { int32_t a[25]; uint32_t b[25]; int32_t o[25]; int32_t g[25]; for (uint32_t i = 0; i < 25; i++) { a[i] = i - 25; b[i] = i % 7 + 1; } vector_arithmatic_right_shift_int32(a, b, o, 25); for (uint32_t i = 0; i < 25; i++) { g[i] = a[i] >> b[i]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_arithmatic_right_shift_const_int32(a, b[2], o, 25); for (uint32_t i = 0; i < 25; i++) { g[i] = a[i] >> b[2]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } vector_const_arithmatic_right_shift_int32(b, a[6], o, 25); for (uint32_t i = 0; i < 25; i++) { g[i] = a[6] >> b[i]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]); } } iot_printf("arithmatic right shift of int32 test end\n"); } void vector_half_bits_test_int32() { int32_t a[25]; int16_t h[25]; int16_t l[25]; int16_t g[25]; for (int32_t i = 0; i < 25; i++) { a[i] = i + (i + 80) * 65536; } vector_high_half_bits_int32(a, h, 25); for (int32_t i = 0; i < 25; i++) { g[i] = a[i] >> 16; if (g[i] != h[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], h[i]); } } iot_printf("vector_high_half_bits_int32 finished\n"); vector_low_half_bits_int32(a, l, 25); for (int32_t i = 0; i < 25; i++) { g[i] = a[i] & 0xffff; if (g[i] != l[i]) { iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], l[i]); } } iot_printf("vector_low_half_bits_int32 finished\n"); vector_high_and_low_half_bits_int32(a, h, l, 25); for (int32_t i = 0; i < 25; i++) { g[i] = a[i] >> 16; if (g[i] != h[i]) { iot_printf("incorrect higt at %d, golden %d, output %d\n", i, g[i], h[i]); } g[i] = a[i] & 0xffff; if (g[i] != l[i]) { iot_printf("incorrect low at %d, golden %d, output %d\n", i, g[i], l[i]); } } iot_printf("vector_high_and_low_half_bits_int32 finished\n"); } void vector_add_test_float() { float a[25]; float b[25]; float o[25]; float g[25]; for (uint32_t i = 0; i < 25; i++) { a[i] = 2 * i; b[i] = 3 * i + 23; } vector_add_const_float(a, b[0], o, 25); for (uint8_t i = 0; i < 25; i++) { g[i] = a[i] + b[0]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %f, output %f\n", i, g[i], o[i]); } } vector_add_float(a, b, o, 25); for (uint8_t i = 0; i < 25; i++) { g[i] = a[i] + b[i]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %f, output %f\n", i, g[i], o[i]); } } iot_printf("add const of float test end\n"); } void vector_sub_test_float() { float a[25]; float b[25]; float o[25]; float g[25]; uint32_t *o32 = (uint32_t *)o; uint32_t *g32 = (uint32_t *)g; for (uint32_t i = 0; i < 25; i++) { a[i] = 2 * i; b[i] = 3 * i + 23; } vector_sub_const_float(a, b[0], o, 25); for (uint8_t i = 0; i < 25; i++) { g[i] = a[i] - b[0]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %f(0x%08x), output %f(0x%08x)\n", i, g[i], g32[i], o[i], o32[i]); } } vector_const_sub_float(a, b[0], o, 25); for (uint8_t i = 0; i < 25; i++) { g[i] = b[0] - a[i]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %f(0x%08x), output %f(0x%08x)\n", i, g[i], g32[i], o[i], o32[i]); } } vector_sub_float(a, b, o, 25); for (uint8_t i = 0; i < 25; i++) { g[i] = a[i] - b[i]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %f(0x%08x), output %f(0x%08x)\n", i, g[i], g32[i], o[i], o32[i]); } } iot_printf("sub const of float test end\n"); } void vector_mul_test_float() { uint8_t length = 25; float a[25]; float b[25]; float g[25]; float w[25]; uint32_t *a32 = (uint32_t *)&a[0]; uint32_t *b32 = (uint32_t *)&b[0]; uint32_t *g32 = (uint32_t *)&g[0]; uint32_t *w32 = (uint32_t *)&w[0]; for (uint8_t i = 0; i < length; i++) { a[i] = -0.2 * (i - 2.5); b[i] = 0.3 * (i - 3.3); //iot_printf("a[%d] = %f, b[%d] = %f\n", i, a[i], i, b[i]); } vector_multiply_float(a, b, w, length); for (uint8_t i = 0; i < length; i++) { g[i] = a[i] * b[i]; if (g[i] != w[i]) { iot_printf("golden %08x, output %08x\n", g32[i], w32[i]); iot_printf("golden %08x, output %08x\n", g32[i], w32[i]); iot_printf("golden %08x, output %08x\n", g32[i], w32[i]); iot_printf("golden %08x, output %08x\n", g32[i], w32[i]); iot_printf("golden %08x, output %08x\n", g32[i], w32[i]); iot_printf("golden %08x, output %08x\n", g32[i], w32[i]); iot_printf("golden %08x, output %08x\n", g32[i], w32[i]); iot_printf("golden %08x, output %08x\n", g32[i], w32[i]); iot_printf("golden %08x, output %08x\n\n", g32[i], w32[i]); iot_printf("incorrect at %d, a = 0x%08x, b = 0x%08x, golden 0x%08x, output 0x%08x\n", i, a32[i], b32[i], g32[i], w32[i]); iot_printf("golden %f, output %f\n", g[i], w[i]); } } //iot_printf("vector_multiply_float finished\n"); vector_multiply_const_float(a, b[1], w, length); for (uint8_t i = 0; i < length; i++) { g[i] = a[i] * b[1]; if (g[i] != w[i]) { iot_printf("incorrect at %d, a = %f(0x%08x), b = %f(0x%08x), golden %f(0x%08x), output %f(0x%08x)\n", i, a[i], a32[i], b[1], b32[1], g[i], g32[i], w[i], w32[i]); } } //iot_printf("vector_multiply_const_float finished\n"); } void vector_madd_msub_test_float() { uint8_t length = 25; float a[25]; float b[25]; float j[25]; float o[25]; float g[25]; uint32_t *uo = (uint32_t *)o; uint32_t *ug = (uint32_t *)g; for (uint8_t i = 0; i < length; i++) { a[i] = -0.2 * (i - 25); b[i] = 0.3 * (i - 33); j[i] = i - 50; //iot_printf("a[%d] = %f, b[%d] = %f, j[%d] = %f\n", i, a[i], i, b[i], i, j[i]); } vector_mul_add_float(a, b, j, o, length); for (uint8_t i = 0; i < length; i++) { g[i] = a[i] * b[i]; g[i] = g[i] + j[i]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %f(0x%08x), output %f(0x%08x)\n", i, g[i], ug[i], o[i], uo[i]); } } iot_printf("vector_mul_add_float finished\n"); vector_mul_sub_float(a, b, j, o, length); for (uint8_t i = 0; i < length; i++) { g[i] = a[i] * b[i]; g[i] = j[i] - g[i]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %f(0x%08x), output %f(0x%08x)\n", i, g[i], ug[i], o[i], uo[i]); } } iot_printf("vector_mul_sub_float finished\n"); } void vector_min_test_float() { float a[25]; float b[25]; float o[25]; float g[25]; for (uint32_t i = 0; i < 25; i++) { a[i] = 2 * i; b[i] = 3 * i + 23; } vector_min_float(a, b, o, 25); for (uint8_t i = 0; i < 25; i++) { g[i] = a[i] < b[i] ? a[i] : b[i]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %f, output %f\n", i, g[i], o[i]); } } vector_min_const_float(a, b[0], o, 25); for (uint8_t i = 0; i < 25; i++) { g[i] = a[i] < b[0] ? a[i] : b[0]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %f, output %f\n", i, g[i], o[i]); } } iot_printf("min of float test end\n"); } void vector_max_test_float() { float a[25]; float b[25]; float o[25]; float g[25]; for (uint32_t i = 0; i < 25; i++) { a[i] = 2 * i; b[i] = 3 * i + 23; } vector_max_float(a, b, o, 25); for (uint8_t i = 0; i < 25; i++) { g[i] = a[i] > b[i] ? a[i] : b[i]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %f, output %f\n", i, g[i], o[i]); } } vector_max_const_float(a, b[0], o, 25); for (uint8_t i = 0; i < 25; i++) { g[i] = a[i] > b[0] ? a[i] : b[0]; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %f, output %f\n", i, g[i], o[i]); } } iot_printf("max of float test end\n"); } void vector_equal_test_float() { float a[25]; float b[25]; float o[25]; float g[25]; for (uint32_t i = 0; i < 25; i++) { a[i] = 2 * i; b[i] = 3 * i - 23; } vector_equal_float(a, b, o, 25); for (uint8_t i = 0; i < 25; i++) { g[i] = a[i] == b[i] ? 1 : 0; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %f, output %f\n", i, g[i], o[i]); } } vector_equal_const_float(a, b[11], o, 25); for (uint8_t i = 0; i < 25; i++) { g[i] = a[i] == b[11] ? 1 : 0; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %f, output %f\n", i, g[i], o[i]); } } iot_printf("equal of float test end\n"); } void vector_not_equal_test_float() { float a[25]; float b[25]; float o[25]; float g[25]; for (uint32_t i = 0; i < 25; i++) { a[i] = 2 * i; b[i] = 3 * i - 23; } vector_not_equal_float(a, b, o, 25); for (uint8_t i = 0; i < 25; i++) { g[i] = a[i] != b[i] ? 1 : 0; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %f, output %f\n", i, g[i], o[i]); } } vector_not_equal_const_float(a, b[11], o, 25); for (uint8_t i = 0; i < 25; i++) { g[i] = a[i] != b[11] ? 1 : 0; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %f, output %f\n", i, g[i], o[i]); } } iot_printf("not equal of float test end\n"); } void vector_less_than_test_float() { float a[25]; float b[25]; float o[25]; float g[25]; for (uint32_t i = 0; i < 25; i++) { a[i] = 2 * i; b[i] = 3 * i - 23; } vector_less_than_float(a, b, o, 25); for (uint8_t i = 0; i < 25; i++) { g[i] = a[i] < b[i] ? 1 : 0; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %f, output %f\n", i, g[i], o[i]); } } vector_less_than_const_float(a, b[11], o, 25); for (uint8_t i = 0; i < 25; i++) { uint32_t *pg = (uint32_t *)(g + i); uint32_t *po = (uint32_t *)(o + i); g[i] = a[i] < b[11] ? 1 : 0; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %f(%08x), output %f(%08x)\n", i, g[i], *pg, o[i], *po); } } vector_const_less_than_float(a, b[11], o, 25); for (uint8_t i = 0; i < 25; i++) { g[i] = a[i] > b[11] ? 1 : 0; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %f, output %f\n", i, g[i], o[i]); } } iot_printf("less than of float test end\n"); } void vector_greater_or_equal_test_float() { float a[25]; float b[25]; float o[25]; float g[25]; for (uint32_t i = 0; i < 25; i++) { a[i] = 2 * i; b[i] = 3 * i - 23; } vector_greater_or_equal_float(a, b, o, 25); for (uint8_t i = 0; i < 25; i++) { uint32_t *pg = (uint32_t *)(g + i); uint32_t *po = (uint32_t *)(o + i); g[i] = a[i] >= b[i] ? 1 : 0; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %f(%08x), output %f(%08x)\n", i, g[i], *pg, o[i], *po); } } vector_greater_or_equal_const_float(a, b[11], o, 25); for (uint8_t i = 0; i < 25; i++) { g[i] = a[i] >= b[11] ? 1 : 0; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %f, output %f\n", i, g[i], o[i]); } } vector_const_greater_or_equal_float(a, b[11], o, 25); for (uint8_t i = 0; i < 25; i++) { g[i] = a[i] <= b[11] ? 1 : 0; if (g[i] != o[i]) { iot_printf("incorrect at %d, golden %f, output %f\n", i, g[i], o[i]); } } iot_printf("greater or equal of float test end\n"); } void float_greater_equal_test(float *a, float *b, float *o, float *g, uint32_t length) { uint32_t length_tail = length & (SIMD_W - 1); uint32_t length_body = length - length_tail; uint32_t *ua = (uint32_t *)a; uint32_t *ub = (uint32_t *)b; float *pa = a; float *pb = b; float *po = o; for (uint32_t i = 0; i < length; i++) { iot_printf("a[%d] = %f(0x%08x), b[%d] = %f(0x%08x)\n", i, a[i], ua[i], i, b[i], ub[i]); g[i] = a[i] >= b[i] ? 1.0 : 0.0; } for (uint32_t i = 0; i < length_body; i += SIMD_W) { vld_fpw(0, pa, 1); vld_fpw(1, pb, 1); vsge(2, 0, 1); vst_fpw(po, 1, 2); pa += SIMD_W; pb += SIMD_W; po += SIMD_W; asm("fence"); } for (uint32_t i = 0; i < length; i++) { if (g[i] != o[i]) { iot_printf("g[%d] = %f, o[%d] = %f\n", i, g[i], i, o[i]); } } } void sigmoid_int8_test() { int8_t input_[256]; for (uint32_t i = 0; i < 256; i++) { input_[i] = i - 128; } int8_t output[256]; uint64_t begin = cpu_get_mcycle(); sigmoid_int8(input_, output, 256); uint64_t end = cpu_get_mcycle(); for (uint32_t i = 0; i < 256; i++) { iot_printf("%d\n", output[i]); } iot_printf("time = %d\n", (uint32_t)(end - begin)); } void tanh_int8_test() { int8_t input_[256]; for (uint32_t i = 0; i < 256; i++) { input_[i] = i - 128; } int8_t output[256]; uint64_t begin = cpu_get_mcycle(); tanh_int8(input_, output, 256); uint64_t end = cpu_get_mcycle(); for (uint32_t i = 0; i < 256; i++) { iot_printf("%d\n", output[i]); } iot_printf("time = %d\n", (uint32_t)(end - begin)); } void half_bits_test_16_to_8() { //uint16_t a[256] = {44641,49129,26256,16088,58270,14244,30327,37995,6931,56640,5774,49320,7276,17402,32136,45320,36031,19644,19435,37152,49723,63428,61882,14223,23149,54102,831,36226,55644,19419,36099,7483,30896,18941,25805,23607,48792,282,48783,54897,49426,6762,20780,53309,24469,8539,61695,19415,59186,50222,32014,25999,41713,64471,33742,23461,4465,38142,60141,23549,24896,15021,3281,41924,38058,7016,24388,14059,4248,54782,40738,39345,58949,38653,25051,58363,19180,3206,3344,40722,33240,16872,34385,65101,62755,61266,28302,12659,40628,21913,4935,63079,31480,25878,21467,21352,64551,55641,38244,30774,5504,48349,57711,14148,54166,25793,3399,23600,39731,57112,45684,38384,31193,33232,63416,7174,52058,22787,60669,44254,32499,36905,43035,60968,26860,60485,3310,48805,45049,37903,143,41464,60646,4187,63860,55819,30387,40709,64641,39538,23352,21361,36043,63552,2583,54181,12272,18361,50277,20570,23835,46469,61349,53900,63130,34784,6593,11239,11974,52307,41069,23393,25954,22119,63136,7816,55537,44185,14606,36887,48995,63029,48235,43904,43939,3273,291,54180,40612,64664,16495,24803,47522,45992,9694,37811,49634,30017,6245,33255,32350,65516,31063,3328,36368,56021,23334,61044,770,43954,56915,8246,59888,19817,44941,61740,51312,30965,54138,54642,4226,32393,48326,25609,54200,32173,49050,14796,54710,51153,28930,54744,56167,47714,31678,64915,61073,64506,30663,17403,37980,12405,8087,58728,65146,45550,56799,38387,38227,44411,18989,61993,48043,54784,40966,25067,16106,29127,19598,19525,43595,36213,65384,26661,39739,41330}; uint16_t a[256]; for (uint32_t i = 0; i < 256; i++) { a[i] = ((i - 128) << 8) | i; //iot_printf("a[%d] = 0x%04x\n", i, a[i]); } uint8_t h[256]; uint8_t l[256]; uint16_t *pa = a; uint8_t *ph = h; uint8_t *pl = l; for (uint16_t i = 0; i < 256; i += SIMD_W << 2) { vld_uih(0, pa, 1); pa += SIMD_W << 1; vld_uih(1, pa, 1); pa += SIMD_W << 1; vnwh(2, 0, 1); vnwl(3, 0, 1); vst_uib(ph, 1, 2); vst_uib(pl, 1, 3); ph += SIMD_W << 2; pl += SIMD_W << 2; asm("fence"); } for (uint16_t i = 0; i < 16; i++) { uint8_t hi = a[i] >> 8; if (hi != h[i]) { iot_printf("golden_h[%d] = %d, output_h[%d] = %d\n", i, hi, i, h[i]); } } for (uint16_t i = 0; i < 16; i++) { uint8_t lo = a[i] & 0xff; if (lo != l[i]) { iot_printf("golden_l[%d] = %d, output_l[%d] = %d\n", i, lo, i, l[i]); } } } void half_bits_test_32_to_16() { uint32_t number_elem = 256; int32_t *a = (int32_t *)os_mem_malloc(1, number_elem * sizeof(int32_t)); int16_t *h = (int16_t *)os_mem_malloc(1, number_elem * sizeof(int16_t)); int16_t *l = (int16_t *)os_mem_malloc(1, number_elem * sizeof(int16_t)); int32_t *pa = a; int16_t *ph = h; int16_t *pl = l; for (uint32_t i = 0; i < number_elem; i++) { a[i] = ((i - 128) << 16) | i; //iot_printf("a[%d] = 0x%08x\n", i, a[i]); } for (uint16_t i = 0; i < number_elem; i += SIMD_WH) { vld_siw(0, pa, 1); pa += SIMD_W; vld_siw(1, pa, 1); pa += SIMD_W; vnwh(2, 0, 1); vnwl(3, 0, 1); vst_sih(ph, 1, 2); vst_sih(pl, 1, 3); ph += SIMD_WH; pl += SIMD_WH; asm("fence"); } for (uint16_t i = 0; i < number_elem; i++) { int16_t hi = a[i] >> 16; if (hi != h[i]) { iot_printf("golden_h[%d] = 0x%04x, output_h[%d] = 0x%04x\n", i, hi, i, h[i]); } } for (uint16_t i = 0; i < number_elem; i++) { int16_t lo = a[i] & 0xffff; if (lo != l[i]) { iot_printf("golden_l[%d] = 0x%04x, output_l[%d] = 0x%04x\n", i, lo, i, l[i]); } } os_mem_free(a); os_mem_free(h); os_mem_free(l); } void half_bits_test_32_to_16u() { uint32_t number_elem = 256; uint32_t *a = (uint32_t *)os_mem_malloc(1, number_elem * sizeof(uint32_t)); uint16_t *h = (uint16_t *)os_mem_malloc(1, number_elem * sizeof(uint16_t)); uint16_t *l = (uint16_t *)os_mem_malloc(1, number_elem * sizeof(uint16_t)); uint32_t *pa = a; uint16_t *ph = h; uint16_t *pl = l; for (uint32_t i = 0; i < number_elem; i++) { a[i] = ((i - 128) << 16) | i; //iot_printf("a[%d] = 0x%08x\n", i, a[i]); } for (uint16_t i = 0; i < number_elem; i += SIMD_WH) { vld_uiw(0, pa, 1); pa += SIMD_W; vld_uiw(1, pa, 1); pa += SIMD_W; vnwh(2, 0, 1); vnwl(3, 0, 1); vst_uih(ph, 1, 2); vst_uih(pl, 1, 3); ph += SIMD_WH; pl += SIMD_WH; asm("fence"); } for (uint16_t i = 0; i < number_elem; i++) { int16_t hi = a[i] >> 16; if (hi != h[i]) { iot_printf("golden_h[%d] = 0x%04x, output_h[%d] = 0x%04x\n", i, hi, i, h[i]); } } for (uint16_t i = 0; i < number_elem; i++) { int16_t lo = a[i] & 0xffff; if (lo != l[i]) { iot_printf("golden_l[%d] = 0x%04x, output_l[%d] = 0x%04x\n", i, lo, i, l[i]); } } os_mem_free(a); os_mem_free(h); os_mem_free(l); } void fc_test_from_pc() { uint8_t config_read[120]; uint32_t config_get[60]; uint32_t data_begin = 0x10200000; read_case_from_python(config_read, config_get, (uint8_t *)data_begin); uint32_t weight_addr = data_begin; uint32_t bias_addr = weight_addr + config_get[52]; uint32_t input_addr = bias_addr + config_get[53]; uint32_t golden_addr = input_addr + config_get[54]; uint32_t output_addr = golden_addr + config_get[55]; uint32_t elem_bytes = config_get[4]; uint32_t batch = config_get[5]; uint32_t bias_shift_enable = config_get[6]; uint64_t begin = cpu_get_mcycle(); switch (elem_bytes) { case 1: // 8bits switch (bias_shift_enable) { case 0: // no bias_shift fc_int8_to_int8_weight_8bit_batch((int8_t *)input_addr, (int8_t *)output_addr, (int8_t *)weight_addr, (int32_t *)bias_addr, config_get[0], config_get[1], config_get[2], batch); break; case 1: // bias_shift fc_int8_to_int8_weight_8bit_bias_shift_batch((int8_t *)input_addr, (int8_t *)output_addr, (int8_t *)weight_addr, (int8_t *)bias_addr, config_get[0], config_get[1], config_get[2], config_get[3], batch); break; default: break; } break; case 2: // 16bits switch (bias_shift_enable) { case 0: // no bias_shift fc_int16_to_int16_weight_16bit_batch((int16_t *)input_addr, (int16_t *)output_addr, (int16_t *)weight_addr, (int64_t *)bias_addr, config_get[0], config_get[1], config_get[2], batch); break; case 1: // bias_shift fc_int16_to_int16_weight_16bit_bias_shift_batch((int16_t *)input_addr, (int16_t *)output_addr, (int16_t *)weight_addr, (int16_t *)bias_addr, config_get[0], config_get[1], config_get[2], config_get[3], batch); break; default: break; } break; default: break; } uint64_t end = cpu_get_mcycle(); iot_printf("time = %d\n", (uint32_t)(end - begin)); verify_8bits(output_addr, golden_addr, config_get[55]); } void depth_fc_test_from_pc() { uint8_t config_read[120]; uint32_t config_get[60]; uint32_t data_begin = 0x10200000; read_case_from_python(config_read, config_get, (uint8_t *)data_begin); uint32_t weight_addr = data_begin; uint32_t bias_addr = weight_addr + config_get[52]; uint32_t input_addr = bias_addr + config_get[53]; uint32_t golden_addr = input_addr + config_get[54]; uint32_t output_addr = golden_addr + config_get[55]; uint32_t elem_bytes = config_get[4]; uint32_t batch = config_get[5]; uint32_t bias_shift_enable = config_get[6]; uint64_t begin = cpu_get_mcycle(); switch (elem_bytes) { case 1: // 8bits switch (bias_shift_enable) { case 0: // no bias_shift fc_depth_int8_to_int8_weight_8bit_batch((int8_t *)input_addr, (int8_t *)output_addr, (int8_t *)weight_addr, (int32_t *)bias_addr, config_get[1], config_get[2], batch); break; case 1: // bias_shift fc_depth_int8_to_int8_weight_8bit_bias_shift_batch((int8_t *)input_addr, (int8_t *)output_addr, (int8_t *)weight_addr, (int8_t *)bias_addr, config_get[1], config_get[2], config_get[3], batch); break; default: break; } break; case 2: // 16bits switch (bias_shift_enable) { case 0: // no bias_shift fc_depth_int16_to_int16_weight_16bit_batch((int16_t *)input_addr, (int16_t *)output_addr, (int16_t *)weight_addr, (int64_t *)bias_addr, config_get[1], config_get[2], batch); break; case 1: // bias_shift fc_depth_int16_to_int16_weight_16bit_bias_shift_batch((int16_t *)input_addr, (int16_t *)output_addr, (int16_t *)weight_addr, (int16_t *)bias_addr, config_get[1], config_get[2], config_get[3], batch); break; default: break; } break; default: break; } uint64_t end = cpu_get_mcycle(); iot_printf("time = %d\n", (uint32_t)(end - begin)); //if (output_addr + config_get[55] > 0x10008000) { // iot_printf("too large\nfinished~"); // return; //} verify_8bits(output_addr, golden_addr, config_get[55]); } void dsp_init(){ asm("csrs mstatus,%0"::"r"(0x18000)); int32_t *p_reg = (int32_t *)0x02000148; *p_reg = 0x3; } void vmaddw_test() { int8_t a[16]; int8_t b[16]; int32_t in[16]; int32_t out[16]; int32_t *pi = in; int32_t *po = out; for (uint8_t i = 0; i < 16; i++) { a[i] = i - 8; b[i] = i - 8; in[i] = 0; } vld_sib(0, a, 1); vld_sib(1, b, 1); vld_siw(4, pi, 1); pi += SIMD_WW; vld_siw(5, pi, 1); pi += SIMD_WW; vld_siw(6, pi, 1); pi += SIMD_WW; vld_siw(7, pi, 1); pi += SIMD_WW; vmaddw(4, 0, 1); vst_siw(po, 1, 4); po += SIMD_WW; vst_siw(po, 1, 5); po += SIMD_WW; vst_siw(po, 1, 6); po += SIMD_WW; vst_siw(po, 1, 7); po += SIMD_WW; asm("fence"); for (uint8_t i = 0; i < 16; i++) { int32_t g = in[i] + a[i] * b[i]; iot_printf("golden[%d] = %d, out[%d] = %d\n", i, g, i, out[i]); } } void vmsubw_testu() { uint8_t a[16]; uint8_t b[16]; uint32_t in[16]; uint32_t out[16]; uint32_t *pi = in; uint32_t *po = out; for (uint8_t i = 0; i < 16; i++) { a[i] = 16 - i; b[i] = 17 - i; in[i] = 500; } vld_uib(0, a, 1); vld_uib(1, b, 1); vld_uiw(4, pi, 1); pi += SIMD_WW; vld_uiw(5, pi, 1); pi += SIMD_WW; vld_uiw(6, pi, 1); pi += SIMD_WW; vld_uiw(7, pi, 1); pi += SIMD_WW; vmsubw(4, 0, 1); vst_uiw(po, 1, 4); po += SIMD_WW; vst_uiw(po, 1, 5); po += SIMD_WW; vst_uiw(po, 1, 6); po += SIMD_WW; vst_uiw(po, 1, 7); po += SIMD_WW; asm("fence"); for (uint8_t i = 0; i < 16; i++) { uint32_t g = in[i] - a[i] * b[i]; iot_printf("golden[%d] = %d, out[%d] = %d\n", i, g, i, out[i]); } } void vmsubw_test() { int8_t a[16]; int8_t b[16]; int32_t in[16]; int32_t out[16]; int32_t *pi = in; int32_t *po = out; for (uint8_t i = 0; i < 16; i++) { a[i] = 16 - i; b[i] = 17 - i; in[i] = 500; } vld_sib(0, a, 1); vld_sib(1, b, 1); vld_siw(4, pi, 1); pi += SIMD_WW; vld_siw(5, pi, 1); pi += SIMD_WW; vld_siw(6, pi, 1); pi += SIMD_WW; vld_siw(7, pi, 1); pi += SIMD_WW; vmsubw(4, 0, 1); vst_siw(po, 1, 4); po += SIMD_WW; vst_siw(po, 1, 5); po += SIMD_WW; vst_siw(po, 1, 6); po += SIMD_WW; vst_siw(po, 1, 7); po += SIMD_WW; asm("fence"); for (uint8_t i = 0; i < 16; i++) { int32_t g = in[i] - a[i] * b[i]; iot_printf("golden[%d] = %d, out[%d] = %d\n", i, g, i, out[i]); } } void max_min_test_float() { float a[4] = {-0.1, 0.0, 0.1, 0.2}; float b[4] = {0.2, 0.1, 0.0, -0.1}; float h[4] = {1.0, 1.0, 1.0, 1.0}; float l[4] = {1.1, 1.1, 1.1, 1.1}; vld_fpw(0, a, 1); vld_fpw(1, b, 1); vmax(2, 0, 1); vmin(3, 0, 1); vst_fpw(h, 1, 2); vst_fpw(l, 1, 3); asm("fence"); for (uint8_t i = 0; i < 4; i++) { iot_printf("max[%d] = %f, min[%d] = %f\n", i, h[i], i, l[i]); } } void compare_test_float() { float a[4] = {1.0, 2.0, 3.0, 4.0}; float b[4] = {5.0, 4.0, 3.0, 2.0}; uint32_t ne[4]; uint32_t eq[4]; uint32_t lt[4]; uint32_t ge[4]; vld_fpw(0, a, 1) vld_fpw(1, b, 1) vseq(2, 0, 1) vsne(3, 0, 1) vslt(4, 0, 1) vsge(5, 0, 1) vst_fpw(eq, 1, 2) vst_fpw(ne, 1, 3) vst_fpw(lt, 1, 4) vst_fpw(ge, 1, 5) asm("fence"); for (uint8_t i = 0; i < 4; i++) { iot_printf("eq[%d] = %08x, ne[%d] = %08x, lt[%d] = %08x, ge[%d] = %08x\n", i, eq[i], i, ne[i], i, lt[i], i, ge[i]); } } void mul_test_float() { float a[4]; float b[4]; float g[4]; float w[4]; uint32_t *pa = (uint32_t *)a; uint32_t *pb = (uint32_t *)b; uint32_t *pg = (uint32_t *)g; uint32_t *pw = (uint32_t *)w; for (uint8_t i = 0; i < 4; i++) { a[i] = -0.2 * (i - 2.5); b[i] = 0.3 * (i - 3.3); } vld_fpw(0, a, 1); vld_fpw(1, b, 1); vmull(2, 0, 1); vst_fpw(w, 1, 2); asm("fence"); for (uint8_t i = 0; i < 4; i++) { g[i] = a[i] * b[i]; iot_printf("a[%d] = %f(0x%08x), b[%d] = %f(0x%08x), g[%d] = %f(0x%08x), o[%d] = %f(0x%08x)\n", i, a[i], pa[i], i, b[i], pb[i], i, g[i], pg[i], i, w[i], pw[i]); } } void vmadd_test_float() { float a[4] = {0.1, 0.2, 0.3, 0.4}; float b[4] = {1.0, 2.0, 3.0, 4.0}; float j[4] = {1.0, 2.0, 3.0, 4.0}; float p[4] = {0.0, 0.0, 0.0, 0.0}; float s[4] = {0.0, 0.0, 0.0, 0.0}; uint32_t *up = (uint32_t *)p; uint32_t *us = (uint32_t *)s; vld_fpw(0, a, 1) vld_fpw(1, b, 1) vld_fpw(2, j, 1) vld_fpw(3, j, 1) vmadd(2, 0, 1) vmsub(3, 0, 1) vst_fpw(p, 1, 2) vst_fpw(s, 1, 3) for (uint32_t i = 0; i < 4; i++) { iot_printf("add result[%d] = %f(0x%08x), sub result[%d] = %f(0x%08x)\n", i, p[i], up[i], i, s[i], us[i]); } } void sigmoid_int16_test() { int16_t in[16]; int16_t ou[16]; for (uint32_t i = 0; i < 65536; i += 16) { for (uint8_t j = 0; j < 16; j++) { in[j] = j + i - 32768; } sigmoid_int16(in, ou, 16); for (uint8_t j = 0; j < 16; j++) { iot_printf("%d\n", ou[j]); } } } void tanh_int16_test() { int16_t in[16]; int16_t ou[16]; for (uint32_t i = 0; i < 65536; i += 16) { for (uint8_t j = 0; j < 16; j++) { in[j] = j + i - 32768; } tanh_int16(in, ou, 16); for (uint8_t j = 0; j < 16; j++) { iot_printf("%d\n", ou[j]); } } } void vld_stride_test() { uint32_t data_len = 2048; uint32_t *data = (uint32_t *)os_mem_malloc(1, data_len * 4); uint8_t *data8 = (uint8_t *)data; for (uint32_t i = 0; i < data_len * 4; i++) { data8[i] = i; } uint32_t load[4]; uint8_t *load8 = (uint8_t *)load; for (uint32_t s = 0; s < 512; s++) { iot_printf("stride = %d\n", s); vlds_uib(0, data, (s * 4)) vst_uib(load8, 1, 0) asm("fence"); for (uint32_t i = 0; i < SIMD_WB; i++) { iot_printf("load[%d] = %d\n", i, load8[i]); } } os_mem_free(data); } void vst_stride_test() { uint32_t store_len = 2048; uint32_t *store = (uint32_t *)os_mem_malloc(1, store_len * 4); uint32_t load[4] = {0x12345678, 0x23456789, 0x34567890, 0x45678901}; vld_uiw(0, load, 1); for (uint32_t s = 0; s < 512; s++) { for (uint32_t i = 0; i < 2048; i++) { store[i] = 0; } vsts_uiw(store, (s * 4), 0) for (uint32_t i = 0; i < 2048; i++) { if (i == 0) { if (store[i] != load[0]) { iot_printf("store incorrect at stride %d, store[%d] = %x, should be %x\n", s, i, store[i], load[0]); } } else if (i == s) { if (store[i] != load[1]) { iot_printf("store incorrect at stride %d, store[%d] = %x, should be %x\n", s, i, store[i], load[1]); } } // else if (i == s * 2) { // if (store[i] != load[2]) { // iot_printf("store incorrect at stride %d, store[%d] = %x, should be %x\n", s, i, store[i], load[2]); // } // } else if (i == s * 3) { // if (store[i] != load[3]) { // iot_printf("store incorrect at stride %d, store[%d] = %x, should be %x\n", s, i, store[i], load[3]); // } // } else { if (store[i] != 0) { iot_printf("store incorrect at stride %d, store[%d] = %x, should be 0\n", s, i, store[i]); } } } } os_mem_free(store); } void uint8_to_float_test() { uint8_t *in = (uint8_t *)os_mem_malloc(1 ,256); float *out = (float *)os_mem_malloc(1, 256 * 4); float *golden = (float *)os_mem_malloc(1, 256 * 4); for (uint32_t i = 0; i < 256; i++) { in[i] = i; } uint64_t begin = cpu_get_mcycle(); vector_uint8_to_float(in, out, 1, 2, 256); uint64_t end = cpu_get_mcycle(); for (uint32_t i = 0; i < 256; i++) { golden[i] = (i + 1.0) * 2.0; if (out[i] != golden[i]) { iot_printf("%f, %f, %d\n", golden[i], out[i], i); } } iot_printf("time = %d\n", (uint32_t)(end - begin)); begin = cpu_get_mcycle(); for (uint32_t i = 0; i < 256; i+=8) { out[i + 0] = (float)in[i + 0]; out[i + 1] = (float)in[i + 1]; out[i + 2] = (float)in[i + 2]; out[i + 3] = (float)in[i + 3]; out[i + 4] = (float)in[i + 4]; out[i + 5] = (float)in[i + 5]; out[i + 6] = (float)in[i + 6]; out[i + 7] = (float)in[i + 7]; } end = cpu_get_mcycle(); iot_printf("time = %d\n", (uint32_t)(end - begin)); os_mem_free(in); os_mem_free(out); os_mem_free(golden); iot_printf("uint8_to_float_test_end\n"); } void int8_to_float_test() { int8_t *in = (int8_t *)os_mem_malloc(1 ,256); float *out = (float *)os_mem_malloc(1, 256 * 4); uint32_t *uo = (uint32_t *)out; float *golden = (float *)os_mem_malloc(1, 256 * 4); uint32_t *ug = (uint32_t *)golden; for (int32_t i = 0; i < 256; i++) { in[i] = i - 128; } uint64_t begin = cpu_get_mcycle(); vector_int8_to_float(in, out, 1.0, 2.0, 256); uint64_t end = cpu_get_mcycle(); for (uint32_t i = 0; i < 256; i++) { golden[i] = (i - 128.0 + 1.0) * 2.0; if (out[i] != golden[i]) { iot_printf("%f, 0x%08x, 0x%08x\n", out[i], uo[i], ug[i]); } } iot_printf("time = %d\n", (uint32_t)(end - begin)); begin = cpu_get_mcycle(); for (uint32_t i = 0; i < 256; i++) { out[i] = (float)i; } end = cpu_get_mcycle(); iot_printf("time = %d\n", (uint32_t)(end - begin)); os_mem_free(in); os_mem_free(golden); os_mem_free(out); iot_printf("int8_to_float_test_end\n"); } void uint16_to_float_test() { uint16_t *in = (uint16_t *)os_mem_malloc(1, 4096 * 2); float *out = (float *)os_mem_malloc(1, 4096 * 4); float *golden = (float *)os_mem_malloc(1, 4096 * 4); for (uint32_t i = 0; i < 4096; i++) { in[i] = i; } uint64_t begin = cpu_get_mcycle(); vector_uint16_to_float(in, out, 1, 2, 4096); uint64_t end = cpu_get_mcycle(); for (uint32_t i = 0; i < 4096; i++) { golden[i] = (i + 1.0) * 2.0; if (out[i] != golden[i]) { iot_printf("%f, %f, %d\n", golden[i], out[i], i); } } iot_printf("time = %d\n", (uint32_t)(end - begin)); begin = cpu_get_mcycle(); for (uint32_t i = 0; i < 4096; i+=8) { out[i + 0] = (float)in[i + 0]; out[i + 1] = (float)in[i + 1]; out[i + 2] = (float)in[i + 2]; out[i + 3] = (float)in[i + 3]; out[i + 4] = (float)in[i + 4]; out[i + 5] = (float)in[i + 5]; out[i + 6] = (float)in[i + 6]; out[i + 7] = (float)in[i + 7]; } end = cpu_get_mcycle(); iot_printf("time = %d\n", (uint32_t)(end - begin)); os_mem_free(in); os_mem_free(golden); os_mem_free(out); iot_printf("uint16_to_float_test_end\n"); } void int16_to_float_test() { int16_t *in = (int16_t *)os_mem_malloc(1, 4096 * 2); float *out = (float *)os_mem_malloc(1, 4096 * 4); uint32_t *uo = (uint32_t *)out; float *golden = (float *)os_mem_malloc(1, 4096 * 4); uint32_t *ug = (uint32_t *)golden; for (int32_t i = 0; i < 4096; i++) { in[i] = i - 32768; } uint64_t begin = cpu_get_mcycle(); vector_int16_to_float(in, out, 1, 2, 4096); uint64_t end = cpu_get_mcycle(); for (uint32_t i = 0; i < 4096; i++) { golden[i] = (i - 32768.0 + 1.0) * 2.0; if (out[i] != golden[i]) { iot_printf("%f, 0x%08x, 0x%08x\n", out[i], uo[i], ug[i]); } } iot_printf("time = %d\n", (uint32_t)(end - begin)); begin = cpu_get_mcycle(); for (uint32_t i = 0; i < 4096; i++) { out[i] = (float)i; } end = cpu_get_mcycle(); iot_printf("time = %d\n", (uint32_t)(end - begin)); os_mem_free(in); os_mem_free(golden); os_mem_free(out); iot_printf("int16_to_float_test_end\n"); } void float_to_int8_test() { float *in = (float *)os_mem_malloc(1, 2560 * 4); int8_t *out = (int8_t *)os_mem_malloc(1, 2560); int8_t *golden = (int8_t *)os_mem_malloc(1, 2560); for (int32_t i = 0; i < 2560; i++) { in[i] = (i - 1280) * 0.1f; } uint64_t begin = cpu_get_mcycle(); vector_float_to_int8(in, out, 1.0, 2.0, 2560); uint64_t end = cpu_get_mcycle(); for (uint32_t i = 0; i < 2560; i++) { float gf = 2.0 * (in[i] - 1.0) + 128.0; gf = (gf > 255.0) ? 255.0 : (gf < 0.0) ? 0.0 : gf; golden[i] = (uint8_t)gf - 128; if (out[i] != golden[i]) { iot_printf("%f, %d\n", in[i], out[i]); } } iot_printf("time = %d\n", (uint32_t)(end - begin)); begin = cpu_get_mcycle(); for (uint32_t i = 0; i < 2560; i++) { out[i] = (int8_t)in[i]; } end = cpu_get_mcycle(); iot_printf("time = %d\n", (uint32_t)(end - begin)); os_mem_free(in); os_mem_free(out); os_mem_free(golden); iot_printf("float_to_int8_test end\n"); } void float_to_uint8_test() { float *in = (float *)os_mem_malloc(1, 2560 * 4); uint8_t *out = (uint8_t *)os_mem_malloc(1, 2560); uint8_t *golden = (uint8_t *)os_mem_malloc(1, 2560); for (int32_t i = 0; i < 2560; i++) { in[i] = i * 0.1f; } uint64_t begin = cpu_get_mcycle(); vector_float_to_uint8(in, out, 1.0, 2.0, 2560); uint64_t end = cpu_get_mcycle(); for (uint32_t i = 0; i < 2560; i++) { float gf = 2.0 * (in[i] - 1.0); gf = (gf > 255.0) ? 255.0 : (gf < 0.0) ? 0.0 : gf; golden[i] = (uint8_t)gf; if (out[i] != golden[i]) { iot_printf("at %d, %f, %d, %d\n", i, in[i], golden[i], out[i]); } } iot_printf("time = %d\n", (uint32_t)(end - begin)); begin = cpu_get_mcycle(); for (uint32_t i = 0; i < 2560; i += 8) { out[i] = (uint8_t)in[i]; out[i + 1] = (uint8_t)in[i + 1]; out[i + 2] = (uint8_t)in[i + 2]; out[i + 3] = (uint8_t)in[i + 3]; out[i + 4] = (uint8_t)in[i + 4]; out[i + 5] = (uint8_t)in[i + 5]; out[i + 6] = (uint8_t)in[i + 6]; out[i + 7] = (uint8_t)in[i + 7]; } end = cpu_get_mcycle(); iot_printf("time = %d\n", (uint32_t)(end - begin)); os_mem_free(in); os_mem_free(out); os_mem_free(golden); iot_printf("float_to_uint8_test end\n"); } void float_to_int16_test() { float *in = (float *)os_mem_malloc(1, 2560 * 4); int16_t *out = (int16_t *)os_mem_malloc(1, 2560 * 2); int16_t *golden = (int16_t *)os_mem_malloc(1, 2560 * 2); for (int32_t i = 0; i < 2560; i++) { in[i] = (i - 1280) * 10.125f; } uint64_t begin = cpu_get_mcycle(); vector_float_to_int16(in, out, 1.0, 2.0, 2560); uint64_t end = cpu_get_mcycle(); for (uint32_t i = 0; i < 2560; i++) { float gf = 2.0 * (in[i] - 1.0) + 32768.0; gf = (gf > 65535.0) ? 65535.0 : (gf < 0.0) ? 0.0 : gf; golden[i] = (uint16_t)gf - 32768; if (out[i] != golden[i]) { iot_printf("%f, %d, %d\n", in[i], golden[i], out[i]); } } iot_printf("time = %d\n", (uint32_t)(end - begin)); begin = cpu_get_mcycle(); for (uint32_t i = 0; i < 2560; i++) { out[i] = (int16_t)in[i]; } end = cpu_get_mcycle(); iot_printf("time = %d\n", (uint32_t)(end - begin)); os_mem_free(in); os_mem_free(out); os_mem_free(golden); iot_printf("float_to_int16_test end\n"); } void float_to_uint16_test() { float *in = (float *)os_mem_malloc(1, 2560 * 4); uint16_t *out = (uint16_t *)os_mem_malloc(1, 2560 * 2); uint16_t *golden = (uint16_t *)os_mem_malloc(1, 2560 * 2); for (int32_t i = 0; i < 2560; i++) { in[i] = i * 10.1f; } uint64_t begin = cpu_get_mcycle(); vector_float_to_uint16(in, out, 1.0, 2.0, 2560); uint64_t end = cpu_get_mcycle(); for (uint32_t i = 0; i < 2560; i++) { float gf = 2.0 * (in[i] - 1.0); gf = (gf > 65535.0) ? 65535.0 : (gf < 0.0) ? 0.0 : gf; golden[i] = (uint16_t)gf; if (out[i] != golden[i]) { iot_printf("%f, %d, %d\n", in[i], golden[i], out[i]); } } iot_printf("time = %d\n", (uint32_t)(end - begin)); begin = cpu_get_mcycle(); for (uint32_t i = 0; i < 2560; i++) { out[i] = (uint16_t)in[i]; } end = cpu_get_mcycle(); iot_printf("time = %d\n", (uint32_t)(end - begin)); os_mem_free(in); os_mem_free(out); os_mem_free(golden); iot_printf("float_to_uint16_test end\n"); } void vector_inner_product_test_uint8() { uint8_t a[100]; uint8_t b[100]; for (uint32_t i = 0; i < 100; i++) { a[i] = i + 50; b[i] = i * 7 + 1; } uint32_t o = 0; uint32_t g = 0; uint64_t begin = cpu_get_mcycle(); o = vector_inner_product_uint8(a, b, 100); uint64_t end = cpu_get_mcycle(); iot_printf("time = %d\n", (uint32_t)(end - begin)); begin = cpu_get_mcycle(); for (uint8_t i = 0; i < 100; i++) { g += a[i] * b[i]; } end = cpu_get_mcycle(); iot_printf("time = %d\n", (uint32_t)(end - begin)); if (g != o) { iot_printf("incorrect, golden = %d, output = %d\n", g, o); } iot_printf("inner product of uint8 test end\n"); } void vector_inner_product_test_int8() { int8_t a[100]; int8_t b[100]; for (uint32_t i = 0; i < 100; i++) { a[i] = i; b[i] = i + 1; } int32_t o = 0; int32_t g = 0; uint64_t begin = cpu_get_mcycle(); o = vector_inner_product_int8(a, b, 100); uint64_t end = cpu_get_mcycle(); iot_printf("time = %d\n", (uint32_t)(end - begin)); begin = cpu_get_mcycle(); for (uint8_t i = 0; i < 100; i++) { g += a[i] * b[i]; } end = cpu_get_mcycle(); iot_printf("time = %d\n", (uint32_t)(end - begin)); if (g != o) { iot_printf("incorrect, golden = %d, output = %d\n", g, o); } iot_printf("inner product of int8 test end\n"); } void vector_inner_product_test_uint16() { uint16_t a[100]; uint16_t b[100]; for (uint64_t i = 0; i < 100; i++) { a[i] = i + 50; b[i] = i * 7 + 1; } uint64_t o = 0; uint64_t g = 0; uint64_t begin = cpu_get_mcycle(); o = vector_inner_product_uint16(a, b, 100); uint64_t end = cpu_get_mcycle(); iot_printf("time = %d\n", (uint32_t)(end - begin)); begin = cpu_get_mcycle(); for (uint16_t i = 0; i < 100; i++) { g += a[i] * b[i]; } end = cpu_get_mcycle(); iot_printf("time = %d\n", (uint32_t)(end - begin)); if (g != o) { iot_printf("incorrect, golden = %d, output = %d\n", (uint32_t)g, (uint32_t)o); } iot_printf("inner product of uint16 test end\n"); } void vector_inner_product_test_int16() { int16_t a[100]; int16_t b[100]; for (uint64_t i = 0; i < 100; i++) { a[i] = i * 100 + 1; b[i] = i * 700 - 200; } int64_t o = 0; int64_t g = 0; uint64_t begin = cpu_get_mcycle(); o = vector_inner_product_int16(a, b, 100); uint64_t end = cpu_get_mcycle(); iot_printf("time = %d\n", (uint32_t)(end - begin)); begin = cpu_get_mcycle(); for (uint16_t i = 0; i < 100; i++) { g += a[i] * b[i]; } end = cpu_get_mcycle(); iot_printf("time = %d\n", (uint32_t)(end - begin)); if (g != o) { iot_printf("incorrect, golden = %d, output = %d\n", (uint32_t)g, (uint32_t)o); } iot_printf("inner product of int16 test end\n"); } void vector_inner_product_test_uint32() { uint32_t a[100]; uint32_t b[100]; for (uint64_t i = 0; i < 100; i++) { a[i] = i + 50; b[i] = i * 7 + 1; } uint32_t o = 0; uint32_t g = 0; uint64_t begin = cpu_get_mcycle(); o = vector_inner_product_uint32(a, b, 100); uint64_t end = cpu_get_mcycle(); iot_printf("time = %d\n", (uint32_t)(end - begin)); begin = cpu_get_mcycle(); for (uint16_t i = 0; i < 100; i++) { g += a[i] * b[i]; } end = cpu_get_mcycle(); iot_printf("time = %d\n", (uint32_t)(end - begin)); if (g != o) { iot_printf("incorrect, golden = %d, output = %d\n", (uint32_t)g, (uint32_t)o); } iot_printf("inner product of uint32 test end\n"); } void vector_inner_product_test_int32() { int32_t a[100]; int32_t b[100]; for (uint64_t i = 0; i < 100; i++) { a[i] = i * 100 + 1; b[i] = i * 700 - 200; } int32_t o = 0; int32_t g = 0; uint64_t begin = cpu_get_mcycle(); o = vector_inner_product_int32(a, b, 100); uint64_t end = cpu_get_mcycle(); iot_printf("time = %d\n", (uint32_t)(end - begin)); begin = cpu_get_mcycle(); for (uint16_t i = 0; i < 100; i++) { g += a[i] * b[i]; } end = cpu_get_mcycle(); iot_printf("time = %d\n", (uint32_t)(end - begin)); if (g != o) { iot_printf("incorrect, golden = %d, output = %d\n", (uint32_t)g, (uint32_t)o); } iot_printf("inner product of int32 test end\n"); } void vector_inner_product_test_float() { float a[100]; float b[100]; for (uint64_t i = 0; i < 100; i++) { a[i] = i + 50.0; b[i] = i * 7.0 + 1.0; } float o = 0; float g = 0; uint64_t begin = cpu_get_mcycle(); o = vector_inner_product_float(a, b, 100); uint64_t end = cpu_get_mcycle(); iot_printf("time = %d\n", (uint32_t)(end - begin)); begin = cpu_get_mcycle(); for (uint16_t i = 0; i < 100; i++) { g += a[i] * b[i]; } end = cpu_get_mcycle(); iot_printf("time = %d\n", (uint32_t)(end - begin)); if (g != o) { iot_printf("incorrect, golden = %d, output = %d\n", (uint32_t)g, (uint32_t)o); } iot_printf("inner product of float test end\n"); } void max_min_test_int64() { int32_t init_a[8] = {0xe0000000, 0xe0000001, 0xe0000002, 0xe0000003, 0xe0000004, 0xe0000005, 0xe0000006, 0xe0000007}; int32_t init_b[8] = {0x7fffffff, 0x7ffffffe, 0x7ffffffd, 0x7ffffffc, 0x7ffffffb, 0x7ffffffa, 0x7ffffff9, 0x7ffffff8}; int32_t one[] = {4}; int64_t curr_a[8]; int64_t curr_b[8]; int32_t *in32_a = (int32_t *)curr_a; int32_t *in32_b = (int32_t *)curr_b; int64_t gold_a[8]; int64_t gold_b[8]; int32_t *in32ga = (int32_t *)gold_a; int32_t *in32gb = (int32_t *)gold_b; int16_t sub1[] = {1, 1}; int16_t sub2[] = {2, 2}; int16_t sub3[] = {1, 1}; int16_t sub4[] = {3, 3}; int64_t gold_g[8]; int64_t gold_l[8]; int64_t curr_g[8]; int64_t curr_l[8]; int32_t *in32_g = (int32_t *)curr_g; int32_t *in32_l = (int32_t *)curr_l; int32_t *in32gg = (int32_t *)gold_g; int32_t *in32gl = (int32_t *)gold_l; vld_siw(10, one, 0) vld_siw(8, init_a, 1) vld_siw(9, init_b, 1) vmulw(0, 10, 8) vmulw(4, 10, 9) vld_siw(8, (init_a + SIMD_WW), 1) vld_siw(9, (init_b + SIMD_WW), 1) vmulw(2, 10, 8) vmulw(6, 10, 9) vld_sih(12, sub1, 0) vld_sih(13, sub2, 0) vld_sih(14, sub3, 0) vld_sih(15, sub4, 0) for (uint32_t i = 0; i < 8; i++) { gold_a[i] = (int64_t)init_a[i] * 4; gold_b[i] = (int64_t)init_b[i] * 4; } for (uint32_t t = 0; t < 100000; t++) { iot_printf("add and sub round %d\n", t); in32ga = (int32_t *)gold_a; in32gb = (int32_t *)gold_b; for (uint32_t i = 0; i < 8; i++) { gold_a[i] = gold_a[i] + sub1[0] * sub2[0]; gold_b[i] = gold_b[i] - sub3[0] * sub4[0]; gold_g[i] = gold_a[i] > gold_b[i] ? gold_a[i] : gold_b[i]; gold_l[i] = gold_a[i] < gold_b[i] ? gold_a[i] : gold_b[i]; } vmaddw(0, 12, 13) vmsubw(4, 14, 15) in32_a = (int32_t *)curr_a; in32_b = (int32_t *)curr_b; in32_g = (int32_t *)curr_g; in32_l = (int32_t *)curr_l; vst_siw(in32_a, 1, 0) vst_siw(in32_b, 1, 4) vmax(11, 0, 4) vst_siw(in32_g, 1, 11) vmin(11, 0, 4) vst_siw(in32_l, 1, 11) in32_a += SIMD_WW; in32_b += SIMD_WW; in32_g += SIMD_WW; in32_l += SIMD_WW; vst_siw(in32_a, 1, 1) vst_siw(in32_b, 1, 5) vmax(11, 1, 5) vst_siw(in32_g, 1, 11) vmin(11, 1, 5) vst_siw(in32_l, 1, 11) in32_a += SIMD_WW; in32_b += SIMD_WW; in32_g += SIMD_WW; in32_l += SIMD_WW; vst_siw(in32_a, 1, 2) vst_siw(in32_b, 1, 6) vmax(11, 2, 6) vst_siw(in32_g, 1, 11) vmin(11, 2, 6) vst_siw(in32_l, 1, 11) in32_a += SIMD_WW; in32_b += SIMD_WW; in32_g += SIMD_WW; in32_l += SIMD_WW; vst_siw(in32_a, 1, 3) vst_siw(in32_b, 1, 7) vmax(11, 3, 7) vst_siw(in32_g, 1, 11) vmin(11, 3, 7) vst_siw(in32_l, 1, 11) in32_a += SIMD_WW; in32_b += SIMD_WW; in32_g += SIMD_WW; in32_l += SIMD_WW; in32_a = (int32_t *)curr_a; in32_b = (int32_t *)curr_b; in32_g = (int32_t *)curr_g; in32_l = (int32_t *)curr_l; for (uint32_t i = 0; i < 8; i++) { if(curr_a[i] != gold_a[i]) { iot_printf("incorrect at %d, gold_a[%d] = 0x%08x%08x, curr_a[%d] = 0x%08x%08x\n", i, i, in32ga[2 * i + 1], in32ga[2 * i], i, in32_a[2 * i + 1], in32_a[2 * i]); } if(curr_b[i] != gold_b[i]) { iot_printf("incorrect at %d, gold_b[%d] = 0x%08x%08x, curr_b[%d] = 0x%08x%08x\n", i, i, in32gb[2 * i + 1], in32gb[2 * i], i, in32_b[2 * i + 1], in32_b[2 * i]); } if(curr_g[i] != gold_g[i]) { iot_printf("incorrect at %d, gold_g[%d] = 0x%08x%08x, curr_g[%d] = 0x%08x%08x\n", i, i, in32gg[2 * i + 1], in32gg[2 * i], i, in32_g[2 * i + 1], in32_g[2 * i]); } if(curr_l[i] != gold_l[i]) { iot_printf("incorrect at %d, gold_l[%d] = 0x%08x%08x, curr_l[%d] = 0x%08x%08x\n", i, i, in32gl[2 * i + 1], in32gl[2 * i], i, in32_l[2 * i + 1], in32_l[2 * i]); } } } } void matrix_transpose_test_from_pc() { uint8_t config_read[120]; uint32_t config_get[60]; uint32_t data_begin = 0x10200000; read_case_from_python(config_read, config_get, (uint8_t *)data_begin); uint32_t input_addr = data_begin; uint32_t golden_addr = input_addr + config_get[54]; uint32_t output_addr = golden_addr + config_get[55]; switch (config_get[3]) { case 0: switch (config_get[2]) { case 1: matrix_transpose_uint8((uint8_t *)input_addr, (uint8_t *)output_addr, config_get[0], config_get[1]); break; case 2: matrix_transpose_uint16((uint16_t *)input_addr, (uint16_t *)output_addr, config_get[0], config_get[1]); break; case 4: matrix_transpose_uint32((uint32_t *)input_addr, (uint32_t *)output_addr, config_get[0], config_get[1]); break; default: break; } break; case 1: switch (config_get[2]) { case 1: matrix_transpose_int8((int8_t *)input_addr, (int8_t *)output_addr, config_get[0], config_get[1]); break; case 2: matrix_transpose_int16((int16_t *)input_addr, (int16_t *)output_addr, config_get[0], config_get[1]); break; case 4: matrix_transpose_int32((int32_t *)input_addr, (int32_t *)output_addr, config_get[0], config_get[1]); break; default: break; } break; default: break; } verify_8bits(output_addr, golden_addr, config_get[55]); } void vector_test_from_pc() { uint8_t config_read[120]; uint32_t config_get[60]; uint32_t data_begin = 0x10200000; //uint8_t *data = os_mem_malloc(1, 512 * 1024); //uint32_t data_begin = (uint32_t)data; read_case_from_python(config_read, config_get, (uint8_t *)data_begin); uint32_t weight_addr = data_begin; uint32_t bias_addr = weight_addr + config_get[52]; uint32_t input_addr = bias_addr + config_get[53]; uint32_t golden_addr = input_addr + config_get[54]; uint32_t output_addr = golden_addr + config_get[55]; if (config_get[4]) { uint32_t data = 0x1071f; vcsrw(data,data,0) } else { uint32_t data = 0x1061f; vcsrw(data,data,0) } switch (config_get[3]) { case 0: // unsigned switch (config_get[2]) { case 1: // 8bit switch (config_get[1]) { case 0: vector_add_uint8((uint8_t *)input_addr, (uint8_t *)bias_addr, (uint8_t *)output_addr, config_get[0]); break; case 1: vector_sub_uint8((uint8_t *)input_addr, (uint8_t *)bias_addr, (uint8_t *)output_addr, config_get[0]); break; case 2: vector_multiply_uint8((uint8_t *)input_addr, (uint8_t *)bias_addr, (uint16_t *)output_addr, config_get[0]); break; case 3: vector_multiply_uint8_high_8bit((uint8_t *)input_addr, (uint8_t *)bias_addr, (uint8_t *)output_addr, config_get[0]); break; case 4: vector_multiply_uint8_low_8bit((uint8_t *)input_addr, (uint8_t *)bias_addr, (uint8_t *)output_addr, config_get[0]); break; case 5: vector_mul_add_uint8((uint8_t *)input_addr, (uint8_t *)bias_addr, (uint8_t *)weight_addr, (uint8_t *)output_addr, config_get[0]); break; case 6: vector_mul_add_uint8_to_uint32((uint8_t *)input_addr, (uint8_t *)bias_addr, (uint32_t *)weight_addr, (uint32_t *)output_addr, config_get[0]); break; case 7: vector_mul_sub_uint8((uint8_t *)input_addr, (uint8_t *)bias_addr, (uint8_t *)weight_addr, (uint8_t *)output_addr, config_get[0]); break; case 8: vector_mul_sub_uint8_to_uint32((uint8_t *)input_addr, (uint8_t *)bias_addr, (uint32_t *)weight_addr, (uint32_t *)output_addr, config_get[0]); break; case 9: vector_max_uint8((uint8_t *)input_addr, (uint8_t *)bias_addr, (uint8_t *)output_addr, config_get[0]); break; case 10: vector_min_uint8((uint8_t *)input_addr, (uint8_t *)bias_addr, (uint8_t *)output_addr, config_get[0]); break; case 11: vector_equal_uint8((uint8_t *)input_addr, (uint8_t *)bias_addr, (uint8_t *)output_addr, config_get[0]); break; case 12: vector_not_equal_uint8((uint8_t *)input_addr, (uint8_t *)bias_addr, (uint8_t *)output_addr, config_get[0]); break; case 13: vector_less_than_uint8((uint8_t *)input_addr, (uint8_t *)bias_addr, (uint8_t *)output_addr, config_get[0]); break; case 14: vector_greater_or_equal_uint8((uint8_t *)input_addr, (uint8_t *)bias_addr, (uint8_t *)output_addr, config_get[0]); break; case 15: vector_logic_and_uint8((uint8_t *)input_addr, (uint8_t *)bias_addr, (uint8_t *)output_addr, config_get[0]); break; case 16: vector_logic_or_uint8((uint8_t *)input_addr, (uint8_t *)bias_addr, (uint8_t *)output_addr, config_get[0]); break; case 17: vector_logic_xor_uint8((uint8_t *)input_addr, (uint8_t *)bias_addr, (uint8_t *)output_addr, config_get[0]); break; case 18: vector_logic_xnor_uint8((uint8_t *)input_addr, (uint8_t *)bias_addr, (uint8_t *)output_addr, config_get[0]); break; case 19: vector_left_shift_uint8((uint8_t *)input_addr, (uint8_t *)bias_addr, (uint8_t *)output_addr, config_get[0]); break; case 20: vector_right_shift_uint8((uint8_t *)input_addr, (uint8_t *)bias_addr, (uint8_t *)output_addr, config_get[0]); break; case 21: vector_right_shift_uint8((uint8_t *)input_addr, (uint8_t *)bias_addr, (uint8_t *)output_addr, config_get[0]); break; default: break; } break; case 2: // 16 bit switch (config_get[1]) { case 0: vector_add_uint16((uint16_t *)input_addr, (uint16_t *)bias_addr, (uint16_t *)output_addr, config_get[0]); break; case 1: vector_sub_uint16((uint16_t *)input_addr, (uint16_t *)bias_addr, (uint16_t *)output_addr, config_get[0]); break; case 2: vector_multiply_uint16((uint16_t *)input_addr, (uint16_t *)bias_addr, (uint32_t *)output_addr, config_get[0]); break; case 3: vector_multiply_uint16_high_16bit((uint16_t *)input_addr, (uint16_t *)bias_addr, (uint16_t *)output_addr, config_get[0]); break; case 4: vector_multiply_uint16_low_16bit((uint16_t *)input_addr, (uint16_t *)bias_addr, (uint16_t *)output_addr, config_get[0]); break; case 5: vector_mul_add_uint16((uint16_t *)input_addr, (uint16_t *)bias_addr, (uint16_t *)weight_addr, (uint16_t *)output_addr, config_get[0]); break; case 6: vector_mul_add_uint16_to_uint64((uint16_t *)input_addr, (uint16_t *)bias_addr, (uint64_t *)weight_addr, (uint64_t *)output_addr, config_get[0]); break; case 7: vector_mul_sub_uint16((uint16_t *)input_addr, (uint16_t *)bias_addr, (uint16_t *)weight_addr, (uint16_t *)output_addr, config_get[0]); break; case 8: vector_mul_sub_uint16_to_uint64((uint16_t *)input_addr, (uint16_t *)bias_addr, (uint64_t *)weight_addr, (uint64_t *)output_addr, config_get[0]); break; case 9: vector_max_uint16((uint16_t *)input_addr, (uint16_t *)bias_addr, (uint16_t *)output_addr, config_get[0]); break; case 10: vector_min_uint16((uint16_t *)input_addr, (uint16_t *)bias_addr, (uint16_t *)output_addr, config_get[0]); break; case 11: vector_equal_uint16((uint16_t *)input_addr, (uint16_t *)bias_addr, (uint16_t *)output_addr, config_get[0]); break; case 12: vector_not_equal_uint16((uint16_t *)input_addr, (uint16_t *)bias_addr, (uint16_t *)output_addr, config_get[0]); break; case 13: vector_less_than_uint16((uint16_t *)input_addr, (uint16_t *)bias_addr, (uint16_t *)output_addr, config_get[0]); break; case 14: vector_greater_or_equal_uint16((uint16_t *)input_addr, (uint16_t *)bias_addr, (uint16_t *)output_addr, config_get[0]); break; case 15: vector_logic_and_uint16((uint16_t *)input_addr, (uint16_t *)bias_addr, (uint16_t *)output_addr, config_get[0]); break; case 16: vector_logic_or_uint16((uint16_t *)input_addr, (uint16_t *)bias_addr, (uint16_t *)output_addr, config_get[0]); break; case 17: vector_logic_xor_uint16((uint16_t *)input_addr, (uint16_t *)bias_addr, (uint16_t *)output_addr, config_get[0]); break; case 18: vector_logic_xnor_uint16((uint16_t *)input_addr, (uint16_t *)bias_addr, (uint16_t *)output_addr, config_get[0]); break; case 19: vector_left_shift_uint16((uint16_t *)input_addr, (uint16_t *)bias_addr, (uint16_t *)output_addr, config_get[0]); break; case 20: vector_right_shift_uint16((uint16_t *)input_addr, (uint16_t *)bias_addr, (uint16_t *)output_addr, config_get[0]); break; case 21: vector_right_shift_uint16((uint16_t *)input_addr, (uint16_t *)bias_addr, (uint16_t *)output_addr, config_get[0]); break; case 22: vector_high_half_bits_uint16((uint16_t *)input_addr, (uint8_t *)output_addr, config_get[0]); break; case 23: vector_low_half_bits_uint16((uint16_t *)input_addr, (uint8_t *)output_addr, config_get[0]); break; default: break; } break; case 3: // float switch (config_get[1]) { case 0: vector_add_float((float *)input_addr, (float *)bias_addr, (float *)output_addr, config_get[0]); break; case 1: vector_sub_float((float *)input_addr, (float *)bias_addr, (float *)output_addr, config_get[0]); break; case 2: vector_multiply_float((float *)input_addr, (float *)bias_addr, (float *)output_addr, config_get[0]); break; case 5: vector_mul_add_float((float *)input_addr, (float *)bias_addr, (float *)weight_addr, (float *)output_addr, config_get[0]); break; case 7: vector_mul_sub_float((float *)input_addr, (float *)bias_addr, (float *)weight_addr, (float *)output_addr, config_get[0]); break; case 9: vector_max_float((float *)input_addr, (float *)bias_addr, (float *)output_addr, config_get[0]); break; case 10: vector_min_float((float *)input_addr, (float *)bias_addr, (float *)output_addr, config_get[0]); break; case 11: vector_equal_float((float *)input_addr, (float *)bias_addr, (float *)output_addr, config_get[0]); break; case 12: vector_not_equal_float((float *)input_addr, (float *)bias_addr, (float *)output_addr, config_get[0]); break; case 13: vector_less_than_float((float *)input_addr, (float *)bias_addr, (float *)output_addr, config_get[0]); break; case 14: vector_greater_or_equal_float((float *)input_addr, (float *)bias_addr, (float *)output_addr, config_get[0]); break; default: break; } break; case 4: // 32 bit switch (config_get[1]) { case 0: vector_add_uint32((uint32_t *)input_addr, (uint32_t *)bias_addr, (uint32_t *)output_addr, config_get[0]); break; case 1: vector_sub_uint32((uint32_t *)input_addr, (uint32_t *)bias_addr, (uint32_t *)output_addr, config_get[0]); break; case 2: vector_multiply_uint32((uint32_t *)input_addr, (uint32_t *)bias_addr, (uint64_t *)output_addr, config_get[0]); break; case 3: vector_multiply_uint32_high_32bit((uint32_t *)input_addr, (uint32_t *)bias_addr, (uint32_t *)output_addr, config_get[0]); break; case 4: vector_multiply_uint32_low_32bit((uint32_t *)input_addr, (uint32_t *)bias_addr, (uint32_t *)output_addr, config_get[0]); break; case 5: vector_mul_add_uint32((uint32_t *)input_addr, (uint32_t *)bias_addr, (uint32_t *)weight_addr, (uint32_t *)output_addr, config_get[0]); break; case 7: vector_mul_sub_uint32((uint32_t *)input_addr, (uint32_t *)bias_addr, (uint32_t *)weight_addr, (uint32_t *)output_addr, config_get[0]); break; case 9: vector_max_uint32((uint32_t *)input_addr, (uint32_t *)bias_addr, (uint32_t *)output_addr, config_get[0]); break; case 10: vector_min_uint32((uint32_t *)input_addr, (uint32_t *)bias_addr, (uint32_t *)output_addr, config_get[0]); break; case 11: vector_equal_uint32((uint32_t *)input_addr, (uint32_t *)bias_addr, (uint32_t *)output_addr, config_get[0]); break; case 12: vector_not_equal_uint32((uint32_t *)input_addr, (uint32_t *)bias_addr, (uint32_t *)output_addr, config_get[0]); break; case 13: vector_less_than_uint32((uint32_t *)input_addr, (uint32_t *)bias_addr, (uint32_t *)output_addr, config_get[0]); break; case 14: vector_greater_or_equal_uint32((uint32_t *)input_addr, (uint32_t *)bias_addr, (uint32_t *)output_addr, config_get[0]); break; case 15: vector_logic_and_uint32((uint32_t *)input_addr, (uint32_t *)bias_addr, (uint32_t *)output_addr, config_get[0]); break; case 16: vector_logic_or_uint32((uint32_t *)input_addr, (uint32_t *)bias_addr, (uint32_t *)output_addr, config_get[0]); break; case 17: vector_logic_xor_uint32((uint32_t *)input_addr, (uint32_t *)bias_addr, (uint32_t *)output_addr, config_get[0]); break; case 18: vector_logic_xnor_uint32((uint32_t *)input_addr, (uint32_t *)bias_addr, (uint32_t *)output_addr, config_get[0]); break; case 19: vector_left_shift_uint32((uint32_t *)input_addr, (uint32_t *)bias_addr, (uint32_t *)output_addr, config_get[0]); break; case 20: vector_right_shift_uint32((uint32_t *)input_addr, (uint32_t *)bias_addr, (uint32_t *)output_addr, config_get[0]); break; case 21: vector_right_shift_uint32((uint32_t *)input_addr, (uint32_t *)bias_addr, (uint32_t *)output_addr, config_get[0]); break; case 22: vector_high_half_bits_uint32((uint32_t *)input_addr, (uint16_t *)output_addr, config_get[0]); break; case 23: vector_low_half_bits_uint32((uint32_t *)input_addr, (uint16_t *)output_addr, config_get[0]); break; default: break; } break; default: break; } break; case 1: // signed switch (config_get[2]) { case 1: // 8bit switch (config_get[1]) { case 0: vector_add_int8((int8_t *)input_addr, (int8_t *)bias_addr, (int8_t *)output_addr, config_get[0]); break; case 1: vector_sub_int8((int8_t *)input_addr, (int8_t *)bias_addr, (int8_t *)output_addr, config_get[0]); break; case 2: vector_multiply_int8((int8_t *)input_addr, (int8_t *)bias_addr, (int16_t *)output_addr, config_get[0]); break; case 3: vector_multiply_int8_high_8bit((int8_t *)input_addr, (int8_t *)bias_addr, (int8_t *)output_addr, config_get[0]); break; case 4: vector_multiply_int8_low_8bit((int8_t *)input_addr, (int8_t *)bias_addr, (int8_t *)output_addr, config_get[0]); break; case 5: vector_mul_add_int8((int8_t *)input_addr, (int8_t *)bias_addr, (int8_t *)weight_addr, (int8_t *)output_addr, config_get[0]); break; case 6: vector_mul_add_int8_to_int32((int8_t *)input_addr, (int8_t *)bias_addr, (int32_t *)weight_addr, (int32_t *)output_addr, config_get[0]); break; case 7: vector_mul_sub_int8((int8_t *)input_addr, (int8_t *)bias_addr, (int8_t *)weight_addr, (int8_t *)output_addr, config_get[0]); break; case 8: vector_mul_sub_int8_to_int32((int8_t *)input_addr, (int8_t *)bias_addr, (int32_t *)weight_addr, (int32_t *)output_addr, config_get[0]); break; case 9: vector_max_int8((int8_t *)input_addr, (int8_t *)bias_addr, (int8_t *)output_addr, config_get[0]); break; case 10: vector_min_int8((int8_t *)input_addr, (int8_t *)bias_addr, (int8_t *)output_addr, config_get[0]); break; case 11: vector_equal_int8((int8_t *)input_addr, (int8_t *)bias_addr, (int8_t *)output_addr, config_get[0]); break; case 12: vector_not_equal_int8((int8_t *)input_addr, (int8_t *)bias_addr, (int8_t *)output_addr, config_get[0]); break; case 13: vector_less_than_int8((int8_t *)input_addr, (int8_t *)bias_addr, (int8_t *)output_addr, config_get[0]); break; case 14: vector_greater_or_equal_int8((int8_t *)input_addr, (int8_t *)bias_addr, (int8_t *)output_addr, config_get[0]); break; case 15: vector_logic_and_int8((int8_t *)input_addr, (int8_t *)bias_addr, (int8_t *)output_addr, config_get[0]); break; case 16: vector_logic_or_int8((int8_t *)input_addr, (int8_t *)bias_addr, (int8_t *)output_addr, config_get[0]); break; case 17: vector_logic_xor_int8((int8_t *)input_addr, (int8_t *)bias_addr, (int8_t *)output_addr, config_get[0]); break; case 18: vector_logic_xnor_int8((int8_t *)input_addr, (int8_t *)bias_addr, (int8_t *)output_addr, config_get[0]); break; case 19: vector_left_shift_int8((int8_t *)input_addr, (uint8_t *)bias_addr, (int8_t *)output_addr, config_get[0]); break; case 20: vector_arithmatic_right_shift_int8((int8_t *)input_addr, (uint8_t *)bias_addr, (int8_t *)output_addr, config_get[0]); break; case 21: vector_logic_right_shift_int8((int8_t *)input_addr, (uint8_t *)bias_addr, (int8_t *)output_addr, config_get[0]); break; default: break; } break; case 2: // 16 bit switch (config_get[1]) { case 0: vector_add_int16((int16_t *)input_addr, (int16_t *)bias_addr, (int16_t *)output_addr, config_get[0]); break; case 1: vector_sub_int16((int16_t *)input_addr, (int16_t *)bias_addr, (int16_t *)output_addr, config_get[0]); break; case 2: vector_multiply_int16((int16_t *)input_addr, (int16_t *)bias_addr, (int32_t *)output_addr, config_get[0]); break; case 3: vector_multiply_int16_high_16bit((int16_t *)input_addr, (int16_t *)bias_addr, (int16_t *)output_addr, config_get[0]); break; case 4: vector_multiply_int16_low_16bit((int16_t *)input_addr, (int16_t *)bias_addr, (int16_t *)output_addr, config_get[0]); break; case 5: vector_mul_add_int16((int16_t *)input_addr, (int16_t *)bias_addr, (int16_t *)weight_addr, (int16_t *)output_addr, config_get[0]); break; case 6: vector_mul_add_int16_to_int64((int16_t *)input_addr, (int16_t *)bias_addr, (int64_t *)weight_addr, (int64_t *)output_addr, config_get[0]); break; case 7: vector_mul_sub_int16((int16_t *)input_addr, (int16_t *)bias_addr, (int16_t *)weight_addr, (int16_t *)output_addr, config_get[0]); break; case 8: vector_mul_sub_int16_to_int64((int16_t *)input_addr, (int16_t *)bias_addr, (int64_t *)weight_addr, (int64_t *)output_addr, config_get[0]); break; case 9: vector_max_int16((int16_t *)input_addr, (int16_t *)bias_addr, (int16_t *)output_addr, config_get[0]); break; case 10: vector_min_int16((int16_t *)input_addr, (int16_t *)bias_addr, (int16_t *)output_addr, config_get[0]); break; case 11: vector_equal_int16((int16_t *)input_addr, (int16_t *)bias_addr, (int16_t *)output_addr, config_get[0]); break; case 12: vector_not_equal_int16((int16_t *)input_addr, (int16_t *)bias_addr, (int16_t *)output_addr, config_get[0]); break; case 13: vector_less_than_int16((int16_t *)input_addr, (int16_t *)bias_addr, (int16_t *)output_addr, config_get[0]); break; case 14: vector_greater_or_equal_int16((int16_t *)input_addr, (int16_t *)bias_addr, (int16_t *)output_addr, config_get[0]); break; case 15: vector_logic_and_int16((int16_t *)input_addr, (int16_t *)bias_addr, (int16_t *)output_addr, config_get[0]); break; case 16: vector_logic_or_int16((int16_t *)input_addr, (int16_t *)bias_addr, (int16_t *)output_addr, config_get[0]); break; case 17: vector_logic_xor_int16((int16_t *)input_addr, (int16_t *)bias_addr, (int16_t *)output_addr, config_get[0]); break; case 18: vector_logic_xnor_int16((int16_t *)input_addr, (int16_t *)bias_addr, (int16_t *)output_addr, config_get[0]); break; case 19: vector_left_shift_int16((int16_t *)input_addr, (uint16_t *)bias_addr, (int16_t *)output_addr, config_get[0]); break; case 20: vector_arithmatic_right_shift_int16((int16_t *)input_addr, (uint16_t *)bias_addr, (int16_t *)output_addr, config_get[0]); break; case 21: vector_logic_right_shift_int16((int16_t *)input_addr, (uint16_t *)bias_addr, (int16_t *)output_addr, config_get[0]); break; case 22: vector_high_half_bits_int16((int16_t *)input_addr, (int8_t *)output_addr, config_get[0]); break; case 23: vector_low_half_bits_int16((int16_t *)input_addr, (int8_t *)output_addr, config_get[0]); break; default: break; } break; case 4: // 32 bit switch (config_get[1]) { case 0: vector_add_int32((int32_t *)input_addr, (int32_t *)bias_addr, (int32_t *)output_addr, config_get[0]); break; case 1: vector_sub_int32((int32_t *)input_addr, (int32_t *)bias_addr, (int32_t *)output_addr, config_get[0]); break; case 2: vector_multiply_int32((int32_t *)input_addr, (int32_t *)bias_addr, (int64_t *)output_addr, config_get[0]); break; case 3: vector_multiply_int32_high_32bit((int32_t *)input_addr, (int32_t *)bias_addr, (int32_t *)output_addr, config_get[0]); break; case 4: vector_multiply_int32_low_32bit((int32_t *)input_addr, (int32_t *)bias_addr, (int32_t *)output_addr, config_get[0]); break; case 5: vector_mul_add_int32((int32_t *)input_addr, (int32_t *)bias_addr, (int32_t *)weight_addr, (int32_t *)output_addr, config_get[0]); break; case 7: vector_mul_sub_int32((int32_t *)input_addr, (int32_t *)bias_addr, (int32_t *)weight_addr, (int32_t *)output_addr, config_get[0]); break; case 9: vector_max_int32((int32_t *)input_addr, (int32_t *)bias_addr, (int32_t *)output_addr, config_get[0]); break; case 10: vector_min_int32((int32_t *)input_addr, (int32_t *)bias_addr, (int32_t *)output_addr, config_get[0]); break; case 11: vector_equal_int32((int32_t *)input_addr, (int32_t *)bias_addr, (int32_t *)output_addr, config_get[0]); break; case 12: vector_not_equal_int32((int32_t *)input_addr, (int32_t *)bias_addr, (int32_t *)output_addr, config_get[0]); break; case 13: vector_less_than_int32((int32_t *)input_addr, (int32_t *)bias_addr, (int32_t *)output_addr, config_get[0]); break; case 14: vector_greater_or_equal_int32((int32_t *)input_addr, (int32_t *)bias_addr, (int32_t *)output_addr, config_get[0]); break; case 15: vector_logic_and_int32((int32_t *)input_addr, (int32_t *)bias_addr, (int32_t *)output_addr, config_get[0]); break; case 16: vector_logic_or_int32((int32_t *)input_addr, (int32_t *)bias_addr, (int32_t *)output_addr, config_get[0]); break; case 17: vector_logic_xor_int32((int32_t *)input_addr, (int32_t *)bias_addr, (int32_t *)output_addr, config_get[0]); break; case 18: vector_logic_xnor_int32((int32_t *)input_addr, (int32_t *)bias_addr, (int32_t *)output_addr, config_get[0]); break; case 19: vector_left_shift_int32((int32_t *)input_addr, (uint32_t *)bias_addr, (int32_t *)output_addr, config_get[0]); break; case 20: vector_arithmatic_right_shift_int32((int32_t *)input_addr, (uint32_t *)bias_addr, (int32_t *)output_addr, config_get[0]); break; case 21: vector_logic_right_shift_int32((int32_t *)input_addr, (uint32_t *)bias_addr, (int32_t *)output_addr, config_get[0]); break; case 22: vector_high_half_bits_int32((int32_t *)input_addr, (int16_t *)output_addr, config_get[0]); break; case 23: vector_low_half_bits_int32((int32_t *)input_addr, (int16_t *)output_addr, config_get[0]); break; default: break; } break; default: break; } break; default: break; } uint32_t conf; vcsrr(conf, 0) iot_printf("conf = %x\n", conf); //if (config_get[52] + config_get[53] + config_get[54] + config_get[55] * 2 <= 0x400000) { if (config_get[2] != 3) { verify_8bits(output_addr, golden_addr, config_get[55]); } else { verify_float(output_addr, golden_addr, config_get[55] / 4); } //os_mem_free(data); } void vector_saturation_test_from_pc() { uint8_t config_read[120]; uint32_t config_get[60]; uint32_t data_begin = 0x10200000; read_case_from_python(config_read, config_get, (uint8_t *)data_begin); uint32_t weight_addr = data_begin; uint32_t bias_addr = weight_addr + config_get[52]; uint32_t input_addr = bias_addr + config_get[53]; uint32_t golden_addr = input_addr + config_get[54]; uint32_t output_addr = golden_addr + config_get[55]; uint32_t with_const = config_get[4]; uint32_t right_shift = config_get[5]; switch (config_get[3]) { case 0: // unsigned switch (config_get[2]) { case 1: // 8bit switch (config_get[1]) { case 2: if (with_const) { vector_multiply_const_uint8_right_shift_saturation((uint8_t *)input_addr, *((uint8_t *)bias_addr), (uint8_t *)output_addr, right_shift, config_get[0]); } else { vector_multiply_uint8_right_shift_saturation((uint8_t *)input_addr, (uint8_t *)bias_addr, (uint8_t *)output_addr, right_shift, config_get[0]); } break; default: break; } break; case 2: // 16 bit switch (config_get[1]) { case 2: if (with_const) { vector_multiply_const_uint16_right_shift_saturation((uint16_t *)input_addr, *((uint16_t *)bias_addr), (uint16_t *)output_addr, right_shift, config_get[0]); } else { vector_multiply_uint16_right_shift_saturation((uint16_t *)input_addr, (uint16_t *)bias_addr, (uint16_t *)output_addr, right_shift, config_get[0]); } break; default: break; } break; case 4: // 32 bit switch (config_get[1]) { case 2: if (with_const) { vector_multiply_const_uint32_right_shift_saturation((uint32_t *)input_addr, *((uint32_t *)bias_addr), (uint32_t *)output_addr, right_shift, config_get[0]); } else { vector_multiply_uint32_right_shift_saturation((uint32_t *)input_addr, (uint32_t *)bias_addr, (uint32_t *)output_addr, right_shift, config_get[0]); } break; default: break; } break; default: break; } break; case 1: // signed switch (config_get[2]) { case 1: // 8bit switch (config_get[1]) { case 2: if (with_const) { vector_multiply_const_int8_right_shift_saturation((int8_t *)input_addr, *((int8_t *)bias_addr), (int8_t *)output_addr, right_shift, config_get[0]); } else { vector_multiply_int8_right_shift_saturation((int8_t *)input_addr, (int8_t *)bias_addr, (int8_t *)output_addr, right_shift, config_get[0]); } break; default: break; } break; case 2: // 16 bit switch (config_get[1]) { case 2: if (with_const) { vector_multiply_const_int16_right_shift_saturation((int16_t *)input_addr, *((int16_t *)bias_addr), (int16_t *)output_addr, right_shift, config_get[0]); } else { vector_multiply_int16_right_shift_saturation((int16_t *)input_addr, (int16_t *)bias_addr, (int16_t *)output_addr, right_shift, config_get[0]); } break; default: break; } break; case 4: // 32 bit switch (config_get[1]) { case 2: if (with_const) { vector_multiply_const_int32_right_shift_saturation((int32_t *)input_addr, *((int32_t *)bias_addr), (int32_t *)output_addr, right_shift, config_get[0]); } else { vector_multiply_int32_right_shift_saturation((int32_t *)input_addr, (int32_t *)bias_addr, (int32_t *)output_addr, right_shift, config_get[0]); } break; default: break; } break; default: break; } break; default: break; } uint32_t conf; vcsrr(conf, 0) iot_printf("conf = %x\n", conf); if (config_get[52] + config_get[53] + config_get[54] + config_get[55] * 2 <= 0x8000) { verify_8bits(output_addr, golden_addr, config_get[55]); } else { iot_printf("too large\nfinished~"); } } void matrix_multi_test_from_pc() { uint8_t config_read[120]; uint32_t config_get[60]; uint8_t *data_begin = (uint8_t *)0x10200000; read_case_from_python(config_read, config_get, data_begin); uint32_t matrixAaddr = (uint32_t)data_begin; uint32_t matrixBaddr = matrixAaddr + config_get[53]; uint32_t golden_addr = matrixBaddr + config_get[54]; uint32_t output_addr = golden_addr + config_get[55]; uint64_t begin = cpu_get_mcycle(); uint32_t elem_bytes = config_get[3]; uint32_t signed_ = config_get[4]; uint32_t w4b = config_get[5]; switch (elem_bytes) { case 1: // 8bits switch (signed_) { case 0: // unsigned switch (w4b) { case 1: // vmadd matrix_multi_uint8_to_uint8((uint8_t *)matrixAaddr, (uint8_t *)matrixBaddr, (uint8_t *)output_addr, config_get[0], config_get[1], config_get[2], config_get[6]); break; case 4: // vmaddw matrix_multi_uint8_to_uint32((uint8_t *)matrixAaddr, (uint8_t *)matrixBaddr, (uint32_t *)output_addr, config_get[0], config_get[1], config_get[2]); break; default: break; } break; case 1: // signed switch (w4b) { case 1: // vmadd matrix_multi_int8_to_int8((int8_t *)matrixAaddr, (int8_t *)matrixBaddr, (int8_t *)output_addr, config_get[0], config_get[1], config_get[2], config_get[6]); break; case 4: // vmaddw matrix_multi_int8_to_int32((int8_t *)matrixAaddr, (int8_t *)matrixBaddr, (int32_t *)output_addr, config_get[0], config_get[1], config_get[2]); break; default: break; } break; default: break; } break; case 2: // 16bits switch (signed_) { case 0: // unsigned switch (w4b) { case 1: // vmadd matrix_multi_uint16_to_uint16((uint16_t *)matrixAaddr, (uint16_t *)matrixBaddr, (uint16_t *)output_addr, config_get[0], config_get[1], config_get[2], config_get[6]); break; case 4: // vmaddw matrix_multi_uint16_to_uint64((uint16_t *)matrixAaddr, (uint16_t *)matrixBaddr, (uint64_t *)output_addr, config_get[0], config_get[1], config_get[2]); break; default: break; } break; case 1: // signed switch (w4b) { case 1: // vmadd matrix_multi_int16_to_int16((int16_t *)matrixAaddr, (int16_t *)matrixBaddr, (int16_t *)output_addr, config_get[0], config_get[1], config_get[2], config_get[6]); break; case 4: // vmaddw matrix_multi_int16_to_int64((int16_t *)matrixAaddr, (int16_t *)matrixBaddr, (int64_t *)output_addr, config_get[0], config_get[1], config_get[2]); break; default: break; } break; default: break; } break; case 4: // 32bits switch (signed_) { case 0: // signed matrix_multi_uint32_to_uint32((uint32_t *)matrixAaddr, (uint32_t *)matrixBaddr, (uint32_t *)output_addr, config_get[0], config_get[1], config_get[2]); break; case 1: // unsigned matrix_multi_int32_to_int32((int32_t *)matrixAaddr, (int32_t *)matrixBaddr, (int32_t *)output_addr, config_get[0], config_get[1], config_get[2]); break; default: break; } break; default: break; } uint64_t end = cpu_get_mcycle(); iot_printf("time = %d\n", (uint32_t)(end - begin)); verify_8bits(output_addr, golden_addr, config_get[55]); } void matrix_transpose_multi_test_from_pc() { uint8_t config_read[120]; uint32_t config_get[60]; uint32_t data_begin = 0x10200000; read_case_from_python(config_read, config_get, (uint8_t *)data_begin); uint32_t matrixAaddr = data_begin; uint32_t matrixBaddr = matrixAaddr + config_get[53]; uint32_t golden_addr = matrixBaddr + config_get[54]; uint32_t output_addr = golden_addr + config_get[55]; uint64_t begin = cpu_get_mcycle(); uint32_t elem_bytes = config_get[3]; uint32_t signed_ = config_get[4]; uint32_t w4b = config_get[5]; switch (elem_bytes) { case 1: // 8bits switch (signed_) { case 0: // unsigned switch (w4b) { case 1: // vmadd matrix_transpose_multi_uint8_to_uint8((uint8_t *)matrixAaddr, (uint8_t *)matrixBaddr, (uint8_t *)output_addr, config_get[0], config_get[1], config_get[2], config_get[6]); break; case 4: // vmaddw matrix_transpose_multi_uint8_to_uint32((uint8_t *)matrixAaddr, (uint8_t *)matrixBaddr, (uint32_t *)output_addr, config_get[0], config_get[1], config_get[2]); break; default: break; } break; case 1: // signed switch (w4b) { case 1: // vmadd matrix_transpose_multi_int8_to_int8((int8_t *)matrixAaddr, (int8_t *)matrixBaddr, (int8_t *)output_addr, config_get[0], config_get[1], config_get[2], config_get[6]); break; case 4: // vmaddw matrix_transpose_multi_int8_to_int32((int8_t *)matrixAaddr, (int8_t *)matrixBaddr, (int32_t *)output_addr, config_get[0], config_get[1], config_get[2]); break; default: break; } break; default: break; } break; case 2: // 16bits switch (signed_) { case 0: // unsigned switch (w4b) { case 1: // vmadd matrix_transpose_multi_uint16_to_uint16((uint16_t *)matrixAaddr, (uint16_t *)matrixBaddr, (uint16_t *)output_addr, config_get[0], config_get[1], config_get[2], config_get[6]); break; case 4: // vmaddw matrix_transpose_multi_uint16_to_uint64((uint16_t *)matrixAaddr, (uint16_t *)matrixBaddr, (uint64_t *)output_addr, config_get[0], config_get[1], config_get[2]); break; default: break; } break; case 1: // signed switch (w4b) { case 1: // vmadd matrix_transpose_multi_int16_to_int16((int16_t *)matrixAaddr, (int16_t *)matrixBaddr, (int16_t *)output_addr, config_get[0], config_get[1], config_get[2], config_get[6]); break; case 4: // vmaddw matrix_transpose_multi_int16_to_int64((int16_t *)matrixAaddr, (int16_t *)matrixBaddr, (int64_t *)output_addr, config_get[0], config_get[1], config_get[2]); break; default: break; } break; default: break; } break; case 4: // 32bits switch (signed_) { case 0: // signed matrix_transpose_multi_uint32_to_uint32((uint32_t *)matrixAaddr, (uint32_t *)matrixBaddr, (uint32_t *)output_addr, config_get[0], config_get[1], config_get[2]); break; case 1: // unsigned matrix_transpose_multi_int32_to_int32((int32_t *)matrixAaddr, (int32_t *)matrixBaddr, (int32_t *)output_addr, config_get[0], config_get[1], config_get[2]); break; default: break; } break; default: matrix_transpose_multi_float((float *)matrixAaddr, (float *)matrixBaddr, (float *)output_addr, config_get[0], config_get[1], config_get[2]); break; } uint64_t end = cpu_get_mcycle(); iot_printf("time = %d\n", (uint32_t)(end - begin)); verify_8bits(output_addr, golden_addr, config_get[55]); } void maximum_test() { int8_t v[256]; for (uint32_t i = 0; i < 256; i++) { v[i] = 3 * i; } for (uint32_t i = 0; i < 256; i++) { iot_printf("v[%d] = %d\n", i, v[i]); } uint8_t max_index; int8_t max_value; uint64_t begin; uint64_t end; begin = cpu_get_mcycle(); vector_maximum_element_int8_small(v, &max_index, &max_value, 0); end = cpu_get_mcycle(); iot_printf("cycle = %d\n", (uint32_t)(end - begin)); iot_printf("max_index = %d\n", max_index); iot_printf("max_value = %d\n", max_value); begin = cpu_get_mcycle(); max_index = 0; max_value = v[0]; for (uint32_t i = 1; i < 256; i++) { if (max_value < v[i]) { max_value = v[i]; max_index = i; } } end = cpu_get_mcycle(); iot_printf("cycle = %d\n", (uint32_t)(end - begin)); int8_t *in = (int8_t *)0x10200000; for (uint32_t i = 0; i < 128; i++) { for (uint32_t j = 0; j < 256; j++) { in[i * 256 + j] = (3 * j) % ((5 * i != 0) ? 5 * i : 1); if (in[i * 256 + j] > 126) { in[i * 256 + j] = 126; } //iot_printf("in[%d] = %d\n", i * 256 + j, in[i * 256 + j]); } } uint16_t max_index16; begin = cpu_get_mcycle(); vector_maximum_element_int8(in, &max_index16, &max_value, 128 * 256); end = cpu_get_mcycle(); iot_printf("cycle = %d\n", (uint32_t)(end - begin)); iot_printf("max_index = %d\n", max_index16); iot_printf("max_value = %d\n", max_value); } void vector_maxi_mini_test_from_pc() { uint8_t config_read[120]; uint32_t config_get[60]; uint32_t data_begin = 0x10200000; read_case_from_python(config_read, config_get, (uint8_t *)data_begin); uint32_t weight_addr = data_begin; uint32_t bias_addr = weight_addr + config_get[52]; uint32_t input_addr = bias_addr + config_get[53]; uint32_t golden_addr = input_addr + config_get[54]; uint32_t output_addr = golden_addr + config_get[55]; uint32_t length = config_get[0]; uint32_t to_do = config_get[1]; uint32_t elem_bytes = config_get[2]; uint32_t signed_ = config_get[3]; uint8_t *input_u8; uint16_t *index_u8; uint8_t *output_u8; uint8_t *golden_u8; int8_t *input_s8; uint16_t *index_s8; int8_t *output_s8; int8_t *golden_s8; uint16_t *input_u16; uint16_t *index_u16; uint16_t *output_u16; uint16_t *golden_u16; int16_t *input_s16; uint16_t *index_s16; int16_t *output_s16; int16_t *golden_s16; uint32_t *input_u32; uint32_t *index_u32; uint32_t *output_u32; uint32_t *golden_u32; int32_t *input_s32; uint32_t *index_s32; int32_t *output_s32; int32_t *golden_s32; float *input_f; uint32_t *index_f; float *output_f; float *golden_f; switch (elem_bytes) { case 1 /* 8bits */: switch (to_do) { case 0 /* min */: switch (signed_) { case 0 /* unsigned */: input_u8 = (uint8_t *)input_addr; index_u8 = (uint16_t *)(output_addr + 4); output_u8 = (uint8_t *)output_addr; golden_u8 = (uint8_t *)golden_addr; vector_minimum_element_uint8(input_u8, index_u8, output_u8, length); if (*output_u8 != *golden_u8) { iot_printf("elem incorrent, golden = %d, output = %d\n", *golden_u8, *output_u8); iot_printf("not all correct!\n"); } else if (input_u8[*index_u8] != *output_u8) { iot_printf("elem index incorrent, input at index %d is %d\n", *index_u8, input_u8[*index_u8]); iot_printf("not all correct!\n"); } iot_printf("finished~"); /* code */ break; case 1 /* signed */: input_s8 = (int8_t *)input_addr; index_s8 = (uint16_t *)(output_addr + 4); output_s8 = (int8_t *)output_addr; golden_s8 = (int8_t *)golden_addr; vector_minimum_element_int8(input_s8, index_s8, output_s8, length); if (*output_s8 != *golden_s8) { iot_printf("elem incorrent, golden = %d, output = %d\n", *golden_s8, *output_s8); iot_printf("not all correct!\n"); } else if (input_s8[*index_s8] != *output_s8) { iot_printf("elem index incorrent, input at index %d is %d\n", *index_s8, input_s8[*index_s8]); iot_printf("not all correct!\n"); } iot_printf("finished~"); /* code */ break; default: break; } /* code */ break; case 1 /* max */: switch (signed_) { case 0 /* unsigned */: input_u8 = (uint8_t *)input_addr; index_u8 = (uint16_t *)(output_addr + 4); output_u8 = (uint8_t *)output_addr; golden_u8 = (uint8_t *)golden_addr; vector_maximum_element_uint8(input_u8, index_u8, output_u8, length); if (*output_u8 != *golden_u8) { iot_printf("elem incorrent, golden = %d, output = %d\n", *golden_u8, *output_u8); iot_printf("not all correct!\n"); } else if (input_u8[*index_u8] != *output_u8) { iot_printf("elem index incorrent, input at index %d is %d\n", *index_u8, input_u8[*index_u8]); iot_printf("not all correct!\n"); } iot_printf("finished~"); /* code */ break; case 1 /* signed */: input_s8 = (int8_t *)input_addr; index_s8 = (uint16_t *)(output_addr + 4); output_s8 = (int8_t *)output_addr; golden_s8 = (int8_t *)golden_addr; vector_maximum_element_int8(input_s8, index_s8, output_s8, length); if (*output_s8 != *golden_s8) { iot_printf("elem incorrent, golden = %d, output = %d\n", *golden_s8, *output_s8); iot_printf("not all correct!\n"); } else if (input_s8[*index_s8] != *golden_s8) { iot_printf("elem index incorrent, input at index %d is %d\n", *index_s8, input_s8[*index_s8]); iot_printf("not all correct!\n"); } iot_printf("finished~"); /* code */ break; default: break; } /* code */ break; default: break; } /* code */ break; case 2 /* 16bits */: switch (to_do) { case 0 /* min */: switch (signed_) { case 0 /* unsigned */: input_u16 = (uint16_t *)input_addr; index_u16 = (uint16_t *)(output_addr + 4); output_u16 = (uint16_t *)output_addr; golden_u16 = (uint16_t *)golden_addr; vector_minimum_element_uint16(input_u16, index_u16, output_u16, length); if (*output_u16 != *golden_u16) { iot_printf("elem incorrent, golden = %d, output = %d\n", *golden_u16, *output_u16); iot_printf("not all correct!\n"); } else if (input_u16[*index_u16] != *output_u16) { iot_printf("elem index incorrent, input at index %d is %d\n", *index_u16, input_u16[*index_u16]); iot_printf("not all correct!\n"); } iot_printf("finished~"); /* code */ break; case 1 /* signed */: input_s16 = (int16_t *)input_addr; index_s16 = (uint16_t *)(output_addr + 4); output_s16 = (int16_t *)output_addr; golden_s16 = (int16_t *)golden_addr; vector_minimum_element_int16(input_s16, index_s16, output_s16, length); if (*output_s16 != *golden_s16) { iot_printf("elem incorrent, golden = %d, output = %d\n", *golden_s16, *output_s16); iot_printf("not all correct!\n"); } else if (input_s16[*index_s16] != *output_s16) { iot_printf("elem index incorrent, input at index %d is %d\n", *index_s16, input_s16[*index_s16]); iot_printf("not all correct!\n"); } iot_printf("finished~"); /* code */ break; default: break; } /* code */ break; case 1 /* max */: switch (signed_) { case 0 /* unsigned */: input_u16 = (uint16_t *)input_addr; index_u16 = (uint16_t *)(output_addr + 4); output_u16 = (uint16_t *)output_addr; golden_u16 = (uint16_t *)golden_addr; vector_maximum_element_uint16(input_u16, index_u16, output_u16, length); if (*output_u16 != *golden_u16) { iot_printf("elem incorrent, golden = %d, output = %d\n", *golden_u16, *output_u16); iot_printf("not all correct!\n"); } else if (input_u16[*index_u16] != *output_u16) { iot_printf("elem index incorrent, input at index %d is %d\n", *index_u16, input_u16[*index_u16]); iot_printf("not all correct!\n"); } iot_printf("finished~"); /* code */ break; case 1 /* signed */: input_s16 = (int16_t *)input_addr; index_s16 = (uint16_t *)(output_addr + 4); output_s16 = (int16_t *)output_addr; golden_s16 = (int16_t *)golden_addr; vector_maximum_element_int16(input_s16, index_s16, output_s16, length); if (*output_s16 != *golden_s16) { iot_printf("elem incorrent, golden = %d, output = %d\n", *golden_s16, *output_s16); iot_printf("not all correct!\n"); } else if (input_s16[*index_s16] != *output_s16) { iot_printf("elem index incorrent, input at index %d is %d\n", *index_s16, input_s16[*index_s16]); iot_printf("not all correct!\n"); } iot_printf("finished~"); /* code */ break; default: break; } /* code */ break; default: break; } /* code */ break; case 3 /* float */: switch (to_do) { case 0 /* min */: input_f = (float *)input_addr; index_f = (uint32_t *)(output_addr + 4); output_f = (float *)output_addr; golden_f = (float *)golden_addr; vector_minimum_element_float(input_f, index_f, output_f, length); if (*output_f != *golden_f) { iot_printf("elem incorrent, golden = %f, output = %f\n", *golden_f, *output_f); iot_printf("not all correct!\n"); } else if (input_f[*index_f] != *output_f) { iot_printf("elem index incorrent, input at index %d is %f\n", *index_f, input[*index_f]); iot_printf("not all correct!\n"); } iot_printf("finished~"); /* code */ break; case 1 /* max */: input_f = (float *)input_addr; index_f = (uint32_t *)(output_addr + 4); output_f = (float *)output_addr; golden_f = (float *)golden_addr; vector_maximum_element_float(input_f, index_f, output_f, length); if (*output_f != *golden_f) { iot_printf("elem incorrent, golden = %f, output = %f\n", *golden_f, *output_f); iot_printf("not all correct!\n"); } else if (input_f[*index_f] != *output_f) { iot_printf("elem index incorrent, input at index %d is %f\n", *index_f, input[*index_f]); iot_printf("not all correct!\n"); } iot_printf("finished~"); /* code */ break; default: break; } /* code */ break; case 4 /* 32bits */: switch (to_do) { case 0 /* min */: switch (signed_) { case 0 /* unsigned */: input_u32 = (uint32_t *)input_addr; index_u32 = (uint32_t *)(output_addr + 4); output_u32 = (uint32_t *)output_addr; golden_u32 = (uint32_t *)golden_addr; vector_minimum_element_uint32(input_u32, index_u32, output_u32, length); if (*output_u32 != *golden_u32) { iot_printf("elem incorrent, golden = %d, output = %d\n", *golden_u32, *output_u32); iot_printf("not all correct!\n"); } else if (input_u32[*index_u32] != *output_u32) { iot_printf("elem index incorrent, input at index %d is %d\n", *index_u32, input_u32[*index_u32]); iot_printf("not all correct!\n"); } iot_printf("finished~"); /* code */ break; case 1 /* signed */: input_s32 = (int32_t *)input_addr; index_s32 = (uint32_t *)(output_addr + 4); output_s32 = (int32_t *)output_addr; golden_s32 = (int32_t *)golden_addr; vector_minimum_element_int32(input_s32, index_s32, output_s32, length); if (*output_s32 != *golden_s32) { iot_printf("elem incorrent, golden = %d, output = %d\n", *golden_s32, *output_s32); iot_printf("not all correct!\n"); } else if (input_s32[*index_s32] != *output_s32) { iot_printf("elem index incorrent, input at index %d is %d\n", *index_s32, input_s32[*index_s32]); iot_printf("not all correct!\n"); } iot_printf("finished~"); /* code */ break; default: break; } /* code */ break; case 1 /* max */: switch (signed_) { case 0 /* unsigned */: input_u32 = (uint32_t *)input_addr; index_u32 = (uint32_t *)(output_addr + 4); output_u32 = (uint32_t *)output_addr; golden_u32 = (uint32_t *)golden_addr; vector_maximum_element_uint32(input_u32, index_u32, output_u32, length); if (*output_u32 != *golden_u32) { iot_printf("elem incorrent, golden = %d, output = %d\n", *golden_u32, *output_u32); iot_printf("not all correct!\n"); } else if (input_u32[*index_u32] != *output_u32) { iot_printf("elem index incorrent, input at index %d is %d\n", *index_u32, input_u32[*index_u32]); iot_printf("not all correct!\n"); } iot_printf("finished~"); /* code */ break; case 1 /* signed */: input_s32 = (int32_t *)input_addr; index_s32 = (uint32_t *)(output_addr + 4); output_s32 = (int32_t *)output_addr; golden_s32 = (int32_t *)golden_addr; vector_maximum_element_int32(input_s32, index_s32, output_s32, length); if (*output_s32 != *golden_s32) { iot_printf("elem incorrent, golden = %d, output = %d\n", *golden_s32, *output_s32); iot_printf("not all correct!\n"); } else if (input_s32[*index_s32] != *output_s32) { iot_printf("elem index incorrent, input at index %d is %d\n", *index_s32, input_s32[*index_s32]); iot_printf("not all correct!\n"); } iot_printf("finished~"); /* code */ break; default: break; } /* code */ break; default: break; } /* code */ break; default: break; } } void logsoftmax_test_from_pc() { uint8_t config_read[120]; uint32_t config_get[60]; uint32_t data_begin = 0x10200000; read_case_from_python(config_read, config_get, (uint8_t *)data_begin); uint32_t weight_addr = data_begin; uint32_t bias_addr = weight_addr + config_get[52]; uint32_t input_addr = bias_addr + config_get[53]; uint32_t golden_addr = input_addr + config_get[54]; uint32_t output_addr = golden_addr + config_get[55]; uint32_t length = config_get[0]; uint32_t elem_bytes = config_get[1]; uint32_t batch = config_get[2]; uint16_t *max_indice = (uint16_t *)(output_addr + config_get[55]); int8_t *max = (int8_t *)(output_addr + config_get[55] + 4 * length);; switch (elem_bytes) { case 1/* constant-expression */: logsoftmax_int8((int8_t *)input_addr, (int8_t *)output_addr, max_indice, max, length, batch); break; default: logsoftmax_int16((int16_t *)input_addr, (int16_t *)output_addr, max_indice, (int16_t *)max, length, batch); break; } verify_8bits(output_addr, golden_addr, config_get[55]); } void softmax_test_from_pc() { uint8_t config_read[120]; uint32_t config_get[60]; #ifdef AI_USING_PSRAM uint8_t *data_begin = (uint8_t *)os_mem_malloc(1, 131072); #else uint32_t data_begin = 0x10200000; #endif read_case_from_python(config_read, config_get, (uint8_t *)data_begin); uint32_t weight_addr = (uint32_t)data_begin; uint32_t bias_addr = weight_addr + config_get[52]; uint32_t input_addr = bias_addr + config_get[53]; uint32_t golden_addr = input_addr + config_get[54]; uint32_t output_addr = golden_addr + config_get[55]; uint32_t length = config_get[0]; uint32_t elem_bytes = config_get[1]; switch (elem_bytes) { case 1/* constant-expression */: softmax_int8((int8_t *)input_addr, (int8_t *)output_addr, length); break; default: softmax_int16((int16_t *)input_addr, (int16_t *)output_addr, length); break; } verify_8bits(output_addr, golden_addr, config_get[55]); #ifdef AI_USING_PSRAM os_mem_free(data_begin); #endif } void dequantize_test_from_pc() { uint8_t config_read[120]; uint32_t config_get[60]; #ifdef AI_USING_PSRAM uint8_t *data_begin = (uint8_t *)os_mem_malloc(1, 131072); #else uint32_t data_begin = 0x10200000; #endif read_case_from_python(config_read, config_get, (uint8_t *)data_begin); uint32_t weight_addr = (uint32_t)data_begin; uint32_t bias_addr = weight_addr + config_get[52]; uint32_t input_addr = bias_addr + config_get[53]; uint32_t golden_addr = input_addr + config_get[54]; uint32_t output_addr = golden_addr + config_get[55]; uint32_t length = config_get[0]; uint32_t elem_bytes = config_get[1]; uint32_t signed_ = config_get[2]; int32_t center = config_get[3]; float scale = (float)((int32_t)config_get[4] - 128); uint32_t scale_power = config_get[5]; for (uint32_t i = 0; i < scale_power; i++) { scale = scale * 2.0; } switch (elem_bytes) { case 1/* constant-expression */: //iot_printf("scale = %f\n", scale); scale = scale / 256.0; //iot_printf("scale = %f\n", scale); switch (signed_) { case 0/* constant-expression */: vector_uint8_to_float((uint8_t *)input_addr, (float *)output_addr, (float)(center), scale, length); break; case 1/* constant-expression */: vector_int8_to_float((int8_t *)input_addr, (float *)output_addr, (float)(center - 128), scale, length); break; default: break; } break; default: //iot_printf("scale = %f\n", scale); scale = scale / 65536.0; //iot_printf("scale = %f\n", scale); switch (signed_) { case 0/* constant-expression */: vector_uint16_to_float((uint16_t *)input_addr, (float *)output_addr, (float)(center), scale, length); break; case 1/* constant-expression */: vector_int16_to_float((int16_t *)input_addr, (float *)output_addr, (float)(center - 32768), scale, length); break; default: break; } break; } verify_float(output_addr, golden_addr, length); #ifdef AI_USING_PSRAM os_mem_free(data_begin); #endif } void quantize_test_from_pc() { uint8_t config_read[120]; uint32_t config_get[60]; #ifdef AI_USING_PSRAM uint8_t *data_begin = (uint8_t *)os_mem_malloc(1, 131072); #else uint32_t data_begin = 0x10200000; #endif read_case_from_python(config_read, config_get, (uint8_t *)data_begin); uint32_t weight_addr = (uint32_t)data_begin; uint32_t bias_addr = weight_addr + config_get[52]; uint32_t input_addr = bias_addr + config_get[53]; uint32_t golden_addr = input_addr + config_get[54]; uint32_t output_addr = golden_addr + config_get[55]; uint32_t length = config_get[0]; uint32_t elem_bytes = config_get[1]; uint32_t signed_ = config_get[2]; float *center = (float *)bias_addr; float *scale = (float *)weight_addr; switch (elem_bytes) { case 1/* constant-expression */: switch (signed_) { case 0/* constant-expression */: vector_float_to_uint8((float *)input_addr, (uint8_t *)output_addr, *center, *scale, length); break; case 1/* constant-expression */: vector_float_to_int8((float *)input_addr, (int8_t *)output_addr, *center, *scale, length); break; default: break; } break; default: switch (signed_) { case 0/* constant-expression */: vector_float_to_uint16((float *)input_addr, (uint16_t *)output_addr, *center, *scale, length); break; case 1/* constant-expression */: vector_float_to_int16((float *)input_addr, (int16_t *)output_addr, *center, *scale, length); break; default: break; } break; } verify_8bits(output_addr, golden_addr, config_get[55]); #ifdef AI_USING_PSRAM os_mem_free(data_begin); #endif } void psram_test() { uint32_t *writer = (uint32_t *)0x10200000; for (uint32_t i = 0; i < 1024 * 512; i++) { writer[i] = i * 3; } for (uint32_t i = 0; i < 1024 * 512; i++) { if (writer[i] != i * 3) { iot_printf("incorrect at %08x, should be %08x, in fact %08x\n", 0x10200000 + 4 * i, i * 3, writer[i]); } } } void temp_test() { uint32_t h_a = 64; uint32_t w_b = 64; uint32_t w_a = 64; uint64_t begin; uint64_t end; begin = cpu_get_mcycle(); int8_t *a = (int8_t *)os_mem_malloc(1, h_a * w_a); int8_t *b = (int8_t *)os_mem_malloc(1, w_a * w_b); int8_t *o = (int8_t *)os_mem_malloc(1, h_a * w_b); end = cpu_get_mcycle(); iot_printf("cycle~~~~~~~~~~~~~~~~~~ = %d\n", (uint32_t)(end - begin)); begin = cpu_get_mcycle(); matrix_multi_int8_to_int8(a, b, o, h_a, w_a, w_b, 0); end = cpu_get_mcycle(); iot_printf("cycle~~~~~~~~~~~~~~~~~~ = %d\n", (uint32_t)(end - begin)); begin = cpu_get_mcycle(); os_mem_free(a); os_mem_free(b); os_mem_free(o); end = cpu_get_mcycle(); iot_printf("cycle~~~~~~~~~~~~~~~~~ = %d\n", (uint32_t)(end - begin)); iot_printf("temp test finished~~~~~~~~~~~~~~~~~\n"); } #ifndef AI_OS_TASK int main(void) { dbg_uart_init(); test_uart_init(); iot_dbg_uart_set_port(0, 115200 * 1, 0, 8, 1); iot_printf("main start!\n"); dsp_init(); //while(1){ //test_uart0_getc();} #ifdef SATURATION uint32_t data = 0x1071f; vcsrw(data,data,0) #else uint32_t data = 0x1061f; vcsrw(data,data,0) #endif //psram_test(); //maximum_test(); //uint64_t begin; //uint64_t end; //begin = cpu_get_mcycle(); //end = cpu_get_mcycle(); //iot_printf("cycle = %d\n", (uint32_t)(end - begin)); //begin = cpu_get_mcycle(); //end = cpu_get_mcycle(); //iot_printf("cycle = %d\n", (uint32_t)(end - begin)); //vector_add_test_float(); //vector_sub_test_float(); //vector_mul_test_float(); //vector_madd_msub_test_float(); //vector_max_test_float(); //vector_min_test_float(); //vector_equal_test_float(); //vector_not_equal_test_float(); //vector_less_than_test_float(); //vector_greater_or_equal_test_float(); //vector_inner_product_test_float(); //vector_inner_product_test_int32(); //vector_inner_product_test_uint32(); //float_to_int8_test(); //float_to_uint8_test(); //float_to_int16_test(); //float_to_uint16_test(); //int8_to_float_test(); //uint8_to_float_test(); //int16_to_float_test(); //uint16_to_float_test(); while(1) { //temp_test(); //softmax_test_from_pc(); //vector_maxi_mini_test_from_pc(); //matrix_multi_test_from_pc(); //matrix_transpose_multi_test_from_pc(); //vector_test_from_pc(); //logsoftmax_test_from_pc(); //fc_test_from_pc(); dequantize_test_from_pc(); //vector_saturation_test_from_pc(); //fc_16bit_test(); //matrix_transpose_test_from_pc(); //depth_fc_test_from_pc(); } return 0; } #else void user_task_1(){ test_uart_init(); iot_dbg_uart_set_port(0, 115200 * 1, 0, 8, 1); iot_printf("main start!\n"); dsp_init(); while(1) { matrix_multi_test_from_pc(); } } int32_t iot__task_init() { os_task_h handle; handle = os_create_task(user_task_1, NULL, 9); //create the tasks; if(handle != NULL) { iot_printf("task 1 init successfully...\n"); } return 0; } int32_t iot__module_init(void) { //platform intialization; iot__platform_init(); //create all the tasks; iot__task_init(); iot_rtc_init(); iot_printf("starting...\n"); return 0; } int32_t iot__task_start() { //start the tasks; os_start_kernel(); return 0; } int32_t iot__module_start(void) { int32_t res = 0; res = iot__task_start(); return res; } int main(void) { //module init; iot__module_init(); //module start; iot__module_start(); return 0; } #endif