9043 lines
		
	
	
		
			327 KiB
		
	
	
	
		
			C
		
	
	
		
			Executable File
		
	
	
	
	
			
		
		
	
	
			9043 lines
		
	
	
		
			327 KiB
		
	
	
	
		
			C
		
	
	
		
			Executable File
		
	
	
	
	
| //--------------------------------------------------------------------------
 | |
| #include <stdint.h>
 | |
| #include <stdio.h>
 | |
| #include <math.h>
 | |
| #include "bee_simd_custom.h"
 | |
| //#include "uart.h"
 | |
| //#include "hw_reg_api.h"
 | |
| #include "os_types.h"
 | |
| //#include "dbg_io.h"
 | |
| //#include "cpl_types.h"
 | |
| #include "iot_diag.h"
 | |
| #include "iot_io.h"
 | |
| #include "simd_vector_function.h"
 | |
| #include "simd_matrix_function.h"
 | |
| #include "simd_nn_function.h"
 | |
| //#include "clk.h"
 | |
| //#include "chip_reg_base.h"
 | |
| #include "os_mem.h"
 | |
| //#include "cpu.h"
 | |
| #include "simd_config.h"
 | |
| 
 | |
| //#define SATURATION 1
 | |
| //#define AI_USING_PSRAM 1
 | |
| //#define AI_OS_TASK 1
 | |
| 
 | |
| #ifdef AI_OS_TASK
 | |
| #include "os_task.h"
 | |
| #endif
 | |
| 
 | |
| 
 | |
| void dbg_uart_init();
 | |
| void read_case_from_python(uint8_t *config_read, uint32_t *config_get, uint8_t *data_get);
 | |
| void read_nn_case_from_python(uint8_t *config_read, uint32_t *config_get);
 | |
| void read_case_from_python_matrix(uint8_t *config_read, uint32_t *config_get);
 | |
| void read_case_from_python_matrix_multi(uint8_t *config_read, uint32_t *config_get);
 | |
| void verify_8bits(uint32_t out_addr, uint32_t golden_addr, uint32_t length);
 | |
| extern void uart_dma_init(int port, int br);
 | |
| extern void uart_dma_read(uint8_t *bufptr, uint32_t size, void (*callback) (void*, uint8_t), void* dummy);
 | |
| void vector_maximum_element_int8_small(int8_t *v, uint8_t *max_index, int8_t *max, uint8_t len);
 | |
| void iot_dbg_uart_set_port(uint8_t port, uint32_t baud, uint8_t parity, uint8_t data, uint8_t stop);
 | |
| void verify_float(uint32_t out_addr, uint32_t golden_addr, uint32_t length);
 | |
| int32_t test_uart0_getc();
 | |
| uint64_t cpu_get_mcycle();
 | |
| void test_uart_init();
 | |
| int8_t table8[256];
 | |
| int16_t table16[256];
 | |
| int32_t table32[256];
 | |
| int32_t table32[256];
 | |
| int8_t weight[] = {
 | |
| 0x02,  // w(0,0)2.0
 | |
| 0xcc,  // w(1,0)-52.0
 | |
| 0x3e,  // w(2,0)62.0
 | |
| 0xfe,  // w(3,0)-2.0
 | |
| 0x60,  // w(4,0)96.0
 | |
| 0x69,  // w(5,0)105.0
 | |
| 0x59,  // w(6,0)89.0
 | |
| 0x98,  // w(7,0)-104.0
 | |
| 0xbe,  // w(8,0)-66.0
 | |
| 0x34,  // w(9,0)52.0
 | |
| 0x06,  // w(10,0)6.0
 | |
| 0x16,  // w(11,0)22.0
 | |
| 0x13,  // w(12,0)19.0
 | |
| 0x74,  // w(13,0)116.0
 | |
| 0x41,  // w(14,0)65.0
 | |
| 0x25,  // w(15,0)37.0
 | |
| 0xd8,  // w(0,1)-40.0
 | |
| 0xeb,  // w(1,1)-21.0
 | |
| 0xbc,  // w(2,1)-68.0
 | |
| 0x32,  // w(3,1)50.0
 | |
| 0x87,  // w(4,1)-121.0
 | |
| 0x95,  // w(5,1)-107.0
 | |
| 0xa0,  // w(6,1)-96.0
 | |
| 0x5a,  // w(7,1)90.0
 | |
| 0x0e,  // w(8,1)14.0
 | |
| 0xf9,  // w(9,1)-7.0
 | |
| 0x60,  // w(10,1)96.0
 | |
| 0xeb,  // w(11,1)-21.0
 | |
| 0xdd,  // w(12,1)-35.0
 | |
| 0x20,  // w(13,1)32.0
 | |
| 0x68,  // w(14,1)104.0
 | |
| 0x13,  // w(15,1)19.0
 | |
| 0x18,  // w(0,2)24.0
 | |
| 0x5c,  // w(1,2)92.0
 | |
| 0x00,  // w(2,2)0.0
 | |
| 0xb1,  // w(3,2)-79.0
 | |
| 0xae,  // w(4,2)-82.0
 | |
| 0xcb,  // w(5,2)-53.0
 | |
| 0x17,  // w(6,2)23.0
 | |
| 0x24,  // w(7,2)36.0
 | |
| 0x80,  // w(8,2)-128.0
 | |
| 0x3a,  // w(9,2)58.0
 | |
| 0x32,  // w(10,2)50.0
 | |
| 0xec,  // w(11,2)-20.0
 | |
| 0x0f,  // w(12,2)15.0
 | |
| 0x14,  // w(13,2)20.0
 | |
| 0xd9,  // w(14,2)-39.0
 | |
| 0xd6,  // w(15,2)-42.0
 | |
| 0xdc,  // w(0,3)-36.0
 | |
| 0x6f,  // w(1,3)111.0
 | |
| 0x3a,  // w(2,3)58.0
 | |
| 0x7f,  // w(3,3)127.0
 | |
| 0x3a,  // w(4,3)58.0
 | |
| 0x7a,  // w(5,3)122.0
 | |
| 0x8e,  // w(6,3)-114.0
 | |
| 0x95,  // w(7,3)-107.0
 | |
| 0x5c,  // w(8,3)92.0
 | |
| 0x0a,  // w(9,3)10.0
 | |
| 0xa0,  // w(10,3)-96.0
 | |
| 0xb5,  // w(11,3)-75.0
 | |
| 0xc8,  // w(12,3)-56.0
 | |
| 0xf7,  // w(13,3)-9.0
 | |
| 0xaf,  // w(14,3)-81.0
 | |
| 0x6a,  // w(15,3)106.0
 | |
| 0x9a,  // w(0,4)-102.0
 | |
| 0xb1,  // w(1,4)-79.0
 | |
| 0xd5,  // w(2,4)-43.0
 | |
| 0x94,  // w(3,4)-108.0
 | |
| 0x68,  // w(4,4)104.0
 | |
| 0x6d,  // w(5,4)109.0
 | |
| 0xcb,  // w(6,4)-53.0
 | |
| 0xb5,  // w(7,4)-75.0
 | |
| 0x42,  // w(8,4)66.0
 | |
| 0xe3,  // w(9,4)-29.0
 | |
| 0xc3,  // w(10,4)-61.0
 | |
| 0x1c,  // w(11,4)28.0
 | |
| 0xfc,  // w(12,4)-4.0
 | |
| 0xd2,  // w(13,4)-46.0
 | |
| 0xf0,  // w(14,4)-16.0
 | |
| 0xcf,  // w(15,4)-49.0
 | |
| 0x13,  // w(0,5)19.0
 | |
| 0x05,  // w(1,5)5.0
 | |
| 0x37,  // w(2,5)55.0
 | |
| 0xee,  // w(3,5)-18.0
 | |
| 0xf1,  // w(4,5)-15.0
 | |
| 0x26,  // w(5,5)38.0
 | |
| 0xd2,  // w(6,5)-46.0
 | |
| 0xf7,  // w(7,5)-9.0
 | |
| 0x0c,  // w(8,5)12.0
 | |
| 0xed,  // w(9,5)-19.0
 | |
| 0x2d,  // w(10,5)45.0
 | |
| 0x23,  // w(11,5)35.0
 | |
| 0x38,  // w(12,5)56.0
 | |
| 0xb3,  // w(13,5)-77.0
 | |
| 0xec,  // w(14,5)-20.0
 | |
| 0x8c,  // w(15,5)-116.0
 | |
| 0xc9,  // w(0,6)-55.0
 | |
| 0xdf,  // w(1,6)-33.0
 | |
| 0xc6,  // w(2,6)-58.0
 | |
| 0x24,  // w(3,6)36.0
 | |
| 0x99,  // w(4,6)-103.0
 | |
| 0xd1,  // w(5,6)-47.0
 | |
| 0x23,  // w(6,6)35.0
 | |
| 0x2a,  // w(7,6)42.0
 | |
| 0x96,  // w(8,6)-106.0
 | |
| 0xa9,  // w(9,6)-87.0
 | |
| 0x17,  // w(10,6)23.0
 | |
| 0x06,  // w(11,6)6.0
 | |
| 0xe3,  // w(12,6)-29.0
 | |
| 0xf0,  // w(13,6)-16.0
 | |
| 0x58,  // w(14,6)88.0
 | |
| 0x70,  // w(15,6)112.0
 | |
| 0x10,  // w(0,7)16.0
 | |
| 0xb0,  // w(1,7)-80.0
 | |
| 0x74,  // w(2,7)116.0
 | |
| 0x3e,  // w(3,7)62.0
 | |
| 0x3d,  // w(4,7)61.0
 | |
| 0x99,  // w(5,7)-103.0
 | |
| 0xa1,  // w(6,7)-95.0
 | |
| 0x0c,  // w(7,7)12.0
 | |
| 0x4f,  // w(8,7)79.0
 | |
| 0x35,  // w(9,7)53.0
 | |
| 0xf7,  // w(10,7)-9.0
 | |
| 0x8b,  // w(11,7)-117.0
 | |
| 0x28,  // w(12,7)40.0
 | |
| 0xc7,  // w(13,7)-57.0
 | |
| 0xbe,  // w(14,7)-66.0
 | |
| 0x13,  // w(15,7)19.0
 | |
| 0x85,  // w(0,8)-123.0
 | |
| 0x64,  // w(1,8)100.0
 | |
| 0xd8,  // w(2,8)-40.0
 | |
| 0xf4,  // w(3,8)-12.0
 | |
| 0x18,  // w(4,8)24.0
 | |
| 0xab,  // w(5,8)-85.0
 | |
| 0x37,  // w(6,8)55.0
 | |
| 0x87,  // w(7,8)-121.0
 | |
| 0x0f,  // w(8,8)15.0
 | |
| 0x6a,  // w(9,8)106.0
 | |
| 0x67,  // w(10,8)103.0
 | |
| 0x8f,  // w(11,8)-113.0
 | |
| 0xb5,  // w(12,8)-75.0
 | |
| 0x4f,  // w(13,8)79.0
 | |
| 0x7a,  // w(14,8)122.0
 | |
| 0xc8,  // w(15,8)-56.0
 | |
| 0x4d,  // w(0,9)77.0
 | |
| 0x5f,  // w(1,9)95.0
 | |
| 0x2e,  // w(2,9)46.0
 | |
| 0x17,  // w(3,9)23.0
 | |
| 0x4b,  // w(4,9)75.0
 | |
| 0x0b,  // w(5,9)11.0
 | |
| 0x3d,  // w(6,9)61.0
 | |
| 0xbf,  // w(7,9)-65.0
 | |
| 0x6f,  // w(8,9)111.0
 | |
| 0x2f,  // w(9,9)47.0
 | |
| 0xa8,  // w(10,9)-88.0
 | |
| 0x71,  // w(11,9)113.0
 | |
| 0x68,  // w(12,9)104.0
 | |
| 0xb9,  // w(13,9)-71.0
 | |
| 0xe9,  // w(14,9)-23.0
 | |
| 0xc1,  // w(15,9)-63.0
 | |
| 0x2d,  // w(0,10)45.0
 | |
| 0xb5,  // w(1,10)-75.0
 | |
| 0x6b,  // w(2,10)107.0
 | |
| 0x66,  // w(3,10)102.0
 | |
| 0x05,  // w(4,10)5.0
 | |
| 0x76,  // w(5,10)118.0
 | |
| 0x00,  // w(6,10)0.0
 | |
| 0x29,  // w(7,10)41.0
 | |
| 0xe6,  // w(8,10)-26.0
 | |
| 0xc9,  // w(9,10)-55.0
 | |
| 0x22,  // w(10,10)34.0
 | |
| 0xf6,  // w(11,10)-10.0
 | |
| 0x2f,  // w(12,10)47.0
 | |
| 0xec,  // w(13,10)-20.0
 | |
| 0xb9,  // w(14,10)-71.0
 | |
| 0x96,  // w(15,10)-106.0
 | |
| 0x33,  // w(0,11)51.0
 | |
| 0x4c,  // w(1,11)76.0
 | |
| 0xeb,  // w(2,11)-21.0
 | |
| 0x48,  // w(3,11)72.0
 | |
| 0xea,  // w(4,11)-22.0
 | |
| 0x0b,  // w(5,11)11.0
 | |
| 0xcb,  // w(6,11)-53.0
 | |
| 0x0d,  // w(7,11)13.0
 | |
| 0x0e,  // w(8,11)14.0
 | |
| 0x9b,  // w(9,11)-101.0
 | |
| 0xba,  // w(10,11)-70.0
 | |
| 0xe0,  // w(11,11)-32.0
 | |
| 0x54,  // w(12,11)84.0
 | |
| 0xd2,  // w(13,11)-46.0
 | |
| 0x43,  // w(14,11)67.0
 | |
| 0x1c,  // w(15,11)28.0
 | |
| 0x37,  // w(0,12)55.0
 | |
| 0xee,  // w(1,12)-18.0
 | |
| 0xca,  // w(2,12)-54.0
 | |
| 0xe5,  // w(3,12)-27.0
 | |
| 0xf0,  // w(4,12)-16.0
 | |
| 0x4f,  // w(5,12)79.0
 | |
| 0xb5,  // w(6,12)-75.0
 | |
| 0x62,  // w(7,12)98.0
 | |
| 0x92,  // w(8,12)-110.0
 | |
| 0xaa,  // w(9,12)-86.0
 | |
| 0x58,  // w(10,12)88.0
 | |
| 0xbc,  // w(11,12)-68.0
 | |
| 0xc6,  // w(12,12)-58.0
 | |
| 0xf7,  // w(13,12)-9.0
 | |
| 0x7b,  // w(14,12)123.0
 | |
| 0xae,  // w(15,12)-82.0
 | |
| 0xc1,  // w(0,13)-63.0
 | |
| 0x4e,  // w(1,13)78.0
 | |
| 0xf9,  // w(2,13)-7.0
 | |
| 0x0b,  // w(3,13)11.0
 | |
| 0xa5,  // w(4,13)-91.0
 | |
| 0xf0,  // w(5,13)-16.0
 | |
| 0x6d,  // w(6,13)109.0
 | |
| 0xa8,  // w(7,13)-88.0
 | |
| 0x80,  // w(8,13)-128.0
 | |
| 0x97,  // w(9,13)-105.0
 | |
| 0x92,  // w(10,13)-110.0
 | |
| 0x68,  // w(11,13)104.0
 | |
| 0x92,  // w(12,13)-110.0
 | |
| 0xed,  // w(13,13)-19.0
 | |
| 0xdd,  // w(14,13)-35.0
 | |
| 0x47,  // w(15,13)71.0 // end of one group of 16 filters
 | |
| 0x1d,  // w(16,0)29.0
 | |
| 0x99,  // w(17,0)-103.0
 | |
| 0x18,  // w(18,0)24.0
 | |
| 0x4e,  // w(19,0)78.0
 | |
| 0x63,  // w(20,0)99.0
 | |
| 0x83,  // w(21,0)-125.0
 | |
| 0xa9,  // w(22,0)-87.0
 | |
| 0xfe,  // w(23,0)-2.0
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x73,  // w(16,1)115.0
 | |
| 0x6a,  // w(17,1)106.0
 | |
| 0x49,  // w(18,1)73.0
 | |
| 0x77,  // w(19,1)119.0
 | |
| 0x7a,  // w(20,1)122.0
 | |
| 0x1e,  // w(21,1)30.0
 | |
| 0xdc,  // w(22,1)-36.0
 | |
| 0x0b,  // w(23,1)11.0
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x75,  // w(16,2)117.0
 | |
| 0x37,  // w(17,2)55.0
 | |
| 0x22,  // w(18,2)34.0
 | |
| 0xee,  // w(19,2)-18.0
 | |
| 0xe8,  // w(20,2)-24.0
 | |
| 0x23,  // w(21,2)35.0
 | |
| 0x2b,  // w(22,2)43.0
 | |
| 0xc1,  // w(23,2)-63.0
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x2d,  // w(16,3)45.0
 | |
| 0xa0,  // w(17,3)-96.0
 | |
| 0x53,  // w(18,3)83.0
 | |
| 0x6f,  // w(19,3)111.0
 | |
| 0x09,  // w(20,3)9.0
 | |
| 0xf0,  // w(21,3)-16.0
 | |
| 0x66,  // w(22,3)102.0
 | |
| 0xa9,  // w(23,3)-87.0
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x87,  // w(16,4)-121.0
 | |
| 0x64,  // w(17,4)100.0
 | |
| 0x56,  // w(18,4)86.0
 | |
| 0x0a,  // w(19,4)10.0
 | |
| 0xee,  // w(20,4)-18.0
 | |
| 0xdb,  // w(21,4)-37.0
 | |
| 0xad,  // w(22,4)-83.0
 | |
| 0xd5,  // w(23,4)-43.0
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0xc1,  // w(16,5)-63.0
 | |
| 0xc1,  // w(17,5)-63.0
 | |
| 0x23,  // w(18,5)35.0
 | |
| 0x94,  // w(19,5)-108.0
 | |
| 0x4b,  // w(20,5)75.0
 | |
| 0x4f,  // w(21,5)79.0
 | |
| 0x59,  // w(22,5)89.0
 | |
| 0xb8,  // w(23,5)-72.0
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x41,  // w(16,6)65.0
 | |
| 0xd8,  // w(17,6)-40.0
 | |
| 0xbe,  // w(18,6)-66.0
 | |
| 0x9c,  // w(19,6)-100.0
 | |
| 0xc1,  // w(20,6)-63.0
 | |
| 0x98,  // w(21,6)-104.0
 | |
| 0x4a,  // w(22,6)74.0
 | |
| 0xab,  // w(23,6)-85.0
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0xc2,  // w(16,7)-62.0
 | |
| 0x00,  // w(17,7)0.0
 | |
| 0xa9,  // w(18,7)-87.0
 | |
| 0xc7,  // w(19,7)-57.0
 | |
| 0xd8,  // w(20,7)-40.0
 | |
| 0x47,  // w(21,7)71.0
 | |
| 0x18,  // w(22,7)24.0
 | |
| 0x2b,  // w(23,7)43.0
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0xf5,  // w(16,8)-11.0
 | |
| 0x2e,  // w(17,8)46.0
 | |
| 0xf5,  // w(18,8)-11.0
 | |
| 0x21,  // w(19,8)33.0
 | |
| 0x75,  // w(20,8)117.0
 | |
| 0x55,  // w(21,8)85.0
 | |
| 0x19,  // w(22,8)25.0
 | |
| 0x81,  // w(23,8)-127.0
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0xe2,  // w(16,9)-30.0
 | |
| 0x38,  // w(17,9)56.0
 | |
| 0xbb,  // w(18,9)-69.0
 | |
| 0x18,  // w(19,9)24.0
 | |
| 0xa7,  // w(20,9)-89.0
 | |
| 0x29,  // w(21,9)41.0
 | |
| 0xa9,  // w(22,9)-87.0
 | |
| 0x6d,  // w(23,9)109.0
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x40,  // w(16,10)64.0
 | |
| 0x9c,  // w(17,10)-100.0
 | |
| 0x5e,  // w(18,10)94.0
 | |
| 0xf8,  // w(19,10)-8.0
 | |
| 0x2d,  // w(20,10)45.0
 | |
| 0x90,  // w(21,10)-112.0
 | |
| 0x3a,  // w(22,10)58.0
 | |
| 0x8b,  // w(23,10)-117.0
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0xdf,  // w(16,11)-33.0
 | |
| 0xde,  // w(17,11)-34.0
 | |
| 0x0a,  // w(18,11)10.0
 | |
| 0xc1,  // w(19,11)-63.0
 | |
| 0x7b,  // w(20,11)123.0
 | |
| 0xe1,  // w(21,11)-31.0
 | |
| 0xc3,  // w(22,11)-61.0
 | |
| 0x82,  // w(23,11)-126.0
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x39,  // w(16,12)57.0
 | |
| 0x5a,  // w(17,12)90.0
 | |
| 0x2d,  // w(18,12)45.0
 | |
| 0x87,  // w(19,12)-121.0
 | |
| 0x9e,  // w(20,12)-98.0
 | |
| 0x0b,  // w(21,12)11.0
 | |
| 0x9a,  // w(22,12)-102.0
 | |
| 0xee,  // w(23,12)-18.0
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x6a,  // w(16,13)106.0
 | |
| 0x26,  // w(17,13)38.0
 | |
| 0x34,  // w(18,13)52.0
 | |
| 0x5d,  // w(19,13)93.0
 | |
| 0xf5,  // w(20,13)-11.0
 | |
| 0xdd,  // w(21,13)-35.0
 | |
| 0x3f,  // w(22,13)63.0
 | |
| 0x61,  // w(23,13)97.0
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00 // end of one group of 16 filters
 | |
| };
 | |
| int32_t bias[32];
 | |
| int8_t bias8[] = {
 | |
| 0x5e,  // F(0,0)94
 | |
| 0xe1,  // F(1,0)-31
 | |
| 0xe9,  // F(2,0)-23
 | |
| 0xcc,  // F(3,0)-52
 | |
| 0xd2,  // F(4,0)-46
 | |
| 0x89,  // F(5,0)-119
 | |
| 0xdc,  // F(6,0)-36
 | |
| 0x02,  // F(7,0)2
 | |
| 0x08,  // F(8,0)8
 | |
| 0x4f,  // F(9,0)79
 | |
| 0xb1,  // F(10,0)-79
 | |
| 0xde,  // F(11,0)-34
 | |
| 0x97,  // F(12,0)-105
 | |
| 0xfe,  // F(13,0)-2
 | |
| 0x1f,  // F(14,0)31
 | |
| 0xf1,  // F(15,0)-15
 | |
| 0x0c,  // F(16,0)12
 | |
| 0x69,  // F(17,0)105
 | |
| 0x9d,  // F(18,0)-99
 | |
| 0xbc,  // F(19,0)-68
 | |
| 0xe7,  // F(20,0)-25
 | |
| 0xd3,  // F(21,0)-45
 | |
| 0xae,  // F(22,0)-82
 | |
| 0xc7,  // F(23,0)-57
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00,
 | |
| 0x00
 | |
| };
 | |
| int8_t output[32];
 | |
| int8_t input[] = {
 | |
| 0xbc,  // F(0,0)-68
 | |
| 0xdb,  // F(1,0)-37
 | |
| 0xc2,  // F(2,0)-62
 | |
| 0x3a,  // F(3,0)58
 | |
| 0x59,  // F(4,0)89
 | |
| 0x15,  // F(5,0)21
 | |
| 0x47,  // F(6,0)71
 | |
| 0xbb,  // F(7,0)-69
 | |
| 0x38,  // F(8,0)56
 | |
| 0xf1,  // F(9,0)-15
 | |
| 0xc3,  // F(10,0)-61
 | |
| 0xdf,  // F(11,0)-33
 | |
| 0xa3,  // F(12,0)-93
 | |
| 0x47,  // F(13,0)71
 | |
| 0x00,
 | |
| 0x00
 | |
| };
 | |
| 
 | |
| void vldx_test_uint8() {
 | |
|     for (uint16_t i = 0; i < 256; i++) {
 | |
|         table8[i] = i;
 | |
|     }
 | |
|     uint8_t in[16] = {96, 15, 30, 4, 24, 25, 57, 23, 71, 112, 83, 89, 6, 60, 57, 32};
 | |
|     uint8_t o[16];
 | |
|     vld_uib(0, in, 1)
 | |
|     vldx_uib(1, table8, 0)
 | |
|     vst_uib(o, 1, 1)
 | |
|     asm("fence");
 | |
|     for (uint8_t i = 0; i < 16; i++) {
 | |
|         iot_printf("in[%d] = %x, out[%d] = %x\n", i, in[i], i, o[i]);
 | |
|     }
 | |
| }
 | |
| 
 | |
| void vldx_test_int8() {
 | |
|     for (uint16_t i = 0; i < 256; i++) {
 | |
|         table8[i] = i;
 | |
|     }
 | |
|     int8_t in[16] = {-6, -15, 30, 4, 24, -25, 57, -23, 71, -112, 83, -89, -6, 60, 57, 32};
 | |
|     int8_t o[16];
 | |
|     vld_sib(0, in, 1)
 | |
|     vldx_sib(1, (table8 + 128), 0)
 | |
|     vst_sib(o, 1, 1)
 | |
|     asm("fence");
 | |
|     for (uint8_t i = 0; i < 16; i++) {
 | |
|         iot_printf("in[%d] = %x, out[%d] = %x\n", i, in[i], i, o[i]);
 | |
|     }
 | |
| }
 | |
| 
 | |
| void vldx_test_uint16() {
 | |
|     for (uint16_t i = 0; i < 256; i++) {
 | |
|         table16[i] = i | ((i + 1) << 8);
 | |
|     }
 | |
|     uint16_t in[16] = {96, 15, 30, 4, 24, 25, 57, 23, 71, 112, 83, 89, 6, 60, 57, 32};
 | |
|     uint16_t o[16];
 | |
|     vld_uih(0, in, 1)
 | |
|     vadd(0, 0, 0)
 | |
|     vldx_uih(1, table16, 0)
 | |
|     vst_uih(o, 1, 1)
 | |
|     asm("fence");
 | |
|     for (uint16_t i = 0; i < 16; i++) {
 | |
|         iot_printf("in[%d] = %x, out[%d] = %x\n", i, in[i], i, o[i]);
 | |
|     }
 | |
| }
 | |
| 
 | |
| void vldx_test_int16() {
 | |
|     for (uint16_t i = 0; i < 256; i++) {
 | |
|         table16[i] = i | ((i + 1) << 8);
 | |
|     }
 | |
|     int16_t in[16] = {-6, -15, 30, 4, 24, -25, 57, -23, 71, -112, 83, -89, -6, 60, 57, 32};
 | |
|     int16_t o[16];
 | |
|     vld_sih(0, in, 1)
 | |
|     vadd(0, 0, 0)
 | |
|     vldx_sih(1, (table16 + 128), 0)
 | |
|     vst_sih(o, 1, 1)
 | |
|     asm("fence");
 | |
|     for (uint16_t i = 0; i < 16; i++) {
 | |
|         iot_printf("in[%d] = %x, out[%d] = %x\n", i, in[i], i, o[i]);
 | |
|     }
 | |
| }
 | |
| 
 | |
| void vldx_test_uint32() {
 | |
|     for (uint32_t i = 0; i < 256; i++) {
 | |
|         table32[i] = i | ((i + 1) << 8) | ((i + 2) << 16) | ((i + 3) << 24);
 | |
|     }
 | |
|     uint32_t in[16] = {96, 15, 30, 4, 24, 25, 57, 23, 71, 112, 83, 89, 6, 60, 57, 32};
 | |
|     uint32_t o[16];
 | |
|     vld_uiw(0, in, 1)
 | |
|     vadd(0, 0, 0)
 | |
|     vadd(0, 0, 0)
 | |
|     vldx_uiw(1, table32, 0)
 | |
|     vst_uiw(o, 1, 1)
 | |
|     asm("fence");
 | |
|     for (uint32_t i = 0; i < 4; i++) {
 | |
|         iot_printf("in[%d] = %x, out[%d] = %x\n", i, in[i], i, o[i]);
 | |
|     }
 | |
| }
 | |
| 
 | |
| void vldx_test_int32() {
 | |
|     for (uint32_t i = 0; i < 256; i++) {
 | |
|         table32[i] = i | ((i + 1) << 8) | ((i + 2) << 16) | ((i + 3) << 24);
 | |
|     }
 | |
|     int32_t in[16] = {-6, -15, 30, 4, 24, -25, 57, -23, 71, -112, 83, -89, -6, 60, 57, 32};
 | |
|     int32_t o[16];
 | |
|     vld_siw(0, in, 1)
 | |
|     vadd(0, 0, 0)
 | |
|     vadd(0, 0, 0)
 | |
|     vldx_siw(1, (table32 + 128), 0)
 | |
|     vst_siw(o, 1, 1)
 | |
|     asm("fence");
 | |
|     for (uint32_t i = 0; i < 4; i++) {
 | |
|         iot_printf("in[%d] = %x, out[%d] = %x\n", i, in[i], i, o[i]);
 | |
|     }
 | |
| }
 | |
| 
 | |
| void vstx_test_uint8() {
 | |
|     uint8_t index[16] = {0, 3, 6, 9, 12, 15, 2, 5, 8, 11, 14, 1, 4, 7, 10, 13};
 | |
|     uint8_t in[16] = {0, 3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 33, 36, 39, 42, 45};
 | |
|     uint8_t out[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
 | |
|     vld_uib(0, index, 1)
 | |
|     vld_uib(1, in, 1)
 | |
|     vstx_uib(out, 0, 1)
 | |
|     asm("fence");
 | |
|     for (uint8_t i = 0; i < 16; i++) {
 | |
|         iot_printf("out[%d] = %d\n", i, out[i]);
 | |
|     }
 | |
| }
 | |
| 
 | |
| void vstx_test_int8() {
 | |
|     int8_t index[16] = {0-8, 3-8, 6-8, 9-8, 12-8, 15-8, 2-8, 5-8, 8-8, 11-8, 14-8, 1-8, 4-8, 7-8, 10-8, 13-8};
 | |
|     int8_t in[16] = {0-12, 3-12, 6-12, 9-12, 12-12, 15-12, 18-12, 21-12, 24-12, 27-12, 30-12, 33-12, 36-12, 39-12, 42-12, 45};
 | |
|     int8_t out[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
 | |
|     vld_sib(0, index, 1)
 | |
|     vld_sib(1, in, 1)
 | |
|     vstx_sib((out + 8), 0, 1)
 | |
|     asm("fence");
 | |
|     for (uint8_t i = 0; i < 16; i++) {
 | |
|         iot_printf("out[%d] = %d\n", i, out[i]);
 | |
|     }
 | |
| }
 | |
| 
 | |
| void vstx_test_uint16() {
 | |
|     uint16_t index[16] = {0, 3, 6, 9, 12, 15, 2, 5, 8, 11, 14, 1, 4, 7, 10, 13};
 | |
|     uint16_t in[16] = {0, 3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 33, 36, 39, 42, 45};
 | |
|     uint16_t out[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
 | |
|     uint16_t *pi = in;
 | |
|     uint16_t *pd = index;
 | |
|     vld_uih(0, pd, 1)
 | |
|     vadd(0, 0, 0)
 | |
|     vld_uih(1, pi, 1)
 | |
|     vstx_uih(out, 0, 1)
 | |
|     pi += 8;
 | |
|     pd += 8;
 | |
|     vld_uih(0, pd, 1)
 | |
|     vadd(0, 0, 0)
 | |
|     vld_uih(1, pi, 1)
 | |
|     vstx_uih(out, 0, 1)
 | |
|     asm("fence");
 | |
|     for (uint8_t i = 0; i < 16; i++) {
 | |
|         iot_printf("out[%d] = %d\n", i, out[i]);
 | |
|     }
 | |
| }
 | |
| 
 | |
| void vstx_test_int16() {
 | |
|     int16_t index[16] = {0-8, 3-8, 6-8, 9-8, 12-8, 15-8, 2-8, 5-8, 8-8, 11-8, 14-8, 1-8, 4-8, 7-8, 10-8, 13-8};
 | |
|     int16_t in[16] = {0-12, 3-12, 6-12, 9-12, 12-12, 15-12, 18-12, 21-12, 24-12, 27-12, 30-12, 33-12, 36-12, 39-12, 42-12, 45};
 | |
|     int16_t out[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
 | |
|     int16_t *pi = in;
 | |
|     int16_t *pd = index;
 | |
|     vld_sih(0, pd, 1)
 | |
|     vadd(0, 0, 0)
 | |
|     vld_sih(1, pi, 1)
 | |
|     vstx_sih((out + 8), 0, 1)
 | |
|     pi += 8;
 | |
|     pd += 8;
 | |
|     vld_sih(0, pd, 1)
 | |
|     vadd(0, 0, 0)
 | |
|     vld_sih(1, pi, 1)
 | |
|     vstx_sih((out + 8), 0, 1)
 | |
|     asm("fence");
 | |
|     for (uint8_t i = 0; i < 16; i++) {
 | |
|         iot_printf("out[%d] = %d\n", i, out[i]);
 | |
|     }
 | |
| }
 | |
| 
 | |
| void vstx_test_uint32() {
 | |
|     uint32_t index[16] = {0, 3, 6, 9, 12, 15, 2, 5, 8, 11, 14, 1, 4, 7, 10, 13};
 | |
|     uint32_t in[16] = {0, 3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 33, 36, 39, 42, 45};
 | |
|     uint32_t out[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
 | |
|     uint32_t *pi = in;
 | |
|     uint32_t *pd = index;
 | |
|     vld_uiw(0, pd, 1)
 | |
|     vadd(0, 0, 0)
 | |
|     vadd(0, 0, 0)
 | |
|     vld_uiw(1, pi, 1)
 | |
|     vstx_uiw(out, 0, 1)
 | |
|     pi += 4;
 | |
|     pd += 4;
 | |
|     vld_uiw(0, pd, 1)
 | |
|     vadd(0, 0, 0)
 | |
|     vadd(0, 0, 0)
 | |
|     vld_uiw(1, pi, 1)
 | |
|     vstx_uiw(out, 0, 1)
 | |
|     pi += 4;
 | |
|     pd += 4;
 | |
|     vld_uiw(0, pd, 1)
 | |
|     vadd(0, 0, 0)
 | |
|     vadd(0, 0, 0)
 | |
|     vld_uiw(1, pi, 1)
 | |
|     vstx_uiw(out, 0, 1)
 | |
|     pi += 4;
 | |
|     pd += 4;
 | |
|     vld_uiw(0, pd, 1)
 | |
|     vadd(0, 0, 0)
 | |
|     vadd(0, 0, 0)
 | |
|     vld_uiw(1, pi, 1)
 | |
|     vstx_uiw(out, 0, 1)
 | |
|     pi += 4;
 | |
|     pd += 4;
 | |
|     asm("fence");
 | |
|     for (uint8_t i = 0; i < 16; i++) {
 | |
|         iot_printf("out[%d] = %d\n", i, out[i]);
 | |
|     }
 | |
| }
 | |
| 
 | |
| void vstx_test_int32() {
 | |
|     int32_t index[16] = {0-8, 3-8, 6-8, 9-8, 12-8, 15-8, 2-8, 5-8, 8-8, 11-8, 14-8, 1-8, 4-8, 7-8, 10-8, 13-8};
 | |
|     int32_t in[16] = {0-12, 3-12, 6-12, 9-12, 12-12, 15-12, 18-12, 21-12, 24-12, 27-12, 30-12, 33-12, 36-12, 39-12, 42-12, 45};
 | |
|     int32_t out[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
 | |
|     int32_t *pi = in;
 | |
|     int32_t *pd = index;
 | |
|     vld_siw(0, pd, 1)
 | |
|     vadd(0, 0, 0)
 | |
|     vadd(0, 0, 0)
 | |
|     vld_siw(1, pi, 1)
 | |
|     vstx_siw((out + 8), 0, 1)
 | |
|     pi += 4;
 | |
|     pd += 4;
 | |
|     vld_siw(0, pd, 1)
 | |
|     vadd(0, 0, 0)
 | |
|     vadd(0, 0, 0)
 | |
|     vld_siw(1, pi, 1)
 | |
|     vstx_siw((out + 8), 0, 1)
 | |
|     pi += 4;
 | |
|     pd += 4;
 | |
|     vld_siw(0, pd, 1)
 | |
|     vadd(0, 0, 0)
 | |
|     vadd(0, 0, 0)
 | |
|     vld_siw(1, pi, 1)
 | |
|     vstx_siw((out + 8), 0, 1)
 | |
|     pi += 4;
 | |
|     pd += 4;
 | |
|     vld_siw(0, pd, 1)
 | |
|     vadd(0, 0, 0)
 | |
|     vadd(0, 0, 0)
 | |
|     vld_siw(1, pi, 1)
 | |
|     vstx_siw((out + 8), 0, 1)
 | |
|     pi += 4;
 | |
|     pd += 4;
 | |
|     asm("fence");
 | |
|     for (uint8_t i = 0; i < 16; i++) {
 | |
|         iot_printf("out[%d] = %d\n", i, out[i]);
 | |
|     }
 | |
| }
 | |
| 
 | |
| uint8_t context_switch_test(){
 | |
| 
 | |
|     uint8_t error = 0;
 | |
| 
 | |
|     int32_t v_od_00[8] = {  31891, -20565,  20631,  12774, -24314,  -9825,  18402, -22729};
 | |
|     int32_t v_od_01[8] = {   1501, -25964, -10420,  10591,  18387,    -72, -27290, -17956};
 | |
|     int32_t v_od_02[8] = {  16671,  -7064,  23947,  -1473,  13293, -12916,    191, -14974};
 | |
|     int32_t v_od_03[8] = { -30005,  -8091, -19722, -23435,    -69,   4113,   3432,  27961};
 | |
|     int32_t v_od_04[8] = {   -618,    660, -28635,  24275,  25812, -16048, -29275, -11623};
 | |
|     int32_t v_od_05[8] = {  -4293,  -7881,  -3072, -12244,  18456,   4407,  22162,   7440};
 | |
|     int32_t v_od_06[8] = {  23991,    835,  10023, -24922,  19792,  -8055,    505,  13278};
 | |
|     int32_t v_od_07[8] = { -31128,    718, -25631, -17336, -18325,   9021,  -4872,  29411};
 | |
|     int32_t v_od_08[8] = {  14059, -14991,  -7484, -16624,  -1074,  20256,  17419,   4956};
 | |
|     int32_t v_od_09[8] = {  21760, -19031,  15016,  32652, -32195,  25705,   1899,   7788};
 | |
|     int32_t v_od_10[8] = {  31891, -20165,  20131,  12774, -24314,  -9821,  18102, -22719};
 | |
|     int32_t v_od_11[8] = {   1501, -25164, -10120,  10591,  18317,    -71, -27190, -17916};
 | |
|     int32_t v_od_12[8] = {  16671,  -7164,  23147,  -1473,  13213, -12911,    191, -14914};
 | |
|     int32_t v_od_13[8] = { -30005,  -8191, -19122, -23435,    -19,   4111,   3132,  27911};
 | |
|     int32_t v_od_14[8] = {   -618,    160, -28135,  24275,  25812, -16041, -29175, -11613};
 | |
|     int32_t v_od_15[8] = {  -4293,  -7181,  -3172, -12244,  18416,   4401,  22162,   7410};
 | |
|     int32_t v_od_16[8] = {  23991,    135,  10123, -24922,  19712,  -8051,    105,  13218};
 | |
|     int32_t v_od_17[8] = { -31128,    118, -25131, -17336, -18315,   9021,  -4172,  29411};
 | |
|     int32_t v_od_18[8] = {  14059, -14191,  -7184, -16624,  -1014,  20251,  17119,   4916};
 | |
|     int32_t v_od_19[8] = {  21760, -19131,  15116,  32652, -32115,  25701,   1199,   7718};
 | |
|     int32_t v_od_20[8] = {  31881, -20865,  28131,  18774, -24384,  -9881,  18802, -28719};
 | |
|     int32_t v_od_21[8] = {   1581, -25864, -18120,  18591,  18387,    -81, -27890, -18916};
 | |
|     int32_t v_od_22[8] = {  16681,  -7864,  28147,  -8473,  13283, -12981,    891, -18914};
 | |
|     int32_t v_od_23[8] = { -30085,  -8891, -18122, -28435,    -89,   4181,   3832,  28911};
 | |
|     int32_t v_od_24[8] = {   -688,    860, -28135,  28275,  25882, -16081, -29875, -18613};
 | |
|     int32_t v_od_25[8] = {  -4283,  -7881,  -8172, -18244,  18486,   4481,  22862,   8410};
 | |
|     int32_t v_od_26[8] = {  23981,    835,  18123, -28922,  19782,  -8081,    805,  18218};
 | |
|     int32_t v_od_27[8] = { -31188,    818, -28131, -18336, -18385,   9081,  -4872,  28411};
 | |
|     int32_t v_od_28[8] = {  14089, -14891,  -8184, -18624,  -1084,  20281,  17819,   8916};
 | |
|     int32_t v_od_29[8] = {  21780, -19831,  18116,  38652, -32185,  25781,   1899,   8718};
 | |
|     int32_t v_od_30[8] = {  31891, -29865,  29131,  18974, -29384,  -9889,  18902, -29919};
 | |
|     int32_t v_od_31[8] = {   1591, -29864, -19120,  18991,  19387,    -89, -27990, -19916};
 | |
| 
 | |
|     int32_t v_dd_00[8];
 | |
|     int32_t v_dd_01[8];
 | |
|     int32_t v_dd_02[8];
 | |
|     int32_t v_dd_03[8];
 | |
|     int32_t v_dd_04[8];
 | |
|     int32_t v_dd_05[8];
 | |
|     int32_t v_dd_06[8];
 | |
|     int32_t v_dd_07[8];
 | |
|     int32_t v_dd_08[8];
 | |
|     int32_t v_dd_09[8];
 | |
|     int32_t v_dd_10[8];
 | |
|     int32_t v_dd_11[8];
 | |
|     int32_t v_dd_12[8];
 | |
|     int32_t v_dd_13[8];
 | |
|     int32_t v_dd_14[8];
 | |
|     int32_t v_dd_15[8];
 | |
|     int32_t v_dd_16[8];
 | |
|     int32_t v_dd_17[8];
 | |
|     int32_t v_dd_18[8];
 | |
|     int32_t v_dd_19[8];
 | |
|     int32_t v_dd_20[8];
 | |
|     int32_t v_dd_21[8];
 | |
|     int32_t v_dd_22[8];
 | |
|     int32_t v_dd_23[8];
 | |
|     int32_t v_dd_24[8];
 | |
|     int32_t v_dd_25[8];
 | |
|     int32_t v_dd_26[8];
 | |
|     int32_t v_dd_27[8];
 | |
|     int32_t v_dd_28[8];
 | |
|     int32_t v_dd_29[8];
 | |
|     int32_t v_dd_30[8];
 | |
|     int32_t v_dd_31[8];
 | |
| 
 | |
|     int32_t v_dt[5];
 | |
| 
 | |
|     vld_fpw( 0, v_od_00, 1)
 | |
|     vld_sib( 1, v_od_01, 1)
 | |
|     vld_uib( 2, v_od_02, 1)
 | |
|     vld_sih( 3, v_od_03, 1)
 | |
|     vld_uih( 4, v_od_04, 1)
 | |
|     vld_siw( 5, v_od_05, 1)
 | |
|     vld_uiw( 6, v_od_06, 1)
 | |
|     vld_fpw( 7, v_od_07, 1)
 | |
|     vld_sib( 8, v_od_08, 1)
 | |
|     vld_uib( 9, v_od_09, 1)
 | |
|     vld_sih(10, v_od_10, 1)
 | |
|     vld_uih(11, v_od_11, 1)
 | |
|     vld_siw(12, v_od_12, 1)
 | |
|     vld_uiw(13, v_od_13, 1)
 | |
|     vld_fpw(14, v_od_14, 1)
 | |
|     vld_sib(15, v_od_15, 1)
 | |
| //    vld_uib(16, v_od_16, 1)
 | |
| //    vld_sih(17, v_od_17, 1)
 | |
| //    vld_uih(18, v_od_18, 1)
 | |
| //    vld_siw(19, v_od_19, 1)
 | |
| //    vld_uiw(20, v_od_20, 1)
 | |
| //    vld_fpw(21, v_od_21, 1)
 | |
| //    vld_sib(22, v_od_22, 1)
 | |
| //    vld_uib(23, v_od_23, 1)
 | |
| //    vld_sih(24, v_od_24, 1)
 | |
| //    vld_uih(25, v_od_25, 1)
 | |
| //    vld_siw(26, v_od_26, 1)
 | |
| //    vld_uiw(27, v_od_27, 1)
 | |
| //    vld_fpw(28, v_od_28, 1)
 | |
| //    vld_sib(29, v_od_29, 1)
 | |
| //    vld_uib(30, v_od_30, 1)
 | |
| //    vld_sih(31, v_od_31, 1)
 | |
|     
 | |
|     vst_fpw(v_dd_00, 1,  0)
 | |
|     vst_sib(v_dd_01, 1,  1)
 | |
|     vst_uib(v_dd_02, 1,  2)
 | |
|     vst_sih(v_dd_03, 1,  3)
 | |
|     vst_uih(v_dd_04, 1,  4)
 | |
|     vst_siw(v_dd_05, 1,  5)
 | |
|     vst_uiw(v_dd_06, 1,  6)
 | |
|     vst_fpw(v_dd_07, 1,  7)
 | |
|     vst_sib(v_dd_08, 1,  8)
 | |
|     vst_uib(v_dd_09, 1,  9)
 | |
|     vst_sih(v_dd_10, 1, 10)
 | |
|     vst_uih(v_dd_11, 1, 11)
 | |
|     vst_siw(v_dd_12, 1, 12)
 | |
|     vst_uiw(v_dd_13, 1, 13)
 | |
|     vst_fpw(v_dd_14, 1, 14)
 | |
|     vst_sib(v_dd_15, 1, 15)
 | |
| //    vst_uib(v_dd_16, 1, 16)
 | |
| //    vst_sih(v_dd_17, 1, 17)
 | |
| //    vst_uih(v_dd_18, 1, 18)
 | |
| //    vst_siw(v_dd_19, 1, 19)
 | |
| //    vst_uiw(v_dd_20, 1, 20)
 | |
| //    vst_fpw(v_dd_21, 1, 21)
 | |
| //    vst_sib(v_dd_22, 1, 22)
 | |
| //    vst_uib(v_dd_23, 1, 23)
 | |
| //    vst_sih(v_dd_24, 1, 24)
 | |
| //    vst_uih(v_dd_25, 1, 25)
 | |
| //    vst_siw(v_dd_26, 1, 26)
 | |
| //    vst_uiw(v_dd_27, 1, 27)
 | |
| //    vst_fpw(v_dd_28, 1, 28)
 | |
| //    vst_sib(v_dd_29, 1, 29)
 | |
| //    vst_uib(v_dd_30, 1, 30)
 | |
| //    vst_sih(v_dd_31, 1, 31)
 | |
|     vcsrr(v_dt[0], 0)
 | |
|     vcsrr(v_dt[1], 4)
 | |
|     vcsrr(v_dt[2], 5)
 | |
|     for (uint8_t i = 0; i < 5; i++) {
 | |
|         iot_printf("v_dt[%d]=%d\n", i, v_dt[i]);
 | |
|     }
 | |
| //    vcsrr(v_dt[3], 6)
 | |
| //    vcsrr(v_dt[4], 7)
 | |
|     
 | |
|     for(uint8_t i = 0; i < 4; i++) {
 | |
|         error += (v_od_00[i] != v_dd_00[i]);
 | |
|         error += (v_od_01[i] != v_dd_01[i]);
 | |
|         error += (v_od_02[i] != v_dd_02[i]);
 | |
|         error += (v_od_03[i] != v_dd_03[i]);
 | |
|         error += (v_od_04[i] != v_dd_04[i]);
 | |
|         error += (v_od_05[i] != v_dd_05[i]);
 | |
|         error += (v_od_06[i] != v_dd_06[i]);
 | |
|         error += (v_od_07[i] != v_dd_07[i]);
 | |
|         error += (v_od_08[i] != v_dd_08[i]);
 | |
|         error += (v_od_09[i] != v_dd_09[i]);
 | |
|         error += (v_od_10[i] != v_dd_10[i]);
 | |
|         error += (v_od_11[i] != v_dd_11[i]);
 | |
|         error += (v_od_12[i] != v_dd_12[i]);
 | |
|         error += (v_od_13[i] != v_dd_13[i]);
 | |
|         error += (v_od_14[i] != v_dd_14[i]);
 | |
|         error += (v_od_15[i] != v_dd_15[i]);
 | |
|         error += (v_od_16[i] != v_dd_16[i]);
 | |
|         error += (v_od_17[i] != v_dd_17[i]);
 | |
|         error += (v_od_18[i] != v_dd_18[i]);
 | |
|         error += (v_od_19[i] != v_dd_19[i]);
 | |
|         error += (v_od_20[i] != v_dd_20[i]);
 | |
|         error += (v_od_21[i] != v_dd_21[i]);
 | |
|         error += (v_od_22[i] != v_dd_22[i]);
 | |
|         error += (v_od_23[i] != v_dd_23[i]);
 | |
|         error += (v_od_24[i] != v_dd_24[i]);
 | |
|         error += (v_od_25[i] != v_dd_25[i]);
 | |
|         error += (v_od_26[i] != v_dd_26[i]);
 | |
|         error += (v_od_27[i] != v_dd_27[i]);
 | |
|         error += (v_od_28[i] != v_dd_28[i]);
 | |
|         error += (v_od_29[i] != v_dd_29[i]);
 | |
|         error += (v_od_30[i] != v_dd_30[i]);
 | |
|         error += (v_od_31[i] != v_dd_31[i]);
 | |
|     }
 | |
| 
 | |
|     return error;
 | |
| }
 | |
| 
 | |
| uint8_t fpw_test(){
 | |
|     int i;
 | |
|     uint8_t error = 0;
 | |
|     int len = 256;
 | |
|     float a = 34.75;
 | |
|     float b = -44.1876;
 | |
|     a = a;
 | |
|     b = b;
 | |
|     float v_a[256] = {777.07, 4935.03, -2095.22, 12332.16, 5333.75, -10814.01, 6083.75, 9508.28, -19271.97, 16169.42, -17471.97, -7809.87, -8338.85, 6279.29, -7754.14, 3226.75, -11237.26, 16013.69, 8641.08, -11088.85, -19109.87, 7806.68, 4344.90, -7388.85, -4545.22, -245.85, 10789.49, 13392.35, -19191.71, -9114.01, 15573.24, -3244.26, 10161.46, 19411.14, -1048.08, 13182.80, -555.73, 16038.21, 798.40, 6779.61, -14253.50, 1392.99, 12161.14, -13104.14, 13215.60, -10624.84, 12779.29, -7574.52, -15975.15, 15696.17, -11969.10, -8404.14, -2035.35, -9578.02, 17222.29, -19736.62, 12621.97, 16485.03, 4657.96, 3929.61, -135.35, 16592.35, -11447.13, -3710.19, 1707.32, 14610.82, -14592.67, 19698.72, 3263.05, 9526.11, 19942.03, -3376.75, -13132.80, 12350.31, 1144.58, -10321.97, 7638.53, 3089.49, -15864.01, -18837.26, 3166.87, 14204.45, -12628.34, -12936.62, -13019.74, -8274.20, 14179.93, -20016.24, -1608.28, 8886.30, -16579.29, -4057.32, 12142.03, 7670.06, -16300.0, 9552.86, -1552.54, -12142.35, 17177.70, 17026.75, 6104.77, 17009.55, -3898.40, 3015.28, 20788.21, 11214.01, -9442.99, -7353.50, -357.00, -13278.02, -7142.35, -12297.77, -16668.15, -19056.36, 11109.23, -4065.92, 2818.15, -10530.57, 3775.79, -12004.14, 18415.28, -8501.91, -9812.10, -7161.46, -5634.71, -14551.91, 15949.36, 13456.68};
 | |
|     float v_b[256] = {8438.85, 280.25, -8459.55, -5259.23, 15479.29, 15932.48, -16.87, -10713.05, -6168.15, -10871.01, 12173.56, -15502.54, -9540.76, -12503.82, 13469.10, -4861.14, -15288.53, -13114.01, -9909.87, -13050.0, 4168.47, 18297.45, -10086.94, -8887.89, 2401.59, -16019.10, 10295.22, 9995.22, 16160.82, 9697.77, 426.11, 20664.96, 6679.93, -2966.87, -7217.19, 2012.73, -1391.71, -5016.24, -16111.46, 12742.35, 7115.60, 4667.51, -6773.24, 12151.27, -6780.89, -12656.36, -12919.42, 1100.31, 10725.79, -1694.26, -1034.07, -6064.33, 6150.0, -7362.10, 20117.19, 12178.66, 14416.24, -15065.28, 8656.36, -13225.47, 16494.58, 1716.24, -11512.10, -12388.53, 1336.30, 15338.85, -353.18, -6915.28, -18771.33, -13300.95, -19781.84, -8577.38, -2192.35, 13153.50, -1938.21, 6155.09, -20264.96, -5348.72, -7686.30, -10642.35, -4306.05, 11095.22, 19364.96, 17412.73, 4867.83, -2163.05, -16232.16, 1861.46, 10643.63, -434.39, -17728.34, -7474.84, 17064.33, 7873.88, 19580.89, -17547.13, 13165.28, 3083.12, 18822.61, -17371.01, -1943.63, 6529.61, -17193.94, -10397.45, -1166.56, 2749.04, -16870.38, 1806.05, -3050.95, -8488.21, -14813.69, -7595.54, 19336.30, 19500.95, 4617.19, -9113.37, -17397.45, 9054.77, 13341.71, 20647.77, 5813.69, 13237.57, 12785.66, 18758.59, 18591.08, -2187.26, 1249.04, 13889.49};
 | |
|     float v_d0[256];
 | |
|     float v_d1[256];
 | |
| 
 | |
|     float *p_a = v_a;
 | |
|     float *p_b = v_b;
 | |
|     float *p_d0 = v_d0;
 | |
| 
 | |
|     vld_fpw(0, p_a, 1)
 | |
|     vld_fpw(1, p_b, 1)
 | |
|     for(i=0; i<len; i=i+SIMD_W*1){
 | |
|         p_a = p_a + SIMD_W*1;
 | |
|         p_b = p_b + SIMD_W*1;
 | |
| 
 | |
|         //vmull(2,0,1)
 | |
|         //vmadd(2,0,1)
 | |
|         //vmsub(2,0,1)
 | |
|         //vsub(2,2,0)
 | |
|         //vadd(2,2,1)
 | |
| 
 | |
|         vmull(2,0,1)
 | |
|         vadd(2,2,1)
 | |
| 
 | |
|         vld_fpw(0, p_a, 1)
 | |
|         vld_fpw(1, p_b, 1)
 | |
|         vst_fpw(p_d0, 1, 2)
 | |
|         p_d0 = p_d0 + SIMD_W*1;
 | |
|     }
 | |
| 
 | |
|     for(i=0; i<len; i++){
 | |
|         //v_d1[i] = v_a[i] * v_b[i] + v_a[i] * v_b[i] - v_a[i] * v_b[i] - v_a[i] + v_b[i];
 | |
|         v_d1[i] = v_a[i] * v_b[i] + v_b[i];
 | |
|     }
 | |
| 
 | |
|     for(i=0; i<len; i++){
 | |
|         if(v_d0[i] != v_d1[i]){
 | |
|             error = 1;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     return error;
 | |
| }
 | |
| 
 | |
| uint8_t siw_test(){
 | |
|     int i;
 | |
|     uint8_t error = 0;
 | |
|     int len = 256;
 | |
|     int32_t a = 4431;
 | |
|     int32_t b = -977;
 | |
|     a = a;
 | |
|     b = b;
 | |
|     int32_t v_a[256] = {31891, -20565, 20631, 12774, -24314, -9825, 18402, -22729, 1501, -25964, -10420, 10591, 18387, -72, -27290, -17956, 16671, -7064, 23947, -1473, 13293, -12916, 191, -14974, -30005, -8091, -19722, -23435, -69, 4113, 3432, 27961, -618, 660, -28635, 24275, 25812, -16048, -29275, -11623, -4293, -7881, -3072, -12244, 18456, 4407, 22162, 7440, 23991, 835, 10023, -24922, 19792, -8055, 505, 13278, -31128, 718, -25631, -17336, -18325, 9021, -4872, 29411, 14059, -14991, -7484, -16624, -1074, 20256, 17419, 4956, 21760, -19031, 15016, 32652, -32195, 25705, 1899, 7788, 10858, -24402, -28464, -25207, -14174, -28881, -31196, -2981, -25670, 17823, 31203, 30466, 13466, 6305, 5822, -10110, -842, 12133, -2669, 8585, -14829, 10916, 8070, -32532, 28141, 18758, -19161, 32388, -7288, 28414, -29363, -28396, -18355, 2658, 23473, 25969, 6706, -32626, 27891, 12062, -27582, 2759, -14680, -24825, -24447, -27837, 9492, 25982, -21924, -6946, 25043, 14646, 20039, 18822, -14363, -16391, 27539, -30930, -22999, 12927, 6345, -21161, 4974, -31459, -6780, 15562, -18841, -13759, 20162, 32285, -2627, 4885, 26834, 27777, 13004, -14924, 22994, -18296, 32560, -27123, -19226, -31844, -18825, 11605, -6562, 26417, 8679, -385, 23600, -29805, 21464, -17727, 6791, 17400, -23574, -814, -20400, -24248, 20524, 14883, 19741, -23825, -8707, 30996, -27303, -9405, -13731, -6428, -18502, -1892, 24601, 26090, -26076, 5165, 19130, -13644, -11590, 29499, -18592, -32625, -6238, 31725, -9899, 7175, 5797, -28332, 17739, 18253, 2210, 2423, 3965, -8678, 7521, -7115, 18383, 13718, 14246, 8456, -26732, 6817, -32614, -25240, -18689, -12323, 22752, 31419, 9487, -10463, -15174, 6310, 28280, -10041, 4824, 12311, -12218, -17051, -9411, -3750, 31689, 25828, -15092, -4717, -25287, -31588, -21071, 11426, -27236, -4123, 18948, -20084, -1076, 1726, -7119, -32740, -29594, 986};
 | |
|     int32_t v_b[256] = {-25964, -10420, 10591, 18387, -72, -27290, -17956, 16671, -7064, 23947, -1473, 13293, -12916, 191, -14974, -30005, -8091, -19722, -23435, -69, 4113, 3432, 27961, -618, 660, -28635, 24275, 25812, -16048, -29275, -11623, -4293, -7881, -3072, -12244, 18456, 4407, 22162, 7440, 23991, 835, 10023, -24922, 19792, -8055, 505, 13278, -31128, 718, -25631, -17336, -18325, 9021, -4872, 29411, 14059, -14991, -7484, -16624, -1074, 20256, 17419, 4956, 21760, -19031, 15016, 32652, -32195, 25705, 1899, 7788, 10858, -24402, -28464, -25207, -14174, -28881, -31196, -2981, -25670, 17823, 31203, 30466, 13466, 6305, 5822, -10110, -842, 12133, -2669, 8585, -14829, 10916, 8070, -32532, 28141, 18758, -19161, 32388, -7288, 28414, -29363, -28396, -18355, 2658, 23473, 25969, 6706, -32626, 27891, 12062, -27582, 2759, -14680, -24825, -24447, -27837, 9492, 25982, -21924, -6946, 25043, 14646, 20039, 18822, -14363, -16391, 27539, -30930, -22999, 12927, 6345, -21161, 4974, -31459, -6780, 15562, -18841, -13759, 20162, 32285, -2627, 4885, 26834, 27777, 13004, -14924, 22994, -18296, 32560, -27123, -19226, -31844, -18825, 11605, -6562, 26417, 8679, -385, 23600, -29805, 21464, -17727, 6791, 17400, -23574, -814, -20400, -24248, 20524, 14883, 19741, -23825, -8707, 30996, -27303, -9405, -13731, -6428, -18502, -1892, 24601, 26090, -26076, 5165, 19130, -13644, -11590, 29499, -18592, -32625, -6238, 31725, -9899, 7175, 5797, -28332, 17739, 18253, 2210, 2423, 3965, -8678, 7521, -7115, 18383, 13718, 14246, 8456, -26732, 6817, -32614, -25240, -18689, -12323, 22752, 31419, 9487, -10463, -15174, 6310, 28280, -10041, 4824, 12311, -12218, -17051, -9411, -3750, 31689, 25828, -15092, -4717, -25287, -31588, -21071, 11426, -27236, -4123, 18948, -20084, -1076, 1726, -7119, -32740, -29594, 31891, -20565, 20631, 12774, -24314, -9825, 18402, -22729, 1501, 986};
 | |
|     int32_t v_d0[256];
 | |
|     int32_t v_d1[256];
 | |
| 
 | |
|     int32_t *p_a = v_a;
 | |
|     int32_t *p_b = v_b;
 | |
|     int32_t *p_d0 = v_d0;
 | |
| 
 | |
|     uint32_t shiftl[1] = {1};
 | |
|     uint32_t shiftr[1] = {16};
 | |
|     vld_uiw(8, shiftl, 0)
 | |
|     vld_uiw(9, shiftr, 0)
 | |
| 
 | |
|     vld_siw(0, p_a, 1)
 | |
|     vld_siw(1, p_b, 1)
 | |
|     for(i=0; i<len; i=i+SIMD_W*1){
 | |
|         p_a = p_a + SIMD_W*1;
 | |
|         p_b = p_b + SIMD_W*1;
 | |
| 
 | |
|         //vmull(2,0,1)
 | |
|         //vmadd(2,0,1)
 | |
|         //vmsub(2,0,1)
 | |
|         //vsub(2,2,0)
 | |
|         //vadd(2,2,1)
 | |
|         //vsll(2,2,8)
 | |
|         //vsra(2,2,9)
 | |
| 
 | |
|         vmull(2,0,1)
 | |
| 
 | |
|         vld_siw(0, p_a, 1)
 | |
|         vld_siw(1, p_b, 1)
 | |
|         vst_siw(p_d0, 1, 2)
 | |
|         p_d0 = p_d0 + SIMD_W*1;
 | |
|     }
 | |
| 
 | |
|     for(i=0; i<len; i++){
 | |
| 
 | |
|         //v_d1[i] = v_a[i] * v_b[i] + v_a[i] * v_b[i] - v_a[i] * v_b[i] - v_a[i] + v_b[i];
 | |
|         //v_d1[i] = v_d1[i] << 1;
 | |
|         //v_d1[i] = v_d1[i] >> 16;
 | |
| 
 | |
|         v_d1[i] = v_a[i] * v_b[i];
 | |
| 
 | |
|     }
 | |
| 
 | |
|     for(i=0; i<len; i++){
 | |
|         if(v_d0[i] != v_d1[i]){
 | |
|             error = 1;
 | |
|             break;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     return error;
 | |
| }
 | |
| 
 | |
| uint8_t uiw_test(){
 | |
|     int i;
 | |
|     uint8_t error = 0;
 | |
|     int len = 256;
 | |
|     uint32_t a = 4431;
 | |
|     uint32_t b = 977;
 | |
|     a = a;
 | |
|     b = b;
 | |
|     uint32_t v_a[256] = {31891, 20565, 20631, 12774, 24314, 9825, 18402, 22729, 1501, 25964, 10420, 10591, 18387, 72, 27290, 17956, 16671, 7064, 23947, 1473, 13293, 12916, 191, 14974, 30005, 8091, 19722, 23435, 69, 4113, 3432, 27961, 618, 660, 28635, 24275, 25812, 16048, 29275, 11623, 4293, 7881, 3072, 12244, 18456, 4407, 22162, 7440, 23991, 835, 10023, 24922, 19792, 8055, 505, 13278, 31128, 718, 25631, 17336, 18325, 9021, 4872, 29411, 14059, 14991, 7484, 16624, 1074, 20256, 17419, 4956, 21760, 19031, 15016, 32652, 32195, 25705, 1899, 7788, 10858, 24402, 28464, 25207, 14174, 28881, 31196, 2981, 25670, 17823, 31203, 30466, 13466, 6305, 5822, 10110, 842, 12133, 2669, 8585, 14829, 10916, 8070, 32532, 28141, 18758, 19161, 32388, 7288, 28414, 29363, 28396, 18355, 2658, 23473, 25969, 6706, 32626, 27891, 12062, 27582, 2759, 14680, 24825, 24447, 27837, 9492, 25982, 21924, 6946, 25043, 14646, 20039, 18822, 14363, 16391, 27539, 30930, 22999, 12927, 6345, 21161, 4974, 31459, 6780, 15562, 18841, 13759, 20162, 32285, 2627, 4885, 26834, 27777, 13004, 14924, 22994, 18296, 32560, 27123, 19226, 31844, 18825, 11605, 6562, 26417, 8679, 385, 23600, 29805, 21464, 17727, 6791, 17400, 23574, 814, 20400, 24248, 20524, 14883, 19741, 23825, 8707, 30996, 27303, 9405, 13731, 6428, 18502, 1892, 24601, 26090, 26076, 5165, 19130, 13644, 11590, 29499, 18592, 32625, 6238, 31725, 9899, 7175, 5797, 28332, 17739, 18253, 2210, 2423, 3965, 8678, 7521, 7115, 18383, 13718, 14246, 8456, 26732, 6817, 32614, 25240, 18689, 12323, 22752, 31419, 9487, 10463, 15174, 6310, 28280, 10041, 4824, 12311, 12218, 17051, 9411, 3750, 31689, 25828, 15092, 4717, 25287, 31588, 21071, 11426, 27236, 4123, 18948, 20084, 1076, 1726, 7119, 32740, 29594, 986};
 | |
|     uint32_t v_b[256] = {25964, 10420, 10591, 18387, 72, 27290, 17956, 16671, 7064, 23947, 1473, 13293, 12916, 191, 14974, 30005, 8091, 19722, 23435, 69, 4113, 3432, 27961, 618, 660, 28635, 24275, 25812, 16048, 29275, 11623, 4293, 7881, 3072, 12244, 18456, 4407, 22162, 7440, 23991, 835, 10023, 24922, 19792, 8055, 505, 13278, 31128, 718, 25631, 17336, 18325, 9021, 4872, 29411, 14059, 14991, 7484, 16624, 1074, 20256, 17419, 4956, 21760, 19031, 15016, 32652, 32195, 25705, 1899, 7788, 10858, 24402, 28464, 25207, 14174, 28881, 31196, 2981, 25670, 17823, 31203, 30466, 13466, 6305, 5822, 10110, 842, 12133, 2669, 8585, 14829, 10916, 8070, 32532, 28141, 18758, 19161, 32388, 7288, 28414, 29363, 28396, 18355, 2658, 23473, 25969, 6706, 32626, 27891, 12062, 27582, 2759, 14680, 24825, 24447, 27837, 9492, 25982, 21924, 6946, 25043, 14646, 20039, 18822, 14363, 16391, 27539, 30930, 22999, 12927, 6345, 21161, 4974, 31459, 6780, 15562, 18841, 13759, 20162, 32285, 2627, 4885, 26834, 27777, 13004, 14924, 22994, 18296, 32560, 27123, 19226, 31844, 18825, 11605, 6562, 26417, 8679, 385, 23600, 29805, 21464, 17727, 6791, 17400, 23574, 814, 20400, 24248, 20524, 14883, 19741, 23825, 8707, 30996, 27303, 9405, 13731, 6428, 18502, 1892, 24601, 26090, 26076, 5165, 19130, 13644, 11590, 29499, 18592, 32625, 6238, 31725, 9899, 7175, 5797, 28332, 17739, 18253, 2210, 2423, 3965, 8678, 7521, 7115, 18383, 13718, 14246, 8456, 26732, 6817, 32614, 25240, 18689, 12323, 22752, 31419, 9487, 10463, 15174, 6310, 28280, 10041, 4824, 12311, 12218, 17051, 9411, 3750, 31689, 25828, 15092, 4717, 25287, 31588, 21071, 11426, 27236, 4123, 18948, 20084, 1076, 1726, 7119, 32740, 29594, 31891, 20565, 20631, 12774, 24314, 9825, 18402, 22729, 1501, 986};
 | |
|     uint32_t v_d0[256];
 | |
|     uint32_t v_d1[256];
 | |
| 
 | |
|     uint32_t *p_a = v_a;
 | |
|     uint32_t *p_b = v_b;
 | |
|     uint32_t *p_d0 = v_d0;
 | |
| 
 | |
|     uint32_t shiftl[1] = {1};
 | |
|     uint32_t shiftr[1] = {16};
 | |
|     vld_uiw(8, shiftl, 0)
 | |
|     vld_uiw(9, shiftr, 0)
 | |
| 
 | |
|     vld_uiw(0, p_a, 1)
 | |
|     vld_uiw(1, p_b, 1)
 | |
|     for(i=0; i<len; i=i+SIMD_W*1){
 | |
|         p_a = p_a + SIMD_W*1;
 | |
|         p_b = p_b + SIMD_W*1;
 | |
| 
 | |
|         vmull(2,0,1)
 | |
| 
 | |
|         //vmull(2,0,1)
 | |
|         //vmadd(2,0,1)
 | |
|         //vmsub(2,0,1)
 | |
|         //vsub(2,2,0)
 | |
|         //vadd(2,2,1)
 | |
|         //vsll(2,2,8)
 | |
|         //vsrl(2,2,9)
 | |
| 
 | |
|         vld_uiw(0, p_a, 1)
 | |
|         vld_uiw(1, p_b, 1)
 | |
|         vst_uiw(p_d0, 1, 2)
 | |
|         p_d0 = p_d0 + SIMD_W*1;
 | |
|     }
 | |
| 
 | |
|     for(i=0; i<len; i++){
 | |
| 
 | |
|         //v_d1[i] = v_a[i] * v_b[i] + v_a[i] * v_b[i] - v_a[i] * v_b[i] - v_a[i] + v_b[i];
 | |
|         //v_d1[i] = v_d1[i] << 1;
 | |
|         //v_d1[i] = v_d1[i] >> 16;
 | |
| 
 | |
|         v_d1[i] = v_a[i] * v_b[i];
 | |
|     }
 | |
| 
 | |
|     for(i=0; i<len; i++){
 | |
|         if(v_d0[i] != v_d1[i]){
 | |
|             error = 1;
 | |
|             break;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     return error;
 | |
| }
 | |
| 
 | |
| uint8_t sih_test(){
 | |
|     int i;
 | |
|     uint8_t error = 0;
 | |
|     int len = 256;
 | |
|     int16_t a = 35;
 | |
|     int16_t b = -113;
 | |
|     a = a;
 | |
|     b = b;
 | |
|     int16_t v_a[256] = {83, 96, 15, -30, 4, -24, -25, 57, -23, 71, -112, 83, 89, -6, 60, 57, -32, -123, -7, 59, -77, 101, 97, 84, 24, 49, 105, -52, -31, 74, -82, -10, 14, 27, 29, -12, -6, -23, -91, 49, 25, 51, 72, -14, -43, 81, -6, 116, -43, -58, 119, -103, -113, 109, 101, -87, 34, -100, -14, -126, -53, 82, 31, -91, -1, -112, -121, -13, 54, -25, -91, 98, -39, -21, 112, -24, -44, 116, -123, -92, -95, 50, -67, 37, 47, -71, -6, -24, 69, -42, -105, 127, 74, -55, -20, 78, 107, 26, -17, -96, 109, -60, 80, -43, 31, 106, -41, -95, 41, -79, 22, -78, -39, -77, -81, 25, -86, -18, -81, 109, 90, -125, -116, -93, -119, 25, -24, -54, -95, 37, 4, 108, 111, -50, 93, 121, 33, 95, 75, 125, 104, 31, 85, 109, -87, 59, 91, -80, 15, -98, 74, -63, 42, 11, -69, -10, -72, 51, 17, 52, -87, -18, 34, -81, 62, 24, 87, -89, 3, -67, 52, -11, -105, -64, -11, 12, -54, 6, -27, 21, 51, -36, -52, -19, -110, 114, -125, 65, -88, -54, 52, 48, -20, 127, -58, 46, -62, 71, 17, 104, -40, 37, -66, -110, -42, -110, -55, -44, 72, 67, -17, -11, 17, 39, -31, -54, -106, -46, -76, -32, 47, 64, -70, 64, -23, -89, -67, 9, 94, -117, -54, 45, -70, 81, -110, -12, -50, 100, -123, -103, -42, 92, -68, 108, -81, -62, -32, -97, -125, -65, 44, 63, 64, 100, 78, 92};
 | |
|     int16_t v_b[256] = {83, 89, -6, 60, 57, -32, -123, -7, 59, -77, 101, 97, 84, 24, 49, 105, -52, -31, 74, -82, -10, 14, 27, 29, -12, -6, -23, -91, 49, 25, 51, 72, -14, -43, 81, -6, 116, -43, -58, 119, -103, -113, 109, 101, -87, 34, -100, -14, -126, -53, 82, 31, -91, -1, -112, -121, -13, 54, -25, -91, 98, -39, -21, 112, -24, -44, 116, -123, -92, -95, 50, -67, 37, 47, -71, -6, -24, 69, -42, -105, 127, 74, -55, -20, 78, 107, 26, -17, -96, 109, -60, 80, -43, 31, 106, -41, -95, 41, -79, 22, -78, -39, -77, -81, 25, -86, -18, -81, 109, 90, -125, -116, -93, -119, 25, -24, -54, -95, 37, 4, 108, 111, -50, 93, 121, 33, 95, 75, 125, 104, 31, 85, 109, -87, 59, 91, -80, 15, -98, 74, -63, 42, 11, -69, -10, -72, 51, 17, 52, -87, -18, 34, -81, 62, 24, 87, -89, 3, -67, 52, -11, -105, -64, -11, 12, -54, 6, -27, 21, 51, -36, -52, -19, -110, 114, -125, 65, -88, -54, 52, 48, -20, 127, -58, 46, -62, 71, 17, 104, -40, 37, -66, -110, -42, -110, -55, -44, 72, 67, -17, -11, 17, 39, -31, -54, -106, -46, -76, -32, 47, 64, -70, 64, -23, -89, -67, 9, 94, -117, -54, 45, -70, 81, -110, -12, -50, 100, -123, -103, -42, 92, -68, 108, -81, -62, -32, -97, -125, -65, 44, 63, 64, 100, 78, 83, 96, 15, -30, 4, -24, -25, 57, -23, 71, -112, 92};
 | |
|     int16_t v_d0[256];
 | |
|     int16_t v_d1[256];
 | |
| 
 | |
|     int16_t *p_a = v_a;
 | |
|     int16_t *p_b = v_b;
 | |
|     int16_t *p_d0 = v_d0;
 | |
| 
 | |
|     uint16_t shiftl[2] = {1, 1};
 | |
|     uint16_t shiftr[2] = {8, 8};
 | |
|     vld_uiw(8, shiftl, 0)
 | |
|     vld_uiw(9, shiftr, 0)
 | |
| 
 | |
|     vld_sih(0, p_a, 1)
 | |
|     vld_sih(1, p_b, 1)
 | |
|     for(i=0; i<len; i=i+SIMD_W*2){
 | |
|         p_a = p_a + SIMD_W*2;
 | |
|         p_b = p_b + SIMD_W*2;
 | |
|         vmull(2,0,1)
 | |
|         vmadd(2,0,1)
 | |
|         vmsub(2,0,1)
 | |
|         vsub(2,2,0)
 | |
|         vadd(2,2,1)
 | |
|         vsll(2,2,8)
 | |
|         vsra(2,2,9)
 | |
|         vld_sih(0, p_a, 1)
 | |
|         vld_sih(1, p_b, 1)
 | |
|         vst_sih(p_d0, 1, 2)
 | |
|         p_d0 = p_d0 + SIMD_W*2;
 | |
|     }
 | |
| 
 | |
|     for(i=0; i<len; i++){
 | |
|         v_d1[i] = v_a[i] * v_b[i] + v_a[i] * v_b[i] - v_a[i] * v_b[i] - v_a[i] + v_b[i];
 | |
|         v_d1[i] = v_d1[i] << 1;
 | |
|         v_d1[i] = v_d1[i] >> 8;
 | |
|     }
 | |
| 
 | |
|     for(i=0; i<len; i++){
 | |
|         if(v_d0[i] != v_d1[i]){
 | |
|             error = 1;
 | |
|             break;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     return error;
 | |
| }
 | |
| 
 | |
| uint8_t uih_test(){
 | |
|     int i;
 | |
|     uint8_t error = 0;
 | |
|     int len = 256;
 | |
|     uint16_t a = 35;
 | |
|     uint16_t b = 113;
 | |
|     a = a;
 | |
|     b = b;
 | |
|     uint16_t v_a[256] = {83, 96, 15, 30, 4, 24, 25, 57, 23, 71, 112, 83, 89, 6, 60, 57, 32, 123, 7, 59, 77, 101, 97, 84, 24, 49, 105, 52, 31, 74, 82, 10, 14, 27, 29, 12, 6, 23, 91, 49, 25, 51, 72, 14, 43, 81, 6, 116, 43, 58, 119, 103, 113, 109, 101, 87, 34, 100, 14, 126, 53, 82, 31, 91, 1, 112, 121, 13, 54, 25, 91, 98, 39, 21, 112, 24, 44, 116, 123, 92, 95, 50, 67, 37, 47, 71, 6, 24, 69, 42, 105, 127, 74, 55, 20, 78, 107, 26, 17, 96, 109, 60, 80, 43, 31, 106, 41, 95, 41, 79, 22, 78, 39, 77, 81, 25, 86, 18, 81, 109, 90, 125, 116, 93, 119, 25, 24, 54, 95, 37, 4, 108, 111, 50, 93, 121, 33, 95, 75, 125, 104, 31, 85, 109, 87, 59, 91, 80, 15, 98, 74, 63, 42, 11, 69, 10, 72, 51, 17, 52, 87, 18, 34, 81, 62, 24, 87, 89, 3, 67, 52, 11, 105, 64, 11, 12, 54, 6, 27, 21, 51, 36, 52, 19, 110, 114, 125, 65, 88, 54, 52, 48, 20, 127, 58, 46, 62, 71, 17, 104, 40, 37, 66, 110, 42, 110, 55, 44, 72, 67, 17, 11, 17, 39, 31, 54, 106, 46, 76, 32, 47, 64, 70, 64, 23, 89, 67, 9, 94, 117, 54, 45, 70, 81, 110, 12, 50, 100, 123, 103, 42, 92, 68, 108, 81, 62, 32, 97, 125, 65, 44, 63, 64, 100, 78, 92};
 | |
|     uint16_t v_b[256] = {83, 89, 6, 60, 57, 32, 123, 7, 59, 77, 101, 97, 84, 24, 49, 105, 52, 31, 74, 82, 10, 14, 27, 29, 12, 6, 23, 91, 49, 25, 51, 72, 14, 43, 81, 6, 116, 43, 58, 119, 103, 113, 109, 101, 87, 34, 100, 14, 126, 53, 82, 31, 91, 1, 112, 121, 13, 54, 25, 91, 98, 39, 21, 112, 24, 44, 116, 123, 92, 95, 50, 67, 37, 47, 71, 6, 24, 69, 42, 105, 127, 74, 55, 20, 78, 107, 26, 17, 96, 109, 60, 80, 43, 31, 106, 41, 95, 41, 79, 22, 78, 39, 77, 81, 25, 86, 18, 81, 109, 90, 125, 116, 93, 119, 25, 24, 54, 95, 37, 4, 108, 111, 50, 93, 121, 33, 95, 75, 125, 104, 31, 85, 109, 87, 59, 91, 80, 15, 98, 74, 63, 42, 11, 69, 10, 72, 51, 17, 52, 87, 18, 34, 81, 62, 24, 87, 89, 3, 67, 52, 11, 105, 64, 11, 12, 54, 6, 27, 21, 51, 36, 52, 19, 110, 114, 125, 65, 88, 54, 52, 48, 20, 127, 58, 46, 62, 71, 17, 104, 40, 37, 66, 110, 42, 110, 55, 44, 72, 67, 17, 11, 17, 39, 31, 54, 106, 46, 76, 32, 47, 64, 70, 64, 23, 89, 67, 9, 94, 117, 54, 45, 70, 81, 110, 12, 50, 100, 123, 103, 42, 92, 68, 108, 81, 62, 32, 97, 125, 65, 44, 63, 64, 100, 78, 83, 96, 15, 30, 4, 24, 25, 57, 23, 71, 112, 92};
 | |
|     uint16_t v_d0[256];
 | |
|     uint16_t v_d1[256];
 | |
| 
 | |
|     uint16_t *p_a = v_a;
 | |
|     uint16_t *p_b = v_b;
 | |
|     uint16_t *p_d0 = v_d0;
 | |
| 
 | |
|     uint16_t shiftl[2] = {1, 1};
 | |
|     uint16_t shiftr[2] = {8, 8};
 | |
|     vld_uiw(8, shiftl, 0)
 | |
|     vld_uiw(9, shiftr, 0)
 | |
| 
 | |
|     vld_uih(0, p_a, 1)
 | |
|     vld_uih(1, p_b, 1)
 | |
|     for(i=0; i<len; i=i+SIMD_W*2){
 | |
|         p_a = p_a + SIMD_W*2;
 | |
|         p_b = p_b + SIMD_W*2;
 | |
|         vmull(2,0,1)
 | |
|         vmadd(2,0,1)
 | |
|         vmsub(2,0,1)
 | |
|         vsub(2,2,0)
 | |
|         vadd(2,2,1)
 | |
|         vsll(2,2,8)
 | |
|         vsrl(2,2,9)
 | |
|         vld_uih(0, p_a, 1)
 | |
|         vld_uih(1, p_b, 1)
 | |
|         vst_uih(p_d0, 1, 2)
 | |
|         p_d0 = p_d0 + SIMD_W*2;
 | |
|     }
 | |
| 
 | |
|     for(i=0; i<len; i++){
 | |
|         v_d1[i] = v_a[i] * v_b[i] + v_a[i] * v_b[i] - v_a[i] * v_b[i] - v_a[i] + v_b[i];
 | |
|         v_d1[i] = v_d1[i] << 1;
 | |
|         v_d1[i] = v_d1[i] >> 8;
 | |
|     }
 | |
| 
 | |
|     for(i=0; i<len; i++){
 | |
|         if(v_d0[i] != v_d1[i]){
 | |
|             error = 1;
 | |
|             break;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     return error;
 | |
| }
 | |
| 
 | |
| uint8_t sib_test(){
 | |
|     int i;
 | |
|     uint8_t error = 0;
 | |
|     int len = 256;
 | |
|     int8_t a = 5;
 | |
|     int8_t b = -3;
 | |
|     a = a;
 | |
|     b = b;
 | |
|     int8_t v_a[256] = {2, -7, -5, 3, -4, 5, 0, -6, -1, 7, -5, -5, 4, 3, 4, 6, 0, -5, -7, -4, -2, 6, -2, -2, 2, -2, 2, -4, -5, 1, -7, -4, 5, 2, -3, 3, -7, 7, -4, 4, -3, -4, 5, -1, 7, -4, 6, 5, 5, 7, -5, 7, 0, -3, 0, -1, 2, 2, -6, 0, 6, -6, -5, 6, 1, -3, -6, -6, 7, -7, -7, -3, -6, -5, 2, -3, 6, -2, -6, -7, -3, 1, 7, 3, -3, -6, 4, 1, -3, -5, 4, 7, -3, -3, 5, 5, -2, 1, -2, 4, -5, 7, 5, -3, 7, 4, -6, 5, -3, 1, -5, 2, 1, -4, 0, -3, -7, 1, 1, -6, 2, -2, -2, 3, -1, 3, -3, 1, 5, 5, 5, -1, 2, 3, 0, 3, 4, -6, 5, -5, -1, -1, 4, -6, 4, -1, -6, 3, 7, -5, 4, 7, -5, 4, 2, -5, -1, 4, 1, -2, 7, 5, -7, 6, -5, 0, -1, -1, -5, 6, -7, 4, 0, 4, 0, -6, -4, -7, 7, 7, -7, 3, 5, 3, 4, 3, 3, -2, 0, 5, 2, 2, 1, -2, -1, 1, -6, 1, -3, 6, 3, -4, -7, 1, 5, -4, -2, -5, -1, -1, 5, 5, 6, -1, -4, 5, 0, 0, 1, -6, 7, -5, 2, 1, 7, 7, 6, -7, -3, 2, -3, 6, -6, 6, -2, 0, -4, 7, 0, 6, -1, -5, 4, 6, 3, 5, -4, -2, 1, 4, 4, -1, -6, -1, -2, -4};
 | |
|     int8_t v_b[256] = {-5, -7, -4, -2, 6, -2, -2, 2, -2, 2, -4, -5, 1, -7, -4, 5, 2, -3, 3, -7, 7, -4, 4, -3, -4, 5, -1, 7, -4, 6, 5, 5, 7, -5, 7, 0, -3, 0, -1, 2, 2, -6, 0, 6, -6, -5, 6, 1, -3, -6, -6, 7, -7, -7, -3, -6, -5, 2, -3, 6, -2, -6, -7, -3, 1, 7, 3, -3, -6, 4, 1, -3, -5, 4, 7, -3, -3, 5, 5, -2, 1, -2, 4, -5, 7, 5, -3, 7, 4, -6, 5, -3, 1, -5, 2, 1, -4, 0, -3, -7, 1, 1, -6, 2, -2, -2, 3, -1, 3, -3, 1, 5, 5, 5, -1, 2, 3, 0, 3, 4, -6, 5, -5, -1, -1, 4, -6, 4, -1, -6, 3, 7, -5, 4, 7, -5, 4, 2, -5, -1, 4, 1, -2, 7, 5, -7, 6, -5, 0, -1, -1, -5, 6, -7, 4, 0, 4, 0, -6, -4, -7, 7, 7, -7, 3, 5, 3, 4, 3, 3, -2, 0, 5, 2, 2, 1, -2, -1, 1, -6, 1, -3, 6, 3, -4, -7, 1, 5, -4, -2, -5, -1, -1, 5, 5, 6, -1, -4, 5, 0, 0, 1, -6, 7, -5, 2, 1, 7, 7, 6, -7, -3, 2, -3, 6, -6, 6, -2, 0, -4, 7, 0, 6, -1, -5, 4, 6, 3, 5, -4, -2, 1, 4, 4, -1, -6, -1, -2, 2, -7, -5, 3, -4, 5, 0, -6, -1, 7, -5, -5, 4, 3, 4, 6, 0, -4};
 | |
|     int8_t v_d0[256];
 | |
|     int8_t v_d1[256];
 | |
| 
 | |
|     int8_t *p_a = v_a;
 | |
|     int8_t *p_b = v_b;
 | |
|     int8_t *p_d0 = v_d0;
 | |
| 
 | |
|     uint8_t shiftl[4] = {1, 1, 1, 1};
 | |
|     uint8_t shiftr[4] = {4, 4, 4, 4};
 | |
|     vld_uiw(8, shiftl, 0)
 | |
|     vld_uiw(9, shiftr, 0)
 | |
| 
 | |
|     vld_sib(0, p_a, 1)
 | |
|     vld_sib(1, p_b, 1)
 | |
|     for(i=0; i<len; i=i+SIMD_W*4){
 | |
|         p_a = p_a + SIMD_W*4;
 | |
|         p_b = p_b + SIMD_W*4;
 | |
|         vmull(2,0,1)
 | |
|         vmadd(2,0,1)
 | |
|         vmsub(2,0,1)
 | |
|         vsub(2,2,0)
 | |
|         vadd(2,2,1)
 | |
|         vsll(2,2,8)
 | |
|         vsra(2,2,9)
 | |
|         vld_sib(0, p_a, 1)
 | |
|         vld_sib(1, p_b, 1)
 | |
|         vst_sib(p_d0, 1, 2)
 | |
|         p_d0 = p_d0 + SIMD_W*4;
 | |
|     }
 | |
| 
 | |
|     for(i=0; i<len; i++){
 | |
|         v_d1[i] = v_a[i] * v_b[i] + v_a[i] * v_b[i] - v_a[i] * v_b[i] - v_a[i] + v_b[i];
 | |
|         v_d1[i] = v_d1[i] << 1;
 | |
|         v_d1[i] = v_d1[i] >> 4;
 | |
|     }
 | |
| 
 | |
|     for(i=0; i<len; i++){
 | |
|         if(v_d0[i] != v_d1[i]){
 | |
|             error = 1;
 | |
|             break;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     return error;
 | |
| }
 | |
| 
 | |
| uint8_t uib_test(){
 | |
|     int i;
 | |
|     uint8_t error = 0;
 | |
|     int len = 256;
 | |
|     uint8_t a = 5;
 | |
|     uint8_t b = 3;
 | |
|     a = a;
 | |
|     b = b;
 | |
|     uint8_t v_a[256] = {2, 7, 5, 3, 4, 5, 0, 6, 1, 7, 5, 5, 4, 3, 4, 6, 0, 5, 7, 4, 2, 6, 2, 2, 2, 2, 2, 4, 5, 1, 7, 4, 5, 2, 3, 3, 7, 7, 4, 4, 3, 4, 5, 1, 7, 4, 6, 5, 5, 7, 5, 7, 0, 3, 0, 1, 2, 2, 6, 0, 6, 6, 5, 6, 1, 3, 6, 6, 7, 7, 7, 3, 6, 5, 2, 3, 6, 2, 6, 7, 3, 1, 7, 3, 3, 6, 4, 1, 3, 5, 4, 7, 3, 3, 5, 5, 2, 1, 2, 4, 5, 7, 5, 3, 7, 4, 6, 5, 3, 1, 5, 2, 1, 4, 0, 3, 7, 1, 1, 6, 2, 2, 2, 3, 1, 3, 3, 1, 5, 5, 5, 1, 2, 3, 0, 3, 4, 6, 5, 5, 1, 1, 4, 6, 4, 1, 6, 3, 7, 5, 4, 7, 5, 4, 2, 5, 1, 4, 1, 2, 7, 5, 7, 6, 5, 0, 1, 1, 5, 6, 7, 4, 0, 4, 0, 6, 4, 7, 7, 7, 7, 3, 5, 3, 4, 3, 3, 2, 0, 5, 2, 2, 1, 2, 1, 1, 6, 1, 3, 6, 3, 4, 7, 1, 5, 4, 2, 5, 1, 1, 5, 5, 6, 1, 4, 5, 0, 0, 1, 6, 7, 5, 2, 1, 7, 7, 6, 7, 3, 2, 3, 6, 6, 6, 2, 0, 4, 7, 0, 6, 1, 5, 4, 6, 3, 5, 4, 2, 1, 4, 4, 1, 6, 1, 2, 4};
 | |
|     uint8_t v_b[256] = {5, 7, 4, 2, 6, 2, 2, 2, 2, 2, 4, 5, 1, 7, 4, 5, 2, 3, 3, 7, 7, 4, 4, 3, 4, 5, 1, 7, 4, 6, 5, 5, 7, 5, 7, 0, 3, 0, 1, 2, 2, 6, 0, 6, 6, 5, 6, 1, 3, 6, 6, 7, 7, 7, 3, 6, 5, 2, 3, 6, 2, 6, 7, 3, 1, 7, 3, 3, 6, 4, 1, 3, 5, 4, 7, 3, 3, 5, 5, 2, 1, 2, 4, 5, 7, 5, 3, 7, 4, 6, 5, 3, 1, 5, 2, 1, 4, 0, 3, 7, 1, 1, 6, 2, 2, 2, 3, 1, 3, 3, 1, 5, 5, 5, 1, 2, 3, 0, 3, 4, 6, 5, 5, 1, 1, 4, 6, 4, 1, 6, 3, 7, 5, 4, 7, 5, 4, 2, 5, 1, 4, 1, 2, 7, 5, 7, 6, 5, 0, 1, 1, 5, 6, 7, 4, 0, 4, 0, 6, 4, 7, 7, 7, 7, 3, 5, 3, 4, 3, 3, 2, 0, 5, 2, 2, 1, 2, 1, 1, 6, 1, 3, 6, 3, 4, 7, 1, 5, 4, 2, 5, 1, 1, 5, 5, 6, 1, 4, 5, 0, 0, 1, 6, 7, 5, 2, 1, 7, 7, 6, 7, 3, 2, 3, 6, 6, 6, 2, 0, 4, 7, 0, 6, 1, 5, 4, 6, 3, 5, 4, 2, 1, 4, 4, 1, 6, 1, 2, 2, 7, 5, 3, 4, 5, 0, 6, 1, 7, 5, 5, 4, 3, 4, 6, 0, 4};
 | |
|     uint8_t v_d0[256];
 | |
|     uint8_t v_d1[256];
 | |
| 
 | |
|     uint8_t *p_a = v_a;
 | |
|     uint8_t *p_b = v_b;
 | |
|     uint8_t *p_d0 = v_d0;
 | |
| 
 | |
|     uint8_t shiftl[4] = {1, 1, 1, 1};
 | |
|     uint8_t shiftr[4] = {4, 4, 4, 4};
 | |
|     vld_uiw(8, shiftl, 0)
 | |
|     vld_uiw(9, shiftr, 0)
 | |
| 
 | |
|     vld_uib(0, p_a, 1)
 | |
|     vld_uib(1, p_b, 1)
 | |
|     for(i=0; i<len; i=i+SIMD_W*4){
 | |
|         p_a = p_a + SIMD_W*4;
 | |
|         p_b = p_b + SIMD_W*4;
 | |
|         vmull(2,0,1)
 | |
|         vmadd(2,0,1)
 | |
|         vmsub(2,0,1)
 | |
|         vsub(2,2,0)
 | |
|         vadd(2,2,1)
 | |
|         vsll(2,2,8)
 | |
|         vsrl(2,2,9)
 | |
|         vld_uib(0, p_a, 1)
 | |
|         vld_uib(1, p_b, 1)
 | |
|         vst_uib(p_d0, 1, 2)
 | |
|         p_d0 = p_d0 + SIMD_W*4;
 | |
|     }
 | |
| 
 | |
|     for(i=0; i<len; i++){
 | |
|         v_d1[i] = v_a[i] * v_b[i] + v_a[i] * v_b[i] - v_a[i] * v_b[i] - v_a[i] + v_b[i];
 | |
|         v_d1[i] = v_d1[i] << 1;
 | |
|         v_d1[i] = v_d1[i] >> 4;
 | |
|     }
 | |
| 
 | |
|     for(i=0; i<len; i++){
 | |
|         if(v_d0[i] != v_d1[i]){
 | |
|             error = 1;
 | |
|             break;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     return error;
 | |
| }
 | |
| 
 | |
| uint8_t sihw_test(){
 | |
|     int i;
 | |
|     uint8_t error = 0;
 | |
|     int len = 256;
 | |
|     int16_t a = 4431;
 | |
|     int16_t b = -977;
 | |
|     a = a;
 | |
|     b = b;
 | |
|     int16_t v_a[256] = {31891, -20565, 20631, 12774, -24314, -9825, 18402, -22729, 1501, -25964, -10420, 10591, 18387, -72, -27290, -17956, 16671, -7064, 23947, -1473, 13293, -12916, 191, -14974, -30005, -8091, -19722, -23435, -69, 4113, 3432, 27961, -618, 660, -28635, 24275, 25812, -16048, -29275, -11623, -4293, -7881, -3072, -12244, 18456, 4407, 22162, 7440, 23991, 835, 10023, -24922, 19792, -8055, 505, 13278, -31128, 718, -25631, -17336, -18325, 9021, -4872, 29411, 14059, -14991, -7484, -16624, -1074, 20256, 17419, 4956, 21760, -19031, 15016, 32652, -32195, 25705, 1899, 7788, 10858, -24402, -28464, -25207, -14174, -28881, -31196, -2981, -25670, 17823, 31203, 30466, 13466, 6305, 5822, -10110, -842, 12133, -2669, 8585, -14829, 10916, 8070, -32532, 28141, 18758, -19161, 32388, -7288, 28414, -29363, -28396, -18355, 2658, 23473, 25969, 6706, -32626, 27891, 12062, -27582, 2759, -14680, -24825, -24447, -27837, 9492, 25982, -21924, -6946, 25043, 14646, 20039, 18822, -14363, -16391, 27539, -30930, -22999, 12927, 6345, -21161, 4974, -31459, -6780, 15562, -18841, -13759, 20162, 32285, -2627, 4885, 26834, 27777, 13004, -14924, 22994, -18296, 32560, -27123, -19226, -31844, -18825, 11605, -6562, 26417, 8679, -385, 23600, -29805, 21464, -17727, 6791, 17400, -23574, -814, -20400, -24248, 20524, 14883, 19741, -23825, -8707, 30996, -27303, -9405, -13731, -6428, -18502, -1892, 24601, 26090, -26076, 5165, 19130, -13644, -11590, 29499, -18592, -32625, -6238, 31725, -9899, 7175, 5797, -28332, 17739, 18253, 2210, 2423, 3965, -8678, 7521, -7115, 18383, 13718, 14246, 8456, -26732, 6817, -32614, -25240, -18689, -12323, 22752, 31419, 9487, -10463, -15174, 6310, 28280, -10041, 4824, 12311, -12218, -17051, -9411, -3750, 31689, 25828, -15092, -4717, -25287, -31588, -21071, 11426, -27236, -4123, 18948, -20084, -1076, 1726, -7119, -32740, -29594, 986};
 | |
|     int16_t v_b[256] = {-25964, -10420, 10591, 18387, -72, -27290, -17956, 16671, -7064, 23947, -1473, 13293, -12916, 191, -14974, -30005, -8091, -19722, -23435, -69, 4113, 3432, 27961, -618, 660, -28635, 24275, 25812, -16048, -29275, -11623, -4293, -7881, -3072, -12244, 18456, 4407, 22162, 7440, 23991, 835, 10023, -24922, 19792, -8055, 505, 13278, -31128, 718, -25631, -17336, -18325, 9021, -4872, 29411, 14059, -14991, -7484, -16624, -1074, 20256, 17419, 4956, 21760, -19031, 15016, 32652, -32195, 25705, 1899, 7788, 10858, -24402, -28464, -25207, -14174, -28881, -31196, -2981, -25670, 17823, 31203, 30466, 13466, 6305, 5822, -10110, -842, 12133, -2669, 8585, -14829, 10916, 8070, -32532, 28141, 18758, -19161, 32388, -7288, 28414, -29363, -28396, -18355, 2658, 23473, 25969, 6706, -32626, 27891, 12062, -27582, 2759, -14680, -24825, -24447, -27837, 9492, 25982, -21924, -6946, 25043, 14646, 20039, 18822, -14363, -16391, 27539, -30930, -22999, 12927, 6345, -21161, 4974, -31459, -6780, 15562, -18841, -13759, 20162, 32285, -2627, 4885, 26834, 27777, 13004, -14924, 22994, -18296, 32560, -27123, -19226, -31844, -18825, 11605, -6562, 26417, 8679, -385, 23600, -29805, 21464, -17727, 6791, 17400, -23574, -814, -20400, -24248, 20524, 14883, 19741, -23825, -8707, 30996, -27303, -9405, -13731, -6428, -18502, -1892, 24601, 26090, -26076, 5165, 19130, -13644, -11590, 29499, -18592, -32625, -6238, 31725, -9899, 7175, 5797, -28332, 17739, 18253, 2210, 2423, 3965, -8678, 7521, -7115, 18383, 13718, 14246, 8456, -26732, 6817, -32614, -25240, -18689, -12323, 22752, 31419, 9487, -10463, -15174, 6310, 28280, -10041, 4824, 12311, -12218, -17051, -9411, -3750, 31689, 25828, -15092, -4717, -25287, -31588, -21071, 11426, -27236, -4123, 18948, -20084, -1076, 1726, -7119, -32740, -29594, 31891, -20565, 20631, 12774, -24314, -9825, 18402, -22729, 1501, 986};
 | |
|     int32_t v_d0[256];
 | |
|     int32_t v_d1[256];
 | |
| 
 | |
|     int16_t *p_a = v_a;
 | |
|     int16_t *p_b = v_b;
 | |
|     int32_t *p_d0 = v_d0;
 | |
| 
 | |
|     vld_sih(0, p_a, 1)
 | |
|     vld_sih(1, p_b, 1)
 | |
|     for(i=0; i<len; i=i+SIMD_W*2){
 | |
|         p_a = p_a + SIMD_W*2;
 | |
|         p_b = p_b + SIMD_W*2;
 | |
|         vmulw(2,0,1)
 | |
|         vld_sih(0, p_a, 1)
 | |
|         vld_sih(1, p_b, 1)
 | |
|         vst_siw(p_d0, 1, 2)
 | |
|         p_d0 = p_d0 + SIMD_W*1;
 | |
|         vst_siw(p_d0, 1, 3)
 | |
|         p_d0 = p_d0 + SIMD_W*1;
 | |
|     }
 | |
| 
 | |
|     for(i=0; i<len; i++){
 | |
|         v_d1[i] = v_a[i] * v_b[i];
 | |
|     }
 | |
| 
 | |
|     for(i=0; i<len; i++){
 | |
|         if(v_d0[i] != v_d1[i]){
 | |
|             error = 1;
 | |
|             break;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     return error;
 | |
| }
 | |
| 
 | |
| uint8_t uihw_test(){
 | |
|     int i;
 | |
|     uint8_t error = 0;
 | |
|     int len = 256;
 | |
|     uint16_t a = 4431;
 | |
|     uint16_t b = 977;
 | |
|     a = a;
 | |
|     b = b;
 | |
|     uint16_t v_a[256] = {31891, 20565, 20631, 12774, 24314, 9825, 18402, 22729, 1501, 25964, 10420, 10591, 18387, 72, 27290, 17956, 16671, 7064, 23947, 1473, 13293, 12916, 191, 14974, 30005, 8091, 19722, 23435, 69, 4113, 3432, 27961, 618, 660, 28635, 24275, 25812, 16048, 29275, 11623, 4293, 7881, 3072, 12244, 18456, 4407, 22162, 7440, 23991, 835, 10023, 24922, 19792, 8055, 505, 13278, 31128, 718, 25631, 17336, 18325, 9021, 4872, 29411, 14059, 14991, 7484, 16624, 1074, 20256, 17419, 4956, 21760, 19031, 15016, 32652, 32195, 25705, 1899, 7788, 10858, 24402, 28464, 25207, 14174, 28881, 31196, 2981, 25670, 17823, 31203, 30466, 13466, 6305, 5822, 10110, 842, 12133, 2669, 8585, 14829, 10916, 8070, 32532, 28141, 18758, 19161, 32388, 7288, 28414, 29363, 28396, 18355, 2658, 23473, 25969, 6706, 32626, 27891, 12062, 27582, 2759, 14680, 24825, 24447, 27837, 9492, 25982, 21924, 6946, 25043, 14646, 20039, 18822, 14363, 16391, 27539, 30930, 22999, 12927, 6345, 21161, 4974, 31459, 6780, 15562, 18841, 13759, 20162, 32285, 2627, 4885, 26834, 27777, 13004, 14924, 22994, 18296, 32560, 27123, 19226, 31844, 18825, 11605, 6562, 26417, 8679, 385, 23600, 29805, 21464, 17727, 6791, 17400, 23574, 814, 20400, 24248, 20524, 14883, 19741, 23825, 8707, 30996, 27303, 9405, 13731, 6428, 18502, 1892, 24601, 26090, 26076, 5165, 19130, 13644, 11590, 29499, 18592, 32625, 6238, 31725, 9899, 7175, 5797, 28332, 17739, 18253, 2210, 2423, 3965, 8678, 7521, 7115, 18383, 13718, 14246, 8456, 26732, 6817, 32614, 25240, 18689, 12323, 22752, 31419, 9487, 10463, 15174, 6310, 28280, 10041, 4824, 12311, 12218, 17051, 9411, 3750, 31689, 25828, 15092, 4717, 25287, 31588, 21071, 11426, 27236, 4123, 18948, 20084, 1076, 1726, 7119, 32740, 29594, 986};
 | |
|     uint16_t v_b[256] = {25964, 10420, 10591, 18387, 72, 27290, 17956, 16671, 7064, 23947, 1473, 13293, 12916, 191, 14974, 30005, 8091, 19722, 23435, 69, 4113, 3432, 27961, 618, 660, 28635, 24275, 25812, 16048, 29275, 11623, 4293, 7881, 3072, 12244, 18456, 4407, 22162, 7440, 23991, 835, 10023, 24922, 19792, 8055, 505, 13278, 31128, 718, 25631, 17336, 18325, 9021, 4872, 29411, 14059, 14991, 7484, 16624, 1074, 20256, 17419, 4956, 21760, 19031, 15016, 32652, 32195, 25705, 1899, 7788, 10858, 24402, 28464, 25207, 14174, 28881, 31196, 2981, 25670, 17823, 31203, 30466, 13466, 6305, 5822, 10110, 842, 12133, 2669, 8585, 14829, 10916, 8070, 32532, 28141, 18758, 19161, 32388, 7288, 28414, 29363, 28396, 18355, 2658, 23473, 25969, 6706, 32626, 27891, 12062, 27582, 2759, 14680, 24825, 24447, 27837, 9492, 25982, 21924, 6946, 25043, 14646, 20039, 18822, 14363, 16391, 27539, 30930, 22999, 12927, 6345, 21161, 4974, 31459, 6780, 15562, 18841, 13759, 20162, 32285, 2627, 4885, 26834, 27777, 13004, 14924, 22994, 18296, 32560, 27123, 19226, 31844, 18825, 11605, 6562, 26417, 8679, 385, 23600, 29805, 21464, 17727, 6791, 17400, 23574, 814, 20400, 24248, 20524, 14883, 19741, 23825, 8707, 30996, 27303, 9405, 13731, 6428, 18502, 1892, 24601, 26090, 26076, 5165, 19130, 13644, 11590, 29499, 18592, 32625, 6238, 31725, 9899, 7175, 5797, 28332, 17739, 18253, 2210, 2423, 3965, 8678, 7521, 7115, 18383, 13718, 14246, 8456, 26732, 6817, 32614, 25240, 18689, 12323, 22752, 31419, 9487, 10463, 15174, 6310, 28280, 10041, 4824, 12311, 12218, 17051, 9411, 3750, 31689, 25828, 15092, 4717, 25287, 31588, 21071, 11426, 27236, 4123, 18948, 20084, 1076, 1726, 7119, 32740, 29594, 31891, 20565, 20631, 12774, 24314, 9825, 18402, 22729, 1501, 986};
 | |
|     uint32_t v_d0[256];
 | |
|     uint32_t v_d1[256];
 | |
| 
 | |
|     uint16_t *p_a = v_a;
 | |
|     uint16_t *p_b = v_b;
 | |
|     uint32_t *p_d0 = v_d0;
 | |
| 
 | |
|     vld_uih(0, p_a, 1)
 | |
|     vld_uih(1, p_b, 1)
 | |
|     for(i=0; i<len; i=i+SIMD_W*2){
 | |
|         p_a = p_a + SIMD_W*2;
 | |
|         p_b = p_b + SIMD_W*2;
 | |
|         vmulw(2,0,1)
 | |
|         vld_uih(0, p_a, 1)
 | |
|         vld_uih(1, p_b, 1)
 | |
|         vst_uiw(p_d0, 1, 2)
 | |
|         p_d0 = p_d0 + SIMD_W*1;
 | |
|         vst_uiw(p_d0, 1, 3)
 | |
|         p_d0 = p_d0 + SIMD_W*1;
 | |
|     }
 | |
| 
 | |
|     for(i=0; i<len; i++){
 | |
|         v_d1[i] = v_a[i] * v_b[i];
 | |
|     }
 | |
| 
 | |
|     for(i=0; i<len; i++){
 | |
|         if(v_d0[i] != v_d1[i]){
 | |
|             error = 1;
 | |
|             break;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     return error;
 | |
| }
 | |
| 
 | |
| uint8_t sibh_test(){
 | |
|     int i;
 | |
|     uint8_t error = 0;
 | |
|     int len = 256;
 | |
|     int8_t a = 5;
 | |
|     int8_t b = -3;
 | |
|     a = a;
 | |
|     b = b;
 | |
|     int8_t v_a[256] = {83, 96, 15, -30, 4, -24, -25, 57, -23, 71, -112, 83, 89, -6, 60, 57, -32, -123, -7, 59, -77, 101, 97, 84, 24, 49, 105, -52, -31, 74, -82, -10, 14, 27, 29, -12, -6, -23, -91, 49, 25, 51, 72, -14, -43, 81, -6, 116, -43, -58, 119, -103, -113, 109, 101, -87, 34, -100, -14, -126, -53, 82, 31, -91, -1, -112, -121, -13, 54, -25, -91, 98, -39, -21, 112, -24, -44, 116, -123, -92, -95, 50, -67, 37, 47, -71, -6, -24, 69, -42, -105, 127, 74, -55, -20, 78, 107, 26, -17, -96, 109, -60, 80, -43, 31, 106, -41, -95, 41, -79, 22, -78, -39, -77, -81, 25, -86, -18, -81, 109, 90, -125, -116, -93, -119, 25, -24, -54, -95, 37, 4, 108, 111, -50, 93, 121, 33, 95, 75, 125, 104, 31, 85, 109, -87, 59, 91, -80, 15, -98, 74, -63, 42, 11, -69, -10, -72, 51, 17, 52, -87, -18, 34, -81, 62, 24, 87, -89, 3, -67, 52, -11, -105, -64, -11, 12, -54, 6, -27, 21, 51, -36, -52, -19, -110, 114, -125, 65, -88, -54, 52, 48, -20, 127, -58, 46, -62, 71, 17, 104, -40, 37, -66, -110, -42, -110, -55, -44, 72, 67, -17, -11, 17, 39, -31, -54, -106, -46, -76, -32, 47, 64, -70, 64, -23, -89, -67, 9, 94, -117, -54, 45, -70, 81, -110, -12, -50, 100, -123, -103, -42, 92, -68, 108, -81, -62, -32, -97, -125, -65, 44, 63, 64, 100, 78, 92};
 | |
|     int8_t v_b[256] = {83, 89, -6, 60, 57, -32, -123, -7, 59, -77, 101, 97, 84, 24, 49, 105, -52, -31, 74, -82, -10, 14, 27, 29, -12, -6, -23, -91, 49, 25, 51, 72, -14, -43, 81, -6, 116, -43, -58, 119, -103, -113, 109, 101, -87, 34, -100, -14, -126, -53, 82, 31, -91, -1, -112, -121, -13, 54, -25, -91, 98, -39, -21, 112, -24, -44, 116, -123, -92, -95, 50, -67, 37, 47, -71, -6, -24, 69, -42, -105, 127, 74, -55, -20, 78, 107, 26, -17, -96, 109, -60, 80, -43, 31, 106, -41, -95, 41, -79, 22, -78, -39, -77, -81, 25, -86, -18, -81, 109, 90, -125, -116, -93, -119, 25, -24, -54, -95, 37, 4, 108, 111, -50, 93, 121, 33, 95, 75, 125, 104, 31, 85, 109, -87, 59, 91, -80, 15, -98, 74, -63, 42, 11, -69, -10, -72, 51, 17, 52, -87, -18, 34, -81, 62, 24, 87, -89, 3, -67, 52, -11, -105, -64, -11, 12, -54, 6, -27, 21, 51, -36, -52, -19, -110, 114, -125, 65, -88, -54, 52, 48, -20, 127, -58, 46, -62, 71, 17, 104, -40, 37, -66, -110, -42, -110, -55, -44, 72, 67, -17, -11, 17, 39, -31, -54, -106, -46, -76, -32, 47, 64, -70, 64, -23, -89, -67, 9, 94, -117, -54, 45, -70, 81, -110, -12, -50, 100, -123, -103, -42, 92, -68, 108, -81, -62, -32, -97, -125, -65, 44, 63, 64, 100, 78, 83, 96, 15, -30, 4, -24, -25, 57, -23, 71, -112, 92};
 | |
|     int16_t v_d0[256];
 | |
|     int16_t v_d1[256];
 | |
| 
 | |
|     int8_t *p_a = v_a;
 | |
|     int8_t *p_b = v_b;
 | |
|     int16_t *p_d0 = v_d0;
 | |
| 
 | |
|     vld_sib(0, p_a, 1)
 | |
|     vld_sib(1, p_b, 1)
 | |
|     for(i=0; i<len; i=i+SIMD_W*4){
 | |
|         p_a = p_a + SIMD_W*4;
 | |
|         p_b = p_b + SIMD_W*4;
 | |
|         vmulw(2,0,1)
 | |
|         vld_sib(0, p_a, 1)
 | |
|         vld_sib(1, p_b, 1)
 | |
|         vst_sih(p_d0, 1, 2)
 | |
|         p_d0 = p_d0 + SIMD_W*2;
 | |
|         vst_uih(p_d0, 1, 3)
 | |
|         p_d0 = p_d0 + SIMD_W*2;
 | |
|     }
 | |
| 
 | |
|     for(i=0; i<len; i++){
 | |
|         v_d1[i] = v_a[i] * v_b[i];
 | |
|     }
 | |
| 
 | |
|     for(i=0; i<len; i++){
 | |
|         if(v_d0[i] != v_d1[i]){
 | |
|             error = 1;
 | |
|             break;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     return error;
 | |
| }
 | |
| 
 | |
| uint8_t uibh_test(){
 | |
|     int i;
 | |
|     uint8_t error = 0;
 | |
|     int len = 256;
 | |
|     uint8_t a = 5;
 | |
|     uint8_t b = 3;
 | |
|     a = a;
 | |
|     b = b;
 | |
|     uint8_t v_a[256] = {83, 96, 15, 30, 4, 24, 25, 57, 23, 71, 112, 83, 89, 6, 60, 57, 32, 123, 7, 59, 77, 101, 97, 84, 24, 49, 105, 52, 31, 74, 82, 10, 14, 27, 29, 12, 6, 23, 91, 49, 25, 51, 72, 14, 43, 81, 6, 116, 43, 58, 119, 103, 113, 109, 101, 87, 34, 100, 14, 126, 53, 82, 31, 91, 1, 112, 121, 13, 54, 25, 91, 98, 39, 21, 112, 24, 44, 116, 123, 92, 95, 50, 67, 37, 47, 71, 6, 24, 69, 42, 105, 127, 74, 55, 20, 78, 107, 26, 17, 96, 109, 60, 80, 43, 31, 106, 41, 95, 41, 79, 22, 78, 39, 77, 81, 25, 86, 18, 81, 109, 90, 125, 116, 93, 119, 25, 24, 54, 95, 37, 4, 108, 111, 50, 93, 121, 33, 95, 75, 125, 104, 31, 85, 109, 87, 59, 91, 80, 15, 98, 74, 63, 42, 11, 69, 10, 72, 51, 17, 52, 87, 18, 34, 81, 62, 24, 87, 89, 3, 67, 52, 11, 105, 64, 11, 12, 54, 6, 27, 21, 51, 36, 52, 19, 110, 114, 125, 65, 88, 54, 52, 48, 20, 127, 58, 46, 62, 71, 17, 104, 40, 37, 66, 110, 42, 110, 55, 44, 72, 67, 17, 11, 17, 39, 31, 54, 106, 46, 76, 32, 47, 64, 70, 64, 23, 89, 67, 9, 94, 117, 54, 45, 70, 81, 110, 12, 50, 100, 123, 103, 42, 92, 68, 108, 81, 62, 32, 97, 125, 65, 44, 63, 64, 100, 78, 92};
 | |
|     uint8_t v_b[256] = {83, 89, 6, 60, 57, 32, 123, 7, 59, 77, 101, 97, 84, 24, 49, 105, 52, 31, 74, 82, 10, 14, 27, 29, 12, 6, 23, 91, 49, 25, 51, 72, 14, 43, 81, 6, 116, 43, 58, 119, 103, 113, 109, 101, 87, 34, 100, 14, 126, 53, 82, 31, 91, 1, 112, 121, 13, 54, 25, 91, 98, 39, 21, 112, 24, 44, 116, 123, 92, 95, 50, 67, 37, 47, 71, 6, 24, 69, 42, 105, 127, 74, 55, 20, 78, 107, 26, 17, 96, 109, 60, 80, 43, 31, 106, 41, 95, 41, 79, 22, 78, 39, 77, 81, 25, 86, 18, 81, 109, 90, 125, 116, 93, 119, 25, 24, 54, 95, 37, 4, 108, 111, 50, 93, 121, 33, 95, 75, 125, 104, 31, 85, 109, 87, 59, 91, 80, 15, 98, 74, 63, 42, 11, 69, 10, 72, 51, 17, 52, 87, 18, 34, 81, 62, 24, 87, 89, 3, 67, 52, 11, 105, 64, 11, 12, 54, 6, 27, 21, 51, 36, 52, 19, 110, 114, 125, 65, 88, 54, 52, 48, 20, 127, 58, 46, 62, 71, 17, 104, 40, 37, 66, 110, 42, 110, 55, 44, 72, 67, 17, 11, 17, 39, 31, 54, 106, 46, 76, 32, 47, 64, 70, 64, 23, 89, 67, 9, 94, 117, 54, 45, 70, 81, 110, 12, 50, 100, 123, 103, 42, 92, 68, 108, 81, 62, 32, 97, 125, 65, 44, 63, 64, 100, 78, 83, 96, 15, 30, 4, 24, 25, 57, 23, 71, 112, 92};
 | |
|     uint16_t v_d0[256];
 | |
|     uint16_t v_d1[256];
 | |
| 
 | |
|     uint8_t *p_a = v_a;
 | |
|     uint8_t *p_b = v_b;
 | |
|     uint16_t *p_d0 = v_d0;
 | |
| 
 | |
|     vld_uib(0, p_a, 1)
 | |
|     vld_uib(1, p_b, 1)
 | |
|     for(i=0; i<len; i=i+SIMD_W*4){
 | |
|         p_a = p_a + SIMD_W*4;
 | |
|         p_b = p_b + SIMD_W*4;
 | |
|         vmulw(2,0,1)
 | |
|         vld_uib(0, p_a, 1)
 | |
|         vld_uib(1, p_b, 1)
 | |
|         vst_uih(p_d0, 1, 2)
 | |
|         p_d0 = p_d0 + SIMD_W*2;
 | |
|         vst_uih(p_d0, 1, 3)
 | |
|         p_d0 = p_d0 + SIMD_W*2;
 | |
|     }
 | |
| 
 | |
|     for(i=0; i<len; i++){
 | |
|         v_d1[i] = v_a[i] * v_b[i];
 | |
|     }
 | |
| 
 | |
|     for(i=0; i<len; i++){
 | |
|         if(v_d0[i] != v_d1[i]){
 | |
|             error = 1;
 | |
|             break;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     return error;
 | |
| }
 | |
| 
 | |
| uint8_t sib_macw_test(){
 | |
|     int i;
 | |
|     uint8_t error = 0;
 | |
|     int len = 256;
 | |
|     int8_t a = 5;
 | |
|     int8_t b = -3;
 | |
|     a = a;
 | |
|     b = b;
 | |
|     int8_t v_a[256] = {83, 96, 15, -30, 4, -24, -25, 57, -23, 71, -112, 83, 89, -6, 60, 57, -32, -123, -7, 59, -77, 101, 97, 84, 24, 49, 105, -52, -31, 74, -82, -10, 14, 27, 29, -12, -6, -23, -91, 49, 25, 51, 72, -14, -43, 81, -6, 116, -43, -58, 119, -103, -113, 109, 101, -87, 34, -100, -14, -126, -53, 82, 31, -91, -1, -112, -121, -13, 54, -25, -91, 98, -39, -21, 112, -24, -44, 116, -123, -92, -95, 50, -67, 37, 47, -71, -6, -24, 69, -42, -105, 127, 74, -55, -20, 78, 107, 26, -17, -96, 109, -60, 80, -43, 31, 106, -41, -95, 41, -79, 22, -78, -39, -77, -81, 25, -86, -18, -81, 109, 90, -125, -116, -93, -119, 25, -24, -54, -95, 37, 4, 108, 111, -50, 93, 121, 33, 95, 75, 125, 104, 31, 85, 109, -87, 59, 91, -80, 15, -98, 74, -63, 42, 11, -69, -10, -72, 51, 17, 52, -87, -18, 34, -81, 62, 24, 87, -89, 3, -67, 52, -11, -105, -64, -11, 12, -54, 6, -27, 21, 51, -36, -52, -19, -110, 114, -125, 65, -88, -54, 52, 48, -20, 127, -58, 46, -62, 71, 17, 104, -40, 37, -66, -110, -42, -110, -55, -44, 72, 67, -17, -11, 17, 39, -31, -54, -106, -46, -76, -32, 47, 64, -70, 64, -23, -89, -67, 9, 94, -117, -54, 45, -70, 81, -110, -12, -50, 100, -123, -103, -42, 92, -68, 108, -81, -62, -32, -97, -125, -65, 44, 63, 64, 100, 78, 92};
 | |
|     int8_t v_b[256] = {83, 89, -6, 60, 57, -32, -123, -7, 59, -77, 101, 97, 84, 24, 49, 105, -52, -31, 74, -82, -10, 14, 27, 29, -12, -6, -23, -91, 49, 25, 51, 72, -14, -43, 81, -6, 116, -43, -58, 119, -103, -113, 109, 101, -87, 34, -100, -14, -126, -53, 82, 31, -91, -1, -112, -121, -13, 54, -25, -91, 98, -39, -21, 112, -24, -44, 116, -123, -92, -95, 50, -67, 37, 47, -71, -6, -24, 69, -42, -105, 127, 74, -55, -20, 78, 107, 26, -17, -96, 109, -60, 80, -43, 31, 106, -41, -95, 41, -79, 22, -78, -39, -77, -81, 25, -86, -18, -81, 109, 90, -125, -116, -93, -119, 25, -24, -54, -95, 37, 4, 108, 111, -50, 93, 121, 33, 95, 75, 125, 104, 31, 85, 109, -87, 59, 91, -80, 15, -98, 74, -63, 42, 11, -69, -10, -72, 51, 17, 52, -87, -18, 34, -81, 62, 24, 87, -89, 3, -67, 52, -11, -105, -64, -11, 12, -54, 6, -27, 21, 51, -36, -52, -19, -110, 114, -125, 65, -88, -54, 52, 48, -20, 127, -58, 46, -62, 71, 17, 104, -40, 37, -66, -110, -42, -110, -55, -44, 72, 67, -17, -11, 17, 39, -31, -54, -106, -46, -76, -32, 47, 64, -70, 64, -23, -89, -67, 9, 94, -117, -54, 45, -70, 81, -110, -12, -50, 100, -123, -103, -42, 92, -68, 108, -81, -62, -32, -97, -125, -65, 44, 63, 64, 100, 78, 83, 96, 15, -30, 4, -24, -25, 57, -23, 71, -112, 92};
 | |
|     int32_t v_d0[SIMD_W];
 | |
|     int32_t v_d1[1] = {0};
 | |
| 
 | |
|     int8_t *p_a = v_a;
 | |
|     int8_t *p_b = v_b;
 | |
|     int32_t *p_d0 = v_d0;
 | |
|     int32_t *p_d1 = v_d1;
 | |
| 
 | |
|     vld_siw( 8, p_d1, 0)
 | |
|     vadd( 9, 8, 8)
 | |
|     vadd(10, 8, 8)
 | |
|     vadd(11, 8, 8)
 | |
| 
 | |
|     for(i=0; i<len; i=i+SIMD_W*4*4){
 | |
|         vld_sib(0, p_a, 1)
 | |
|         vld_sib(4, p_b, 1)
 | |
|         p_a = p_a + SIMD_W*4;
 | |
|         p_b = p_b + SIMD_W*4;
 | |
|         vld_sib(1, p_a, 1)
 | |
|         vld_sib(5, p_b, 1)
 | |
|         p_a = p_a + SIMD_W*4;
 | |
|         p_b = p_b + SIMD_W*4;
 | |
|         vld_sib(2, p_a, 1)
 | |
|         vld_sib(6, p_b, 1)
 | |
|         p_a = p_a + SIMD_W*4;
 | |
|         p_b = p_b + SIMD_W*4;
 | |
|         vld_sib(3, p_a, 1)
 | |
|         vld_sib(7, p_b, 1)
 | |
|         p_a = p_a + SIMD_W*4;
 | |
|         p_b = p_b + SIMD_W*4;
 | |
|         vmaddw( 8,0,4)
 | |
|         vmaddw( 8,1,5)
 | |
|         vmaddw( 8,2,6)
 | |
|         vmaddw( 8,3,7)
 | |
|     }
 | |
|     vadd( 8, 8, 9)
 | |
|     vadd( 8, 8,10)
 | |
|     vadd( 8, 8,11)
 | |
| 
 | |
|     vst_sih(p_d0, 1, 8)
 | |
| 
 | |
|     for(i=0; i<len; i++){
 | |
|         v_d1[0] += v_a[i] * v_b[i];
 | |
|     }
 | |
| 
 | |
|     for(i=1; i<SIMD_W; i++){
 | |
|         v_d0[0] += v_d0[i];
 | |
|     }
 | |
| 
 | |
|     if(v_d1[0] != v_d0[0]){
 | |
|         iot_printf("incorrect, golden = %d, output = %d\n", v_d1[0], v_d0[0]);
 | |
|         error = 1;
 | |
|     }
 | |
|     iot_printf("mac finish\n");
 | |
| 
 | |
|     return error;
 | |
| }
 | |
| 
 | |
| uint8_t uib_macw_test(){
 | |
|     int i;
 | |
|     uint8_t error = 0;
 | |
|     int len = 256;
 | |
|     uint8_t a = 5;
 | |
|     uint8_t b = 3;
 | |
|     a = a;
 | |
|     b = b;
 | |
|     uint8_t v_a[256] = {83, 96, 15, 30, 4, 24, 25, 57, 23, 71, 112, 83, 89, 6, 60, 57, 32, 123, 7, 59, 77, 101, 97, 84, 24, 49, 105, 52, 31, 74, 82, 10, 14, 27, 29, 12, 6, 23, 91, 49, 25, 51, 72, 14, 43, 81, 6, 116, 43, 58, 119, 103, 113, 109, 101, 87, 34, 100, 14, 126, 53, 82, 31, 91, 1, 112, 121, 13, 54, 25, 91, 98, 39, 21, 112, 24, 44, 116, 123, 92, 95, 50, 67, 37, 47, 71, 6, 24, 69, 42, 105, 127, 74, 55, 20, 78, 107, 26, 17, 96, 109, 60, 80, 43, 31, 106, 41, 95, 41, 79, 22, 78, 39, 77, 81, 25, 86, 18, 81, 109, 90, 125, 116, 93, 119, 25, 24, 54, 95, 37, 4, 108, 111, 50, 93, 121, 33, 95, 75, 125, 104, 31, 85, 109, 87, 59, 91, 80, 15, 98, 74, 63, 42, 11, 69, 10, 72, 51, 17, 52, 87, 18, 34, 81, 62, 24, 87, 89, 3, 67, 52, 11, 105, 64, 11, 12, 54, 6, 27, 21, 51, 36, 52, 19, 110, 114, 125, 65, 88, 54, 52, 48, 20, 127, 58, 46, 62, 71, 17, 104, 40, 37, 66, 110, 42, 110, 55, 44, 72, 67, 17, 11, 17, 39, 31, 54, 106, 46, 76, 32, 47, 64, 70, 64, 23, 89, 67, 9, 94, 117, 54, 45, 70, 81, 110, 12, 50, 100, 123, 103, 42, 92, 68, 108, 81, 62, 32, 97, 125, 65, 44, 63, 64, 100, 78, 92};
 | |
|     uint8_t v_b[256] = {83, 89, 6, 60, 57, 32, 123, 7, 59, 77, 101, 97, 84, 24, 49, 105, 52, 31, 74, 82, 10, 14, 27, 29, 12, 6, 23, 91, 49, 25, 51, 72, 14, 43, 81, 6, 116, 43, 58, 119, 103, 113, 109, 101, 87, 34, 100, 14, 126, 53, 82, 31, 91, 1, 112, 121, 13, 54, 25, 91, 98, 39, 21, 112, 24, 44, 116, 123, 92, 95, 50, 67, 37, 47, 71, 6, 24, 69, 42, 105, 127, 74, 55, 20, 78, 107, 26, 17, 96, 109, 60, 80, 43, 31, 106, 41, 95, 41, 79, 22, 78, 39, 77, 81, 25, 86, 18, 81, 109, 90, 125, 116, 93, 119, 25, 24, 54, 95, 37, 4, 108, 111, 50, 93, 121, 33, 95, 75, 125, 104, 31, 85, 109, 87, 59, 91, 80, 15, 98, 74, 63, 42, 11, 69, 10, 72, 51, 17, 52, 87, 18, 34, 81, 62, 24, 87, 89, 3, 67, 52, 11, 105, 64, 11, 12, 54, 6, 27, 21, 51, 36, 52, 19, 110, 114, 125, 65, 88, 54, 52, 48, 20, 127, 58, 46, 62, 71, 17, 104, 40, 37, 66, 110, 42, 110, 55, 44, 72, 67, 17, 11, 17, 39, 31, 54, 106, 46, 76, 32, 47, 64, 70, 64, 23, 89, 67, 9, 94, 117, 54, 45, 70, 81, 110, 12, 50, 100, 123, 103, 42, 92, 68, 108, 81, 62, 32, 97, 125, 65, 44, 63, 64, 100, 78, 83, 96, 15, 30, 4, 24, 25, 57, 23, 71, 112, 92};
 | |
|     uint32_t v_d0[SIMD_W];
 | |
|     uint32_t v_d1[1] = {0};
 | |
| 
 | |
|     uint8_t *p_a = v_a;
 | |
|     uint8_t *p_b = v_b;
 | |
|     uint32_t *p_d0 = v_d0;
 | |
|     uint32_t *p_d1 = v_d1;
 | |
| 
 | |
|     vld_uiw( 8, p_d1, 0)
 | |
|     vadd( 9, 8, 8)
 | |
|     vadd(10, 8, 8)
 | |
|     vadd(11, 8, 8)
 | |
| 
 | |
|     for(i=0; i<len; i=i+SIMD_W*4*4){
 | |
|         vld_uib(0, p_a, 1)
 | |
|         vld_uib(4, p_b, 1)
 | |
|         p_a = p_a + SIMD_W*4;
 | |
|         p_b = p_b + SIMD_W*4;
 | |
|         vld_uib(1, p_a, 1)
 | |
|         vld_uib(5, p_b, 1)
 | |
|         p_a = p_a + SIMD_W*4;
 | |
|         p_b = p_b + SIMD_W*4;
 | |
|         vld_uib(2, p_a, 1)
 | |
|         vld_uib(6, p_b, 1)
 | |
|         p_a = p_a + SIMD_W*4;
 | |
|         p_b = p_b + SIMD_W*4;
 | |
|         vld_uib(3, p_a, 1)
 | |
|         vld_uib(7, p_b, 1)
 | |
|         p_a = p_a + SIMD_W*4;
 | |
|         p_b = p_b + SIMD_W*4;
 | |
|         vmaddw( 8,0,4)
 | |
|         vmaddw( 8,1,5)
 | |
|         vmaddw( 8,2,6)
 | |
|         vmaddw( 8,3,7)
 | |
|     }
 | |
|     vadd( 8, 8, 9)
 | |
|     vadd( 8, 8,10)
 | |
|     vadd( 8, 8,11)
 | |
| 
 | |
|     vst_uih(p_d0, 1, 8)
 | |
| 
 | |
|     for(i=0; i<len; i++){
 | |
|         v_d1[0] += v_a[i] * v_b[i];
 | |
|     }
 | |
| 
 | |
|     for(i=1; i<SIMD_W; i++){
 | |
|         v_d0[0] += v_d0[i];
 | |
|     }
 | |
| 
 | |
|     if(v_d1[0] != v_d0[0]){
 | |
|         error = 1;
 | |
|     }
 | |
| 
 | |
|     return error;
 | |
| }
 | |
| 
 | |
| uint8_t sih_macw_test() {
 | |
|     int16_t a[8] = {12000, 14000, 16000, 18000, 20000, 22000, 24000, 26000};
 | |
|     int16_t b[8] = {28000, 30000, 32000, 34000, 36000, 38000, 40000, 42000};
 | |
|     vld_sih(0, a, 1);
 | |
|     vld_sih(1, b, 1);
 | |
|     int32_t zero[] = {0};
 | |
|     vld_siw(2, zero, 0);
 | |
|     vld_siw(3, zero, 0);
 | |
|     vld_siw(4, zero, 0);
 | |
|     vld_siw(5, zero, 0);
 | |
|     vmaddw(2, 0, 1);
 | |
|     int32_t out[16];
 | |
|     int32_t *po = out;
 | |
|     vst_siw(po, 1, 2);
 | |
|     po += SIMD_W;
 | |
|     vst_siw(po, 1, 3);
 | |
|     po += SIMD_W;
 | |
|     vst_siw(po, 1, 4);
 | |
|     po += SIMD_W;
 | |
|     vst_siw(po, 1, 5);
 | |
|     po += SIMD_W;
 | |
|     return 0;
 | |
| }
 | |
| 
 | |
| uint8_t gather8_test(){
 | |
|     int i;
 | |
|     uint8_t error = 0;
 | |
|     int len = 256;
 | |
|     int8_t v_a[256] = {83, 96, 15, -30, 4, -24, -25, 57, -23, 71, -112, 83, 89, -6, 60, 57, -32, -123, -7, 59, -77, 101, 97, 84, 24, 49, 105, -52, -31, 74, -82, -10, 14, 27, 29, -12, -6, -23, -91, 49, 25, 51, 72, -14, -43, 81, -6, 116, -43, -58, 119, -103, -113, 109, 101, -87, 34, -100, -14, -126, -53, 82, 31, -91, -1, -112, -121, -13, 54, -25, -91, 98, -39, -21, 112, -24, -44, 116, -123, -92, -95, 50, -67, 37, 47, -71, -6, -24, 69, -42, -105, 127, 74, -55, -20, 78, 107, 26, -17, -96, 109, -60, 80, -43, 31, 106, -41, -95, 41, -79, 22, -78, -39, -77, -81, 25, -86, -18, -81, 109, 90, -125, -116, -93, -119, 25, -24, -54, -95, 37, 4, 108, 111, -50, 93, 121, 33, 95, 75, 125, 104, 31, 85, 109, -87, 59, 91, -80, 15, -98, 74, -63, 42, 11, -69, -10, -72, 51, 17, 52, -87, -18, 34, -81, 62, 24, 87, -89, 3, -67, 52, -11, -105, -64, -11, 12, -54, 6, -27, 21, 51, -36, -52, -19, -110, 114, -125, 65, -88, -54, 52, 48, -20, 127, -58, 46, -62, 71, 17, 104, -40, 37, -66, -110, -42, -110, -55, -44, 72, 67, -17, -11, 17, 39, -31, -54, -106, -46, -76, -32, 47, 64, -70, 64, -23, -89, -67, 9, 94, -117, -54, 45, -70, 81, -110, -12, -50, 100, -123, -103, -42, 92, -68, 108, -81, -62, -32, -97, -125, -65, 44, 63, 64, 100, 78, 92};
 | |
|     int8_t v_d0[256];
 | |
|     int8_t v_d1[256];
 | |
|     const int8_t *lookup_table = table8;
 | |
| 
 | |
|     
 | |
|     int8_t *p_a = v_a;
 | |
|     int8_t *p_d0 = v_d0;
 | |
| 
 | |
|     for(i=0; i<len; i=i+SIMD_W*4){
 | |
|         vlds_sib(0, p_a, 4)
 | |
|         vldx_sib(1, (lookup_table + 128), 0)
 | |
|         p_a = p_a + SIMD_W*4;
 | |
|         vsts_sib(p_d0, 4, 1)
 | |
|         p_d0 = p_d0 + SIMD_W*4;
 | |
|     }
 | |
| 
 | |
|     for(i=0; i<len; i++){
 | |
|         v_d1[i] = *((lookup_table + 128) + v_a[i]);
 | |
|     }
 | |
| 
 | |
|     for(i=0; i<len; i++){
 | |
|         if(v_d0[i] != v_d1[i]){
 | |
|             error = 1;
 | |
|             break;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     return error;
 | |
| }
 | |
| 
 | |
| uint8_t gather16_test(){
 | |
|     int i;
 | |
|     uint8_t error = 0;
 | |
|     int len = 256;
 | |
|     int16_t v_a[256] = {83, 96, 15, -30, 4, -24, -25, 57, -23, 71, -112, 83, 89, -6, 60, 57, -32, -123, -7, 59, -77, 101, 97, 84, 24, 49, 105, -52, -31, 74, -82, -10, 14, 27, 29, -12, -6, -23, -91, 49, 25, 51, 72, -14, -43, 81, -6, 116, -43, -58, 119, -103, -113, 109, 101, -87, 34, -100, -14, -126, -53, 82, 31, -91, -1, -112, -121, -13, 54, -25, -91, 98, -39, -21, 112, -24, -44, 116, -123, -92, -95, 50, -67, 37, 47, -71, -6, -24, 69, -42, -105, 127, 74, -55, -20, 78, 107, 26, -17, -96, 109, -60, 80, -43, 31, 106, -41, -95, 41, -79, 22, -78, -39, -77, -81, 25, -86, -18, -81, 109, 90, -125, -116, -93, -119, 25, -24, -54, -95, 37, 4, 108, 111, -50, 93, 121, 33, 95, 75, 125, 104, 31, 85, 109, -87, 59, 91, -80, 15, -98, 74, -63, 42, 11, -69, -10, -72, 51, 17, 52, -87, -18, 34, -81, 62, 24, 87, -89, 3, -67, 52, -11, -105, -64, -11, 12, -54, 6, -27, 21, 51, -36, -52, -19, -110, 114, -125, 65, -88, -54, 52, 48, -20, 127, -58, 46, -62, 71, 17, 104, -40, 37, -66, -110, -42, -110, -55, -44, 72, 67, -17, -11, 17, 39, -31, -54, -106, -46, -76, -32, 47, 64, -70, 64, -23, -89, -67, 9, 94, -117, -54, 45, -70, 81, -110, -12, -50, 100, -123, -103, -42, 92, -68, 108, -81, -62, -32, -97, -125, -65, 44, 63, 64, 100, 78, 92};
 | |
|     int16_t v_d0[256];
 | |
|     int16_t v_d1[256];
 | |
|     const int16_t *lookup_table = table16;
 | |
|     for (uint32_t i = 0; i < 256; i++) {
 | |
|         table16[i] = 10 * i;
 | |
|     }
 | |
|     for(i=0; i<len; i++){
 | |
|         iot_printf("look_up[%d] = %d\n", i, lookup_table[i]);
 | |
|     }
 | |
| 
 | |
|     
 | |
|     int16_t *p_a = v_a;
 | |
|     int16_t *p_d0 = v_d0;
 | |
| 
 | |
|     for(i=0; i<len; i=i+SIMD_W*2){
 | |
|         vlds_sih(0, p_a, 4)
 | |
|         p_a = p_a + SIMD_W*2;
 | |
|         vadd(0, 0, 0)
 | |
|         vldx_sih(1, (lookup_table + 128), 0)
 | |
|         vsts_sih(p_d0, 4, 1)
 | |
|         p_d0 = p_d0 + SIMD_W*2;
 | |
|     }
 | |
| 
 | |
|     for(i=0; i<len; i++){
 | |
|         v_d1[i] = *((lookup_table + 128) + v_a[i]);
 | |
|     }
 | |
| 
 | |
|     for(i=0; i<len; i++){
 | |
|         iot_printf("g[%d] = %d, o[%d] = %d\n", i, v_d1[i], i, v_d0[i]);
 | |
|         if(v_d0[i] != v_d1[i]){
 | |
|             error = 1;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     return error;
 | |
| }
 | |
| 
 | |
| uint8_t scatter8_test(){
 | |
|     int i;
 | |
|     uint8_t error = 0;
 | |
|     int len = 256;
 | |
|     int8_t v_a[256] = {127, 126, 125, 124, 123, 122, 121, 120, 119, 118, 117, 116, 115, 114, 113, 112, 111, 110, 109, 108, 107, 106, 105, 104, 103, 102, 101, 100, 99, 98, 97, 96, 95, 94, 93, 92, 91, 90, 89, 88, 87, 86, 85, 84, 83, 82, 81, 80, 79, 78, 77, 76, 75, 74, 73, 72, 71, 70, 69, 68, 67, 66, 65, 64, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, 48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16, -17, -18, -19, -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, -30, -31, -32, -33, -34, -35, -36, -37, -38, -39, -40, -41, -42, -43, -44, -45, -46, -47, -48, -49, -50, -51, -52, -53, -54, -55, -56, -57, -58, -59, -60, -61, -62, -63, -64, -65, -66, -67, -68, -69, -70, -71, -72, -73, -74, -75, -76, -77, -78, -79, -80, -81, -82, -83, -84, -85, -86, -87, -88, -89, -90, -91, -92, -93, -94, -95, -96, -97, -98, -99, -100, -101, -102, -103, -104, -105, -106, -107, -108, -109, -110, -111, -112, -113, -114, -115, -116, -117, -118, -119, -120, -121, -122, -123, -124, -125, -126, -127, -128};
 | |
|     int8_t v_d0[256];
 | |
|     int8_t v_d1[256];
 | |
|     const int8_t *lookup_table = table8;
 | |
| 
 | |
|     
 | |
|     int8_t *p_a = v_a;
 | |
|     const int8_t *p_t = lookup_table;
 | |
| 
 | |
|     for(i=0; i<len; i=i+SIMD_W*4){
 | |
|         vlds_sib(0, p_a, 4)
 | |
|         p_a = p_a + SIMD_W*4;
 | |
|         vlds_sib(1, p_t, 4)
 | |
|         vstx_sib((v_d0+128), 0, 1)
 | |
|         p_t = p_t + SIMD_W*4;
 | |
|     }
 | |
| 
 | |
|     for(i=0; i<len; i++){
 | |
|         v_d1[v_a[i]+128] = *(lookup_table + i);
 | |
|     }
 | |
| 
 | |
|     for(i=0; i<len; i++){
 | |
|         if(v_d0[i] != v_d1[i]){
 | |
|             error = 1;
 | |
|             break;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     return error;
 | |
| }
 | |
| 
 | |
| uint8_t scatter16_test(){
 | |
|     int i;
 | |
|     uint8_t error = 0;
 | |
|     int len = 256;
 | |
|     int16_t v_a[256] = {127, 126, 125, 124, 123, 122, 121, 120, 119, 118, 117, 116, 115, 114, 113, 112, 111, 110, 109, 108, 107, 106, 105, 104, 103, 102, 101, 100, 99, 98, 97, 96, 95, 94, 93, 92, 91, 90, 89, 88, 87, 86, 85, 84, 83, 82, 81, 80, 79, 78, 77, 76, 75, 74, 73, 72, 71, 70, 69, 68, 67, 66, 65, 64, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, 48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16, -17, -18, -19, -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, -30, -31, -32, -33, -34, -35, -36, -37, -38, -39, -40, -41, -42, -43, -44, -45, -46, -47, -48, -49, -50, -51, -52, -53, -54, -55, -56, -57, -58, -59, -60, -61, -62, -63, -64, -65, -66, -67, -68, -69, -70, -71, -72, -73, -74, -75, -76, -77, -78, -79, -80, -81, -82, -83, -84, -85, -86, -87, -88, -89, -90, -91, -92, -93, -94, -95, -96, -97, -98, -99, -100, -101, -102, -103, -104, -105, -106, -107, -108, -109, -110, -111, -112, -113, -114, -115, -116, -117, -118, -119, -120, -121, -122, -123, -124, -125, -126, -127, -128};
 | |
|     int16_t v_d0[256];
 | |
|     int16_t v_d1[256];
 | |
|     const int16_t *lookup_table = table16;
 | |
| 
 | |
|     
 | |
|     int16_t *p_a = v_a;
 | |
|     const int16_t *p_t = lookup_table;
 | |
| 
 | |
|     for(i=0; i<len; i=i+SIMD_W*2){
 | |
|         vlds_sih(0, p_a, 4)
 | |
|         p_a = p_a + SIMD_W*2;
 | |
|         vlds_sih(1, p_t, 4)
 | |
|         vadd(0, 0, 0)
 | |
|         vstx_sih((v_d0+128), 0, 1)
 | |
|         p_t = p_t + SIMD_W*2;
 | |
|     }
 | |
| 
 | |
|     for(i=0; i<len; i++){
 | |
|         v_d1[(v_a[i]+128)] = *(lookup_table + i);
 | |
|     }
 | |
| 
 | |
|     for(i=0; i<len; i++){
 | |
|         if(v_d0[i] != v_d1[i]){
 | |
|             error = 1;
 | |
|             break;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     return error;
 | |
| }
 | |
| 
 | |
| void matrix_test() {
 | |
| #if 0
 | |
|     int8_t a[256];
 | |
|     int8_t b[256];
 | |
|     int32_t out[1024];
 | |
|     for (uint32_t i = 0; i < 256; i++) {
 | |
|         a[i] = 1;
 | |
|         b[i] = -1;
 | |
|         out[i] = 0;
 | |
|     }
 | |
|     uint8_t result = matrix_multi_int8_to_int32(a, b, out, 32, 8, 29);
 | |
|     iot_printf("result = %d\n", result);
 | |
|     for (uint32_t i = 0; i < 1024; i++) {
 | |
|         iot_printf("out[%d] = %d\n", i, out[i]);
 | |
|     }
 | |
| #else
 | |
| #if 1
 | |
|     int8_t a[512];
 | |
|     int8_t b[512];
 | |
|     int8_t out[1024];
 | |
|     for (uint32_t i = 0; i < 512; i++) {
 | |
|         a[i] = 1;
 | |
|         b[i] = -3;
 | |
|         out[i] = 0;
 | |
|     }
 | |
|     uint8_t result = matrix_multi_int8_to_int8(a, b, out, 32, 8, 29, 1);
 | |
|     iot_printf("result = %d\n", result);
 | |
|     for (uint32_t i = 0; i < 1024; i++) {
 | |
|         iot_printf("out[%d] = %d\n", i, out[i]);
 | |
|     }
 | |
| #else
 | |
|     float a[256];
 | |
|     float b[256];
 | |
|     float out[1024];
 | |
|     for (uint32_t i = 0; i < 256; i++) {
 | |
|         a[i] = 1;
 | |
|         b[i] = 1;
 | |
|         out[i] = 0;
 | |
|     }
 | |
|     uint8_t result = matrix_multi_float(a, b, out, 32, 8, 32);
 | |
|     uint32_t *out_bin = (uint32_t *)out;
 | |
|     iot_printf("result = %d\n", result);
 | |
|     for (uint32_t i = 0; i < 1024; i++) {
 | |
|         iot_printf("out[%d] = %08x\n", i, out_bin[i]);
 | |
|     }
 | |
| #endif
 | |
| #endif
 | |
| }
 | |
| 
 | |
| void vector_add_test_uint8() {
 | |
|     uint8_t a[100];
 | |
|     uint8_t b[100];
 | |
|     uint8_t o[100];
 | |
|     uint8_t g[100];
 | |
|     for (uint32_t i = 0; i < 100; i++) {
 | |
|         a[i] = 2 * i;
 | |
|         b[i] = 3 * i + 23;
 | |
|     }
 | |
|     vector_add_const_uint8(a, b[0], o, 100);
 | |
|     for (uint8_t i = 0; i < 100; i++) {
 | |
|     #ifdef SATURATION
 | |
|         uint16_t gg = a[i] + b[0];
 | |
|         g[i] = gg > 255 ? 255 : gg;
 | |
|     #else
 | |
|         g[i] = a[i] + b[0];
 | |
|     #endif
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[0], g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("add of uint8 test end\n");
 | |
|     vector_add_uint8(a, b, o, 100);
 | |
|     for (uint8_t i = 0; i < 100; i++) {
 | |
|     #ifdef SATURATION
 | |
|         uint16_t gg = a[i] + b[i];
 | |
|         g[i] = gg > 255 ? 255 : gg;
 | |
|     #else
 | |
|         g[i] = a[i] + b[i];
 | |
|     #endif
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[i], g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("add const of uint8 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_sub_test_uint8() {
 | |
|     uint8_t a[100];
 | |
|     uint8_t b[100];
 | |
|     uint8_t o[100];
 | |
|     uint8_t g[100];
 | |
|     for (uint32_t i = 0; i < 100; i++) {
 | |
|         a[i] = 2 * i;
 | |
|         b[i] = 3 * i + 23;
 | |
|     }
 | |
|     vector_sub_const_uint8(a, b[0], o, 100);
 | |
|     for (uint8_t i = 0; i < 100; i++) {
 | |
|         #ifdef SATURATION
 | |
|         if (a[i] < b[0]) {
 | |
|             g[i] = 0;
 | |
|         } else {
 | |
|             g[i] = a[i] - b[0];
 | |
|         }
 | |
|         #else
 | |
|             g[i] = a[i] - b[0];
 | |
|         #endif
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[0], g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("sub const of uint8 test end\n");
 | |
|     vector_const_sub_uint8(a, b[0], o, 100);
 | |
|     for (uint8_t i = 0; i < 100; i++) {
 | |
|         #ifdef SATURATION
 | |
|         if (a[i] > b[0]) {
 | |
|             g[i] = 0;
 | |
|         } else {
 | |
|             g[i] = b[0] - a[i];
 | |
|         }
 | |
|         #else
 | |
|             g[i] = b[0] - a[i];
 | |
|         #endif
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, b = %d, a = %d, golden %d, output %d\n", i, b[0], a[i], g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("sub const of uint8 test end\n");
 | |
|     vector_sub_uint8(a, b, o, 100);
 | |
|     for (uint8_t i = 0; i < 100; i++) {
 | |
|         #ifdef SATURATION
 | |
|         if (a[i] < b[i]) {
 | |
|             g[i] = 0;
 | |
|         } else {
 | |
|             g[i] = a[i] - b[i];
 | |
|         }
 | |
|         #else
 | |
|             g[i] = a[i] - b[i];
 | |
|         #endif
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[i], g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("sub of uint8 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_mul_test_uint8() {
 | |
|     uint8_t length = 100;
 | |
|     uint8_t *a = (uint8_t *)os_mem_malloc(1, length);
 | |
|     uint8_t *b = (uint8_t *)os_mem_malloc(1, length);
 | |
|     uint8_t *l = (uint8_t *)os_mem_malloc(1, length);
 | |
|     uint8_t *h = (uint8_t *)os_mem_malloc(1, length);
 | |
|     uint16_t *w = (uint16_t *)os_mem_malloc(1, length * 2);
 | |
|     for (uint8_t i = 0; i < length; i++) {
 | |
|         a[i] = -2 * (i - 25);
 | |
|         b[i] = 3 * (i - 33);
 | |
|         //iot_printf("a[%d] = %d, b[%d] = %d\n", i, a[i], i, b[i]);
 | |
|     }
 | |
|     vector_multiply_uint8(a, b, w, length);
 | |
|     for (uint8_t i = 0; i < length; i++) {
 | |
|         uint16_t g = a[i] * b[i];
 | |
|         if (g != w[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g, w[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("vector_multiply_uint8 finished\n");
 | |
|     vector_multiply_const_uint8(a, b[1], w, length);
 | |
|     for (uint8_t i = 0; i < length; i++) {
 | |
|         uint16_t g = a[i] * b[1];
 | |
|         if (g != w[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g, w[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("vector_multiply_const_uint8 finished\n");
 | |
|     vector_multiply_uint8_high_8bit(a, b, h, length);
 | |
|     for (uint8_t i = 0; i < length; i++) {
 | |
|         uint16_t g = (a[i] * b[i]) >> 8;
 | |
|         if (g != h[i]) {
 | |
|             iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[i], g, l[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("vector_multiply_uint8_high finished\n");
 | |
|     vector_multiply_const_uint8_high_8bit(a, b[1], h, length);
 | |
|     for (uint8_t i = 0; i < length; i++) {
 | |
|         uint16_t g = (a[i] * b[1]) >> 8;
 | |
|         if (g != h[i]) {
 | |
|             iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[1], g, l[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("vector_multiply_const_uint8_high finished\n");
 | |
|     vector_multiply_uint8_low_8bit(a, b, l, length);
 | |
|     for (uint8_t i = 0; i < length; i++) {
 | |
|         #ifdef SATURATION
 | |
|         uint16_t g = (a[i] * b[i]);
 | |
|         if (g > 255) {
 | |
|             g = 255;
 | |
|         }
 | |
|         #else
 | |
|         uint16_t g = (a[i] * b[i]) & 0xff;
 | |
|         #endif
 | |
|         if (g != l[i]) {
 | |
|             iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[i], g, l[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("vector_multiply_uint8_low finished\n");
 | |
|     vector_multiply_const_uint8_low_8bit(a, b[1], l, length);
 | |
|     for (uint8_t i = 0; i < length; i++) {
 | |
|         #ifdef SATURATION
 | |
|         uint16_t g = (a[i] * b[1]);
 | |
|         if (g > 255) {
 | |
|             g = 255;
 | |
|         }
 | |
|         #else
 | |
|         uint16_t g = (a[i] * b[1]) & 0xff;
 | |
|         #endif
 | |
|         if (g != l[i]) {
 | |
|             iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[1], g, l[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("vector_multiply_const_uint8_low finished\n");
 | |
|     os_mem_free(a);
 | |
|     os_mem_free(b);
 | |
|     os_mem_free(h);
 | |
|     os_mem_free(l);
 | |
|     os_mem_free(w);
 | |
| }
 | |
| 
 | |
| void vector_madd_msub_test_uint8() {
 | |
|     uint8_t length = 100;
 | |
|     uint8_t *a = os_mem_malloc(1, length);
 | |
|     uint8_t *b = os_mem_malloc(1, length);
 | |
|     uint8_t *j = os_mem_malloc(1, length);
 | |
|     uint8_t *o = os_mem_malloc(1, length);
 | |
|     uint32_t *J = os_mem_malloc(1, length * 4);
 | |
|     uint32_t *O = os_mem_malloc(1, length * 4);
 | |
|     for (uint8_t i = 0; i < length; i++) {
 | |
|         a[i] = 2 * (i + 25);
 | |
|         b[i] = 3 * (i + 33);
 | |
|         j[i] = i + 50;
 | |
|         J[i] = i * 100 + 50000;
 | |
|         //iot_printf("a[%d] = %d, b[%d] = %d, j[%d] = %d, J[%d] = %d\n", i, a[i], i, b[i], i, j[i], i, J[i]);
 | |
|     }
 | |
|     vector_mul_add_uint8(a, b, j, o, length);
 | |
|     for (uint8_t i = 0; i < length; i++) {
 | |
|         #ifdef SATURATION
 | |
|         uint16_t l = a[i] * b[i];
 | |
|         l = l > 255 ? 255 : l;
 | |
|         uint16_t g = j[i] + l;
 | |
|         g = g > 255 ? 255 : g;
 | |
|         #else
 | |
|         uint8_t g = j[i] + a[i] * b[i];
 | |
|         #endif
 | |
|         if (g != o[i]) {
 | |
|             iot_printf("incorrect at %d, j = %d, a = %d, b = %d, golden %d, output %d\n", i, j[i], a[i], b[i], g, o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("vector_mul_add_uint8 finished\n");
 | |
|     vector_mul_sub_uint8(a, b, j, o, length);
 | |
|     for (uint8_t i = 0; i < length; i++) {
 | |
|         #ifdef SATURATION
 | |
|         uint16_t l = a[i] * b[i];
 | |
|         l = l > 255 ? 255 : l;
 | |
|         uint8_t g = j[i] > l ? j[i] - l : 0;
 | |
|         #else
 | |
|         uint8_t g = j[i] - a[i] * b[i];
 | |
|         #endif
 | |
|         if (g != o[i]) {
 | |
|             iot_printf("incorrect at %d, j = %d, a = %d, b = %d, golden %d, output %d\n", i, j[i], a[i], b[i], g, o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("vector_mul_sub_uint8 finished\n");
 | |
|     vector_mul_add_uint8_to_uint32(a, b, J, O, length);
 | |
|     for (uint8_t i = 0; i < length; i++) {
 | |
|         uint32_t g = J[i] + (uint32_t)a[i] * (uint32_t)b[i];
 | |
|         if (g != O[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g, O[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("vector_mul_add_uint8_to_uint32 finished\n");
 | |
|     vector_mul_sub_uint8_to_uint32(a, b, J, O, length);
 | |
|     for (uint8_t i = 0; i < length; i++) {
 | |
|         uint32_t g = J[i] - (uint32_t)a[i] * (uint32_t)b[i];
 | |
|         if (g != O[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g, O[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("vector_mul_sub_uint8_to_uint32 finished\n");
 | |
| }
 | |
| 
 | |
| void vector_min_test_uint8() {
 | |
|     uint8_t a[100];
 | |
|     uint8_t b[100];
 | |
|     uint8_t o[100];
 | |
|     uint8_t g[100];
 | |
|     for (uint32_t i = 0; i < 100; i++) {
 | |
|         a[i] = 2 * i;
 | |
|         b[i] = 3 * i + 23;
 | |
|     }
 | |
|     vector_min_uint8(a, b, o, 100);
 | |
|     for (uint8_t i = 0; i < 100; i++) {
 | |
|         g[i] = a[i] < b[i] ? a[i] : b[i];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_min_const_uint8(a, b[0], o, 100);
 | |
|     for (uint8_t i = 0; i < 100; i++) {
 | |
|         g[i] = a[i] < b[0] ? a[i] : b[0];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("min of uint8 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_max_test_uint8() {
 | |
|     uint8_t a[100];
 | |
|     uint8_t b[100];
 | |
|     uint8_t o[100];
 | |
|     uint8_t g[100];
 | |
|     for (uint32_t i = 0; i < 100; i++) {
 | |
|         a[i] = 2 * i;
 | |
|         b[i] = 3 * i + 23;
 | |
|     }
 | |
|     vector_max_uint8(a, b, o, 100);
 | |
|     for (uint8_t i = 0; i < 100; i++) {
 | |
|         g[i] = a[i] > b[i] ? a[i] : b[i];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_max_const_uint8(a, b[0], o, 100);
 | |
|     for (uint8_t i = 0; i < 100; i++) {
 | |
|         g[i] = a[i] > b[0] ? a[i] : b[0];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("max of uint8 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_equal_test_uint8() {
 | |
|     uint8_t a[100];
 | |
|     uint8_t b[100];
 | |
|     uint8_t o[100];
 | |
|     uint8_t g[100];
 | |
|     for (uint32_t i = 0; i < 100; i++) {
 | |
|         a[i] = 2 * i;
 | |
|         b[i] = 3 * i - 23;
 | |
|     }
 | |
|     vector_equal_uint8(a, b, o, 100);
 | |
|     for (uint8_t i = 0; i < 100; i++) {
 | |
|         g[i] = a[i] == b[i] ? 1 : 0;
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_equal_const_uint8(a, b[11], o, 100);
 | |
|     for (uint8_t i = 0; i < 100; i++) {
 | |
|         g[i] = a[i] == b[11] ? 1 : 0;
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("equal of uint8 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_not_equal_test_uint8() {
 | |
|     uint8_t a[100];
 | |
|     uint8_t b[100];
 | |
|     uint8_t o[100];
 | |
|     uint8_t g[100];
 | |
|     for (uint32_t i = 0; i < 100; i++) {
 | |
|         a[i] = 2 * i;
 | |
|         b[i] = 3 * i - 23;
 | |
|     }
 | |
|     vector_not_equal_uint8(a, b, o, 100);
 | |
|     for (uint8_t i = 0; i < 100; i++) {
 | |
|         g[i] = a[i] != b[i] ? 1 : 0;
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_not_equal_const_uint8(a, b[11], o, 100);
 | |
|     for (uint8_t i = 0; i < 100; i++) {
 | |
|         g[i] = a[i] != b[11] ? 1 : 0;
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("not equal of uint8 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_less_than_test_uint8() {
 | |
|     uint8_t a[100];
 | |
|     uint8_t b[100];
 | |
|     uint8_t o[100];
 | |
|     uint8_t g[100];
 | |
|     for (uint32_t i = 0; i < 100; i++) {
 | |
|         a[i] = 2 * i;
 | |
|         b[i] = 3 * i - 23;
 | |
|     }
 | |
|     vector_less_than_uint8(a, b, o, 100);
 | |
|     for (uint8_t i = 0; i < 100; i++) {
 | |
|         g[i] = a[i] < b[i] ? 1 : 0;
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_less_than_const_uint8(a, b[11], o, 100);
 | |
|     for (uint8_t i = 0; i < 100; i++) {
 | |
|         g[i] = a[i] < b[11] ? 1 : 0;
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_const_less_than_uint8(a, b[11], o, 100);
 | |
|     for (uint8_t i = 0; i < 100; i++) {
 | |
|         g[i] = a[i] > b[11] ? 1 : 0;
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("less than of uint8 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_greater_or_equal_test_uint8() {
 | |
|     uint8_t a[100];
 | |
|     uint8_t b[100];
 | |
|     uint8_t o[100];
 | |
|     uint8_t g[100];
 | |
|     for (uint32_t i = 0; i < 100; i++) {
 | |
|         a[i] = 2 * i;
 | |
|         b[i] = 3 * i - 23;
 | |
|     }
 | |
|     vector_greater_or_equal_uint8(a, b, o, 100);
 | |
|     for (uint8_t i = 0; i < 100; i++) {
 | |
|         g[i] = a[i] >= b[i] ? 1 : 0;
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_greater_or_equal_const_uint8(a, b[11], o, 100);
 | |
|     for (uint8_t i = 0; i < 100; i++) {
 | |
|         g[i] = a[i] >= b[11] ? 1 : 0;
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_const_greater_or_equal_uint8(a, b[11], o, 100);
 | |
|     for (uint8_t i = 0; i < 100; i++) {
 | |
|         g[i] = a[i] <= b[11] ? 1 : 0;
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("greater or equal of uint8 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_logic_and_test_uint8() {
 | |
|     uint8_t a[100];
 | |
|     uint8_t b[100];
 | |
|     uint8_t o[100];
 | |
|     uint8_t g[100];
 | |
|     for (uint32_t i = 0; i < 100; i++) {
 | |
|         a[i] = 2 * i;
 | |
|         b[i] = 3 * i - 23;
 | |
|     }
 | |
|     vector_logic_and_uint8(a, b, o, 100);
 | |
|     for (uint8_t i = 0; i < 100; i++) {
 | |
|         g[i] = a[i] & b[i];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_logic_and_const_uint8(a, b[11], o, 100);
 | |
|     for (uint8_t i = 0; i < 100; i++) {
 | |
|         g[i] = a[i] & b[11];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("logic xor of uint8 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_logic_or_test_uint8() {
 | |
|     uint8_t a[100];
 | |
|     uint8_t b[100];
 | |
|     uint8_t o[100];
 | |
|     uint8_t g[100];
 | |
|     for (uint32_t i = 0; i < 100; i++) {
 | |
|         a[i] = 2 * i;
 | |
|         b[i] = 3 * i - 23;
 | |
|     }
 | |
|     vector_logic_or_uint8(a, b, o, 100);
 | |
|     for (uint8_t i = 0; i < 100; i++) {
 | |
|         g[i] = a[i] | b[i];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_logic_or_const_uint8(a, b[11], o, 100);
 | |
|     for (uint8_t i = 0; i < 100; i++) {
 | |
|         g[i] = a[i] | b[11];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("logic or of uint8 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_logic_xor_test_uint8() {
 | |
|     uint8_t a[100];
 | |
|     uint8_t b[100];
 | |
|     uint8_t o[100];
 | |
|     uint8_t g[100];
 | |
|     for (uint32_t i = 0; i < 100; i++) {
 | |
|         a[i] = 2 * i;
 | |
|         b[i] = 3 * i - 23;
 | |
|     }
 | |
|     vector_logic_xor_uint8(a, b, o, 100);
 | |
|     for (uint8_t i = 0; i < 100; i++) {
 | |
|         g[i] = a[i] ^ b[i];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_logic_xor_const_uint8(a, b[11], o, 100);
 | |
|     for (uint8_t i = 0; i < 100; i++) {
 | |
|         g[i] = a[i] ^ b[11];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("logic xor of uint8 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_logic_xnor_test_uint8() {
 | |
|     uint8_t a[100];
 | |
|     uint8_t b[100];
 | |
|     uint8_t o[100];
 | |
|     uint8_t g[100];
 | |
|     for (uint32_t i = 0; i < 100; i++) {
 | |
|         a[i] = 2 * i;
 | |
|         b[i] = 3 * i - 23;
 | |
|     }
 | |
|     vector_logic_xnor_uint8(a, b, o, 100);
 | |
|     for (uint8_t i = 0; i < 100; i++) {
 | |
|         g[i] = ~(a[i] ^ b[i]);
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_logic_xnor_const_uint8(a, b[11], o, 100);
 | |
|     for (uint8_t i = 0; i < 100; i++) {
 | |
|         g[i] = ~(a[i] ^ b[11]);
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("logic xor of uint8 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_left_shift_test_uint8() {
 | |
|     uint8_t a[100];
 | |
|     uint8_t b[100];
 | |
|     uint8_t o[100];
 | |
|     uint8_t g[100];
 | |
|     for (uint32_t i = 0; i < 100; i++) {
 | |
|         a[i] = i + 1;
 | |
|         b[i] = i % 7 + 1;
 | |
|     }
 | |
|     vector_left_shift_uint8(a, b, o, 100);
 | |
|     for (uint8_t i = 0; i < 100; i++) {
 | |
|         g[i] = a[i] << b[i];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_left_shift_const_uint8(a, b[2], o, 100);
 | |
|     for (uint8_t i = 0; i < 100; i++) {
 | |
|         g[i] = a[i] << b[2];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_const_left_shift_uint8(b, a[6], o, 100);
 | |
|     for (uint8_t i = 0; i < 100; i++) {
 | |
|         g[i] = a[6] << b[i];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("left shift of uint8 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_right_shift_test_uint8() {
 | |
|     uint8_t a[100];
 | |
|     uint8_t b[100];
 | |
|     uint8_t o[100];
 | |
|     uint8_t g[100];
 | |
|     for (uint32_t i = 0; i < 100; i++) {
 | |
|         a[i] = i - 50;
 | |
|         b[i] = i % 7 + 1;
 | |
|     }
 | |
|     vector_right_shift_uint8(a, b, o, 100);
 | |
|     for (uint8_t i = 0; i < 100; i++) {
 | |
|         g[i] = a[i] >> b[i];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_right_shift_const_uint8(a, b[2], o, 100);
 | |
|     for (uint8_t i = 0; i < 100; i++) {
 | |
|         g[i] = a[i] >> b[2];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_const_right_shift_uint8(b, a[6], o, 100);
 | |
|     for (uint8_t i = 0; i < 100; i++) {
 | |
|         g[i] = a[6] >> b[i];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("logic right shift of uint8 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_add_test_int8() {
 | |
|     int8_t *a = (int8_t *)0x10200000;
 | |
|     int8_t *b = (int8_t *)0x10101000;
 | |
|     int8_t *o = (int8_t *)0x10102000;
 | |
|     int8_t *g = (int8_t *)0x10103000;
 | |
|     for (uint32_t i = 0; i < 128; i++) {
 | |
|         a[i] = 2 * i;
 | |
|         b[i] = 3 * i + 23;
 | |
|     }
 | |
|     vector_add_const_int8(a, b[0], o, 128);
 | |
|     for (uint8_t i = 0; i < 128; i++) {
 | |
|         #ifdef SATURATION
 | |
|         int16_t gg = a[i] + b[0];
 | |
|         g[i] = gg > 127 ? 127 : (gg < -128 ? -128 : gg);
 | |
|         #else
 | |
|         g[i] = a[i] + b[0];
 | |
|         #endif
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[0], g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     uint64_t begin = cpu_get_mcycle();
 | |
|     for (uint32_t repeat = 0; repeat < 1024; repeat++) {
 | |
|     vector_add_int8(a, b, o, 128);
 | |
|     }
 | |
|     uint64_t end = cpu_get_mcycle();
 | |
|     iot_printf("time = %d\n", (uint32_t)(end - begin));
 | |
|     begin = cpu_get_mcycle();
 | |
|     for (uint32_t repeat = 0; repeat < 1024; repeat++) {
 | |
|     for (uint8_t i = 0; i < 128; i++) {
 | |
|         #ifdef SATURATION
 | |
|         int16_t gg = a[i] + b[i];
 | |
|         g[i] = gg > 127 ? 127 : (gg < -128 ? -128 : gg);
 | |
|         #else
 | |
|         g[i] = a[i] + b[i];
 | |
|         #endif
 | |
|     }
 | |
|     }
 | |
|     end = cpu_get_mcycle();
 | |
|     iot_printf("time = %d\n", (uint32_t)(end - begin));
 | |
|     for (uint8_t i = 0; i < 128; i++) {
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[i], g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("add const of int8 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_sub_test_int8() {
 | |
|     int8_t a[128];
 | |
|     int8_t b[128];
 | |
|     int8_t o[128];
 | |
|     int8_t g[128];
 | |
|     for (uint32_t i = 0; i < 128; i++) {
 | |
|         a[i] = 2 * i;
 | |
|         b[i] = 3 * i + 23;
 | |
|     }
 | |
|     vector_sub_const_int8(a, b[0], o, 128);
 | |
|     for (uint8_t i = 0; i < 128; i++) {
 | |
|         #ifdef SATURATION
 | |
|         int16_t gg = a[i] - b[0];
 | |
|         g[i] = gg > 127 ? 127 : (gg < -128 ? -128 : gg);
 | |
|         #else
 | |
|         g[i] = a[i] - b[0];
 | |
|         #endif
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_const_sub_int8(a, b[0], o, 128);
 | |
|     for (uint8_t i = 0; i < 128; i++) {
 | |
|         #ifdef SATURATION
 | |
|         int16_t gg = b[0] - a[i];
 | |
|         g[i] = gg > 127 ? 127 : (gg < -128 ? -128 : gg);
 | |
|         #else
 | |
|         g[i] = b[0] - a[i];
 | |
|         #endif
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     uint64_t begin = cpu_get_mcycle();
 | |
|     for (uint32_t repeat = 0; repeat < 1024; repeat++) {
 | |
|     vector_sub_int8(a, b, o, 128);
 | |
|     }
 | |
|     uint64_t end = cpu_get_mcycle();
 | |
|     iot_printf("time = %d\n", (uint32_t)(end - begin));
 | |
|     begin = cpu_get_mcycle();
 | |
|     for (uint32_t repeat = 0; repeat < 1024; repeat++) {
 | |
|     for (uint8_t i = 0; i < 128; i++) {
 | |
|         #ifdef SATURATION
 | |
|         int16_t gg = a[i] - b[i];
 | |
|         g[i] = gg > 127 ? 127 : (gg < -128 ? -128 : gg);
 | |
|         #else
 | |
|         g[i] = a[i] - b[i];
 | |
|         #endif
 | |
|     }
 | |
|     }
 | |
|     end = cpu_get_mcycle();
 | |
|     iot_printf("time = %d\n", (uint32_t)(end - begin));
 | |
|     for (uint8_t i = 0; i < 128; i++) {
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("sub const of int8 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_mul_test_int8() {
 | |
|     uint8_t length = 128;
 | |
|     int8_t *a = (int8_t *)os_mem_malloc(1, length);
 | |
|     int8_t *b = (int8_t *)os_mem_malloc(1, length);
 | |
|     int8_t *l = (int8_t *)os_mem_malloc(1, length);
 | |
|     int8_t *h = (int8_t *)os_mem_malloc(1, length);
 | |
|     int16_t *w = (int16_t *)os_mem_malloc(1, length * 2);
 | |
|     for (uint8_t i = 0; i < length; i++) {
 | |
|         a[i] = -2 * (i - 25);
 | |
|         b[i] = 3 * (i - 33);
 | |
|         //iot_printf("a[%d] = %d, b[%d] = %d\n", i, a[i], i, b[i]);
 | |
|     }
 | |
|     uint64_t begin = cpu_get_mcycle();
 | |
|     for (uint32_t repeat = 0; repeat < 1024; repeat++) {
 | |
|     vector_multiply_int8(a, b, w, length);
 | |
|     }
 | |
|     uint64_t end = cpu_get_mcycle();
 | |
|     iot_printf("time = %d\n", (uint32_t)(end - begin));
 | |
|     for (uint8_t i = 0; i < length; i++) {
 | |
|         int16_t g = a[i] * b[i];
 | |
|         if (g != w[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g, w[i]);
 | |
|         }
 | |
|     }
 | |
|     begin = cpu_get_mcycle();
 | |
|     for (uint32_t repeat = 0; repeat < 1024; repeat++) {
 | |
|     for (uint8_t i = 0; i < length; i++) {
 | |
|         w[i] = a[i] * b[i];
 | |
|     }
 | |
|     }
 | |
|     end = cpu_get_mcycle();
 | |
|     iot_printf("time = %d\n", (uint32_t)(end - begin));
 | |
|     iot_printf("vector_multiply_int8 finished\n");
 | |
|     vector_multiply_const_int8(a, b[1], w, length);
 | |
|     for (uint8_t i = 0; i < length; i++) {
 | |
|         int16_t g = a[i] * b[1];
 | |
|         if (g != w[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g, w[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("vector_multiply_const_int8 finished\n");
 | |
|     vector_multiply_int8_high_8bit(a, b, h, length);
 | |
|     for (uint8_t i = 0; i < length; i++) {
 | |
|         int16_t g = (a[i] * b[i]) >> 8;
 | |
|         if (g != h[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g, h[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("vector_multiply_int8_high finished\n");
 | |
|     vector_multiply_const_int8_high_8bit(a, b[1], h, length);
 | |
|     for (uint8_t i = 0; i < length; i++) {
 | |
|         int16_t g = (a[i] * b[1]) >> 8;
 | |
|         if (g != h[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g, h[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("vector_multiply_const_int8_high finished\n");
 | |
|     vector_multiply_int8_low_8bit(a, b, l, length);
 | |
|     for (uint8_t i = 0; i < length; i++) {
 | |
|         #ifdef SATURATION
 | |
|         int16_t g = a[i] * b[i];
 | |
|         g = g > 127 ? 127 : (g < -128 ? -128 : g);
 | |
|         #else
 | |
|         int8_t g = (a[i] * b[i]) & 0xff;
 | |
|         #endif
 | |
|         if (g != l[i]) {
 | |
|             iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[i], g, l[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("vector_multiply_int8_low finished\n");
 | |
|     vector_multiply_const_int8_low_8bit(a, b[1], l, length);
 | |
|     for (uint8_t i = 0; i < length; i++) {
 | |
|         #ifdef SATURATION
 | |
|         int16_t g = a[i] * b[1];
 | |
|         g = g > 127 ? 127 : (g < -128 ? -128 : g);
 | |
|         #else
 | |
|         int8_t g = (a[i] * b[1]) & 0xff;
 | |
|         #endif
 | |
|         if (g != l[i]) {
 | |
|             iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[1], g, l[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("vector_multiply_const_int8_low finished\n");
 | |
|     os_mem_free(a);
 | |
|     os_mem_free(b);
 | |
|     os_mem_free(h);
 | |
|     os_mem_free(l);
 | |
|     os_mem_free(w);
 | |
| }
 | |
| 
 | |
| void vector_madd_msub_test_int8() {
 | |
|     uint8_t length = 128;
 | |
|     int8_t *a = os_mem_malloc(1, length);
 | |
|     int8_t *b = os_mem_malloc(1, length);
 | |
|     int8_t *j = os_mem_malloc(1, length);
 | |
|     int8_t *o = os_mem_malloc(1, length);
 | |
|     int32_t *J = os_mem_malloc(1, length * 4);
 | |
|     int32_t *O = os_mem_malloc(1, length * 4);
 | |
|     for (uint8_t i = 0; i < length; i++) {
 | |
|         a[i] = -2 * (i - 25);
 | |
|         b[i] = 3 * (i - 33);
 | |
|         j[i] = i - 50;
 | |
|         J[i] = i * 100 - 500;
 | |
|         //iot_printf("a[%d] = %d, b[%d] = %d, j[%d] = %d, J[%d] = %d\n", i, a[i], i, b[i], i, j[i], i, J[i]);
 | |
|     }
 | |
|     uint64_t begin = cpu_get_mcycle();
 | |
|     vector_mul_add_int8(a, b, j, o, length);
 | |
|     uint64_t end = cpu_get_mcycle();
 | |
|     iot_printf("time = %d\n", (uint32_t)(end - begin));
 | |
|     for (uint8_t i = 0; i < length; i++) {
 | |
|         #ifdef SATURATION
 | |
|         int16_t m = a[i] * b[i];
 | |
|         m = m > 127 ? 127 : (m < -128 ? -128 : m);
 | |
|         int16_t g = (int16_t)j[i] + m;
 | |
|         g = g > 127 ? 127 : (g < -128 ? -128 : g);
 | |
|         #else
 | |
|         int8_t g = j[i] + a[i] * b[i];
 | |
|         #endif
 | |
|         if (g != o[i]) {
 | |
|             iot_printf("incorrect at %d, a = %d, b = %d, j = %d, golden %d, output %d\n", i, a[i], b[i], j[i], g, o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("vector_mul_add_int8 finished\n");
 | |
|     vector_mul_sub_int8(a, b, j, o, length);
 | |
|     for (uint8_t i = 0; i < length; i++) {
 | |
|         #ifdef SATURATION
 | |
|         int16_t m = a[i] * b[i];
 | |
|         m = m > 127 ? 127 : (m < -128 ? -128 : m);
 | |
|         int16_t g = (int16_t)j[i] - m;
 | |
|         g = g > 127 ? 127 : (g < -128 ? -128 : g);
 | |
|         #else
 | |
|         int8_t g = j[i] - a[i] * b[i];
 | |
|         #endif
 | |
|         if (g != o[i]) {
 | |
|             iot_printf("incorrect at %d, a = %d, b = %d, j = %d, golden %d, output %d\n", i, a[i], b[i], j[i], g, o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("vector_mul_sub_int8 finished\n");
 | |
|     vector_mul_add_int8_to_int32(a, b, J, O, length);
 | |
|     for (uint8_t i = 0; i < length; i++) {
 | |
|         int32_t g = J[i] + a[i] * b[i];
 | |
|         if (g != O[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g, O[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("vector_mul_add_int8_to_int32 finished\n");
 | |
|     vector_mul_sub_int8_to_int32(a, b, J, O, length);
 | |
|     for (uint8_t i = 0; i < length; i++) {
 | |
|         int32_t g = J[i] - a[i] * b[i];
 | |
|         if (g != O[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g, O[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("vector_mul_sub_int8_to_int32 finished\n");
 | |
| }
 | |
| void vector_min_test_int8() {
 | |
|     int8_t a[100];
 | |
|     int8_t b[100];
 | |
|     int8_t o[100];
 | |
|     int8_t g[100];
 | |
|     for (uint32_t i = 0; i < 100; i++) {
 | |
|         a[i] = 2 * i;
 | |
|         b[i] = 3 * i + 23;
 | |
|     }
 | |
|     vector_min_int8(a, b, o, 100);
 | |
|     for (uint8_t i = 0; i < 100; i++) {
 | |
|         g[i] = a[i] < b[i] ? a[i] : b[i];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_min_const_int8(a, b[0], o, 100);
 | |
|     for (uint8_t i = 0; i < 100; i++) {
 | |
|         g[i] = a[i] < b[0] ? a[i] : b[0];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("min of int8 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_max_test_int8() {
 | |
|     int8_t a[100];
 | |
|     int8_t b[100];
 | |
|     int8_t o[100];
 | |
|     int8_t g[100];
 | |
|     for (uint32_t i = 0; i < 100; i++) {
 | |
|         a[i] = 2 * i;
 | |
|         b[i] = 3 * i + 23;
 | |
|     }
 | |
|     vector_max_int8(a, b, o, 100);
 | |
|     for (uint8_t i = 0; i < 100; i++) {
 | |
|         g[i] = a[i] > b[i] ? a[i] : b[i];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_max_const_int8(a, b[0], o, 100);
 | |
|     for (uint8_t i = 0; i < 100; i++) {
 | |
|         g[i] = a[i] > b[0] ? a[i] : b[0];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("max of int8 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_equal_test_int8() {
 | |
|     int8_t a[100];
 | |
|     int8_t b[100];
 | |
|     int8_t o[100];
 | |
|     int8_t g[100];
 | |
|     for (uint32_t i = 0; i < 100; i++) {
 | |
|         a[i] = 2 * i;
 | |
|         b[i] = 3 * i - 23;
 | |
|     }
 | |
|     vector_equal_int8(a, b, o, 100);
 | |
|     for (uint8_t i = 0; i < 100; i++) {
 | |
|         g[i] = a[i] == b[i] ? 1 : 0;
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_equal_const_int8(a, b[11], o, 100);
 | |
|     for (uint8_t i = 0; i < 100; i++) {
 | |
|         g[i] = a[i] == b[11] ? 1 : 0;
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("equal of int8 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_not_equal_test_int8() {
 | |
|     int8_t a[100];
 | |
|     int8_t b[100];
 | |
|     int8_t o[100];
 | |
|     int8_t g[100];
 | |
|     for (uint32_t i = 0; i < 100; i++) {
 | |
|         a[i] = 2 * i;
 | |
|         b[i] = 3 * i - 23;
 | |
|     }
 | |
|     vector_not_equal_int8(a, b, o, 100);
 | |
|     for (uint8_t i = 0; i < 100; i++) {
 | |
|         g[i] = a[i] != b[i] ? 1 : 0;
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_not_equal_const_int8(a, b[11], o, 100);
 | |
|     for (uint8_t i = 0; i < 100; i++) {
 | |
|         g[i] = a[i] != b[11] ? 1 : 0;
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("not equal of int8 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_less_than_test_int8() {
 | |
|     int8_t a[100];
 | |
|     int8_t b[100];
 | |
|     int8_t o[100];
 | |
|     int8_t g[100];
 | |
|     for (uint32_t i = 0; i < 100; i++) {
 | |
|         a[i] = 2 * i;
 | |
|         b[i] = 3 * i - 23;
 | |
|     }
 | |
|     vector_less_than_int8(a, b, o, 100);
 | |
|     for (uint8_t i = 0; i < 100; i++) {
 | |
|         g[i] = a[i] < b[i] ? 1 : 0;
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_less_than_const_int8(a, b[11], o, 100);
 | |
|     for (uint8_t i = 0; i < 100; i++) {
 | |
|         g[i] = a[i] < b[11] ? 1 : 0;
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_const_less_than_int8(a, b[11], o, 100);
 | |
|     for (uint8_t i = 0; i < 100; i++) {
 | |
|         g[i] = a[i] > b[11] ? 1 : 0;
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("less than of int8 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_greater_or_equal_test_int8() {
 | |
|     int8_t a[100];
 | |
|     int8_t b[100];
 | |
|     int8_t o[100];
 | |
|     int8_t g[100];
 | |
|     for (uint32_t i = 0; i < 100; i++) {
 | |
|         a[i] = 2 * i;
 | |
|         b[i] = 3 * i - 23;
 | |
|         //iot_printf("a[%d] = %d, b[%d] = %d\n", i, a[i], i, b[i]);
 | |
|     }
 | |
|     vector_greater_or_equal_int8(a, b, o, 100);
 | |
|     for (uint8_t i = 0; i < 100; i++) {
 | |
|         g[i] = a[i] >= b[i] ? 1 : 0;
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_greater_or_equal_const_int8(a, b[11], o, 100);
 | |
|     for (uint8_t i = 0; i < 100; i++) {
 | |
|         g[i] = a[i] >= b[11] ? 1 : 0;
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_const_greater_or_equal_int8(a, b[11], o, 100);
 | |
|     for (uint8_t i = 0; i < 100; i++) {
 | |
|         g[i] = a[i] <= b[11] ? 1 : 0;
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("greater or equal of int8 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_logic_and_test_int8() {
 | |
|     int8_t a[100];
 | |
|     int8_t b[100];
 | |
|     int8_t o[100];
 | |
|     int8_t g[100];
 | |
|     for (uint32_t i = 0; i < 100; i++) {
 | |
|         a[i] = 2 * i;
 | |
|         b[i] = 3 * i - 23;
 | |
|     }
 | |
|     vector_logic_and_int8(a, b, o, 100);
 | |
|     for (uint8_t i = 0; i < 100; i++) {
 | |
|         g[i] = a[i] & b[i];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_logic_and_const_int8(a, b[11], o, 100);
 | |
|     for (uint8_t i = 0; i < 100; i++) {
 | |
|         g[i] = a[i] & b[11];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("logic and of int8 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_logic_or_test_int8() {
 | |
|     int8_t a[100];
 | |
|     int8_t b[100];
 | |
|     int8_t o[100];
 | |
|     int8_t g[100];
 | |
|     for (uint32_t i = 0; i < 100; i++) {
 | |
|         a[i] = 2 * i;
 | |
|         b[i] = 3 * i - 23;
 | |
|     }
 | |
|     vector_logic_or_int8(a, b, o, 100);
 | |
|     for (uint8_t i = 0; i < 100; i++) {
 | |
|         g[i] = a[i] | b[i];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_logic_or_const_int8(a, b[11], o, 100);
 | |
|     for (uint8_t i = 0; i < 100; i++) {
 | |
|         g[i] = a[i] | b[11];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("logic or of int8 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_logic_xor_test_int8() {
 | |
|     int8_t a[100];
 | |
|     int8_t b[100];
 | |
|     int8_t o[100];
 | |
|     int8_t g[100];
 | |
|     for (uint32_t i = 0; i < 100; i++) {
 | |
|         a[i] = 2 * i;
 | |
|         b[i] = 3 * i - 23;
 | |
|     }
 | |
|     vector_logic_xor_int8(a, b, o, 100);
 | |
|     for (uint8_t i = 0; i < 100; i++) {
 | |
|         g[i] = a[i] ^ b[i];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_logic_xor_const_int8(a, b[11], o, 100);
 | |
|     for (uint8_t i = 0; i < 100; i++) {
 | |
|         g[i] = a[i] ^ b[11];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("logic xor of int8 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_logic_xnor_test_int8() {
 | |
|     int8_t a[100];
 | |
|     int8_t b[100];
 | |
|     int8_t o[100];
 | |
|     int8_t g[100];
 | |
|     for (uint32_t i = 0; i < 100; i++) {
 | |
|         a[i] = 2 * i;
 | |
|         b[i] = 3 * i - 23;
 | |
|     }
 | |
|     vector_logic_xnor_int8(a, b, o, 100);
 | |
|     for (uint8_t i = 0; i < 100; i++) {
 | |
|         g[i] = ~(a[i] ^ b[i]);
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_logic_xnor_const_int8(a, b[11], o, 100);
 | |
|     for (uint8_t i = 0; i < 100; i++) {
 | |
|         g[i] = ~(a[i] ^ b[11]);
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("logic xnor of int8 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_left_shift_test_int8() {
 | |
|     int8_t a[100];
 | |
|     uint8_t b[100];
 | |
|     int8_t o[100];
 | |
|     int8_t g[100];
 | |
|     for (uint32_t i = 0; i < 100; i++) {
 | |
|         a[i] = i + 1;
 | |
|         b[i] = i % 7 + 1;
 | |
|     }
 | |
|     vector_left_shift_int8(a, b, o, 100);
 | |
|     for (uint8_t i = 0; i < 100; i++) {
 | |
|         g[i] = a[i] << b[i];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_left_shift_const_int8(a, b[2], o, 100);
 | |
|     for (uint8_t i = 0; i < 100; i++) {
 | |
|         g[i] = a[i] << b[2];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_const_left_shift_int8(b, a[6], o, 100);
 | |
|     for (uint8_t i = 0; i < 100; i++) {
 | |
|         g[i] = a[6] << b[i];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("left shift of int8 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_logic_right_shift_test_int8() {
 | |
|     int8_t a[100];
 | |
|     uint8_t b[100];
 | |
|     int8_t o[100];
 | |
|     int8_t g[100];
 | |
|     for (uint32_t i = 0; i < 100; i++) {
 | |
|         a[i] = i - 50;
 | |
|         b[i] = i % 7 + 1;
 | |
|     }
 | |
|     vector_logic_right_shift_int8(a, b, o, 100);
 | |
|     for (uint8_t i = 0; i < 100; i++) {
 | |
|         g[i] = (uint8_t)a[i] >> b[i];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_logic_right_shift_const_int8(a, b[2], o, 100);
 | |
|     for (uint8_t i = 0; i < 100; i++) {
 | |
|         g[i] = (uint8_t)a[i] >> b[2];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_const_logic_right_shift_int8(b, a[6], o, 100);
 | |
|     for (uint8_t i = 0; i < 100; i++) {
 | |
|         g[i] = (uint8_t)a[6] >> b[i];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("logic right shift of int8 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_arithmatic_right_shift_test_int8() {
 | |
|     int8_t a[100];
 | |
|     uint8_t b[100];
 | |
|     int8_t o[100];
 | |
|     int8_t g[100];
 | |
|     for (uint32_t i = 0; i < 100; i++) {
 | |
|         a[i] = i - 50;
 | |
|         b[i] = i % 7 + 1;
 | |
|     }
 | |
|     vector_arithmatic_right_shift_int8(a, b, o, 100);
 | |
|     for (uint8_t i = 0; i < 100; i++) {
 | |
|         g[i] = a[i] >> b[i];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_arithmatic_right_shift_const_int8(a, b[2], o, 100);
 | |
|     for (uint8_t i = 0; i < 100; i++) {
 | |
|         g[i] = a[i] >> b[2];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_const_arithmatic_right_shift_int8(b, a[6], o, 100);
 | |
|     for (uint8_t i = 0; i < 100; i++) {
 | |
|         g[i] = a[6] >> b[i];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("arithmatic right shift of int8 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_add_test_uint16() {
 | |
|     uint16_t a[50];
 | |
|     uint16_t b[50];
 | |
|     uint16_t o[50];
 | |
|     uint16_t g[50];
 | |
|     for (uint32_t i = 0; i < 50; i++) {
 | |
|         a[i] = 2000 * i;
 | |
|         b[i] = 3000 * i + 23;
 | |
|     }
 | |
|     vector_add_const_uint16(a, b[0], o, 50);
 | |
|     for (uint16_t i = 0; i < 50; i++) {
 | |
|     #ifdef SATURATION
 | |
|         uint32_t gg = a[i] + b[0];
 | |
|         g[i] = gg > 65535 ? 65535 : gg;
 | |
|     #else
 | |
|         g[i] = a[i] + b[0];
 | |
|     #endif
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_add_uint16(a, b, o, 50);
 | |
|     for (uint16_t i = 0; i < 50; i++) {
 | |
|     #ifdef SATURATION
 | |
|         uint32_t gg = a[i] + b[i];
 | |
|         g[i] = gg > 65535 ? 65535 : gg;
 | |
|     #else
 | |
|         g[i] = a[i] + b[i];
 | |
|     #endif
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("add const of uint16 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_sub_test_uint16() {
 | |
|     uint16_t a[50];
 | |
|     uint16_t b[50];
 | |
|     uint16_t o[50];
 | |
|     uint16_t g[50];
 | |
|     for (uint32_t i = 0; i < 50; i++) {
 | |
|         a[i] = 2000 * i;
 | |
|         b[i] = 3000 * i + 23;
 | |
|     }
 | |
|     vector_sub_const_uint16(a, b[0], o, 50);
 | |
|     for (uint16_t i = 0; i < 50; i++) {
 | |
|     #ifdef SATURATION
 | |
|         if (a[i] < b[0]) {
 | |
|             g[i] = 0;
 | |
|         } else {
 | |
|             g[i] = a[i] - b[0];
 | |
|         }
 | |
|     #else
 | |
|         g[i] = a[i] - b[0];
 | |
|     #endif
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_const_sub_uint16(a, b[0], o, 50);
 | |
|     for (uint16_t i = 0; i < 50; i++) {
 | |
|     #ifdef SATURATION
 | |
|         if (a[i] > b[0]) {
 | |
|             g[i] = 0;
 | |
|         } else {
 | |
|             g[i] = b[0] - a[i];
 | |
|         }
 | |
|     #else
 | |
|         g[i] = b[0] - a[i];
 | |
|     #endif
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_sub_uint16(a, b, o, 50);
 | |
|     for (uint16_t i = 0; i < 50; i++) {
 | |
|     #ifdef SATURATION
 | |
|         if (a[i] < b[i]) {
 | |
|             g[i] = 0;
 | |
|         } else {
 | |
|             g[i] = a[i] - b[i];
 | |
|         }
 | |
|     #else
 | |
|         g[i] = a[i] - b[i];
 | |
|     #endif
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("sub const of uint16 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_mul_test_uint16() {
 | |
|     uint16_t length = 50;
 | |
|     uint16_t *a = (uint16_t *)os_mem_malloc(1, length * 2);
 | |
|     uint16_t *b = (uint16_t *)os_mem_malloc(1, length * 2);
 | |
|     uint16_t *l = (uint16_t *)os_mem_malloc(1, length * 2);
 | |
|     uint16_t *h = (uint16_t *)os_mem_malloc(1, length * 2);
 | |
|     uint32_t *w = (uint32_t *)os_mem_malloc(1, length * 2 * 2);
 | |
|     for (uint16_t i = 0; i < length; i++) {
 | |
|         a[i] = (uint16_t)(-2000 * (i - 25));
 | |
|         b[i] = (uint16_t)(3000 * (i - 33));
 | |
|         //iot_printf("a[%d] = %d, b[%d] = %d\n", i, a[i], i, b[i]);
 | |
|     }
 | |
|     vector_multiply_uint16(a, b, w, length);
 | |
|     for (uint16_t i = 0; i < length; i++) {
 | |
|         uint32_t g = a[i] * b[i];
 | |
|         if (g != w[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g, w[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("vector_multiply_uint16 finished\n");
 | |
|     vector_multiply_const_uint16(a, b[1], w, length);
 | |
|     for (uint16_t i = 0; i < length; i++) {
 | |
|         uint32_t g = a[i] * b[1];
 | |
|         if (g != w[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g, w[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("vector_multiply_const_uint16 finished\n");
 | |
|     vector_multiply_uint16_high_16bit(a, b, h, length);
 | |
|     for (uint16_t i = 0; i < length; i++) {
 | |
|         uint16_t g = (a[i] * b[i]) >> 16;
 | |
|         if (g != h[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g, h[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("vector_multiply_uint16_high finished\n");
 | |
|     vector_multiply_const_uint16_high_16bit(a, b[1], h, length);
 | |
|     for (uint16_t i = 0; i < length; i++) {
 | |
|         uint16_t g = (a[i] * b[1]) >> 16;
 | |
|         if (g != h[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g, h[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("vector_multiply_const_uint16_high finished\n");
 | |
|     vector_multiply_uint16_low_16bit(a, b, l, length);
 | |
|     for (uint16_t i = 0; i < length; i++) {
 | |
|     #ifdef SATURATION
 | |
|         uint32_t g = (uint32_t)a[i] * (uint32_t)b[i];
 | |
|         if (g > 65535) {
 | |
|             g = 65535;
 | |
|         }
 | |
|     #else
 | |
|         uint16_t g = (a[i] * b[i]) & 0xffff;
 | |
|     #endif
 | |
|         if (g != l[i]) {
 | |
|             iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[i], g, l[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("vector_multiply_uint16_low finished\n");
 | |
|     vector_multiply_const_uint16_low_16bit(a, b[1], l, length);
 | |
|     for (uint16_t i = 0; i < length; i++) {
 | |
|     #ifdef SATURATION
 | |
|         uint32_t g = (uint32_t)a[i] * (uint32_t)b[1];
 | |
|         if (g > 65535) {
 | |
|             g = 65535;
 | |
|         }
 | |
|     #else
 | |
|         uint16_t g = (a[i] * b[1]) & 0xffff;
 | |
|     #endif
 | |
|         if (g != l[i]) {
 | |
|             iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[1], g, l[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("vector_multiply_const_uint16_low finished\n");
 | |
|     os_mem_free(a);
 | |
|     os_mem_free(b);
 | |
|     os_mem_free(h);
 | |
|     os_mem_free(l);
 | |
|     os_mem_free(w);
 | |
| }
 | |
| 
 | |
| void vector_madd_msub_test_uint16() {
 | |
|     uint16_t length = 50;
 | |
|     uint16_t *a = os_mem_malloc(1, length * 2);
 | |
|     uint16_t *b = os_mem_malloc(1, length * 2);
 | |
|     uint16_t *j = os_mem_malloc(1, length * 2);
 | |
|     uint16_t *o = os_mem_malloc(1, length * 2);
 | |
|     uint64_t *J = os_mem_malloc(1, length * 2 * 4);
 | |
|     uint64_t *O = os_mem_malloc(1, length * 2 * 4);
 | |
|     for (uint16_t i = 0; i < length; i++) {
 | |
|         a[i] = 2000 * (i + 235);
 | |
|         b[i] = 3000 * (i + 333);
 | |
|         j[i] = i + 504;
 | |
|         J[i] = i * 504 + 800000;
 | |
|         //iot_printf("a[%d] = %d, b[%d] = %d, j[%d] = %d, J[%d] = %d\n", i, a[i], i, b[i], i, j[i], i, J[i]);
 | |
|     }
 | |
|     vector_mul_add_uint16(a, b, j, o, length);
 | |
|     for (uint16_t i = 0; i < length; i++) {
 | |
|     #ifdef SATURATION
 | |
|         uint32_t m = a[i] * b[i];
 | |
|         m = m > 65535 ? 65535 : m;
 | |
|         uint32_t g = j[i] + m;
 | |
|         g = g > 65535 ? 65535 : g;
 | |
|     #else
 | |
|         uint16_t g = j[i] + a[i] * b[i];
 | |
|     #endif
 | |
|         if (g != o[i]) {
 | |
|             iot_printf("incorrect at %d, a = %d, b = %d, j = %d, golden %d, output %d\n", i, a[i], b[i], j[i], g, o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("vector_mul_add_uint16 finished\n");
 | |
|     vector_mul_sub_uint16(a, b, j, o, length);
 | |
|     for (uint16_t i = 0; i < length; i++) {
 | |
|     #ifdef SATURATION
 | |
|         uint32_t m = a[i] * b[i];
 | |
|         m = m > 65535 ? 65535 : m;
 | |
|         uint32_t g = j[i] < m ? 0 : j[i] - m;
 | |
|     #else
 | |
|         uint16_t g = j[i] - a[i] * b[i];
 | |
|     #endif
 | |
|         if (g != o[i]) {
 | |
|             iot_printf("incorrect at %d, a = %d, b = %d, j = %d, golden %d, output %d\n", i, a[i], b[i], j[i], g, o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("vector_mul_sub_uint16 finished\n");
 | |
|     vector_mul_add_uint16_to_uint64(a, b, J, O, length);
 | |
|     for (uint16_t i = 0; i < length; i++) {
 | |
|         uint64_t g = J[i] + (uint64_t)a[i] * (uint64_t)b[i];
 | |
|         if (g != O[i]) {
 | |
|             uint32_t *pJ = (uint32_t *)J;
 | |
|             uint32_t *pg = (uint32_t *)(&g);
 | |
|             uint32_t *pO = (uint32_t *)O;
 | |
|             iot_printf("incorrect at %d, J = %x%x, a = %x, b = %x, golden %x%x, output %x%x\n", i, pJ[2 * i + 1], pJ[2 * i], a[i], b[i], pg[1], pg[0], pO[2 * i + 1], pO[2 * i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("vector_mul_add_uint16_to_uint32 finished\n");
 | |
|     vector_mul_sub_uint16_to_uint64(a, b, J, O, length);
 | |
|     for (uint16_t i = 0; i < length; i++) {
 | |
|         uint64_t g = (uint64_t)(J[i] - (uint64_t)a[i] * (uint64_t)b[i]);
 | |
|         if (g != O[i]) {
 | |
|             uint32_t *pJ = (uint32_t *)J;
 | |
|             uint32_t *pg = (uint32_t *)(&g);
 | |
|             uint32_t *pO = (uint32_t *)O;
 | |
|             iot_printf("incorrect at %d, J = %x%x, a = %x, b = %x, golden %x%x, output %x%x\n", i, pJ[2 * i + 1], pJ[2 * i], a[i], b[i], pg[1], pg[0], pO[2 * i + 1], pO[2 * i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("vector_mul_sub_uint16_to_uint32 finished\n");
 | |
| }
 | |
| 
 | |
| void vector_min_test_uint16() {
 | |
|     uint16_t a[50];
 | |
|     uint16_t b[50];
 | |
|     uint16_t o[50];
 | |
|     uint16_t g[50];
 | |
|     for (uint32_t i = 0; i < 50; i++) {
 | |
|         a[i] = 2 * i;
 | |
|         b[i] = 3 * i + 23;
 | |
|     }
 | |
|     vector_min_uint16(a, b, o, 50);
 | |
|     for (uint16_t i = 0; i < 50; i++) {
 | |
|         g[i] = a[i] < b[i] ? a[i] : b[i];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_min_const_uint16(a, b[0], o, 50);
 | |
|     for (uint16_t i = 0; i < 50; i++) {
 | |
|         g[i] = a[i] < b[0] ? a[i] : b[0];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("min of uint16 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_max_test_uint16() {
 | |
|     uint16_t a[50];
 | |
|     uint16_t b[50];
 | |
|     uint16_t o[50];
 | |
|     uint16_t g[50];
 | |
|     for (uint32_t i = 0; i < 50; i++) {
 | |
|         a[i] = 2 * i;
 | |
|         b[i] = 3 * i + 23;
 | |
|     }
 | |
|     vector_max_uint16(a, b, o, 50);
 | |
|     for (uint16_t i = 0; i < 50; i++) {
 | |
|         g[i] = a[i] > b[i] ? a[i] : b[i];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_max_const_uint16(a, b[0], o, 50);
 | |
|     for (uint16_t i = 0; i < 50; i++) {
 | |
|         g[i] = a[i] > b[0] ? a[i] : b[0];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("max of uint16 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_equal_test_uint16() {
 | |
|     uint16_t a[50];
 | |
|     uint16_t b[50];
 | |
|     uint16_t o[50];
 | |
|     uint16_t g[50];
 | |
|     for (uint32_t i = 0; i < 50; i++) {
 | |
|         a[i] = 2 * i;
 | |
|         b[i] = 3 * i - 23;
 | |
|     }
 | |
|     vector_equal_uint16(a, b, o, 50);
 | |
|     for (uint16_t i = 0; i < 50; i++) {
 | |
|         g[i] = a[i] == b[i] ? 1 : 0;
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_equal_const_uint16(a, b[11], o, 50);
 | |
|     for (uint16_t i = 0; i < 50; i++) {
 | |
|         g[i] = a[i] == b[11] ? 1 : 0;
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("equal of uint16 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_not_equal_test_uint16() {
 | |
|     uint16_t a[50];
 | |
|     uint16_t b[50];
 | |
|     uint16_t o[50];
 | |
|     uint16_t g[50];
 | |
|     for (uint32_t i = 0; i < 50; i++) {
 | |
|         a[i] = 2 * i;
 | |
|         b[i] = 3 * i - 23;
 | |
|     }
 | |
|     vector_not_equal_uint16(a, b, o, 50);
 | |
|     for (uint16_t i = 0; i < 50; i++) {
 | |
|         g[i] = a[i] != b[i] ? 1 : 0;
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_not_equal_const_uint16(a, b[11], o, 50);
 | |
|     for (uint16_t i = 0; i < 50; i++) {
 | |
|         g[i] = a[i] != b[11] ? 1 : 0;
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("not equal of uint16 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_less_than_test_uint16() {
 | |
|     uint16_t a[50];
 | |
|     uint16_t b[50];
 | |
|     uint16_t o[50];
 | |
|     uint16_t g[50];
 | |
|     for (uint32_t i = 0; i < 50; i++) {
 | |
|         a[i] = 2 * i;
 | |
|         b[i] = 3 * i - 23;
 | |
|     }
 | |
|     vector_less_than_uint16(a, b, o, 50);
 | |
|     for (uint16_t i = 0; i < 50; i++) {
 | |
|         g[i] = a[i] < b[i] ? 1 : 0;
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_less_than_const_uint16(a, b[11], o, 50);
 | |
|     for (uint16_t i = 0; i < 50; i++) {
 | |
|         g[i] = a[i] < b[11] ? 1 : 0;
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_const_less_than_uint16(a, b[11], o, 50);
 | |
|     for (uint16_t i = 0; i < 50; i++) {
 | |
|         g[i] = a[i] > b[11] ? 1 : 0;
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("less than of uint16 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_greater_or_equal_test_uint16() {
 | |
|     uint16_t a[50];
 | |
|     uint16_t b[50];
 | |
|     uint16_t o[50];
 | |
|     uint16_t g[50];
 | |
|     for (uint32_t i = 0; i < 50; i++) {
 | |
|         a[i] = 2 * i;
 | |
|         b[i] = 3 * i - 23;
 | |
|     }
 | |
|     vector_greater_or_equal_uint16(a, b, o, 50);
 | |
|     for (uint16_t i = 0; i < 50; i++) {
 | |
|         g[i] = a[i] >= b[i] ? 1 : 0;
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_greater_or_equal_const_uint16(a, b[11], o, 50);
 | |
|     for (uint16_t i = 0; i < 50; i++) {
 | |
|         g[i] = a[i] >= b[11] ? 1 : 0;
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_const_greater_or_equal_uint16(a, b[11], o, 50);
 | |
|     for (uint16_t i = 0; i < 50; i++) {
 | |
|         g[i] = a[i] <= b[11] ? 1 : 0;
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("greater or equal of uint16 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_logic_and_test_uint16() {
 | |
|     uint16_t a[50];
 | |
|     uint16_t b[50];
 | |
|     uint16_t o[50];
 | |
|     uint16_t g[50];
 | |
|     for (uint32_t i = 0; i < 50; i++) {
 | |
|         a[i] = 2 * i;
 | |
|         b[i] = 3 * i - 23;
 | |
|     }
 | |
|     vector_logic_and_uint16(a, b, o, 50);
 | |
|     for (uint16_t i = 0; i < 50; i++) {
 | |
|         g[i] = a[i] & b[i];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_logic_and_const_uint16(a, b[11], o, 50);
 | |
|     for (uint16_t i = 0; i < 50; i++) {
 | |
|         g[i] = a[i] & b[11];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("logic and of uint16 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_logic_or_test_uint16() {
 | |
|     uint16_t a[50];
 | |
|     uint16_t b[50];
 | |
|     uint16_t o[50];
 | |
|     uint16_t g[50];
 | |
|     for (uint32_t i = 0; i < 50; i++) {
 | |
|         a[i] = 2 * i;
 | |
|         b[i] = 3 * i - 23;
 | |
|     }
 | |
|     vector_logic_or_uint16(a, b, o, 50);
 | |
|     for (uint16_t i = 0; i < 50; i++) {
 | |
|         g[i] = a[i] | b[i];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_logic_or_const_uint16(a, b[11], o, 50);
 | |
|     for (uint16_t i = 0; i < 50; i++) {
 | |
|         g[i] = a[i] | b[11];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("logic or of uint16 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_logic_xor_test_uint16() {
 | |
|     uint16_t a[50];
 | |
|     uint16_t b[50];
 | |
|     uint16_t o[50];
 | |
|     uint16_t g[50];
 | |
|     for (uint32_t i = 0; i < 50; i++) {
 | |
|         a[i] = 2 * i;
 | |
|         b[i] = 3 * i - 23;
 | |
|     }
 | |
|     vector_logic_xor_uint16(a, b, o, 50);
 | |
|     for (uint16_t i = 0; i < 50; i++) {
 | |
|         g[i] = a[i] ^ b[i];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_logic_xor_const_uint16(a, b[11], o, 50);
 | |
|     for (uint16_t i = 0; i < 50; i++) {
 | |
|         g[i] = a[i] ^ b[11];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("logic xor of uint16 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_logic_xnor_test_uint16() {
 | |
|     uint16_t a[50];
 | |
|     uint16_t b[50];
 | |
|     uint16_t o[50];
 | |
|     uint16_t g[50];
 | |
|     for (uint32_t i = 0; i < 50; i++) {
 | |
|         a[i] = 2 * i;
 | |
|         b[i] = 3 * i - 23;
 | |
|     }
 | |
|     vector_logic_xnor_uint16(a, b, o, 50);
 | |
|     for (uint16_t i = 0; i < 50; i++) {
 | |
|         g[i] = ~(a[i] ^ b[i]);
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_logic_xnor_const_uint16(a, b[11], o, 50);
 | |
|     for (uint16_t i = 0; i < 50; i++) {
 | |
|         g[i] = ~(a[i] ^ b[11]);
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("logic xnor of uint16 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_left_shift_test_uint16() {
 | |
|     uint16_t a[50];
 | |
|     uint16_t b[50];
 | |
|     uint16_t o[50];
 | |
|     uint16_t g[50];
 | |
|     for (uint32_t i = 0; i < 50; i++) {
 | |
|         a[i] = i + 1;
 | |
|         b[i] = i % 7 + 1;
 | |
|     }
 | |
|     vector_left_shift_uint16(a, b, o, 50);
 | |
|     for (uint16_t i = 0; i < 50; i++) {
 | |
|         g[i] = a[i] << b[i];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_left_shift_const_uint16(a, b[2], o, 50);
 | |
|     for (uint16_t i = 0; i < 50; i++) {
 | |
|         g[i] = a[i] << b[2];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_const_left_shift_uint16(b, a[6], o, 50);
 | |
|     for (uint16_t i = 0; i < 50; i++) {
 | |
|         g[i] = a[6] << b[i];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("left shift of uint16 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_right_shift_test_uint16() {
 | |
|     uint16_t a[50];
 | |
|     uint16_t b[50];
 | |
|     uint16_t o[50];
 | |
|     uint16_t g[50];
 | |
|     for (uint32_t i = 0; i < 50; i++) {
 | |
|         a[i] = i - 50;
 | |
|         b[i] = i % 7 + 1;
 | |
|     }
 | |
|     vector_right_shift_uint16(a, b, o, 50);
 | |
|     for (uint16_t i = 0; i < 50; i++) {
 | |
|         g[i] = a[i] >> b[i];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_right_shift_const_uint16(a, b[2], o, 50);
 | |
|     for (uint16_t i = 0; i < 50; i++) {
 | |
|         g[i] = a[i] >> b[2];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_const_right_shift_uint16(b, a[6], o, 50);
 | |
|     for (uint16_t i = 0; i < 50; i++) {
 | |
|         g[i] = a[6] >> b[i];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("logic right shift of uint16 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_half_bits_test_uint16() {
 | |
|     uint16_t a[50];
 | |
|     uint8_t h[50];
 | |
|     uint8_t l[50];
 | |
|     uint8_t g[50];
 | |
|     for (uint32_t i = 0; i < 50; i++) {
 | |
|         a[i] = i + (i + 80) * 256;
 | |
|     }
 | |
|     uint64_t begin = cpu_get_mcycle();
 | |
|     vector_high_half_bits_uint16(a, h, 50);
 | |
|     uint64_t end = cpu_get_mcycle();
 | |
|     iot_printf("time = %d\n", (uint32_t)(end - begin));
 | |
|     for (uint16_t i = 0; i < 50; i++) {
 | |
|         g[i] = a[i] >> 8;
 | |
|         if (g[i] != h[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], h[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("vector_high_half_bits_uint16 finished\n");
 | |
|     vector_low_half_bits_uint16(a, l, 50);
 | |
|     for (uint16_t i = 0; i < 50; i++) {
 | |
|         g[i] = a[i] & 0xff;
 | |
|         if (g[i] != l[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], l[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("vector_low_half_bits_uint16 finished\n");
 | |
|     vector_high_and_low_half_bits_uint16(a, h, l, 50);
 | |
|     for (uint16_t i = 0; i < 50; i++) {
 | |
|         g[i] = a[i] >> 8;
 | |
|         if (g[i] != h[i]) {
 | |
|             iot_printf("incorrect higt at %d, golden %d, output %d\n", i, g[i], h[i]);
 | |
|         }
 | |
|         g[i] = a[i] & 0xff;
 | |
|         if (g[i] != l[i]) {
 | |
|             iot_printf("incorrect low at %d, golden %d, output %d\n", i, g[i], l[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("vector_high_and_low_half_bits_uint16 finished\n");
 | |
| }
 | |
| 
 | |
| void vector_add_test_int16() {
 | |
|     int16_t a[50];
 | |
|     int16_t b[50];
 | |
|     int16_t o[50];
 | |
|     int16_t g[50];
 | |
|     for (uint32_t i = 0; i < 50; i++) {
 | |
|         a[i] = 2000 * i;
 | |
|         b[i] = 3000 * i + 23;
 | |
|     }
 | |
|     vector_add_const_int16(a, b[0], o, 50);
 | |
|     for (uint16_t i = 0; i < 50; i++) {
 | |
|     #ifdef SATURATION
 | |
|         int32_t gg = a[i] + b[0];
 | |
|         g[i] = gg > 32767 ? 32767 : (gg < -32768 ? -32768 : gg);
 | |
|     #else
 | |
|         g[i] = a[i] + b[0];
 | |
|     #endif
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     uint64_t begin = cpu_get_mcycle();
 | |
|     for (uint32_t repeat = 0; repeat < 1000; repeat++) {
 | |
|     vector_add_int16(a, b, o, 50);
 | |
|     }
 | |
|     uint64_t end = cpu_get_mcycle();
 | |
|     iot_printf("time = %d\n", (uint32_t)(end - begin));
 | |
| 
 | |
|     begin = cpu_get_mcycle();
 | |
|     for (uint32_t repeat = 0; repeat < 1000; repeat++) {
 | |
|     for (uint16_t i = 0; i < 50; i++) {
 | |
|     #ifdef SATURATION
 | |
|         int32_t gg = a[i] + b[i];
 | |
|         g[i] = gg > 32767 ? 32767 : (gg < -32768 ? -32768 : gg);
 | |
|     #else
 | |
|         g[i] = a[i] + b[i];
 | |
|     #endif
 | |
|         //if (g[i] != o[i]) {
 | |
|         //    iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         //}
 | |
|     }
 | |
|     }
 | |
|     end = cpu_get_mcycle();
 | |
|     for (uint16_t i = 0; i < 50; i++) {
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("time = %d\n", (uint32_t)(end - begin));
 | |
|     iot_printf("add const of int16 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_sub_test_int16() {
 | |
|     int16_t a[50];
 | |
|     int16_t b[50];
 | |
|     int16_t o[50];
 | |
|     int16_t g[50];
 | |
|     for (uint32_t i = 0; i < 50; i++) {
 | |
|         a[i] = 2000 * i;
 | |
|         b[i] = 3000 * i + 23;
 | |
|     }
 | |
|     vector_sub_const_int16(a, b[0], o, 50);
 | |
|     for (uint16_t i = 0; i < 50; i++) {
 | |
|     #ifdef SATURATION
 | |
|         int32_t gg = a[i] - b[0];
 | |
|         g[i] = gg > 32767 ? 32767 : (gg < -32768 ? -32768 : gg);
 | |
|     #else
 | |
|         g[i] = a[i] - b[0];
 | |
|     #endif
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_const_sub_int16(a, b[0], o, 50);
 | |
|     for (uint16_t i = 0; i < 50; i++) {
 | |
|     #ifdef SATURATION
 | |
|         int32_t gg = b[0] - a[i];
 | |
|         g[i] = gg > 32767 ? 32767 : (gg < -32768 ? -32768 : gg);
 | |
|     #else
 | |
|         g[i] = b[0] - a[i];
 | |
|     #endif
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     uint64_t begin = cpu_get_mcycle();
 | |
|     for (uint32_t repeat = 0; repeat < 1; repeat++) {
 | |
|     vector_sub_int16(a, b, o, 50);
 | |
|     }
 | |
|     uint64_t end = cpu_get_mcycle();
 | |
|     iot_printf("time = %d\n", (uint32_t)(end - begin));
 | |
| 
 | |
|     begin = cpu_get_mcycle();
 | |
|     for (uint32_t repeat = 0; repeat < 1; repeat++) {
 | |
|     for (uint16_t i = 0; i < 50; i++) {
 | |
|     #ifdef SATURATION
 | |
|         int32_t gg = a[i] - b[i];
 | |
|         g[i] = gg > 32767 ? 32767 : (gg < -32768 ? -32768 : gg);
 | |
|     #else
 | |
|         g[i] = a[i] - b[i];
 | |
|     #endif
 | |
|         }
 | |
|     }
 | |
|     end = cpu_get_mcycle();
 | |
|     for (uint16_t i = 0; i < 50; i++) {
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("time = %d\n", (uint32_t)(end - begin));
 | |
|     iot_printf("sub const of int16 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_mul_test_int16() {
 | |
|     uint16_t length = 50;
 | |
|     int16_t *a = (int16_t *)os_mem_malloc(1, length * 2);
 | |
|     int16_t *b = (int16_t *)os_mem_malloc(1, length * 2);
 | |
|     int16_t *l = (int16_t *)os_mem_malloc(1, length * 2);
 | |
|     int16_t *h = (int16_t *)os_mem_malloc(1, length * 2);
 | |
|     int32_t *w = (int32_t *)os_mem_malloc(1, length * 2 * 2);
 | |
|     for (uint16_t i = 0; i < length; i++) {
 | |
|         a[i] = -2000 * (i - 25);
 | |
|         b[i] = 3000 * (i - 33);
 | |
|         //iot_printf("a[%d] = %d, b[%d] = %d\n", i, a[i], i, b[i]);
 | |
|     }
 | |
|     vector_multiply_int16(a, b, w, length);
 | |
|     for (uint16_t i = 0; i < length; i++) {
 | |
|         int32_t g = a[i] * b[i];
 | |
|         if (g != w[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g, w[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("vector_multiply_int16 finished\n");
 | |
|     vector_multiply_const_int16(a, b[1], w, length);
 | |
|     for (uint16_t i = 0; i < length; i++) {
 | |
|         int32_t g = a[i] * b[1];
 | |
|         if (g != w[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g, w[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("vector_multiply_const_int16 finished\n");
 | |
|     vector_multiply_int16_high_16bit(a, b, h, length);
 | |
|     for (uint16_t i = 0; i < length; i++) {
 | |
|         int32_t g = (a[i] * b[i]) >> 16;
 | |
|         if (g != h[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g, h[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("vector_multiply_int16_high finished\n");
 | |
|     vector_multiply_const_int16_high_16bit(a, b[1], h, length);
 | |
|     for (uint16_t i = 0; i < length; i++) {
 | |
|         int32_t g = (a[i] * b[1]) >> 16;
 | |
|         if (g != h[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g, h[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("vector_multiply_const_int16_high finished\n");
 | |
|     vector_multiply_int16_low_16bit(a, b, l, length);
 | |
|     for (uint16_t i = 0; i < length; i++) {
 | |
|     #ifdef SATURATION
 | |
|         int32_t gg = a[i] * b[i];
 | |
|         int32_t g = gg > 32767 ? 32767 : (gg < -32768 ? -32768 : gg);
 | |
|     #else
 | |
|         int16_t g = (a[i] * b[i]) & 0xffff;
 | |
|     #endif
 | |
|         if (g != l[i]) {
 | |
|             iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[i], g, l[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("vector_multiply_int16_low finished\n");
 | |
|     vector_multiply_const_int16_low_16bit(a, b[1], l, length);
 | |
|     for (uint16_t i = 0; i < length; i++) {
 | |
|     #ifdef SATURATION
 | |
|         int32_t gg = a[i] * b[1];
 | |
|         int32_t g = gg > 32767 ? 32767 : (gg < -32768 ? -32768 : gg);
 | |
|     #else
 | |
|         int16_t g = (a[i] * b[1]) & 0xffff;
 | |
|     #endif
 | |
|         if (g != l[i]) {
 | |
|             iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[1], g, l[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("vector_multiply_const_int16_low finished\n");
 | |
|     os_mem_free(a);
 | |
|     os_mem_free(b);
 | |
|     os_mem_free(h);
 | |
|     os_mem_free(l);
 | |
|     os_mem_free(w);
 | |
| }
 | |
| 
 | |
| void vector_madd_msub_test_int16() {
 | |
|     uint16_t length = 50;
 | |
|     int16_t *a = os_mem_malloc(1, length * 2);
 | |
|     int16_t *b = os_mem_malloc(1, length * 2);
 | |
|     int16_t *j = os_mem_malloc(1, length * 2);
 | |
|     int16_t *o = os_mem_malloc(1, length * 2);
 | |
|     int64_t *J = os_mem_malloc(1, length * 2 * 4);
 | |
|     int64_t *O = os_mem_malloc(1, length * 2 * 4);
 | |
|     for (uint16_t i = 0; i < length; i++) {
 | |
|         a[i] = -2000 * (i - 25);
 | |
|         b[i] = 3000 * (i - 33);
 | |
|         j[i] = i - 50;
 | |
|         J[i] = i * 50 - 500;
 | |
|         //iot_printf("a[%d] = %d, b[%d] = %d, j[%d] = %d, J[%d] = %d\n", i, a[i], i, b[i], i, j[i], i, J[i]);
 | |
|     }
 | |
|     vector_mul_add_int16(a, b, j, o, length);
 | |
|     for (uint16_t i = 0; i < length; i++) {
 | |
|     #ifdef SATURATION
 | |
|         int32_t m = a[i] * b[i];
 | |
|         m = m > 32767 ? 32767 : (m < -32768 ? -32768 : m);
 | |
|         int32_t g = j[i] + m;
 | |
|         g = g > 32767 ? 32767 : (g < -32768 ? -32768 : g);
 | |
|     #else
 | |
|         int16_t g = j[i] + a[i] * b[i];
 | |
|     #endif
 | |
|         if (g != o[i]) {
 | |
|             iot_printf("incorrect at %d, a = %d, b = %d, j = %d, golden %d, output %d\n", i, a[i], b[i], j[i], g, o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("vector_mul_add_int16 finished\n");
 | |
|     vector_mul_sub_int16(a, b, j, o, length);
 | |
|     for (uint16_t i = 0; i < length; i++) {
 | |
|     #ifdef SATURATION
 | |
|         int32_t m = a[i] * b[i];
 | |
|         m = m > 32767 ? 32767 : (m < -32768 ? -32768 : m);
 | |
|         int32_t g = j[i] - m;
 | |
|         g = g > 32767 ? 32767 : (g < -32768 ? -32768 : g);
 | |
|     #else
 | |
|         int16_t g = j[i] - a[i] * b[i];
 | |
|     #endif
 | |
|         if (g != o[i]) {
 | |
|             iot_printf("incorrect at %d, a = %d, b = %d, j = %d, golden %d, output %d\n", i, a[i], b[i], j[i], g, o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("vector_mul_sub_int16 finished\n");
 | |
|     vector_mul_add_int16_to_int64(a, b, J, O, length);
 | |
|     for (uint16_t i = 0; i < length; i++) {
 | |
|         int32_t g = J[i] + a[i] * b[i];
 | |
|         if (g != O[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g, O[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("vector_mul_add_int16_to_int32 finished\n");
 | |
|     vector_mul_sub_int16_to_int64(a, b, J, O, length);
 | |
|     for (uint16_t i = 0; i < length; i++) {
 | |
|         int32_t g = J[i] - a[i] * b[i];
 | |
|         if (g != O[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g, O[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("vector_mul_sub_int16_to_int32 finished\n");
 | |
| }
 | |
| void vector_min_test_int16() {
 | |
|     int16_t a[50];
 | |
|     int16_t b[50];
 | |
|     int16_t o[50];
 | |
|     int16_t g[50];
 | |
|     for (uint32_t i = 0; i < 50; i++) {
 | |
|         a[i] = 2 * i;
 | |
|         b[i] = 3 * i + 23;
 | |
|     }
 | |
|     vector_min_int16(a, b, o, 50);
 | |
|     for (uint16_t i = 0; i < 50; i++) {
 | |
|         g[i] = a[i] < b[i] ? a[i] : b[i];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_min_const_int16(a, b[0], o, 50);
 | |
|     for (uint16_t i = 0; i < 50; i++) {
 | |
|         g[i] = a[i] < b[0] ? a[i] : b[0];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("min of int16 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_max_test_int16() {
 | |
|     int16_t a[50];
 | |
|     int16_t b[50];
 | |
|     int16_t o[50];
 | |
|     int16_t g[50];
 | |
|     for (uint32_t i = 0; i < 50; i++) {
 | |
|         a[i] = 2 * i;
 | |
|         b[i] = 3 * i + 23;
 | |
|     }
 | |
|     vector_max_int16(a, b, o, 50);
 | |
|     for (uint16_t i = 0; i < 50; i++) {
 | |
|         g[i] = a[i] > b[i] ? a[i] : b[i];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_max_const_int16(a, b[0], o, 50);
 | |
|     for (uint16_t i = 0; i < 50; i++) {
 | |
|         g[i] = a[i] > b[0] ? a[i] : b[0];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("max of int16 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_equal_test_int16() {
 | |
|     int16_t a[50];
 | |
|     int16_t b[50];
 | |
|     int16_t o[50];
 | |
|     int16_t g[50];
 | |
|     for (uint32_t i = 0; i < 50; i++) {
 | |
|         a[i] = 2 * i;
 | |
|         b[i] = 3 * i - 23;
 | |
|     }
 | |
|     vector_equal_int16(a, b, o, 50);
 | |
|     for (uint16_t i = 0; i < 50; i++) {
 | |
|         g[i] = a[i] == b[i] ? 1 : 0;
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_equal_const_int16(a, b[11], o, 50);
 | |
|     for (uint16_t i = 0; i < 50; i++) {
 | |
|         g[i] = a[i] == b[11] ? 1 : 0;
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("equal of int16 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_not_equal_test_int16() {
 | |
|     int16_t a[50];
 | |
|     int16_t b[50];
 | |
|     int16_t o[50];
 | |
|     int16_t g[50];
 | |
|     for (uint32_t i = 0; i < 50; i++) {
 | |
|         a[i] = 2 * i;
 | |
|         b[i] = 3 * i - 23;
 | |
|     }
 | |
|     vector_not_equal_int16(a, b, o, 50);
 | |
|     for (uint16_t i = 0; i < 50; i++) {
 | |
|         g[i] = a[i] != b[i] ? 1 : 0;
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_not_equal_const_int16(a, b[11], o, 50);
 | |
|     for (uint16_t i = 0; i < 50; i++) {
 | |
|         g[i] = a[i] != b[11] ? 1 : 0;
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("not equal of int16 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_less_than_test_int16() {
 | |
|     int16_t a[50];
 | |
|     int16_t b[50];
 | |
|     int16_t o[50];
 | |
|     int16_t g[50];
 | |
|     for (uint32_t i = 0; i < 50; i++) {
 | |
|         a[i] = 2 * i;
 | |
|         b[i] = 3 * i - 23;
 | |
|     }
 | |
|     vector_less_than_int16(a, b, o, 50);
 | |
|     for (uint16_t i = 0; i < 50; i++) {
 | |
|         g[i] = a[i] < b[i] ? 1 : 0;
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_less_than_const_int16(a, b[11], o, 50);
 | |
|     for (uint16_t i = 0; i < 50; i++) {
 | |
|         g[i] = a[i] < b[11] ? 1 : 0;
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_const_less_than_int16(a, b[11], o, 50);
 | |
|     for (uint16_t i = 0; i < 50; i++) {
 | |
|         g[i] = a[i] > b[11] ? 1 : 0;
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("less than of int16 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_greater_or_equal_test_int16() {
 | |
|     int16_t a[50];
 | |
|     int16_t b[50];
 | |
|     int16_t o[50];
 | |
|     int16_t g[50];
 | |
|     for (uint32_t i = 0; i < 50; i++) {
 | |
|         a[i] = 2 * i;
 | |
|         b[i] = 3 * i - 23;
 | |
|     }
 | |
|     vector_greater_or_equal_int16(a, b, o, 50);
 | |
|     for (uint16_t i = 0; i < 50; i++) {
 | |
|         g[i] = a[i] >= b[i] ? 1 : 0;
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_greater_or_equal_const_int16(a, b[11], o, 50);
 | |
|     for (uint16_t i = 0; i < 50; i++) {
 | |
|         g[i] = a[i] >= b[11] ? 1 : 0;
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_const_greater_or_equal_int16(a, b[11], o, 50);
 | |
|     for (uint16_t i = 0; i < 50; i++) {
 | |
|         g[i] = a[i] <= b[11] ? 1 : 0;
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("greater or equal of int16 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_logic_and_test_int16() {
 | |
|     int16_t a[50];
 | |
|     int16_t b[50];
 | |
|     int16_t o[50];
 | |
|     int16_t g[50];
 | |
|     for (uint32_t i = 0; i < 50; i++) {
 | |
|         a[i] = 2 * i;
 | |
|         b[i] = 3 * i - 23;
 | |
|     }
 | |
|     vector_logic_and_int16(a, b, o, 50);
 | |
|     for (uint16_t i = 0; i < 50; i++) {
 | |
|         g[i] = a[i] & b[i];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_logic_and_const_int16(a, b[11], o, 50);
 | |
|     for (uint16_t i = 0; i < 50; i++) {
 | |
|         g[i] = a[i] & b[11];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("logic and of int16 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_logic_or_test_int16() {
 | |
|     int16_t a[50];
 | |
|     int16_t b[50];
 | |
|     int16_t o[50];
 | |
|     int16_t g[50];
 | |
|     for (uint32_t i = 0; i < 50; i++) {
 | |
|         a[i] = 2 * i;
 | |
|         b[i] = 3 * i - 23;
 | |
|     }
 | |
|     vector_logic_or_int16(a, b, o, 50);
 | |
|     for (uint16_t i = 0; i < 50; i++) {
 | |
|         g[i] = a[i] | b[i];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_logic_or_const_int16(a, b[11], o, 50);
 | |
|     for (uint16_t i = 0; i < 50; i++) {
 | |
|         g[i] = a[i] | b[11];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("logic or of int16 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_logic_xor_test_int16() {
 | |
|     int16_t a[50];
 | |
|     int16_t b[50];
 | |
|     int16_t o[50];
 | |
|     int16_t g[50];
 | |
|     for (uint32_t i = 0; i < 50; i++) {
 | |
|         a[i] = 2 * i;
 | |
|         b[i] = 3 * i - 23;
 | |
|     }
 | |
|     vector_logic_xor_int16(a, b, o, 50);
 | |
|     for (uint16_t i = 0; i < 50; i++) {
 | |
|         g[i] = a[i] ^ b[i];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_logic_xor_const_int16(a, b[11], o, 50);
 | |
|     for (uint16_t i = 0; i < 50; i++) {
 | |
|         g[i] = a[i] ^ b[11];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("logic xor of int16 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_logic_xnor_test_int16() {
 | |
|     int16_t a[50];
 | |
|     int16_t b[50];
 | |
|     int16_t o[50];
 | |
|     int16_t g[50];
 | |
|     for (uint32_t i = 0; i < 50; i++) {
 | |
|         a[i] = 2 * i;
 | |
|         b[i] = 3 * i - 23;
 | |
|     }
 | |
|     vector_logic_xnor_int16(a, b, o, 50);
 | |
|     for (uint16_t i = 0; i < 50; i++) {
 | |
|         g[i] = ~(a[i] ^ b[i]);
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_logic_xnor_const_int16(a, b[11], o, 50);
 | |
|     for (uint16_t i = 0; i < 50; i++) {
 | |
|         g[i] = ~(a[i] ^ b[11]);
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("logic xnor of int16 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_left_shift_test_int16() {
 | |
|     int16_t a[50];
 | |
|     uint16_t b[50];
 | |
|     int16_t o[50];
 | |
|     int16_t g[50];
 | |
|     for (uint32_t i = 0; i < 50; i++) {
 | |
|         a[i] = i + 1;
 | |
|         b[i] = i % 7 + 1;
 | |
|     }
 | |
|     vector_left_shift_int16(a, b, o, 50);
 | |
|     for (uint16_t i = 0; i < 50; i++) {
 | |
|         g[i] = a[i] << b[i];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_left_shift_const_int16(a, b[2], o, 50);
 | |
|     for (uint16_t i = 0; i < 50; i++) {
 | |
|         g[i] = a[i] << b[2];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_const_left_shift_int16(b, a[6], o, 50);
 | |
|     for (uint16_t i = 0; i < 50; i++) {
 | |
|         g[i] = a[6] << b[i];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("left shift of int16 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_logic_right_shift_test_int16() {
 | |
|     int16_t a[50];
 | |
|     uint16_t b[50];
 | |
|     int16_t o[50];
 | |
|     int16_t g[50];
 | |
|     for (uint32_t i = 0; i < 50; i++) {
 | |
|         a[i] = i - 50;
 | |
|         b[i] = i % 7 + 1;
 | |
|     }
 | |
|     vector_logic_right_shift_int16(a, b, o, 50);
 | |
|     for (uint16_t i = 0; i < 50; i++) {
 | |
|         g[i] = (uint16_t)a[i] >> b[i];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_logic_right_shift_const_int16(a, b[2], o, 50);
 | |
|     for (uint16_t i = 0; i < 50; i++) {
 | |
|         g[i] = (uint16_t)a[i] >> b[2];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_const_logic_right_shift_int16(b, a[6], o, 50);
 | |
|     for (uint16_t i = 0; i < 50; i++) {
 | |
|         g[i] = (uint16_t)a[6] >> b[i];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("logic right shift of int16 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_arithmatic_right_shift_test_int16() {
 | |
|     int16_t a[50];
 | |
|     uint16_t b[50];
 | |
|     int16_t o[50];
 | |
|     int16_t g[50];
 | |
|     for (uint32_t i = 0; i < 50; i++) {
 | |
|         a[i] = i - 50;
 | |
|         b[i] = i % 7 + 1;
 | |
|     }
 | |
|     vector_arithmatic_right_shift_int16(a, b, o, 50);
 | |
|     for (uint16_t i = 0; i < 50; i++) {
 | |
|         g[i] = a[i] >> b[i];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_arithmatic_right_shift_const_int16(a, b[2], o, 50);
 | |
|     for (uint16_t i = 0; i < 50; i++) {
 | |
|         g[i] = a[i] >> b[2];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_const_arithmatic_right_shift_int16(b, a[6], o, 50);
 | |
|     for (uint16_t i = 0; i < 50; i++) {
 | |
|         g[i] = a[6] >> b[i];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("arithmatic right shift of int16 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_half_bits_test_int16() {
 | |
|     int16_t a[50];
 | |
|     int8_t h[50];
 | |
|     int8_t l[50];
 | |
|     int8_t g[50];
 | |
|     for (int16_t i = 0; i < 50; i++) {
 | |
|         a[i] = i + (i + 80) * 506;
 | |
|     }
 | |
|     vector_high_half_bits_int16(a, h, 50);
 | |
|     for (int16_t i = 0; i < 50; i++) {
 | |
|         g[i] = a[i] >> 8;
 | |
|         if (g[i] != h[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], h[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("vector_high_half_bits_int16 finished\n");
 | |
|     vector_low_half_bits_int16(a, l, 50);
 | |
|     for (int16_t i = 0; i < 50; i++) {
 | |
|         g[i] = a[i] & 0xff;
 | |
|         if (g[i] != l[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], l[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("vector_low_half_bits_int16 finished\n");
 | |
|     vector_high_and_low_half_bits_int16(a, h, l, 50);
 | |
|     for (int16_t i = 0; i < 50; i++) {
 | |
|         g[i] = a[i] >> 8;
 | |
|         if (g[i] != h[i]) {
 | |
|             iot_printf("incorrect higt at %d, golden %d, output %d\n", i, g[i], h[i]);
 | |
|         }
 | |
|         g[i] = a[i] & 0xff;
 | |
|         if (g[i] != l[i]) {
 | |
|             iot_printf("incorrect low at %d, golden %d, output %d\n", i, g[i], l[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("vector_high_and_low_half_bits_int16 finished\n");
 | |
| }
 | |
| 
 | |
| 
 | |
| void vector_add_test_uint32() {
 | |
|     uint32_t a[25];
 | |
|     uint32_t b[25];
 | |
|     uint32_t o[25];
 | |
|     uint32_t g[25];
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         a[i] = 200000000 * i;
 | |
|         b[i] = 300000000 * i + 23;
 | |
|     }
 | |
|     vector_add_const_uint32(a, b[0], o, 25);
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|     #ifdef SATURATION
 | |
|         uint64_t gg = (uint64_t)a[i] + (uint64_t)b[0];
 | |
|         g[i] = gg > 0xffffffff ? 0xffffffff : gg;
 | |
|     #else
 | |
|         g[i] = a[i] + b[0];
 | |
|     #endif
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, a = %x, b = %x, golden %x, output %x\n", i, a[i], b[0], g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_add_uint32(a, b, o, 25);
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|     #ifdef SATURATION
 | |
|         uint64_t gg = (uint64_t)a[i] + (uint64_t)b[i];
 | |
|         g[i] = gg > 0xffffffff ? 0xffffffff : gg;
 | |
|     #else
 | |
|         g[i] = a[i] + b[i];
 | |
|     #endif
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, a = %x, b = %x, golden %x, output %x\n", i, a[i], b[i], g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("add const of uint32 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_sub_test_uint32() {
 | |
|     uint32_t a[25];
 | |
|     uint32_t b[25];
 | |
|     uint32_t o[25];
 | |
|     uint32_t g[25];
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         a[i] = 200000001 * i + 23;
 | |
|         b[i] = 300000001 * i;
 | |
|     }
 | |
|     vector_sub_const_uint32(a, b[0], o, 25);
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|     #ifdef SATURATION
 | |
|         if (a[i] < b[0]) {
 | |
|             g[i] = 0;
 | |
|         } else {
 | |
|             g[i] = a[i] - b[0];
 | |
|         }
 | |
|     #else
 | |
|         g[i] = a[i] - b[0];
 | |
|     #endif
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_const_sub_uint32(a, b[0], o, 25);
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|     #ifdef SATURATION
 | |
|         if (a[i] > b[0]) {
 | |
|             g[i] = 0;
 | |
|         } else {
 | |
|             g[i] = b[0] - a[i];
 | |
|         }
 | |
|     #else
 | |
|         g[i] = b[0] - a[i];
 | |
|     #endif
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_sub_uint32(a, b, o, 25);
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|     #ifdef SATURATION
 | |
|         if (a[i] < b[i]) {
 | |
|             g[i] = 0;
 | |
|         } else {
 | |
|             g[i] = a[i] - b[i];
 | |
|         }
 | |
|     #else
 | |
|         g[i] = a[i] - b[i];
 | |
|     #endif
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("sub const of uint32 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_mul_test_uint32() {
 | |
|     uint32_t length = 100;
 | |
|     uint32_t *a = (uint32_t *)os_mem_malloc(1, length * 4);
 | |
|     uint32_t *b = (uint32_t *)os_mem_malloc(1, length * 4);
 | |
|     uint32_t *l = (uint32_t *)os_mem_malloc(1, length * 4);
 | |
|     uint32_t *h = (uint32_t *)os_mem_malloc(1, length * 4);
 | |
|     uint64_t *w = (uint64_t *)os_mem_malloc(1, length * 4 * 2);
 | |
|     for (uint32_t i = 0; i < length; i++) {
 | |
|         a[i] = 201 * (103 - i);
 | |
|         b[i] = 301 * (127 - i);
 | |
|         //iot_printf("a[%d] = %d, b[%d] = %d\n", i, a[i], i, b[i]);
 | |
|     }
 | |
|     vector_multiply_uint32(a, b, w, length);
 | |
|     for (uint32_t i = 0; i < length; i++) {
 | |
|         uint64_t g = (uint64_t)a[i] * (uint64_t)b[i];
 | |
|         if (g != w[i]) {
 | |
|             iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[i], g, w[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("vector_multiply_uint32 finished\n");
 | |
|     vector_multiply_const_uint32(a, b[1], w, length);
 | |
|     for (uint32_t i = 0; i < length; i++) {
 | |
|         uint64_t g = (uint64_t)a[i] * (uint64_t)b[1];
 | |
|         if (g != w[i]) {
 | |
|             iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[1], g, w[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("vector_multiply_const_uint32 finished\n");
 | |
|     vector_multiply_uint32_high_32bit(a, b, h, length);
 | |
|     for (uint32_t i = 0; i < length; i++) {
 | |
|         uint32_t g = ((uint64_t)a[i] * (uint64_t)b[i]) >> 32;
 | |
|         if (g != h[i]) {
 | |
|             iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[i], g, h[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("vector_multiply_uint32_high finished\n");
 | |
|     vector_multiply_const_uint32_high_32bit(a, b[1], h, length);
 | |
|     for (uint32_t i = 0; i < length; i++) {
 | |
|         uint32_t g = ((uint64_t)a[i] * (uint64_t)b[1]) >> 32;
 | |
|         if (g != h[i]) {
 | |
|             iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[1], g, h[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("vector_multiply_const_uint32_high finished\n");
 | |
|     vector_multiply_uint32_low_32bit(a, b, l, length);
 | |
|     for (uint32_t i = 0; i < length; i++) {
 | |
|     #ifdef SATURATION
 | |
|         uint64_t gg = (uint64_t)a[i] * (uint64_t)b[i];
 | |
|         uint32_t g = gg > 0xffffffff ? 0xffffffff : gg;
 | |
|     #else
 | |
|         uint32_t g = ((uint64_t)a[i] * (uint64_t)b[i]) & 0xffffffff;
 | |
|     #endif
 | |
|         if (g != l[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g, l[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("vector_multiply_uint32_low finished\n");
 | |
|     vector_multiply_const_uint32_low_32bit(a, b[1], l, length);
 | |
|     for (uint32_t i = 0; i < length; i++) {
 | |
|     #ifdef SATURATION
 | |
|         uint64_t gg = (uint64_t)a[i] * (uint64_t)b[1];
 | |
|         uint32_t g = gg > 0xffffffff ? 0xffffffff : gg;
 | |
|     #else
 | |
|         uint32_t g = ((uint64_t)a[i] * (uint64_t)b[1]) & 0xffffffff;
 | |
|     #endif
 | |
|         if (g != l[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g, l[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("vector_multiply_const_uint32_low finished\n");
 | |
|     os_mem_free(a);
 | |
|     os_mem_free(b);
 | |
|     os_mem_free(h);
 | |
|     os_mem_free(l);
 | |
|     os_mem_free(w);
 | |
| }
 | |
| 
 | |
| void vector_madd_msub_test_uint32() {
 | |
|     uint32_t length = 100;
 | |
|     uint32_t *a = os_mem_malloc(1, length * 4);
 | |
|     uint32_t *b = os_mem_malloc(1, length * 4);
 | |
|     uint32_t *j = os_mem_malloc(1, length * 4);
 | |
|     uint32_t *o = os_mem_malloc(1, length * 4);
 | |
|     for (uint32_t i = 0; i < length; i++) {
 | |
|         a[i] = 222 * (103 - i);
 | |
|         b[i] = 333 * (127 - i);
 | |
|         j[i] = i + 120000000;
 | |
|         //iot_printf("a[%d] = %d, b[%d] = %d, j[%d] = %d\n", i, a[i], i, b[i], i, j[i]);
 | |
|     }
 | |
|     vector_mul_add_uint32(a, b, j, o, length);
 | |
|     for (uint32_t i = 0; i < length; i++) {
 | |
|     #ifdef SATURATION
 | |
|         uint64_t gg = (uint64_t)a[i] * (uint64_t)b[i];
 | |
|         gg = gg > 0xffffffff ? 0xffffffff : gg;
 | |
|         gg = (uint64_t)j[i] + gg;
 | |
|         uint32_t g = gg > 0xffffffff ? 0xffffffff : gg;
 | |
|     #else
 | |
|         uint32_t g = j[i] + a[i] * b[i];
 | |
|     #endif
 | |
|         if (g != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g, o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("vector_mul_add_uint32 finished\n");
 | |
|     vector_mul_sub_uint32(a, b, j, o, length);
 | |
|     for (uint32_t i = 0; i < length; i++) {
 | |
|     #ifdef SATURATION
 | |
|         uint64_t gg = (uint64_t)a[i] * (uint64_t)b[i];
 | |
|         gg = gg > 0xffffffff ? 0xffffffff : gg;
 | |
|         uint32_t g = j[i] < gg ? 0 : j[i] - gg;
 | |
|     #else
 | |
|         uint32_t g = j[i] - a[i] * b[i];
 | |
|     #endif
 | |
|         if (g != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g, o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("vector_mul_sub_uint32 finished\n");
 | |
|     os_mem_free(a);
 | |
|     os_mem_free(b);
 | |
|     os_mem_free(j);
 | |
|     os_mem_free(o);
 | |
| }
 | |
| 
 | |
| void vector_min_test_uint32() {
 | |
|     uint32_t a[25];
 | |
|     uint32_t b[25];
 | |
|     uint32_t o[25];
 | |
|     uint32_t g[25];
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         a[i] = 200000000 * i;
 | |
|         b[i] = 300000000 * i + 23;
 | |
|     }
 | |
|     vector_min_uint32(a, b, o, 25);
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         g[i] = a[i] < b[i] ? a[i] : b[i];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_min_const_uint32(a, b[0], o, 25);
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         g[i] = a[i] < b[0] ? a[i] : b[0];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("min of uint32 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_max_test_uint32() {
 | |
|     uint32_t a[25];
 | |
|     uint32_t b[25];
 | |
|     uint32_t o[25];
 | |
|     uint32_t g[25];
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         a[i] = 200000000 * i;
 | |
|         b[i] = 300000000 * i + 23;
 | |
|     }
 | |
|     vector_max_uint32(a, b, o, 25);
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         g[i] = a[i] > b[i] ? a[i] : b[i];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_max_const_uint32(a, b[0], o, 25);
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         g[i] = a[i] > b[0] ? a[i] : b[0];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("max of uint32 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_equal_test_uint32() {
 | |
|     uint32_t a[25];
 | |
|     uint32_t b[25];
 | |
|     uint32_t o[25];
 | |
|     uint32_t g[25];
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         a[i] = 200000000 * i;
 | |
|         b[i] = 300000000 * i + 23;
 | |
|     }
 | |
|     vector_equal_uint32(a, b, o, 25);
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         g[i] = a[i] == b[i] ? 1 : 0;
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_equal_const_uint32(a, b[11], o, 25);
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         g[i] = a[i] == b[11] ? 1 : 0;
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("equal of uint32 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_not_equal_test_uint32() {
 | |
|     uint32_t a[25];
 | |
|     uint32_t b[25];
 | |
|     uint32_t o[25];
 | |
|     uint32_t g[25];
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         a[i] = 200000000 * i;
 | |
|         b[i] = 300000000 * i + 23;
 | |
|     }
 | |
|     vector_not_equal_uint32(a, b, o, 25);
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         g[i] = a[i] != b[i] ? 1 : 0;
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_not_equal_const_uint32(a, b[11], o, 25);
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         g[i] = a[i] != b[11] ? 1 : 0;
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("not equal of uint32 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_less_than_test_uint32() {
 | |
|     uint32_t a[25];
 | |
|     uint32_t b[25];
 | |
|     uint32_t o[25];
 | |
|     uint32_t g[25];
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         a[i] = 200000000 * i;
 | |
|         b[i] = 300000000 * i + 23;
 | |
|     }
 | |
|     vector_less_than_uint32(a, b, o, 25);
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         g[i] = a[i] < b[i] ? 1 : 0;
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_less_than_const_uint32(a, b[11], o, 25);
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         g[i] = a[i] < b[11] ? 1 : 0;
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_const_less_than_uint32(a, b[11], o, 25);
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         g[i] = a[i] > b[11] ? 1 : 0;
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("less than of uint32 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_greater_or_equal_test_uint32() {
 | |
|     uint32_t a[25];
 | |
|     uint32_t b[25];
 | |
|     uint32_t o[25];
 | |
|     uint32_t g[25];
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         a[i] = 200000000 * i;
 | |
|         b[i] = 300000000 * i + 23;
 | |
|     }
 | |
|     vector_greater_or_equal_uint32(a, b, o, 25);
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         g[i] = a[i] >= b[i] ? 1 : 0;
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_greater_or_equal_const_uint32(a, b[11], o, 25);
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         g[i] = a[i] >= b[11] ? 1 : 0;
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_const_greater_or_equal_uint32(a, b[11], o, 25);
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         g[i] = a[i] <= b[11] ? 1 : 0;
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("greater or equal of uint32 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_logic_and_test_uint32() {
 | |
|     uint32_t a[25];
 | |
|     uint32_t b[25];
 | |
|     uint32_t o[25];
 | |
|     uint32_t g[25];
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         a[i] = 200000000 * i;
 | |
|         b[i] = 300000000 * i + 23;
 | |
|     }
 | |
|     vector_logic_and_uint32(a, b, o, 25);
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         g[i] = a[i] & b[i];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_logic_and_const_uint32(a, b[11], o, 25);
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         g[i] = a[i] & b[11];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("logic and of uint32 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_logic_or_test_uint32() {
 | |
|     uint32_t a[25];
 | |
|     uint32_t b[25];
 | |
|     uint32_t o[25];
 | |
|     uint32_t g[25];
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         a[i] = 200 * i;
 | |
|         b[i] = 300000000 * i + 23;
 | |
|     }
 | |
|     vector_logic_or_uint32(a, b, o, 25);
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         g[i] = a[i] | b[i];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_logic_or_const_uint32(a, b[11], o, 25);
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         g[i] = a[i] | b[11];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("logic or of uint32 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_logic_xor_test_uint32() {
 | |
|     uint32_t a[25];
 | |
|     uint32_t b[25];
 | |
|     uint32_t o[25];
 | |
|     uint32_t g[25];
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         a[i] = 2 * i;
 | |
|         b[i] = 3 * i - 23;
 | |
|     }
 | |
|     vector_logic_xor_uint32(a, b, o, 25);
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         g[i] = a[i] ^ b[i];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_logic_xor_const_uint32(a, b[11], o, 25);
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         g[i] = a[i] ^ b[11];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("logic xor of uint32 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_logic_xnor_test_uint32() {
 | |
|     uint32_t a[25];
 | |
|     uint32_t b[25];
 | |
|     uint32_t o[25];
 | |
|     uint32_t g[25];
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         a[i] = 2 * i;
 | |
|         b[i] = 3 * i - 23;
 | |
|     }
 | |
|     vector_logic_xnor_uint32(a, b, o, 25);
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         g[i] = ~(a[i] ^ b[i]);
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_logic_xnor_const_uint32(a, b[11], o, 25);
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         g[i] = ~(a[i] ^ b[11]);
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("logic xnor of uint32 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_left_shift_test_uint32() {
 | |
|     uint32_t a[25];
 | |
|     uint32_t b[25];
 | |
|     uint32_t o[25];
 | |
|     uint32_t g[25];
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         a[i] = i + 1;
 | |
|         b[i] = i + 1;
 | |
|     }
 | |
|     vector_left_shift_uint32(a, b, o, 25);
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         g[i] = a[i] << b[i];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_left_shift_const_uint32(a, b[2], o, 25);
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         g[i] = a[i] << b[2];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_const_left_shift_uint32(b, a[6], o, 25);
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         g[i] = a[6] << b[i];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("left shift of uint32 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_right_shift_test_uint32() {
 | |
|     uint32_t a[25];
 | |
|     uint32_t b[25];
 | |
|     uint32_t o[25];
 | |
|     uint32_t g[25];
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         a[i] = i * 1000000000 - 25;
 | |
|         b[i] = i + 1;
 | |
|     }
 | |
|     vector_right_shift_uint32(a, b, o, 25);
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         g[i] = a[i] >> b[i];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_right_shift_const_uint32(a, b[2], o, 25);
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         g[i] = a[i] >> b[2];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_const_right_shift_uint32(b, a[6], o, 25);
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         g[i] = a[6] >> b[i];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("logic right shift of uint32 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_half_bits_test_uint32() {
 | |
|     uint32_t a[25];
 | |
|     uint16_t h[25];
 | |
|     uint16_t l[25];
 | |
|     uint16_t g[25];
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         a[i] = i + (i + 80) * 65536;
 | |
|     }
 | |
|     vector_high_half_bits_uint32(a, h, 25);
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         g[i] = a[i] >> 16;
 | |
|         if (g[i] != h[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], h[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("vector_high_half_bits_uint32 finished\n");
 | |
|     vector_low_half_bits_uint32(a, l, 25);
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         g[i] = a[i] & 0xffff;
 | |
|         if (g[i] != l[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], l[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("vector_low_half_bits_uint32 finished\n");
 | |
|     vector_high_and_low_half_bits_uint32(a, h, l, 25);
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         g[i] = a[i] >> 16;
 | |
|         if (g[i] != h[i]) {
 | |
|             iot_printf("incorrect higt at %d, golden %d, output %d\n", i, g[i], h[i]);
 | |
|         }
 | |
|         g[i] = a[i] & 0xffff;
 | |
|         if (g[i] != l[i]) {
 | |
|             iot_printf("incorrect low at %d, golden %d, output %d\n", i, g[i], l[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("vector_high_and_low_half_bits_uint32 finished\n");
 | |
| }
 | |
| 
 | |
| 
 | |
| void vector_add_test_int32() {
 | |
|     int32_t a[25];
 | |
|     int32_t b[25];
 | |
|     int32_t o[25];
 | |
|     int32_t g[25];
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         a[i] = 200000000 * i;
 | |
|         b[i] = 300000000 * i + 23;
 | |
|     }
 | |
|     vector_add_const_int32(a, b[0], o, 25);
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|     #ifdef SATURATION
 | |
|         int64_t gg = (int64_t)a[i] + (int64_t)b[0];
 | |
|         g[i] = gg > 0x7fffffff ? 0x7fffffff : (gg < -2147483648 ? -2147483648 : gg);
 | |
|     #else
 | |
|         g[i] = a[i] + b[0];
 | |
|     #endif
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[0], g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     uint64_t begin = cpu_get_mcycle();
 | |
|     for (uint32_t repeat = 0; repeat < 1000; repeat++) {
 | |
|     vector_add_int32(a, b, o, 25);
 | |
|     }
 | |
|     uint64_t end = cpu_get_mcycle();
 | |
|     iot_printf("time = %d\n", (uint32_t)(end - begin));
 | |
| 
 | |
|     begin = cpu_get_mcycle();
 | |
|     for (uint32_t repeat = 0; repeat < 1000; repeat++) {
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|     #ifdef SATURATION
 | |
|         int64_t gg = (int64_t)a[i] + (int64_t)b[i];
 | |
|         g[i] = gg > 0x7fffffff ? 0x7fffffff : (gg < -2147483648 ? -2147483648: gg);
 | |
|     #else
 | |
|         g[i] = a[i] + b[i];
 | |
|     #endif
 | |
|     }
 | |
|     }
 | |
|     end = cpu_get_mcycle();
 | |
|     iot_printf("time = %d\n", (uint32_t)(end - begin));
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[i], g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("add const of int32 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_sub_test_int32() {
 | |
|     int32_t a[25];
 | |
|     int32_t b[25];
 | |
|     int32_t o[25];
 | |
|     int32_t g[25];
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         a[i] = 200000000 * i;
 | |
|         b[i] = 300000000 * i + 23;
 | |
|     }
 | |
|     vector_sub_const_int32(a, b[0], o, 25);
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|     #ifdef SATURATION
 | |
|         int64_t gg = (int64_t)a[i] - (int64_t)b[0];
 | |
|         g[i] = gg > 0x7fffffff ? 0x7fffffff : (gg < -2147483648 ? -2147483648 : gg);
 | |
|     #else
 | |
|         g[i] = a[i] - b[0];
 | |
|     #endif
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[i], g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("sub const of int32 test end\n");
 | |
|     vector_const_sub_int32(a, b[0], o, 25);
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|     #ifdef SATURATION
 | |
|         int64_t gg = (int64_t)b[0] - (int64_t)a[i];
 | |
|         g[i] = gg > 0x7fffffff ? 0x7fffffff : (gg < -2147483648 ? -2147483648 : gg);
 | |
|     #else
 | |
|         g[i] = b[0] - a[i];
 | |
|     #endif
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[i], g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("const sub of int32 test end\n");
 | |
|     vector_sub_int32(a, b, o, 25);
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|     #ifdef SATURATION
 | |
|         int64_t gg = (int64_t)a[i] - (int64_t)b[i];
 | |
|         g[i] = gg > 0x7fffffff ? 0x7fffffff : (gg < -2147483648 ? -2147483648: gg);
 | |
|     #else
 | |
|         g[i] = a[i] - b[i];
 | |
|     #endif
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[i], g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("sub of int32 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_mul_test_int32() {
 | |
|     uint32_t length = 100;
 | |
|     int32_t *a = (int32_t *)os_mem_malloc(1, length * 4);
 | |
|     int32_t *b = (int32_t *)os_mem_malloc(1, length * 4);
 | |
|     int32_t *l = (int32_t *)os_mem_malloc(1, length * 4);
 | |
|     int32_t *h = (int32_t *)os_mem_malloc(1, length * 4);
 | |
|     int64_t *w = (int64_t *)os_mem_malloc(1, length * 4 * 2);
 | |
|     for (uint32_t i = 0; i < length; i++) {
 | |
|         a[i] = -201 * (i - 14);
 | |
|         b[i] = 301 * (i - 6);
 | |
|         //iot_printf("a[%d] = %d, b[%d] = %d\n", i, a[i], i, b[i]);
 | |
|     }
 | |
|     vector_multiply_int32(a, b, w, length);
 | |
|     for (uint32_t i = 0; i < length; i++) {
 | |
|         int64_t g = (int64_t)a[i] * (int64_t)b[i];
 | |
|         if (g != w[i]) {
 | |
|             iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[i], g, w[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("vector_multiply_int32 finished\n");
 | |
|     vector_multiply_const_int32(a, b[1], w, length);
 | |
|     for (uint32_t i = 0; i < length; i++) {
 | |
|         int64_t g = (int64_t)a[i] * (int64_t)b[1];
 | |
|         if (g != w[i]) {
 | |
|             iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[i], g, w[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("vector_multiply_const_int32 finished\n");
 | |
|     vector_multiply_int32_high_32bit(a, b, h, length);
 | |
|     for (uint32_t i = 0; i < length; i++) {
 | |
|         int32_t g = ((int64_t)a[i] * (int64_t)b[i]) >> 32;
 | |
|         if (g != h[i]) {
 | |
|             iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[i], g, h[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("vector_multiply_int32_high finished\n");
 | |
|     vector_multiply_const_int32_high_32bit(a, b[1], h, length);
 | |
|     for (uint32_t i = 0; i < length; i++) {
 | |
|         int32_t g = ((int64_t)a[i] * (int64_t)b[1]) >> 32;
 | |
|         if (g != h[i]) {
 | |
|             iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[i], g, h[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("vector_multiply_const_int32_high finished\n");
 | |
|     vector_multiply_int32_low_32bit(a, b, l, length);
 | |
|     for (uint32_t i = 0; i < length; i++) {
 | |
|     #ifdef SATURATION
 | |
|         int64_t gg = (int64_t)a[i] * (int64_t)b[i];
 | |
|         int32_t g = gg > 0x7fffffff ? 0x7fffffff : (gg < -2147483648 ? -2147483648: gg);
 | |
|     #else
 | |
|         int32_t g = ((int64_t)a[i] * (int64_t)b[i]) & 0xffffffff;
 | |
|     #endif
 | |
|         if (g != l[i]) {
 | |
|             iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[i], g, l[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("vector_multiply_int32_low finished\n");
 | |
|     vector_multiply_const_int32_low_32bit(a, b[1], l, length);
 | |
|     for (uint32_t i = 0; i < length; i++) {
 | |
|     #ifdef SATURATION
 | |
|         int64_t gg = (int64_t)a[i] * (int64_t)b[1];
 | |
|         int32_t g = gg > 0x7fffffff ? 0x7fffffff : (gg < -2147483648 ? -2147483648: gg);
 | |
|     #else
 | |
|         int32_t g = ((int64_t)a[i] * (int64_t)b[1]) & 0xffffffff;
 | |
|     #endif
 | |
|         if (g != l[i]) {
 | |
|             iot_printf("incorrect at %d, a = %d, b = %d, golden %d, output %d\n", i, a[i], b[i], g, l[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("vector_multiply_const_int32_low finished\n");
 | |
|     os_mem_free(a);
 | |
|     os_mem_free(b);
 | |
|     os_mem_free(h);
 | |
|     os_mem_free(l);
 | |
|     os_mem_free(w);
 | |
| }
 | |
| 
 | |
| void vector_madd_msub_test_int32() {
 | |
|     uint32_t length = 100;
 | |
|     int32_t *a = os_mem_malloc(1, length * 4);
 | |
|     int32_t *b = os_mem_malloc(1, length * 4);
 | |
|     int32_t *j = os_mem_malloc(1, length * 4);
 | |
|     int32_t *o = os_mem_malloc(1, length * 4);
 | |
|     for (uint32_t i = 0; i < length; i++) {
 | |
|         a[i] = -222 * (i - 25);
 | |
|         b[i] = 333 * (i - 33);
 | |
|         j[i] = i - 1200000000;
 | |
|         //iot_printf("a[%d] = %d, b[%d] = %d, j[%d] = %d\n", i, a[i], i, b[i], i, j[i]);
 | |
|     }
 | |
|     vector_mul_add_int32(a, b, j, o, length);
 | |
|     for (uint32_t i = 0; i < length; i++) {
 | |
|     #ifdef SATURATION
 | |
|         int64_t gg = (int64_t)a[i] * (int64_t)b[i];
 | |
|         gg = gg > 0x7fffffff ? 0x7fffffff : (gg < -2147483648 ? -2147483648: gg);
 | |
|         int64_t g = (int64_t)j[i] + gg;
 | |
|         g = g > 0x7fffffff ? 0x7fffffff : (g < -2147483648 ? -2147483648: g);
 | |
|     #else
 | |
|         int32_t g = j[i] + a[i] * b[i];
 | |
|     #endif
 | |
|         if (g != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g, o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("vector_mul_add_int32 finished\n");
 | |
|     vector_mul_sub_int32(a, b, j, o, length);
 | |
|     for (uint32_t i = 0; i < length; i++) {
 | |
|     #ifdef SATURATION
 | |
|         int64_t gg = (int64_t)a[i] * (int64_t)b[i];
 | |
|         gg = gg > 0x7fffffff ? 0x7fffffff : (gg < -2147483648 ? -2147483648: gg);
 | |
|         int64_t g = (int64_t)j[i] - gg;
 | |
|         g = g > 0x7fffffff ? 0x7fffffff : (g < -2147483648 ? -2147483648: g);
 | |
|     #else
 | |
|         int32_t g = j[i] - a[i] * b[i];
 | |
|     #endif
 | |
|         if (g != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g, o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("vector_mul_sub_int32 finished\n");
 | |
|     os_mem_free(a);
 | |
|     os_mem_free(b);
 | |
|     os_mem_free(j);
 | |
|     os_mem_free(o);
 | |
| }
 | |
| 
 | |
| void vector_min_test_int32() {
 | |
|     int32_t a[25];
 | |
|     int32_t b[25];
 | |
|     int32_t o[25];
 | |
|     int32_t g[25];
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         a[i] = 2 * i;
 | |
|         b[i] = 3 * i + 23;
 | |
|     }
 | |
|     vector_min_int32(a, b, o, 25);
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         g[i] = a[i] < b[i] ? a[i] : b[i];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_min_const_int32(a, b[0], o, 25);
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         g[i] = a[i] < b[0] ? a[i] : b[0];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("min of int32 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_max_test_int32() {
 | |
|     int32_t a[25];
 | |
|     int32_t b[25];
 | |
|     int32_t o[25];
 | |
|     int32_t g[25];
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         a[i] = 2 * i;
 | |
|         b[i] = 3 * i + 23;
 | |
|     }
 | |
|     vector_max_int32(a, b, o, 25);
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         g[i] = a[i] > b[i] ? a[i] : b[i];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_max_const_int32(a, b[0], o, 25);
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         g[i] = a[i] > b[0] ? a[i] : b[0];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("max of int32 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_equal_test_int32() {
 | |
|     int32_t a[25];
 | |
|     int32_t b[25];
 | |
|     int32_t o[25];
 | |
|     int32_t g[25];
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         a[i] = 200000000 * i;
 | |
|         b[i] = 300000000 * i - 23;
 | |
|     }
 | |
|     vector_equal_int32(a, b, o, 25);
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         g[i] = a[i] == b[i] ? 1 : 0;
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_equal_const_int32(a, b[11], o, 25);
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         g[i] = a[i] == b[11] ? 1 : 0;
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("equal of int32 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_not_equal_test_int32() {
 | |
|     int32_t a[25];
 | |
|     int32_t b[25];
 | |
|     int32_t o[25];
 | |
|     int32_t g[25];
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         a[i] = 200000000 * i;
 | |
|         b[i] = 300000000 * i - 23;
 | |
|     }
 | |
|     vector_not_equal_int32(a, b, o, 25);
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         g[i] = a[i] != b[i] ? 1 : 0;
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_not_equal_const_int32(a, b[11], o, 25);
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         g[i] = a[i] != b[11] ? 1 : 0;
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("not equal of int32 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_less_than_test_int32() {
 | |
|     int32_t a[25];
 | |
|     int32_t b[25];
 | |
|     int32_t o[25];
 | |
|     int32_t g[25];
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         a[i] = 2000000000 * i;
 | |
|         b[i] = 3000000000 * i - 23;
 | |
|     }
 | |
|     vector_less_than_int32(a, b, o, 25);
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         g[i] = a[i] < b[i] ? 1 : 0;
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_less_than_const_int32(a, b[11], o, 25);
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         g[i] = a[i] < b[11] ? 1 : 0;
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_const_less_than_int32(a, b[11], o, 25);
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         g[i] = a[i] > b[11] ? 1 : 0;
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("less than of int32 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_greater_or_equal_test_int32() {
 | |
|     int32_t a[25];
 | |
|     int32_t b[25];
 | |
|     int32_t o[25];
 | |
|     int32_t g[25];
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         a[i] = 200000000 * i;
 | |
|         b[i] = 300000000 * i - 23;
 | |
|     }
 | |
|     vector_greater_or_equal_int32(a, b, o, 25);
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         g[i] = a[i] >= b[i] ? 1 : 0;
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_greater_or_equal_const_int32(a, b[11], o, 25);
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         g[i] = a[i] >= b[11] ? 1 : 0;
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_const_greater_or_equal_int32(a, b[11], o, 25);
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         g[i] = a[i] <= b[11] ? 1 : 0;
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("greater or equal of int32 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_logic_and_test_int32() {
 | |
|     int32_t a[25];
 | |
|     int32_t b[25];
 | |
|     int32_t o[25];
 | |
|     int32_t g[25];
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         a[i] = 200000000 * i;
 | |
|         b[i] = 300000000 * i - 23;
 | |
|     }
 | |
|     vector_logic_and_int32(a, b, o, 25);
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         g[i] = a[i] & b[i];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_logic_and_const_int32(a, b[11], o, 25);
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         g[i] = a[i] & b[11];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("logic and of int32 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_logic_or_test_int32() {
 | |
|     int32_t a[25];
 | |
|     int32_t b[25];
 | |
|     int32_t o[25];
 | |
|     int32_t g[25];
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         a[i] = 200000000 * i;
 | |
|         b[i] = 300000000 * i - 23;
 | |
|     }
 | |
|     vector_logic_or_int32(a, b, o, 25);
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         g[i] = a[i] | b[i];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_logic_or_const_int32(a, b[11], o, 25);
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         g[i] = a[i] | b[11];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("logic or of int32 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_logic_xor_test_int32() {
 | |
|     int32_t a[25];
 | |
|     int32_t b[25];
 | |
|     int32_t o[25];
 | |
|     int32_t g[25];
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         a[i] = 200000000 * i;
 | |
|         b[i] = 300000000 * i - 23;
 | |
|     }
 | |
|     vector_logic_xor_int32(a, b, o, 25);
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         g[i] = a[i] ^ b[i];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_logic_xor_const_int32(a, b[11], o, 25);
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         g[i] = a[i] ^ b[11];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("logic xor of int32 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_logic_xnor_test_int32() {
 | |
|     int32_t a[25];
 | |
|     int32_t b[25];
 | |
|     int32_t o[25];
 | |
|     int32_t g[25];
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         a[i] = 200000000 * i;
 | |
|         b[i] = 300000000 * i - 23;
 | |
|     }
 | |
|     vector_logic_xnor_int32(a, b, o, 25);
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         g[i] = ~(a[i] ^ b[i]);
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_logic_xnor_const_int32(a, b[11], o, 25);
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         g[i] = ~(a[i] ^ b[11]);
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("logic xnor of int32 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_left_shift_test_int32() {
 | |
|     int32_t a[25];
 | |
|     uint32_t b[25];
 | |
|     int32_t o[25];
 | |
|     int32_t g[25];
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         a[i] = i * 500000000 + 1;
 | |
|         b[i] = i;
 | |
|     }
 | |
|     vector_left_shift_int32(a, b, o, 25);
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         g[i] = a[i] << b[i];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_left_shift_const_int32(a, b[2], o, 25);
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         g[i] = a[i] << b[2];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_const_left_shift_int32(b, a[6], o, 25);
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         g[i] = a[6] << b[i];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("left shift of int32 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_logic_right_shift_test_int32() {
 | |
|     int32_t a[25];
 | |
|     uint32_t b[25];
 | |
|     int32_t o[25];
 | |
|     int32_t g[25];
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         a[i] = i - 25;
 | |
|         b[i] = i % 7 + 1;
 | |
|     }
 | |
|     vector_logic_right_shift_int32(a, b, o, 25);
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         g[i] = (uint32_t)a[i] >> b[i];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_logic_right_shift_const_int32(a, b[2], o, 25);
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         g[i] = (uint32_t)a[i] >> b[2];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_const_logic_right_shift_int32(b, a[6], o, 25);
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         g[i] = (uint32_t)a[6] >> b[i];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("logic right shift of int32 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_arithmatic_right_shift_test_int32() {
 | |
|     int32_t a[25];
 | |
|     uint32_t b[25];
 | |
|     int32_t o[25];
 | |
|     int32_t g[25];
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         a[i] = i - 25;
 | |
|         b[i] = i % 7 + 1;
 | |
|     }
 | |
|     vector_arithmatic_right_shift_int32(a, b, o, 25);
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         g[i] = a[i] >> b[i];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_arithmatic_right_shift_const_int32(a, b[2], o, 25);
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         g[i] = a[i] >> b[2];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_const_arithmatic_right_shift_int32(b, a[6], o, 25);
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         g[i] = a[6] >> b[i];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("arithmatic right shift of int32 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_half_bits_test_int32() {
 | |
|     int32_t a[25];
 | |
|     int16_t h[25];
 | |
|     int16_t l[25];
 | |
|     int16_t g[25];
 | |
|     for (int32_t i = 0; i < 25; i++) {
 | |
|         a[i] = i + (i + 80) * 65536;
 | |
|     }
 | |
|     vector_high_half_bits_int32(a, h, 25);
 | |
|     for (int32_t i = 0; i < 25; i++) {
 | |
|         g[i] = a[i] >> 16;
 | |
|         if (g[i] != h[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], h[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("vector_high_half_bits_int32 finished\n");
 | |
|     vector_low_half_bits_int32(a, l, 25);
 | |
|     for (int32_t i = 0; i < 25; i++) {
 | |
|         g[i] = a[i] & 0xffff;
 | |
|         if (g[i] != l[i]) {
 | |
|             iot_printf("incorrect at %d, golden %d, output %d\n", i, g[i], l[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("vector_low_half_bits_int32 finished\n");
 | |
|     vector_high_and_low_half_bits_int32(a, h, l, 25);
 | |
|     for (int32_t i = 0; i < 25; i++) {
 | |
|         g[i] = a[i] >> 16;
 | |
|         if (g[i] != h[i]) {
 | |
|             iot_printf("incorrect higt at %d, golden %d, output %d\n", i, g[i], h[i]);
 | |
|         }
 | |
|         g[i] = a[i] & 0xffff;
 | |
|         if (g[i] != l[i]) {
 | |
|             iot_printf("incorrect low at %d, golden %d, output %d\n", i, g[i], l[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("vector_high_and_low_half_bits_int32 finished\n");
 | |
| }
 | |
| 
 | |
| 
 | |
| void vector_add_test_float() {
 | |
|     float a[25];
 | |
|     float b[25];
 | |
|     float o[25];
 | |
|     float g[25];
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         a[i] = 2 * i;
 | |
|         b[i] = 3 * i + 23;
 | |
|     }
 | |
|     vector_add_const_float(a, b[0], o, 25);
 | |
|     for (uint8_t i = 0; i < 25; i++) {
 | |
|         g[i] = a[i] + b[0];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %f, output %f\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_add_float(a, b, o, 25);
 | |
|     for (uint8_t i = 0; i < 25; i++) {
 | |
|         g[i] = a[i] + b[i];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %f, output %f\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("add const of float test end\n");
 | |
| }
 | |
| 
 | |
| void vector_sub_test_float() {
 | |
|     float a[25];
 | |
|     float b[25];
 | |
|     float o[25];
 | |
|     float g[25];
 | |
|     uint32_t *o32 = (uint32_t *)o;
 | |
|     uint32_t *g32 = (uint32_t *)g;
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         a[i] = 2 * i;
 | |
|         b[i] = 3 * i + 23;
 | |
|     }
 | |
|     vector_sub_const_float(a, b[0], o, 25);
 | |
|     for (uint8_t i = 0; i < 25; i++) {
 | |
|         g[i] = a[i] - b[0];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %f(0x%08x), output %f(0x%08x)\n", i, g[i], g32[i], o[i], o32[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_const_sub_float(a, b[0], o, 25);
 | |
|     for (uint8_t i = 0; i < 25; i++) {
 | |
|         g[i] = b[0] - a[i];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %f(0x%08x), output %f(0x%08x)\n", i, g[i], g32[i], o[i], o32[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_sub_float(a, b, o, 25);
 | |
|     for (uint8_t i = 0; i < 25; i++) {
 | |
|         g[i] = a[i] - b[i];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %f(0x%08x), output %f(0x%08x)\n", i, g[i], g32[i], o[i], o32[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("sub const of float test end\n");
 | |
| }
 | |
| 
 | |
| void vector_mul_test_float() {
 | |
|     uint8_t length = 25;
 | |
|     float a[25];
 | |
|     float b[25];
 | |
|     float g[25];
 | |
|     float w[25];
 | |
|     uint32_t *a32 = (uint32_t *)&a[0];
 | |
|     uint32_t *b32 = (uint32_t *)&b[0];
 | |
|     uint32_t *g32 = (uint32_t *)&g[0];
 | |
|     uint32_t *w32 = (uint32_t *)&w[0];
 | |
|     for (uint8_t i = 0; i < length; i++) {
 | |
|         a[i] = -0.2 * (i - 2.5);
 | |
|         b[i] = 0.3 * (i - 3.3);
 | |
|         //iot_printf("a[%d] = %f, b[%d] = %f\n", i, a[i], i, b[i]);
 | |
|     }
 | |
|     vector_multiply_float(a, b, w, length);
 | |
|     for (uint8_t i = 0; i < length; i++) {
 | |
|         g[i] = a[i] * b[i];
 | |
|         if (g[i] != w[i]) {
 | |
|             iot_printf("golden %08x, output %08x\n", g32[i], w32[i]);
 | |
|             iot_printf("golden %08x, output %08x\n", g32[i], w32[i]);
 | |
|             iot_printf("golden %08x, output %08x\n", g32[i], w32[i]);
 | |
|             iot_printf("golden %08x, output %08x\n", g32[i], w32[i]);
 | |
|             iot_printf("golden %08x, output %08x\n", g32[i], w32[i]);
 | |
|             iot_printf("golden %08x, output %08x\n", g32[i], w32[i]);
 | |
|             iot_printf("golden %08x, output %08x\n", g32[i], w32[i]);
 | |
|             iot_printf("golden %08x, output %08x\n", g32[i], w32[i]);
 | |
|             iot_printf("golden %08x, output %08x\n\n", g32[i], w32[i]);
 | |
| 
 | |
|             iot_printf("incorrect at %d, a = 0x%08x, b = 0x%08x, golden 0x%08x, output 0x%08x\n", i, a32[i], b32[i], g32[i], w32[i]);
 | |
|             iot_printf("golden %f, output %f\n", g[i], w[i]);
 | |
|         }
 | |
|     }
 | |
|     //iot_printf("vector_multiply_float finished\n");
 | |
|     vector_multiply_const_float(a, b[1], w, length);
 | |
|     for (uint8_t i = 0; i < length; i++) {
 | |
|         g[i] = a[i] * b[1];
 | |
|         if (g[i] != w[i]) {
 | |
|             iot_printf("incorrect at %d, a = %f(0x%08x), b = %f(0x%08x), golden %f(0x%08x), output %f(0x%08x)\n", i, a[i], a32[i], b[1], b32[1], g[i], g32[i], w[i], w32[i]);
 | |
|         }
 | |
|     }
 | |
|     //iot_printf("vector_multiply_const_float finished\n");
 | |
| }
 | |
| 
 | |
| void vector_madd_msub_test_float() {
 | |
|     uint8_t length = 25;
 | |
|     float a[25];
 | |
|     float b[25];
 | |
|     float j[25];
 | |
|     float o[25];
 | |
|     float g[25];
 | |
|     uint32_t *uo = (uint32_t *)o;
 | |
|     uint32_t *ug = (uint32_t *)g;
 | |
|     for (uint8_t i = 0; i < length; i++) {
 | |
|         a[i] = -0.2 * (i - 25);
 | |
|         b[i] = 0.3 * (i - 33);
 | |
|         j[i] = i - 50;
 | |
|         //iot_printf("a[%d] = %f, b[%d] = %f, j[%d] = %f\n", i, a[i], i, b[i], i, j[i]);
 | |
|     }
 | |
|     vector_mul_add_float(a, b, j, o, length);
 | |
|     for (uint8_t i = 0; i < length; i++) {
 | |
|         g[i] = a[i] * b[i];
 | |
|         g[i] = g[i] + j[i];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %f(0x%08x), output %f(0x%08x)\n", i, g[i], ug[i], o[i], uo[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("vector_mul_add_float finished\n");
 | |
|     vector_mul_sub_float(a, b, j, o, length);
 | |
|     for (uint8_t i = 0; i < length; i++) {
 | |
|         g[i] = a[i] * b[i];
 | |
|         g[i] = j[i] - g[i];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %f(0x%08x), output %f(0x%08x)\n", i, g[i], ug[i], o[i], uo[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("vector_mul_sub_float finished\n");
 | |
| }
 | |
| void vector_min_test_float() {
 | |
|     float a[25];
 | |
|     float b[25];
 | |
|     float o[25];
 | |
|     float g[25];
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         a[i] = 2 * i;
 | |
|         b[i] = 3 * i + 23;
 | |
|     }
 | |
|     vector_min_float(a, b, o, 25);
 | |
|     for (uint8_t i = 0; i < 25; i++) {
 | |
|         g[i] = a[i] < b[i] ? a[i] : b[i];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %f, output %f\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_min_const_float(a, b[0], o, 25);
 | |
|     for (uint8_t i = 0; i < 25; i++) {
 | |
|         g[i] = a[i] < b[0] ? a[i] : b[0];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %f, output %f\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("min of float test end\n");
 | |
| }
 | |
| 
 | |
| void vector_max_test_float() {
 | |
|     float a[25];
 | |
|     float b[25];
 | |
|     float o[25];
 | |
|     float g[25];
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         a[i] = 2 * i;
 | |
|         b[i] = 3 * i + 23;
 | |
|     }
 | |
|     vector_max_float(a, b, o, 25);
 | |
|     for (uint8_t i = 0; i < 25; i++) {
 | |
|         g[i] = a[i] > b[i] ? a[i] : b[i];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %f, output %f\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_max_const_float(a, b[0], o, 25);
 | |
|     for (uint8_t i = 0; i < 25; i++) {
 | |
|         g[i] = a[i] > b[0] ? a[i] : b[0];
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %f, output %f\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("max of float test end\n");
 | |
| }
 | |
| 
 | |
| void vector_equal_test_float() {
 | |
|     float a[25];
 | |
|     float b[25];
 | |
|     float o[25];
 | |
|     float g[25];
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         a[i] = 2 * i;
 | |
|         b[i] = 3 * i - 23;
 | |
|     }
 | |
|     vector_equal_float(a, b, o, 25);
 | |
|     for (uint8_t i = 0; i < 25; i++) {
 | |
|         g[i] = a[i] == b[i] ? 1 : 0;
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %f, output %f\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_equal_const_float(a, b[11], o, 25);
 | |
|     for (uint8_t i = 0; i < 25; i++) {
 | |
|         g[i] = a[i] == b[11] ? 1 : 0;
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %f, output %f\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("equal of float test end\n");
 | |
| }
 | |
| 
 | |
| void vector_not_equal_test_float() {
 | |
|     float a[25];
 | |
|     float b[25];
 | |
|     float o[25];
 | |
|     float g[25];
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         a[i] = 2 * i;
 | |
|         b[i] = 3 * i - 23;
 | |
|     }
 | |
|     vector_not_equal_float(a, b, o, 25);
 | |
|     for (uint8_t i = 0; i < 25; i++) {
 | |
|         g[i] = a[i] != b[i] ? 1 : 0;
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %f, output %f\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_not_equal_const_float(a, b[11], o, 25);
 | |
|     for (uint8_t i = 0; i < 25; i++) {
 | |
|         g[i] = a[i] != b[11] ? 1 : 0;
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %f, output %f\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("not equal of float test end\n");
 | |
| }
 | |
| 
 | |
| void vector_less_than_test_float() {
 | |
|     float a[25];
 | |
|     float b[25];
 | |
|     float o[25];
 | |
|     float g[25];
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         a[i] = 2 * i;
 | |
|         b[i] = 3 * i - 23;
 | |
|     }
 | |
|     vector_less_than_float(a, b, o, 25);
 | |
|     for (uint8_t i = 0; i < 25; i++) {
 | |
|         g[i] = a[i] < b[i] ? 1 : 0;
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %f, output %f\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_less_than_const_float(a, b[11], o, 25);
 | |
|     for (uint8_t i = 0; i < 25; i++) {
 | |
|         uint32_t *pg = (uint32_t *)(g + i);
 | |
|         uint32_t *po = (uint32_t *)(o + i);
 | |
|         g[i] = a[i] < b[11] ? 1 : 0;
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %f(%08x), output %f(%08x)\n", i, g[i], *pg, o[i], *po);
 | |
|         }
 | |
|     }
 | |
|     vector_const_less_than_float(a, b[11], o, 25);
 | |
|     for (uint8_t i = 0; i < 25; i++) {
 | |
|         g[i] = a[i] > b[11] ? 1 : 0;
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %f, output %f\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("less than of float test end\n");
 | |
| }
 | |
| 
 | |
| void vector_greater_or_equal_test_float() {
 | |
|     float a[25];
 | |
|     float b[25];
 | |
|     float o[25];
 | |
|     float g[25];
 | |
|     for (uint32_t i = 0; i < 25; i++) {
 | |
|         a[i] = 2 * i;
 | |
|         b[i] = 3 * i - 23;
 | |
|     }
 | |
|     vector_greater_or_equal_float(a, b, o, 25);
 | |
|     for (uint8_t i = 0; i < 25; i++) {
 | |
|         uint32_t *pg = (uint32_t *)(g + i);
 | |
|         uint32_t *po = (uint32_t *)(o + i);
 | |
|         g[i] = a[i] >= b[i] ? 1 : 0;
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %f(%08x), output %f(%08x)\n", i, g[i], *pg, o[i], *po);
 | |
|         }
 | |
|     }
 | |
|     vector_greater_or_equal_const_float(a, b[11], o, 25);
 | |
|     for (uint8_t i = 0; i < 25; i++) {
 | |
|         g[i] = a[i] >= b[11] ? 1 : 0;
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %f, output %f\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     vector_const_greater_or_equal_float(a, b[11], o, 25);
 | |
|     for (uint8_t i = 0; i < 25; i++) {
 | |
|         g[i] = a[i] <= b[11] ? 1 : 0;
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("incorrect at %d, golden %f, output %f\n", i, g[i], o[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("greater or equal of float test end\n");
 | |
| }
 | |
| 
 | |
| 
 | |
| void float_greater_equal_test(float *a, float *b, float *o, float *g, uint32_t length) {
 | |
|     uint32_t length_tail = length & (SIMD_W - 1);
 | |
|     uint32_t length_body = length - length_tail;
 | |
|     uint32_t *ua = (uint32_t *)a;
 | |
|     uint32_t *ub = (uint32_t *)b;
 | |
|     float *pa = a;
 | |
|     float *pb = b;
 | |
|     float *po = o;
 | |
|     for (uint32_t i = 0; i < length; i++) {
 | |
|         iot_printf("a[%d] = %f(0x%08x), b[%d] = %f(0x%08x)\n", i, a[i], ua[i], i, b[i], ub[i]);
 | |
|         g[i] = a[i] >= b[i] ? 1.0 : 0.0;
 | |
|     }
 | |
|     for (uint32_t i = 0; i < length_body; i += SIMD_W) {
 | |
|         vld_fpw(0, pa, 1);
 | |
|         vld_fpw(1, pb, 1);
 | |
|         vsge(2, 0, 1);
 | |
|         vst_fpw(po, 1, 2);
 | |
|         pa += SIMD_W;
 | |
|         pb += SIMD_W;
 | |
|         po += SIMD_W;
 | |
|         asm("fence");
 | |
|     }
 | |
|     for (uint32_t i = 0; i < length; i++) {
 | |
|         if (g[i] != o[i]) {
 | |
|             iot_printf("g[%d] = %f, o[%d] = %f\n", i, g[i], i, o[i]);
 | |
|         }
 | |
|     }
 | |
| }
 | |
| 
 | |
| void sigmoid_int8_test() {
 | |
|     int8_t input_[256];
 | |
|     for (uint32_t i = 0; i < 256; i++) {
 | |
|         input_[i] = i - 128;
 | |
|     }
 | |
|     int8_t output[256];
 | |
|     uint64_t begin = cpu_get_mcycle();
 | |
|     sigmoid_int8(input_, output, 256);
 | |
|     uint64_t end = cpu_get_mcycle();
 | |
|     for (uint32_t i = 0; i < 256; i++) {
 | |
|         iot_printf("%d\n", output[i]);
 | |
|     }
 | |
|     iot_printf("time = %d\n", (uint32_t)(end - begin));
 | |
| }
 | |
| 
 | |
| void tanh_int8_test() {
 | |
|     int8_t input_[256];
 | |
|     for (uint32_t i = 0; i < 256; i++) {
 | |
|         input_[i] = i - 128;
 | |
|     }
 | |
|     int8_t output[256];
 | |
|     uint64_t begin = cpu_get_mcycle();
 | |
|     tanh_int8(input_, output, 256);
 | |
|     uint64_t end = cpu_get_mcycle();
 | |
|     for (uint32_t i = 0; i < 256; i++) {
 | |
|         iot_printf("%d\n", output[i]);
 | |
|     }
 | |
|     iot_printf("time = %d\n", (uint32_t)(end - begin));
 | |
| }
 | |
| 
 | |
| void half_bits_test_16_to_8() {
 | |
|     //uint16_t a[256] = {44641,49129,26256,16088,58270,14244,30327,37995,6931,56640,5774,49320,7276,17402,32136,45320,36031,19644,19435,37152,49723,63428,61882,14223,23149,54102,831,36226,55644,19419,36099,7483,30896,18941,25805,23607,48792,282,48783,54897,49426,6762,20780,53309,24469,8539,61695,19415,59186,50222,32014,25999,41713,64471,33742,23461,4465,38142,60141,23549,24896,15021,3281,41924,38058,7016,24388,14059,4248,54782,40738,39345,58949,38653,25051,58363,19180,3206,3344,40722,33240,16872,34385,65101,62755,61266,28302,12659,40628,21913,4935,63079,31480,25878,21467,21352,64551,55641,38244,30774,5504,48349,57711,14148,54166,25793,3399,23600,39731,57112,45684,38384,31193,33232,63416,7174,52058,22787,60669,44254,32499,36905,43035,60968,26860,60485,3310,48805,45049,37903,143,41464,60646,4187,63860,55819,30387,40709,64641,39538,23352,21361,36043,63552,2583,54181,12272,18361,50277,20570,23835,46469,61349,53900,63130,34784,6593,11239,11974,52307,41069,23393,25954,22119,63136,7816,55537,44185,14606,36887,48995,63029,48235,43904,43939,3273,291,54180,40612,64664,16495,24803,47522,45992,9694,37811,49634,30017,6245,33255,32350,65516,31063,3328,36368,56021,23334,61044,770,43954,56915,8246,59888,19817,44941,61740,51312,30965,54138,54642,4226,32393,48326,25609,54200,32173,49050,14796,54710,51153,28930,54744,56167,47714,31678,64915,61073,64506,30663,17403,37980,12405,8087,58728,65146,45550,56799,38387,38227,44411,18989,61993,48043,54784,40966,25067,16106,29127,19598,19525,43595,36213,65384,26661,39739,41330};
 | |
|     uint16_t a[256];
 | |
|     for (uint32_t i = 0; i < 256; i++) {
 | |
|         a[i] = ((i - 128) << 8) | i;
 | |
|         //iot_printf("a[%d] = 0x%04x\n", i, a[i]);
 | |
|     }
 | |
|     uint8_t h[256];
 | |
|     uint8_t l[256];
 | |
|     uint16_t *pa = a;
 | |
|     uint8_t *ph = h;
 | |
|     uint8_t *pl = l;
 | |
|     for (uint16_t i = 0; i < 256; i += SIMD_W << 2) {
 | |
|         vld_uih(0, pa, 1);
 | |
|         pa += SIMD_W << 1;
 | |
|         vld_uih(1, pa, 1);
 | |
|         pa += SIMD_W << 1;
 | |
|         vnwh(2, 0, 1);
 | |
|         vnwl(3, 0, 1);
 | |
|         vst_uib(ph, 1, 2);
 | |
|         vst_uib(pl, 1, 3);
 | |
|         ph += SIMD_W << 2;
 | |
|         pl += SIMD_W << 2;
 | |
|         asm("fence");
 | |
|     }
 | |
|     for (uint16_t i = 0; i < 16; i++) {
 | |
|         uint8_t hi = a[i] >> 8;
 | |
|         if (hi != h[i]) {
 | |
|             iot_printf("golden_h[%d] = %d, output_h[%d] = %d\n", i, hi, i, h[i]);
 | |
|         }
 | |
|     }
 | |
|     for (uint16_t i = 0; i < 16; i++) {
 | |
|         uint8_t lo = a[i] & 0xff;
 | |
|         if (lo != l[i]) {
 | |
|             iot_printf("golden_l[%d] = %d, output_l[%d] = %d\n", i, lo, i, l[i]);
 | |
|         }
 | |
|     }
 | |
| }
 | |
| 
 | |
| void half_bits_test_32_to_16() {
 | |
|     uint32_t number_elem = 256;
 | |
|     int32_t *a = (int32_t *)os_mem_malloc(1, number_elem * sizeof(int32_t));
 | |
|     int16_t *h = (int16_t *)os_mem_malloc(1, number_elem * sizeof(int16_t));
 | |
|     int16_t *l = (int16_t *)os_mem_malloc(1, number_elem * sizeof(int16_t));
 | |
|     int32_t *pa = a;
 | |
|     int16_t *ph = h;
 | |
|     int16_t *pl = l;
 | |
|     for (uint32_t i = 0; i < number_elem; i++) {
 | |
|         a[i] = ((i - 128) << 16) | i;
 | |
|         //iot_printf("a[%d] = 0x%08x\n", i, a[i]);
 | |
|     }
 | |
|     for (uint16_t i = 0; i < number_elem; i += SIMD_WH) {
 | |
|         vld_siw(0, pa, 1);
 | |
|         pa += SIMD_W;
 | |
|         vld_siw(1, pa, 1);
 | |
|         pa += SIMD_W;
 | |
|         vnwh(2, 0, 1);
 | |
|         vnwl(3, 0, 1);
 | |
|         vst_sih(ph, 1, 2);
 | |
|         vst_sih(pl, 1, 3);
 | |
|         ph += SIMD_WH;
 | |
|         pl += SIMD_WH;
 | |
|         asm("fence");
 | |
|     }
 | |
|     for (uint16_t i = 0; i < number_elem; i++) {
 | |
|         int16_t hi = a[i] >> 16;
 | |
|         if (hi != h[i]) {
 | |
|             iot_printf("golden_h[%d] = 0x%04x, output_h[%d] = 0x%04x\n", i, hi, i, h[i]);
 | |
|         }
 | |
|     }
 | |
|     for (uint16_t i = 0; i < number_elem; i++) {
 | |
|         int16_t lo = a[i] & 0xffff;
 | |
|         if (lo != l[i]) {
 | |
|             iot_printf("golden_l[%d] = 0x%04x, output_l[%d] = 0x%04x\n", i, lo, i, l[i]);
 | |
|         }
 | |
|     }
 | |
|     os_mem_free(a);
 | |
|     os_mem_free(h);
 | |
|     os_mem_free(l);
 | |
| }
 | |
| 
 | |
| void half_bits_test_32_to_16u() {
 | |
|     uint32_t number_elem = 256;
 | |
|     uint32_t *a = (uint32_t *)os_mem_malloc(1, number_elem * sizeof(uint32_t));
 | |
|     uint16_t *h = (uint16_t *)os_mem_malloc(1, number_elem * sizeof(uint16_t));
 | |
|     uint16_t *l = (uint16_t *)os_mem_malloc(1, number_elem * sizeof(uint16_t));
 | |
|     uint32_t *pa = a;
 | |
|     uint16_t *ph = h;
 | |
|     uint16_t *pl = l;
 | |
|     for (uint32_t i = 0; i < number_elem; i++) {
 | |
|         a[i] = ((i - 128) << 16) | i;
 | |
|         //iot_printf("a[%d] = 0x%08x\n", i, a[i]);
 | |
|     }
 | |
|     for (uint16_t i = 0; i < number_elem; i += SIMD_WH) {
 | |
|         vld_uiw(0, pa, 1);
 | |
|         pa += SIMD_W;
 | |
|         vld_uiw(1, pa, 1);
 | |
|         pa += SIMD_W;
 | |
|         vnwh(2, 0, 1);
 | |
|         vnwl(3, 0, 1);
 | |
|         vst_uih(ph, 1, 2);
 | |
|         vst_uih(pl, 1, 3);
 | |
|         ph += SIMD_WH;
 | |
|         pl += SIMD_WH;
 | |
|         asm("fence");
 | |
|     }
 | |
|     for (uint16_t i = 0; i < number_elem; i++) {
 | |
|         int16_t hi = a[i] >> 16;
 | |
|         if (hi != h[i]) {
 | |
|             iot_printf("golden_h[%d] = 0x%04x, output_h[%d] = 0x%04x\n", i, hi, i, h[i]);
 | |
|         }
 | |
|     }
 | |
|     for (uint16_t i = 0; i < number_elem; i++) {
 | |
|         int16_t lo = a[i] & 0xffff;
 | |
|         if (lo != l[i]) {
 | |
|             iot_printf("golden_l[%d] = 0x%04x, output_l[%d] = 0x%04x\n", i, lo, i, l[i]);
 | |
|         }
 | |
|     }
 | |
|     os_mem_free(a);
 | |
|     os_mem_free(h);
 | |
|     os_mem_free(l);
 | |
| }
 | |
| 
 | |
| void fc_test_from_pc() {
 | |
|     uint8_t config_read[120];
 | |
|     uint32_t config_get[60];
 | |
|     uint32_t data_begin = 0x10200000;
 | |
|     read_case_from_python(config_read, config_get, (uint8_t *)data_begin);
 | |
|     uint32_t weight_addr = data_begin;
 | |
|     uint32_t bias_addr = weight_addr + config_get[52];
 | |
|     uint32_t input_addr = bias_addr + config_get[53];
 | |
|     uint32_t golden_addr = input_addr + config_get[54];
 | |
|     uint32_t output_addr = golden_addr + config_get[55];
 | |
|     uint32_t elem_bytes = config_get[4];
 | |
|     uint32_t batch = config_get[5];
 | |
|     uint32_t bias_shift_enable = config_get[6];
 | |
|     uint64_t begin = cpu_get_mcycle();
 | |
|     switch (elem_bytes)
 | |
|     {
 | |
|         case 1: // 8bits
 | |
|             switch (bias_shift_enable)
 | |
|             {
 | |
|                 case 0: // no bias_shift
 | |
|                     fc_int8_to_int8_weight_8bit_batch((int8_t *)input_addr, (int8_t *)output_addr, (int8_t *)weight_addr, (int32_t *)bias_addr, config_get[0], config_get[1], config_get[2], batch);
 | |
|                     break;
 | |
|             
 | |
|                 case 1: // bias_shift
 | |
|                     fc_int8_to_int8_weight_8bit_bias_shift_batch((int8_t *)input_addr, (int8_t *)output_addr, (int8_t *)weight_addr, (int8_t *)bias_addr, config_get[0], config_get[1], config_get[2], config_get[3], batch);
 | |
|                     break;
 | |
|             
 | |
|                 default:
 | |
|                     break;
 | |
|             }
 | |
|             break;
 | |
|     
 | |
|         case 2: // 16bits
 | |
|             switch (bias_shift_enable)
 | |
|             {
 | |
|                 case 0: // no bias_shift
 | |
|                     fc_int16_to_int16_weight_16bit_batch((int16_t *)input_addr, (int16_t *)output_addr, (int16_t *)weight_addr, (int64_t *)bias_addr, config_get[0], config_get[1], config_get[2], batch);
 | |
|                     break;
 | |
|             
 | |
|                 case 1: // bias_shift
 | |
|                     fc_int16_to_int16_weight_16bit_bias_shift_batch((int16_t *)input_addr, (int16_t *)output_addr, (int16_t *)weight_addr, (int16_t *)bias_addr, config_get[0], config_get[1], config_get[2], config_get[3], batch);
 | |
|                     break;
 | |
|             
 | |
|                 default:
 | |
|                     break;
 | |
|             }
 | |
|             break;
 | |
|     
 | |
|         default:
 | |
|             break;
 | |
|     }
 | |
|     uint64_t end = cpu_get_mcycle();
 | |
|     iot_printf("time = %d\n", (uint32_t)(end - begin));
 | |
|     verify_8bits(output_addr, golden_addr, config_get[55]);
 | |
| }
 | |
| 
 | |
| void depth_fc_test_from_pc() {
 | |
|     uint8_t config_read[120];
 | |
|     uint32_t config_get[60];
 | |
|     uint32_t data_begin = 0x10200000;
 | |
|     read_case_from_python(config_read, config_get, (uint8_t *)data_begin);
 | |
|     uint32_t weight_addr = data_begin;
 | |
|     uint32_t bias_addr = weight_addr + config_get[52];
 | |
|     uint32_t input_addr = bias_addr + config_get[53];
 | |
|     uint32_t golden_addr = input_addr + config_get[54];
 | |
|     uint32_t output_addr = golden_addr + config_get[55];
 | |
|     uint32_t elem_bytes = config_get[4];
 | |
|     uint32_t batch = config_get[5];
 | |
|     uint32_t bias_shift_enable = config_get[6];
 | |
|     uint64_t begin = cpu_get_mcycle();
 | |
|     switch (elem_bytes)
 | |
|     {
 | |
|         case 1: // 8bits
 | |
|             switch (bias_shift_enable)
 | |
|             {
 | |
|                 case 0: // no bias_shift
 | |
|                     fc_depth_int8_to_int8_weight_8bit_batch((int8_t *)input_addr, (int8_t *)output_addr, (int8_t *)weight_addr, (int32_t *)bias_addr, config_get[1], config_get[2], batch);
 | |
|                     break;
 | |
|             
 | |
|                 case 1: // bias_shift
 | |
|                     fc_depth_int8_to_int8_weight_8bit_bias_shift_batch((int8_t *)input_addr, (int8_t *)output_addr, (int8_t *)weight_addr, (int8_t *)bias_addr, config_get[1], config_get[2], config_get[3], batch);
 | |
|                     break;
 | |
|             
 | |
|                 default:
 | |
|                     break;
 | |
|             }
 | |
|             break;
 | |
|     
 | |
|         case 2: // 16bits
 | |
|             switch (bias_shift_enable)
 | |
|             {
 | |
|                 case 0: // no bias_shift
 | |
|                     fc_depth_int16_to_int16_weight_16bit_batch((int16_t *)input_addr, (int16_t *)output_addr, (int16_t *)weight_addr, (int64_t *)bias_addr, config_get[1], config_get[2], batch);
 | |
|                     break;
 | |
|             
 | |
|                 case 1: // bias_shift
 | |
|                     fc_depth_int16_to_int16_weight_16bit_bias_shift_batch((int16_t *)input_addr, (int16_t *)output_addr, (int16_t *)weight_addr, (int16_t *)bias_addr, config_get[1], config_get[2], config_get[3], batch);
 | |
|                     break;
 | |
|             
 | |
|                 default:
 | |
|                     break;
 | |
|             }
 | |
|             break;
 | |
|     
 | |
|         default:
 | |
|             break;
 | |
|     }
 | |
|     uint64_t end = cpu_get_mcycle();
 | |
|     iot_printf("time = %d\n", (uint32_t)(end - begin));
 | |
|     //if (output_addr + config_get[55] > 0x10008000) {
 | |
|     //    iot_printf("too large\nfinished~");
 | |
|     //    return;
 | |
|     //}
 | |
|     verify_8bits(output_addr, golden_addr, config_get[55]);
 | |
| }
 | |
| 
 | |
| void dsp_init(){
 | |
|     asm("csrs mstatus,%0"::"r"(0x18000));
 | |
|     int32_t *p_reg = (int32_t *)0x02000148;
 | |
|     *p_reg = 0x3;
 | |
| }
 | |
| 
 | |
| void vmaddw_test() {
 | |
|     int8_t a[16];
 | |
|     int8_t b[16];
 | |
|     int32_t in[16];
 | |
|     int32_t out[16];
 | |
|     int32_t *pi = in;
 | |
|     int32_t *po = out;
 | |
|     for (uint8_t i = 0; i < 16; i++) {
 | |
|         a[i] = i - 8;
 | |
|         b[i] = i - 8;
 | |
|         in[i] = 0;
 | |
|     }
 | |
|     vld_sib(0, a, 1);
 | |
|     vld_sib(1, b, 1);
 | |
|     vld_siw(4, pi, 1);
 | |
|     pi += SIMD_WW;
 | |
|     vld_siw(5, pi, 1);
 | |
|     pi += SIMD_WW;
 | |
|     vld_siw(6, pi, 1);
 | |
|     pi += SIMD_WW;
 | |
|     vld_siw(7, pi, 1);
 | |
|     pi += SIMD_WW;
 | |
|     vmaddw(4, 0, 1);
 | |
|     vst_siw(po, 1, 4);
 | |
|     po += SIMD_WW;
 | |
|     vst_siw(po, 1, 5);
 | |
|     po += SIMD_WW;
 | |
|     vst_siw(po, 1, 6);
 | |
|     po += SIMD_WW;
 | |
|     vst_siw(po, 1, 7);
 | |
|     po += SIMD_WW;
 | |
|     asm("fence");
 | |
|     for (uint8_t i = 0; i < 16; i++) {
 | |
|         int32_t g = in[i] + a[i] * b[i];
 | |
|         iot_printf("golden[%d] = %d, out[%d] = %d\n", i, g, i, out[i]);
 | |
|     }
 | |
| }
 | |
| 
 | |
| void vmsubw_testu() {
 | |
|     uint8_t a[16];
 | |
|     uint8_t b[16];
 | |
|     uint32_t in[16];
 | |
|     uint32_t out[16];
 | |
|     uint32_t *pi = in;
 | |
|     uint32_t *po = out;
 | |
|     for (uint8_t i = 0; i < 16; i++) {
 | |
|         a[i] = 16 - i;
 | |
|         b[i] = 17 - i;
 | |
|         in[i] = 500;
 | |
|     }
 | |
|     vld_uib(0, a, 1);
 | |
|     vld_uib(1, b, 1);
 | |
|     vld_uiw(4, pi, 1);
 | |
|     pi += SIMD_WW;
 | |
|     vld_uiw(5, pi, 1);
 | |
|     pi += SIMD_WW;
 | |
|     vld_uiw(6, pi, 1);
 | |
|     pi += SIMD_WW;
 | |
|     vld_uiw(7, pi, 1);
 | |
|     pi += SIMD_WW;
 | |
|     vmsubw(4, 0, 1);
 | |
|     vst_uiw(po, 1, 4);
 | |
|     po += SIMD_WW;
 | |
|     vst_uiw(po, 1, 5);
 | |
|     po += SIMD_WW;
 | |
|     vst_uiw(po, 1, 6);
 | |
|     po += SIMD_WW;
 | |
|     vst_uiw(po, 1, 7);
 | |
|     po += SIMD_WW;
 | |
|     asm("fence");
 | |
|     for (uint8_t i = 0; i < 16; i++) {
 | |
|         uint32_t g = in[i] - a[i] * b[i];
 | |
|         iot_printf("golden[%d] = %d, out[%d] = %d\n", i, g, i, out[i]);
 | |
|     }
 | |
| }
 | |
| 
 | |
| 
 | |
| void vmsubw_test() {
 | |
|     int8_t a[16];
 | |
|     int8_t b[16];
 | |
|     int32_t in[16];
 | |
|     int32_t out[16];
 | |
|     int32_t *pi = in;
 | |
|     int32_t *po = out;
 | |
|     for (uint8_t i = 0; i < 16; i++) {
 | |
|         a[i] = 16 - i;
 | |
|         b[i] = 17 - i;
 | |
|         in[i] = 500;
 | |
|     }
 | |
|     vld_sib(0, a, 1);
 | |
|     vld_sib(1, b, 1);
 | |
|     vld_siw(4, pi, 1);
 | |
|     pi += SIMD_WW;
 | |
|     vld_siw(5, pi, 1);
 | |
|     pi += SIMD_WW;
 | |
|     vld_siw(6, pi, 1);
 | |
|     pi += SIMD_WW;
 | |
|     vld_siw(7, pi, 1);
 | |
|     pi += SIMD_WW;
 | |
|     vmsubw(4, 0, 1);
 | |
|     vst_siw(po, 1, 4);
 | |
|     po += SIMD_WW;
 | |
|     vst_siw(po, 1, 5);
 | |
|     po += SIMD_WW;
 | |
|     vst_siw(po, 1, 6);
 | |
|     po += SIMD_WW;
 | |
|     vst_siw(po, 1, 7);
 | |
|     po += SIMD_WW;
 | |
|     asm("fence");
 | |
|     for (uint8_t i = 0; i < 16; i++) {
 | |
|         int32_t g = in[i] - a[i] * b[i];
 | |
|         iot_printf("golden[%d] = %d, out[%d] = %d\n", i, g, i, out[i]);
 | |
|     }
 | |
| }
 | |
| 
 | |
| void max_min_test_float() {
 | |
|     float a[4] = {-0.1, 0.0, 0.1, 0.2};
 | |
|     float b[4] = {0.2, 0.1, 0.0, -0.1};
 | |
|     float h[4] = {1.0, 1.0, 1.0, 1.0};
 | |
|     float l[4] = {1.1, 1.1, 1.1, 1.1};
 | |
|     vld_fpw(0, a, 1);
 | |
|     vld_fpw(1, b, 1);
 | |
|     vmax(2, 0, 1);
 | |
|     vmin(3, 0, 1);
 | |
|     vst_fpw(h, 1, 2);
 | |
|     vst_fpw(l, 1, 3);
 | |
|     asm("fence");
 | |
|     for (uint8_t i = 0; i < 4; i++) {
 | |
|         iot_printf("max[%d] = %f, min[%d] = %f\n", i, h[i], i, l[i]);
 | |
|     }
 | |
| }
 | |
| 
 | |
| void compare_test_float() {
 | |
|     float a[4] = {1.0, 2.0, 3.0, 4.0};
 | |
|     float b[4] = {5.0, 4.0, 3.0, 2.0};
 | |
|     uint32_t ne[4];
 | |
|     uint32_t eq[4];
 | |
|     uint32_t lt[4];
 | |
|     uint32_t ge[4];
 | |
|     vld_fpw(0, a, 1)
 | |
|     vld_fpw(1, b, 1)
 | |
|     vseq(2, 0, 1)
 | |
|     vsne(3, 0, 1)
 | |
|     vslt(4, 0, 1)
 | |
|     vsge(5, 0, 1)
 | |
|     vst_fpw(eq, 1, 2)
 | |
|     vst_fpw(ne, 1, 3)
 | |
|     vst_fpw(lt, 1, 4)
 | |
|     vst_fpw(ge, 1, 5)
 | |
|     asm("fence");
 | |
|     for (uint8_t i = 0; i < 4; i++) {
 | |
|         iot_printf("eq[%d] = %08x, ne[%d] = %08x, lt[%d] = %08x, ge[%d] = %08x\n", i, eq[i], i, ne[i], i, lt[i], i, ge[i]);
 | |
|     }
 | |
| }
 | |
| 
 | |
| void mul_test_float() {
 | |
|     float a[4];
 | |
|     float b[4];
 | |
|     float g[4];
 | |
|     float w[4];
 | |
|     uint32_t *pa = (uint32_t *)a;
 | |
|     uint32_t *pb = (uint32_t *)b;
 | |
|     uint32_t *pg = (uint32_t *)g;
 | |
|     uint32_t *pw = (uint32_t *)w;
 | |
|     for (uint8_t i = 0; i < 4; i++) {
 | |
|         a[i] = -0.2 * (i - 2.5);
 | |
|         b[i] = 0.3 * (i - 3.3);
 | |
|     }
 | |
|     vld_fpw(0, a, 1);
 | |
|     vld_fpw(1, b, 1);
 | |
|     vmull(2, 0, 1);
 | |
|     vst_fpw(w, 1, 2);
 | |
|     asm("fence");
 | |
|     for (uint8_t i = 0; i < 4; i++) {
 | |
|         g[i] = a[i] * b[i];
 | |
|         iot_printf("a[%d] = %f(0x%08x), b[%d] = %f(0x%08x), g[%d] = %f(0x%08x), o[%d] = %f(0x%08x)\n", i, a[i], pa[i], i, b[i], pb[i], i, g[i], pg[i], i, w[i], pw[i]);
 | |
|     }
 | |
| }
 | |
| 
 | |
| void vmadd_test_float() {
 | |
|     float a[4] = {0.1, 0.2, 0.3, 0.4};
 | |
|     float b[4] = {1.0, 2.0, 3.0, 4.0};
 | |
|     float j[4] = {1.0, 2.0, 3.0, 4.0};
 | |
|     float p[4] = {0.0, 0.0, 0.0, 0.0};
 | |
|     float s[4] = {0.0, 0.0, 0.0, 0.0};
 | |
|     uint32_t *up = (uint32_t *)p;
 | |
|     uint32_t *us = (uint32_t *)s;
 | |
|     vld_fpw(0, a, 1)
 | |
|     vld_fpw(1, b, 1)
 | |
|     vld_fpw(2, j, 1)
 | |
|     vld_fpw(3, j, 1)
 | |
|     vmadd(2, 0, 1)
 | |
|     vmsub(3, 0, 1)
 | |
|     vst_fpw(p, 1, 2)
 | |
|     vst_fpw(s, 1, 3)
 | |
|     for (uint32_t i = 0; i < 4; i++) {
 | |
|         iot_printf("add result[%d] = %f(0x%08x), sub result[%d] = %f(0x%08x)\n", i, p[i], up[i], i, s[i], us[i]);
 | |
|     }
 | |
| }
 | |
| 
 | |
| void sigmoid_int16_test() {
 | |
|     int16_t in[16];
 | |
|     int16_t ou[16];
 | |
|     for (uint32_t i = 0; i < 65536; i += 16) {
 | |
|         for (uint8_t j = 0; j < 16; j++) {
 | |
|             in[j] = j + i - 32768;
 | |
|         }
 | |
|         sigmoid_int16(in, ou, 16);
 | |
|         for (uint8_t j = 0; j < 16; j++) {
 | |
|             iot_printf("%d\n", ou[j]);
 | |
|         }
 | |
|     }
 | |
| }
 | |
| 
 | |
| void tanh_int16_test() {
 | |
|     int16_t in[16];
 | |
|     int16_t ou[16];
 | |
|     for (uint32_t i = 0; i < 65536; i += 16) {
 | |
|         for (uint8_t j = 0; j < 16; j++) {
 | |
|             in[j] = j + i - 32768;
 | |
|         }
 | |
|         tanh_int16(in, ou, 16);
 | |
|         for (uint8_t j = 0; j < 16; j++) {
 | |
|             iot_printf("%d\n", ou[j]);
 | |
|         }
 | |
|     }
 | |
| }
 | |
| 
 | |
| void vld_stride_test() {
 | |
|     uint32_t data_len = 2048;
 | |
|     uint32_t *data = (uint32_t *)os_mem_malloc(1, data_len * 4);
 | |
|     uint8_t *data8 = (uint8_t *)data;
 | |
|     for (uint32_t i = 0; i < data_len * 4; i++) {
 | |
|         data8[i] = i;
 | |
|     }
 | |
|     uint32_t load[4];
 | |
|     uint8_t *load8 = (uint8_t *)load;
 | |
|     for (uint32_t s = 0; s < 512; s++) {
 | |
|         iot_printf("stride = %d\n", s);
 | |
|         vlds_uib(0, data, (s * 4))
 | |
|         vst_uib(load8, 1, 0)
 | |
|         asm("fence");
 | |
|         for (uint32_t i = 0; i < SIMD_WB; i++) {
 | |
|             iot_printf("load[%d] = %d\n", i, load8[i]);
 | |
|         }
 | |
|     }
 | |
|     os_mem_free(data);
 | |
| }
 | |
| 
 | |
| void vst_stride_test() {
 | |
|     uint32_t store_len = 2048;
 | |
|     uint32_t *store = (uint32_t *)os_mem_malloc(1, store_len * 4);
 | |
|     uint32_t load[4] = {0x12345678, 0x23456789, 0x34567890, 0x45678901};
 | |
|     vld_uiw(0, load, 1);
 | |
|     for (uint32_t s = 0; s < 512; s++) {
 | |
|         for (uint32_t i = 0; i < 2048; i++) {
 | |
|             store[i] = 0;
 | |
|         }
 | |
|         vsts_uiw(store, (s * 4), 0)
 | |
|         for (uint32_t i = 0; i < 2048; i++) {
 | |
|             if (i == 0) {
 | |
|                 if (store[i] != load[0]) {
 | |
|                     iot_printf("store incorrect at stride %d, store[%d] = %x, should be %x\n", s, i, store[i], load[0]);
 | |
|                 }
 | |
|             } else if (i == s) {
 | |
|                 if (store[i] != load[1]) {
 | |
|                     iot_printf("store incorrect at stride %d, store[%d] = %x, should be %x\n", s, i, store[i], load[1]);
 | |
|                 }
 | |
|             } 
 | |
|            // else if (i == s * 2) {
 | |
|            //     if (store[i] != load[2]) {
 | |
|            //         iot_printf("store incorrect at stride %d, store[%d] = %x, should be %x\n", s, i, store[i], load[2]);
 | |
|            //     }
 | |
|            // } else if (i == s * 3) {
 | |
|            //     if (store[i] != load[3]) {
 | |
|            //         iot_printf("store incorrect at stride %d, store[%d] = %x, should be %x\n", s, i, store[i], load[3]);
 | |
|            //     }
 | |
|            // }
 | |
|             else {
 | |
|                 if (store[i] != 0) {
 | |
|                     iot_printf("store incorrect at stride %d, store[%d] = %x, should be 0\n", s, i, store[i]);
 | |
|                 }
 | |
|             }
 | |
|         }
 | |
|     }
 | |
|     os_mem_free(store);
 | |
| }
 | |
| 
 | |
| void uint8_to_float_test() {
 | |
|     uint8_t *in = (uint8_t *)os_mem_malloc(1 ,256);
 | |
|     float *out = (float *)os_mem_malloc(1, 256 * 4);
 | |
|     float *golden = (float *)os_mem_malloc(1, 256 * 4);
 | |
|     for (uint32_t i = 0; i < 256; i++) {
 | |
|         in[i] = i;
 | |
|     }
 | |
|     uint64_t begin = cpu_get_mcycle();
 | |
|     vector_uint8_to_float(in, out, 1, 2, 256);
 | |
|     uint64_t end = cpu_get_mcycle();
 | |
|     for (uint32_t i = 0; i < 256; i++) {
 | |
|         golden[i] = (i + 1.0) * 2.0;
 | |
|         if (out[i] != golden[i]) {
 | |
|             iot_printf("%f, %f, %d\n", golden[i], out[i], i);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("time = %d\n", (uint32_t)(end - begin));
 | |
|     begin = cpu_get_mcycle();
 | |
|     for (uint32_t i = 0; i < 256; i+=8) {
 | |
|         out[i + 0] = (float)in[i + 0];
 | |
|         out[i + 1] = (float)in[i + 1];
 | |
|         out[i + 2] = (float)in[i + 2];
 | |
|         out[i + 3] = (float)in[i + 3];
 | |
|         out[i + 4] = (float)in[i + 4];
 | |
|         out[i + 5] = (float)in[i + 5];
 | |
|         out[i + 6] = (float)in[i + 6];
 | |
|         out[i + 7] = (float)in[i + 7];
 | |
|     }
 | |
|     end = cpu_get_mcycle();
 | |
|     iot_printf("time = %d\n", (uint32_t)(end - begin));
 | |
|     os_mem_free(in);
 | |
|     os_mem_free(out);
 | |
|     os_mem_free(golden);
 | |
|     iot_printf("uint8_to_float_test_end\n");
 | |
| }
 | |
| 
 | |
| void int8_to_float_test() {
 | |
|     int8_t *in = (int8_t *)os_mem_malloc(1 ,256);
 | |
|     float *out = (float *)os_mem_malloc(1, 256 * 4);
 | |
|     uint32_t *uo = (uint32_t *)out;
 | |
|     float *golden = (float *)os_mem_malloc(1, 256 * 4);
 | |
|     uint32_t *ug = (uint32_t *)golden;
 | |
|     for (int32_t i = 0; i < 256; i++) {
 | |
|         in[i] = i - 128;
 | |
|     }
 | |
|     uint64_t begin = cpu_get_mcycle();
 | |
|     vector_int8_to_float(in, out, 1.0, 2.0, 256);
 | |
|     uint64_t end = cpu_get_mcycle();
 | |
|     for (uint32_t i = 0; i < 256; i++) {
 | |
|         golden[i] = (i - 128.0 + 1.0) * 2.0;
 | |
|         if (out[i] != golden[i]) {
 | |
|             iot_printf("%f, 0x%08x, 0x%08x\n", out[i], uo[i], ug[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("time = %d\n", (uint32_t)(end - begin));
 | |
|     begin = cpu_get_mcycle();
 | |
|     for (uint32_t i = 0; i < 256; i++) {
 | |
|         out[i] = (float)i;
 | |
|     }
 | |
|     end = cpu_get_mcycle();
 | |
|     iot_printf("time = %d\n", (uint32_t)(end - begin));
 | |
|     os_mem_free(in);
 | |
|     os_mem_free(golden);
 | |
|     os_mem_free(out);
 | |
|     iot_printf("int8_to_float_test_end\n");
 | |
| }
 | |
| 
 | |
| 
 | |
| void uint16_to_float_test() {
 | |
|     uint16_t *in = (uint16_t *)os_mem_malloc(1, 4096 * 2);
 | |
|     float *out = (float *)os_mem_malloc(1, 4096 * 4);
 | |
|     float *golden = (float *)os_mem_malloc(1, 4096 * 4);
 | |
|     for (uint32_t i = 0; i < 4096; i++) {
 | |
|         in[i] = i;
 | |
|     }
 | |
|     uint64_t begin = cpu_get_mcycle();
 | |
|     vector_uint16_to_float(in, out, 1, 2, 4096);
 | |
|     uint64_t end = cpu_get_mcycle();
 | |
|     for (uint32_t i = 0; i < 4096; i++) {
 | |
|         golden[i] = (i + 1.0) * 2.0;
 | |
|         if (out[i] != golden[i]) {
 | |
|             iot_printf("%f, %f, %d\n", golden[i], out[i], i);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("time = %d\n", (uint32_t)(end - begin));
 | |
|     begin = cpu_get_mcycle();
 | |
|     for (uint32_t i = 0; i < 4096; i+=8) {
 | |
|         out[i + 0] = (float)in[i + 0];
 | |
|         out[i + 1] = (float)in[i + 1];
 | |
|         out[i + 2] = (float)in[i + 2];
 | |
|         out[i + 3] = (float)in[i + 3];
 | |
|         out[i + 4] = (float)in[i + 4];
 | |
|         out[i + 5] = (float)in[i + 5];
 | |
|         out[i + 6] = (float)in[i + 6];
 | |
|         out[i + 7] = (float)in[i + 7];
 | |
|     }
 | |
|     end = cpu_get_mcycle();
 | |
|     iot_printf("time = %d\n", (uint32_t)(end - begin));
 | |
|     os_mem_free(in);
 | |
|     os_mem_free(golden);
 | |
|     os_mem_free(out);
 | |
|     iot_printf("uint16_to_float_test_end\n");
 | |
| }
 | |
| 
 | |
| void int16_to_float_test() {
 | |
|     int16_t *in = (int16_t *)os_mem_malloc(1, 4096 * 2);
 | |
|     float *out = (float *)os_mem_malloc(1, 4096 * 4);
 | |
|     uint32_t *uo = (uint32_t *)out;
 | |
|     float *golden = (float *)os_mem_malloc(1, 4096 * 4);
 | |
|     uint32_t *ug = (uint32_t *)golden;
 | |
|     for (int32_t i = 0; i < 4096; i++) {
 | |
|         in[i] = i - 32768;
 | |
|     }
 | |
|     uint64_t begin = cpu_get_mcycle();
 | |
|     vector_int16_to_float(in, out, 1, 2, 4096);
 | |
|     uint64_t end = cpu_get_mcycle();
 | |
|     for (uint32_t i = 0; i < 4096; i++) {
 | |
|         golden[i] = (i - 32768.0 + 1.0) * 2.0;
 | |
|         if (out[i] != golden[i]) {
 | |
|             iot_printf("%f, 0x%08x, 0x%08x\n", out[i], uo[i], ug[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("time = %d\n", (uint32_t)(end - begin));
 | |
|     begin = cpu_get_mcycle();
 | |
|     for (uint32_t i = 0; i < 4096; i++) {
 | |
|         out[i] = (float)i;
 | |
|     }
 | |
|     end = cpu_get_mcycle();
 | |
|     iot_printf("time = %d\n", (uint32_t)(end - begin));
 | |
|     os_mem_free(in);
 | |
|     os_mem_free(golden);
 | |
|     os_mem_free(out);
 | |
|     iot_printf("int16_to_float_test_end\n");
 | |
| }
 | |
| 
 | |
| void float_to_int8_test() {
 | |
|     float *in = (float *)os_mem_malloc(1, 2560 * 4);
 | |
|     int8_t *out = (int8_t *)os_mem_malloc(1, 2560);
 | |
|     int8_t *golden = (int8_t *)os_mem_malloc(1, 2560);
 | |
|     for (int32_t i = 0; i < 2560; i++) {
 | |
|         in[i] = (i - 1280) * 0.1f;
 | |
|     }
 | |
|     uint64_t begin = cpu_get_mcycle();
 | |
|     vector_float_to_int8(in, out, 1.0, 2.0, 2560);
 | |
|     uint64_t end = cpu_get_mcycle();
 | |
|     for (uint32_t i = 0; i < 2560; i++) {
 | |
|         float gf = 2.0 * (in[i] - 1.0) + 128.0;
 | |
|         gf = (gf > 255.0) ? 255.0 : (gf < 0.0) ? 0.0 : gf;
 | |
|         golden[i] = (uint8_t)gf - 128;
 | |
|         if (out[i] != golden[i]) {
 | |
|             iot_printf("%f, %d\n", in[i], out[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("time = %d\n", (uint32_t)(end - begin));
 | |
|     begin = cpu_get_mcycle();
 | |
|     for (uint32_t i = 0; i < 2560; i++) {
 | |
|         out[i] = (int8_t)in[i];
 | |
|     }
 | |
|     end = cpu_get_mcycle();
 | |
|     iot_printf("time = %d\n", (uint32_t)(end - begin));
 | |
|     os_mem_free(in);
 | |
|     os_mem_free(out);
 | |
|     os_mem_free(golden);
 | |
|     iot_printf("float_to_int8_test end\n");
 | |
| }
 | |
| 
 | |
| void float_to_uint8_test() {
 | |
|     float *in = (float *)os_mem_malloc(1, 2560 * 4);
 | |
|     uint8_t *out = (uint8_t *)os_mem_malloc(1, 2560);
 | |
|     uint8_t *golden = (uint8_t *)os_mem_malloc(1, 2560);
 | |
|     for (int32_t i = 0; i < 2560; i++) {
 | |
|         in[i] = i * 0.1f;
 | |
|     }
 | |
|     uint64_t begin = cpu_get_mcycle();
 | |
|     vector_float_to_uint8(in, out, 1.0, 2.0, 2560);
 | |
|     uint64_t end = cpu_get_mcycle();
 | |
|     for (uint32_t i = 0; i < 2560; i++) {
 | |
|         float gf = 2.0 * (in[i] - 1.0);
 | |
|         gf = (gf > 255.0) ? 255.0 : (gf < 0.0) ? 0.0 : gf;
 | |
|         golden[i] = (uint8_t)gf;
 | |
|         if (out[i] != golden[i]) {
 | |
|             iot_printf("at %d, %f, %d, %d\n", i, in[i], golden[i], out[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("time = %d\n", (uint32_t)(end - begin));
 | |
|     begin = cpu_get_mcycle();
 | |
|     for (uint32_t i = 0; i < 2560; i += 8) {
 | |
|         out[i] = (uint8_t)in[i];
 | |
|         out[i + 1] = (uint8_t)in[i + 1];
 | |
|         out[i + 2] = (uint8_t)in[i + 2];
 | |
|         out[i + 3] = (uint8_t)in[i + 3];
 | |
|         out[i + 4] = (uint8_t)in[i + 4];
 | |
|         out[i + 5] = (uint8_t)in[i + 5];
 | |
|         out[i + 6] = (uint8_t)in[i + 6];
 | |
|         out[i + 7] = (uint8_t)in[i + 7];
 | |
|     }
 | |
|     end = cpu_get_mcycle();
 | |
|     iot_printf("time = %d\n", (uint32_t)(end - begin));
 | |
|     os_mem_free(in);
 | |
|     os_mem_free(out);
 | |
|     os_mem_free(golden);
 | |
|     iot_printf("float_to_uint8_test end\n");
 | |
| }
 | |
| 
 | |
| void float_to_int16_test() {
 | |
|     float *in = (float *)os_mem_malloc(1, 2560 * 4);
 | |
|     int16_t *out = (int16_t *)os_mem_malloc(1, 2560 * 2);
 | |
|     int16_t *golden = (int16_t *)os_mem_malloc(1, 2560 * 2);
 | |
|     for (int32_t i = 0; i < 2560; i++) {
 | |
|         in[i] = (i - 1280) * 10.125f;
 | |
|     }
 | |
|     uint64_t begin = cpu_get_mcycle();
 | |
|     vector_float_to_int16(in, out, 1.0, 2.0, 2560);
 | |
|     uint64_t end = cpu_get_mcycle();
 | |
|     for (uint32_t i = 0; i < 2560; i++) {
 | |
|         float gf = 2.0 * (in[i] - 1.0) + 32768.0;
 | |
|         gf = (gf > 65535.0) ? 65535.0 : (gf < 0.0) ? 0.0 : gf;
 | |
|         golden[i] = (uint16_t)gf - 32768;
 | |
|         if (out[i] != golden[i]) {
 | |
|             iot_printf("%f, %d, %d\n", in[i], golden[i], out[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("time = %d\n", (uint32_t)(end - begin));
 | |
|     begin = cpu_get_mcycle();
 | |
|     for (uint32_t i = 0; i < 2560; i++) {
 | |
|         out[i] = (int16_t)in[i];
 | |
|     }
 | |
|     end = cpu_get_mcycle();
 | |
|     iot_printf("time = %d\n", (uint32_t)(end - begin));
 | |
|     os_mem_free(in);
 | |
|     os_mem_free(out);
 | |
|     os_mem_free(golden);
 | |
|     iot_printf("float_to_int16_test end\n");
 | |
| }
 | |
| 
 | |
| void float_to_uint16_test() {
 | |
|     float *in = (float *)os_mem_malloc(1, 2560 * 4);
 | |
|     uint16_t *out = (uint16_t *)os_mem_malloc(1, 2560 * 2);
 | |
|     uint16_t *golden = (uint16_t *)os_mem_malloc(1, 2560 * 2);
 | |
|     for (int32_t i = 0; i < 2560; i++) {
 | |
|         in[i] = i * 10.1f;
 | |
|     }
 | |
|     uint64_t begin = cpu_get_mcycle();
 | |
|     vector_float_to_uint16(in, out, 1.0, 2.0, 2560);
 | |
|     uint64_t end = cpu_get_mcycle();
 | |
|     for (uint32_t i = 0; i < 2560; i++) {
 | |
|         float gf = 2.0 * (in[i] - 1.0);
 | |
|         gf = (gf > 65535.0) ? 65535.0 : (gf < 0.0) ? 0.0 : gf;
 | |
|         golden[i] = (uint16_t)gf;
 | |
|         if (out[i] != golden[i]) {
 | |
|             iot_printf("%f, %d, %d\n", in[i], golden[i], out[i]);
 | |
|         }
 | |
|     }
 | |
|     iot_printf("time = %d\n", (uint32_t)(end - begin));
 | |
|     begin = cpu_get_mcycle();
 | |
|     for (uint32_t i = 0; i < 2560; i++) {
 | |
|         out[i] = (uint16_t)in[i];
 | |
|     }
 | |
|     end = cpu_get_mcycle();
 | |
|     iot_printf("time = %d\n", (uint32_t)(end - begin));
 | |
|     os_mem_free(in);
 | |
|     os_mem_free(out);
 | |
|     os_mem_free(golden);
 | |
|     iot_printf("float_to_uint16_test end\n");
 | |
| }
 | |
| 
 | |
| void vector_inner_product_test_uint8() {
 | |
|     uint8_t a[100];
 | |
|     uint8_t b[100];
 | |
|     for (uint32_t i = 0; i < 100; i++) {
 | |
|         a[i] = i + 50;
 | |
|         b[i] = i * 7 + 1;
 | |
|     }
 | |
|     uint32_t o = 0;
 | |
|     uint32_t g = 0;
 | |
|     uint64_t begin = cpu_get_mcycle();
 | |
|     o = vector_inner_product_uint8(a, b, 100);
 | |
|     uint64_t end = cpu_get_mcycle();
 | |
|     iot_printf("time = %d\n", (uint32_t)(end - begin));    
 | |
|     begin = cpu_get_mcycle();
 | |
|     for (uint8_t i = 0; i < 100; i++) {
 | |
|         g += a[i] * b[i];
 | |
|     }
 | |
|     end = cpu_get_mcycle();
 | |
|     iot_printf("time = %d\n", (uint32_t)(end - begin));    
 | |
|     if (g != o) {
 | |
|         iot_printf("incorrect, golden = %d, output = %d\n", g, o);
 | |
|     }
 | |
|     iot_printf("inner product of uint8 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_inner_product_test_int8() {
 | |
|     int8_t a[100];
 | |
|     int8_t b[100];
 | |
|     for (uint32_t i = 0; i < 100; i++) {
 | |
|         a[i] = i;
 | |
|         b[i] = i + 1;
 | |
|     }
 | |
|     int32_t o = 0;
 | |
|     int32_t g = 0;
 | |
|     uint64_t begin = cpu_get_mcycle();
 | |
|     o = vector_inner_product_int8(a, b, 100);
 | |
|     uint64_t end = cpu_get_mcycle();
 | |
|     iot_printf("time = %d\n", (uint32_t)(end - begin));    
 | |
|     begin = cpu_get_mcycle();
 | |
|     for (uint8_t i = 0; i < 100; i++) {
 | |
|         g += a[i] * b[i];
 | |
|     }
 | |
|     end = cpu_get_mcycle();
 | |
|     iot_printf("time = %d\n", (uint32_t)(end - begin));    
 | |
|     if (g != o) {
 | |
|         iot_printf("incorrect, golden = %d, output = %d\n", g, o);
 | |
|     }
 | |
|     iot_printf("inner product of int8 test end\n");
 | |
| }
 | |
| 
 | |
| 
 | |
| void vector_inner_product_test_uint16() {
 | |
|     uint16_t a[100];
 | |
|     uint16_t b[100];
 | |
|     for (uint64_t i = 0; i < 100; i++) {
 | |
|         a[i] = i + 50;
 | |
|         b[i] = i * 7 + 1;
 | |
|     }
 | |
|     uint64_t o = 0;
 | |
|     uint64_t g = 0;
 | |
|     uint64_t begin = cpu_get_mcycle();
 | |
|     o = vector_inner_product_uint16(a, b, 100);
 | |
|     uint64_t end = cpu_get_mcycle();
 | |
|     iot_printf("time = %d\n", (uint32_t)(end - begin));    
 | |
|     begin = cpu_get_mcycle();
 | |
|     for (uint16_t i = 0; i < 100; i++) {
 | |
|         g += a[i] * b[i];
 | |
|     }
 | |
|     end = cpu_get_mcycle();
 | |
|     iot_printf("time = %d\n", (uint32_t)(end - begin));    
 | |
|     if (g != o) {
 | |
|         iot_printf("incorrect, golden = %d, output = %d\n", (uint32_t)g, (uint32_t)o);
 | |
|     }
 | |
|     iot_printf("inner product of uint16 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_inner_product_test_int16() {
 | |
|     int16_t a[100];
 | |
|     int16_t b[100];
 | |
|     for (uint64_t i = 0; i < 100; i++) {
 | |
|         a[i] = i * 100 + 1;
 | |
|         b[i] = i * 700 - 200;
 | |
|     }
 | |
|     int64_t o = 0;
 | |
|     int64_t g = 0;
 | |
|     uint64_t begin = cpu_get_mcycle();
 | |
|     o = vector_inner_product_int16(a, b, 100);
 | |
|     uint64_t end = cpu_get_mcycle();
 | |
|     iot_printf("time = %d\n", (uint32_t)(end - begin));    
 | |
|     begin = cpu_get_mcycle();
 | |
|     for (uint16_t i = 0; i < 100; i++) {
 | |
|         g += a[i] * b[i];
 | |
|     }
 | |
|     end = cpu_get_mcycle();
 | |
|     iot_printf("time = %d\n", (uint32_t)(end - begin));    
 | |
|     if (g != o) {
 | |
|         iot_printf("incorrect, golden = %d, output = %d\n", (uint32_t)g, (uint32_t)o);
 | |
|     }
 | |
|     iot_printf("inner product of int16 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_inner_product_test_uint32() {
 | |
|     uint32_t a[100];
 | |
|     uint32_t b[100];
 | |
|     for (uint64_t i = 0; i < 100; i++) {
 | |
|         a[i] = i + 50;
 | |
|         b[i] = i * 7 + 1;
 | |
|     }
 | |
|     uint32_t o = 0;
 | |
|     uint32_t g = 0;
 | |
|     uint64_t begin = cpu_get_mcycle();
 | |
|     o = vector_inner_product_uint32(a, b, 100);
 | |
|     uint64_t end = cpu_get_mcycle();
 | |
|     iot_printf("time = %d\n", (uint32_t)(end - begin));    
 | |
|     begin = cpu_get_mcycle();
 | |
|     for (uint16_t i = 0; i < 100; i++) {
 | |
|         g += a[i] * b[i];
 | |
|     }
 | |
|     end = cpu_get_mcycle();
 | |
|     iot_printf("time = %d\n", (uint32_t)(end - begin));    
 | |
|     if (g != o) {
 | |
|         iot_printf("incorrect, golden = %d, output = %d\n", (uint32_t)g, (uint32_t)o);
 | |
|     }
 | |
|     iot_printf("inner product of uint32 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_inner_product_test_int32() {
 | |
|     int32_t a[100];
 | |
|     int32_t b[100];
 | |
|     for (uint64_t i = 0; i < 100; i++) {
 | |
|         a[i] = i * 100 + 1;
 | |
|         b[i] = i * 700 - 200;
 | |
|     }
 | |
|     int32_t o = 0;
 | |
|     int32_t g = 0;
 | |
|     uint64_t begin = cpu_get_mcycle();
 | |
|     o = vector_inner_product_int32(a, b, 100);
 | |
|     uint64_t end = cpu_get_mcycle();
 | |
|     iot_printf("time = %d\n", (uint32_t)(end - begin));    
 | |
|     begin = cpu_get_mcycle();
 | |
|     for (uint16_t i = 0; i < 100; i++) {
 | |
|         g += a[i] * b[i];
 | |
|     }
 | |
|     end = cpu_get_mcycle();
 | |
|     iot_printf("time = %d\n", (uint32_t)(end - begin));    
 | |
|     if (g != o) {
 | |
|         iot_printf("incorrect, golden = %d, output = %d\n", (uint32_t)g, (uint32_t)o);
 | |
|     }
 | |
|     iot_printf("inner product of int32 test end\n");
 | |
| }
 | |
| 
 | |
| void vector_inner_product_test_float() {
 | |
|     float a[100];
 | |
|     float b[100];
 | |
|     for (uint64_t i = 0; i < 100; i++) {
 | |
|         a[i] = i + 50.0;
 | |
|         b[i] = i * 7.0 + 1.0;
 | |
|     }
 | |
|     float o = 0;
 | |
|     float g = 0;
 | |
|     uint64_t begin = cpu_get_mcycle();
 | |
|     o = vector_inner_product_float(a, b, 100);
 | |
|     uint64_t end = cpu_get_mcycle();
 | |
|     iot_printf("time = %d\n", (uint32_t)(end - begin));    
 | |
|     begin = cpu_get_mcycle();
 | |
|     for (uint16_t i = 0; i < 100; i++) {
 | |
|         g += a[i] * b[i];
 | |
|     }
 | |
|     end = cpu_get_mcycle();
 | |
|     iot_printf("time = %d\n", (uint32_t)(end - begin));    
 | |
|     if (g != o) {
 | |
|         iot_printf("incorrect, golden = %d, output = %d\n", (uint32_t)g, (uint32_t)o);
 | |
|     }
 | |
|     iot_printf("inner product of float test end\n");
 | |
| }
 | |
| 
 | |
| void max_min_test_int64() {
 | |
|     int32_t init_a[8] = {0xe0000000, 0xe0000001, 0xe0000002, 0xe0000003, 0xe0000004, 0xe0000005, 0xe0000006, 0xe0000007};
 | |
|     int32_t init_b[8] = {0x7fffffff, 0x7ffffffe, 0x7ffffffd, 0x7ffffffc, 0x7ffffffb, 0x7ffffffa, 0x7ffffff9, 0x7ffffff8};
 | |
|     int32_t one[] = {4};
 | |
|     int64_t curr_a[8];
 | |
|     int64_t curr_b[8];
 | |
|     int32_t *in32_a = (int32_t *)curr_a;
 | |
|     int32_t *in32_b = (int32_t *)curr_b;
 | |
|     int64_t gold_a[8];
 | |
|     int64_t gold_b[8];
 | |
|     int32_t *in32ga = (int32_t *)gold_a;
 | |
|     int32_t *in32gb = (int32_t *)gold_b;
 | |
|     int16_t sub1[] = {1, 1};
 | |
|     int16_t sub2[] = {2, 2};
 | |
|     int16_t sub3[] = {1, 1};
 | |
|     int16_t sub4[] = {3, 3};
 | |
|     int64_t gold_g[8];
 | |
|     int64_t gold_l[8];
 | |
|     int64_t curr_g[8];
 | |
|     int64_t curr_l[8];
 | |
|     int32_t *in32_g = (int32_t *)curr_g;
 | |
|     int32_t *in32_l = (int32_t *)curr_l;
 | |
|     int32_t *in32gg = (int32_t *)gold_g;
 | |
|     int32_t *in32gl = (int32_t *)gold_l;
 | |
|     vld_siw(10, one, 0)
 | |
|     vld_siw(8, init_a, 1)
 | |
|     vld_siw(9, init_b, 1)
 | |
|     vmulw(0, 10, 8)
 | |
|     vmulw(4, 10, 9)
 | |
|     vld_siw(8, (init_a + SIMD_WW), 1)
 | |
|     vld_siw(9, (init_b + SIMD_WW), 1)
 | |
|     vmulw(2, 10, 8)
 | |
|     vmulw(6, 10, 9)
 | |
|     vld_sih(12, sub1, 0)
 | |
|     vld_sih(13, sub2, 0)
 | |
|     vld_sih(14, sub3, 0)
 | |
|     vld_sih(15, sub4, 0)
 | |
|     for (uint32_t i = 0; i < 8; i++) {
 | |
|         gold_a[i] = (int64_t)init_a[i] * 4;
 | |
|         gold_b[i] = (int64_t)init_b[i] * 4;
 | |
|     }
 | |
|     for (uint32_t t = 0; t < 100000; t++) {
 | |
|         iot_printf("add and sub round %d\n", t);
 | |
|         in32ga = (int32_t *)gold_a;
 | |
|         in32gb = (int32_t *)gold_b;
 | |
|         for (uint32_t i = 0; i < 8; i++) {
 | |
|             gold_a[i] = gold_a[i] + sub1[0] * sub2[0];
 | |
|             gold_b[i] = gold_b[i] - sub3[0] * sub4[0];
 | |
|             gold_g[i] = gold_a[i] > gold_b[i] ? gold_a[i] : gold_b[i];
 | |
|             gold_l[i] = gold_a[i] < gold_b[i] ? gold_a[i] : gold_b[i];
 | |
|         }
 | |
|         vmaddw(0, 12, 13)
 | |
|         vmsubw(4, 14, 15)
 | |
| 
 | |
|         in32_a = (int32_t *)curr_a;
 | |
|         in32_b = (int32_t *)curr_b;
 | |
|         in32_g = (int32_t *)curr_g;
 | |
|         in32_l = (int32_t *)curr_l;
 | |
| 
 | |
|         vst_siw(in32_a, 1, 0)
 | |
|         vst_siw(in32_b, 1, 4)
 | |
|         vmax(11, 0, 4)
 | |
|         vst_siw(in32_g, 1, 11)
 | |
|         vmin(11, 0, 4)
 | |
|         vst_siw(in32_l, 1, 11)
 | |
|         in32_a += SIMD_WW;
 | |
|         in32_b += SIMD_WW;
 | |
|         in32_g += SIMD_WW;
 | |
|         in32_l += SIMD_WW;
 | |
| 
 | |
|         vst_siw(in32_a, 1, 1)
 | |
|         vst_siw(in32_b, 1, 5)
 | |
|         vmax(11, 1, 5)
 | |
|         vst_siw(in32_g, 1, 11)
 | |
|         vmin(11, 1, 5)
 | |
|         vst_siw(in32_l, 1, 11)
 | |
|         in32_a += SIMD_WW;
 | |
|         in32_b += SIMD_WW;
 | |
|         in32_g += SIMD_WW;
 | |
|         in32_l += SIMD_WW;
 | |
| 
 | |
|         vst_siw(in32_a, 1, 2)
 | |
|         vst_siw(in32_b, 1, 6)
 | |
|         vmax(11, 2, 6)
 | |
|         vst_siw(in32_g, 1, 11)
 | |
|         vmin(11, 2, 6)
 | |
|         vst_siw(in32_l, 1, 11)
 | |
|         in32_a += SIMD_WW;
 | |
|         in32_b += SIMD_WW;
 | |
|         in32_g += SIMD_WW;
 | |
|         in32_l += SIMD_WW;
 | |
| 
 | |
|         vst_siw(in32_a, 1, 3)
 | |
|         vst_siw(in32_b, 1, 7)
 | |
|         vmax(11, 3, 7)
 | |
|         vst_siw(in32_g, 1, 11)
 | |
|         vmin(11, 3, 7)
 | |
|         vst_siw(in32_l, 1, 11)
 | |
|         in32_a += SIMD_WW;
 | |
|         in32_b += SIMD_WW;
 | |
|         in32_g += SIMD_WW;
 | |
|         in32_l += SIMD_WW;
 | |
| 
 | |
|         in32_a = (int32_t *)curr_a;
 | |
|         in32_b = (int32_t *)curr_b;
 | |
|         in32_g = (int32_t *)curr_g;
 | |
|         in32_l = (int32_t *)curr_l;
 | |
| 
 | |
|         for (uint32_t i = 0; i < 8; i++) {
 | |
|             if(curr_a[i] != gold_a[i]) {
 | |
|                 iot_printf("incorrect at %d, gold_a[%d] = 0x%08x%08x, curr_a[%d] = 0x%08x%08x\n", i, i, in32ga[2 * i + 1], in32ga[2 * i], i, in32_a[2 * i + 1], in32_a[2 * i]);
 | |
|             }
 | |
|             if(curr_b[i] != gold_b[i]) {
 | |
|                 iot_printf("incorrect at %d, gold_b[%d] = 0x%08x%08x, curr_b[%d] = 0x%08x%08x\n", i, i, in32gb[2 * i + 1], in32gb[2 * i], i, in32_b[2 * i + 1], in32_b[2 * i]);
 | |
|             }
 | |
|             if(curr_g[i] != gold_g[i]) {
 | |
|                 iot_printf("incorrect at %d, gold_g[%d] = 0x%08x%08x, curr_g[%d] = 0x%08x%08x\n", i, i, in32gg[2 * i + 1], in32gg[2 * i], i, in32_g[2 * i + 1], in32_g[2 * i]);
 | |
|             }
 | |
|             if(curr_l[i] != gold_l[i]) {
 | |
|                 iot_printf("incorrect at %d, gold_l[%d] = 0x%08x%08x, curr_l[%d] = 0x%08x%08x\n", i, i, in32gl[2 * i + 1], in32gl[2 * i], i, in32_l[2 * i + 1], in32_l[2 * i]);
 | |
|             }
 | |
|         }
 | |
|     }
 | |
| }
 | |
| 
 | |
| void matrix_transpose_test_from_pc() {
 | |
|     uint8_t config_read[120];
 | |
|     uint32_t config_get[60];
 | |
|     uint32_t data_begin = 0x10200000;
 | |
|     read_case_from_python(config_read, config_get, (uint8_t *)data_begin);
 | |
|     uint32_t input_addr = data_begin;
 | |
|     uint32_t golden_addr = input_addr + config_get[54];
 | |
|     uint32_t output_addr = golden_addr + config_get[55];
 | |
|     switch (config_get[3])
 | |
|     {
 | |
|         case 0:
 | |
|             switch (config_get[2])
 | |
|             {
 | |
|                 case 1:
 | |
|                     matrix_transpose_uint8((uint8_t *)input_addr, (uint8_t *)output_addr, config_get[0], config_get[1]);
 | |
|                     break;
 | |
|             
 | |
|                 case 2:
 | |
|                     matrix_transpose_uint16((uint16_t *)input_addr, (uint16_t *)output_addr, config_get[0], config_get[1]);
 | |
|                     break;
 | |
| 
 | |
|                 case 4:
 | |
|                     matrix_transpose_uint32((uint32_t *)input_addr, (uint32_t *)output_addr, config_get[0], config_get[1]);
 | |
|                     break;
 | |
| 
 | |
|                 default:
 | |
|                     break;
 | |
|             }
 | |
|             break;
 | |
|     
 | |
|         case 1:
 | |
|             switch (config_get[2])
 | |
|             {
 | |
|                 case 1:
 | |
|                     matrix_transpose_int8((int8_t *)input_addr, (int8_t *)output_addr, config_get[0], config_get[1]);
 | |
|                     break;
 | |
|             
 | |
|                 case 2:
 | |
|                     matrix_transpose_int16((int16_t *)input_addr, (int16_t *)output_addr, config_get[0], config_get[1]);
 | |
|                     break;
 | |
| 
 | |
|                 case 4:
 | |
|                     matrix_transpose_int32((int32_t *)input_addr, (int32_t *)output_addr, config_get[0], config_get[1]);
 | |
|                     break;
 | |
| 
 | |
|                 default:
 | |
|                     break;
 | |
|             }
 | |
|             break;
 | |
|     
 | |
|         default:
 | |
|             break;
 | |
|     }
 | |
|     verify_8bits(output_addr, golden_addr, config_get[55]);
 | |
| }
 | |
| 
 | |
| void vector_test_from_pc() {
 | |
|     uint8_t config_read[120];
 | |
|     uint32_t config_get[60];
 | |
|     uint32_t data_begin = 0x10200000;
 | |
|     //uint8_t *data = os_mem_malloc(1, 512 * 1024);
 | |
|     //uint32_t data_begin = (uint32_t)data;
 | |
|     read_case_from_python(config_read, config_get, (uint8_t *)data_begin);
 | |
|     uint32_t weight_addr = data_begin;
 | |
|     uint32_t bias_addr = weight_addr + config_get[52];
 | |
|     uint32_t input_addr = bias_addr + config_get[53];
 | |
|     uint32_t golden_addr = input_addr + config_get[54];
 | |
|     uint32_t output_addr = golden_addr + config_get[55];
 | |
|     if (config_get[4]) {
 | |
|         uint32_t data = 0x1071f;
 | |
|         vcsrw(data,data,0)
 | |
|     } else {
 | |
|         uint32_t data = 0x1061f;
 | |
|         vcsrw(data,data,0)
 | |
|     }
 | |
|     switch (config_get[3])
 | |
|     {
 | |
|         case 0: // unsigned
 | |
|             switch (config_get[2])
 | |
|             {
 | |
|                 case 1: // 8bit
 | |
|                     switch (config_get[1])
 | |
|                     {
 | |
|                         case 0:
 | |
|                             vector_add_uint8((uint8_t *)input_addr, (uint8_t *)bias_addr, (uint8_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 1:
 | |
|                             vector_sub_uint8((uint8_t *)input_addr, (uint8_t *)bias_addr, (uint8_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 2:
 | |
|                             vector_multiply_uint8((uint8_t *)input_addr, (uint8_t *)bias_addr, (uint16_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 3:
 | |
|                             vector_multiply_uint8_high_8bit((uint8_t *)input_addr, (uint8_t *)bias_addr, (uint8_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 4:
 | |
|                             vector_multiply_uint8_low_8bit((uint8_t *)input_addr, (uint8_t *)bias_addr, (uint8_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 5:
 | |
|                             vector_mul_add_uint8((uint8_t *)input_addr, (uint8_t *)bias_addr, (uint8_t *)weight_addr, (uint8_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 6:
 | |
|                             vector_mul_add_uint8_to_uint32((uint8_t *)input_addr, (uint8_t *)bias_addr, (uint32_t *)weight_addr, (uint32_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 7:
 | |
|                             vector_mul_sub_uint8((uint8_t *)input_addr, (uint8_t *)bias_addr, (uint8_t *)weight_addr, (uint8_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 8:
 | |
|                             vector_mul_sub_uint8_to_uint32((uint8_t *)input_addr, (uint8_t *)bias_addr, (uint32_t *)weight_addr, (uint32_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 9:
 | |
|                             vector_max_uint8((uint8_t *)input_addr, (uint8_t *)bias_addr, (uint8_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 10:
 | |
|                             vector_min_uint8((uint8_t *)input_addr, (uint8_t *)bias_addr, (uint8_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 11:
 | |
|                             vector_equal_uint8((uint8_t *)input_addr, (uint8_t *)bias_addr, (uint8_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 12:
 | |
|                             vector_not_equal_uint8((uint8_t *)input_addr, (uint8_t *)bias_addr, (uint8_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 13:
 | |
|                             vector_less_than_uint8((uint8_t *)input_addr, (uint8_t *)bias_addr, (uint8_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 14:
 | |
|                             vector_greater_or_equal_uint8((uint8_t *)input_addr, (uint8_t *)bias_addr, (uint8_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 15:
 | |
|                             vector_logic_and_uint8((uint8_t *)input_addr, (uint8_t *)bias_addr, (uint8_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 16:
 | |
|                             vector_logic_or_uint8((uint8_t *)input_addr, (uint8_t *)bias_addr, (uint8_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 17:
 | |
|                             vector_logic_xor_uint8((uint8_t *)input_addr, (uint8_t *)bias_addr, (uint8_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 18:
 | |
|                             vector_logic_xnor_uint8((uint8_t *)input_addr, (uint8_t *)bias_addr, (uint8_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 19:
 | |
|                             vector_left_shift_uint8((uint8_t *)input_addr, (uint8_t *)bias_addr, (uint8_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 20:
 | |
|                             vector_right_shift_uint8((uint8_t *)input_addr, (uint8_t *)bias_addr, (uint8_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 21:
 | |
|                             vector_right_shift_uint8((uint8_t *)input_addr, (uint8_t *)bias_addr, (uint8_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         default:
 | |
|                             break;
 | |
|                     }
 | |
|                     break;
 | |
|         
 | |
|                 case 2: // 16 bit
 | |
|                     switch (config_get[1])
 | |
|                     {
 | |
|                         case 0:
 | |
|                             vector_add_uint16((uint16_t *)input_addr, (uint16_t *)bias_addr, (uint16_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 1:
 | |
|                             vector_sub_uint16((uint16_t *)input_addr, (uint16_t *)bias_addr, (uint16_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 2:
 | |
|                             vector_multiply_uint16((uint16_t *)input_addr, (uint16_t *)bias_addr, (uint32_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 3:
 | |
|                             vector_multiply_uint16_high_16bit((uint16_t *)input_addr, (uint16_t *)bias_addr, (uint16_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 4:
 | |
|                             vector_multiply_uint16_low_16bit((uint16_t *)input_addr, (uint16_t *)bias_addr, (uint16_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 5:
 | |
|                             vector_mul_add_uint16((uint16_t *)input_addr, (uint16_t *)bias_addr, (uint16_t *)weight_addr, (uint16_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 6:
 | |
|                             vector_mul_add_uint16_to_uint64((uint16_t *)input_addr, (uint16_t *)bias_addr, (uint64_t *)weight_addr, (uint64_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 7:
 | |
|                             vector_mul_sub_uint16((uint16_t *)input_addr, (uint16_t *)bias_addr, (uint16_t *)weight_addr, (uint16_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 8:
 | |
|                             vector_mul_sub_uint16_to_uint64((uint16_t *)input_addr, (uint16_t *)bias_addr, (uint64_t *)weight_addr, (uint64_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 9:
 | |
|                             vector_max_uint16((uint16_t *)input_addr, (uint16_t *)bias_addr, (uint16_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 10:
 | |
|                             vector_min_uint16((uint16_t *)input_addr, (uint16_t *)bias_addr, (uint16_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 11:
 | |
|                             vector_equal_uint16((uint16_t *)input_addr, (uint16_t *)bias_addr, (uint16_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 12:
 | |
|                             vector_not_equal_uint16((uint16_t *)input_addr, (uint16_t *)bias_addr, (uint16_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 13:
 | |
|                             vector_less_than_uint16((uint16_t *)input_addr, (uint16_t *)bias_addr, (uint16_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 14:
 | |
|                             vector_greater_or_equal_uint16((uint16_t *)input_addr, (uint16_t *)bias_addr, (uint16_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 15:
 | |
|                             vector_logic_and_uint16((uint16_t *)input_addr, (uint16_t *)bias_addr, (uint16_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 16:
 | |
|                             vector_logic_or_uint16((uint16_t *)input_addr, (uint16_t *)bias_addr, (uint16_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 17:
 | |
|                             vector_logic_xor_uint16((uint16_t *)input_addr, (uint16_t *)bias_addr, (uint16_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 18:
 | |
|                             vector_logic_xnor_uint16((uint16_t *)input_addr, (uint16_t *)bias_addr, (uint16_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 19:
 | |
|                             vector_left_shift_uint16((uint16_t *)input_addr, (uint16_t *)bias_addr, (uint16_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 20:
 | |
|                             vector_right_shift_uint16((uint16_t *)input_addr, (uint16_t *)bias_addr, (uint16_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 21:
 | |
|                             vector_right_shift_uint16((uint16_t *)input_addr, (uint16_t *)bias_addr, (uint16_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 22:
 | |
|                             vector_high_half_bits_uint16((uint16_t *)input_addr, (uint8_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 23:
 | |
|                             vector_low_half_bits_uint16((uint16_t *)input_addr, (uint8_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         default:
 | |
|                             break;
 | |
|                     }
 | |
|                     break;
 | |
|         
 | |
|                 case 3: // float
 | |
|                     switch (config_get[1])
 | |
|                     {
 | |
|                         case 0:
 | |
|                             vector_add_float((float *)input_addr, (float *)bias_addr, (float *)output_addr, config_get[0]);
 | |
|                             break;
 | |
| 
 | |
|                         case 1:
 | |
|                             vector_sub_float((float *)input_addr, (float *)bias_addr, (float *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 2:
 | |
|                             vector_multiply_float((float *)input_addr, (float *)bias_addr, (float *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 5:
 | |
|                             vector_mul_add_float((float *)input_addr, (float *)bias_addr, (float *)weight_addr, (float *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 7:
 | |
|                             vector_mul_sub_float((float *)input_addr, (float *)bias_addr, (float *)weight_addr, (float *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 9:
 | |
|                             vector_max_float((float *)input_addr, (float *)bias_addr, (float *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 10:
 | |
|                             vector_min_float((float *)input_addr, (float *)bias_addr, (float *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 11:
 | |
|                             vector_equal_float((float *)input_addr, (float *)bias_addr, (float *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 12:
 | |
|                             vector_not_equal_float((float *)input_addr, (float *)bias_addr, (float *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 13:
 | |
|                             vector_less_than_float((float *)input_addr, (float *)bias_addr, (float *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 14:
 | |
|                             vector_greater_or_equal_float((float *)input_addr, (float *)bias_addr, (float *)output_addr, config_get[0]);
 | |
|                             break;
 | |
| 
 | |
|                         default:
 | |
|                             break;
 | |
|                     }
 | |
|                     break;
 | |
|                 case 4: // 32 bit
 | |
|                     switch (config_get[1])
 | |
|                     {
 | |
|                         case 0:
 | |
|                             vector_add_uint32((uint32_t *)input_addr, (uint32_t *)bias_addr, (uint32_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 1:
 | |
|                             vector_sub_uint32((uint32_t *)input_addr, (uint32_t *)bias_addr, (uint32_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 2:
 | |
|                             vector_multiply_uint32((uint32_t *)input_addr, (uint32_t *)bias_addr, (uint64_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 3:
 | |
|                             vector_multiply_uint32_high_32bit((uint32_t *)input_addr, (uint32_t *)bias_addr, (uint32_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 4:
 | |
|                             vector_multiply_uint32_low_32bit((uint32_t *)input_addr, (uint32_t *)bias_addr, (uint32_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 5:
 | |
|                             vector_mul_add_uint32((uint32_t *)input_addr, (uint32_t *)bias_addr, (uint32_t *)weight_addr, (uint32_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 7:
 | |
|                             vector_mul_sub_uint32((uint32_t *)input_addr, (uint32_t *)bias_addr, (uint32_t *)weight_addr, (uint32_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 9:
 | |
|                             vector_max_uint32((uint32_t *)input_addr, (uint32_t *)bias_addr, (uint32_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 10:
 | |
|                             vector_min_uint32((uint32_t *)input_addr, (uint32_t *)bias_addr, (uint32_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 11:
 | |
|                             vector_equal_uint32((uint32_t *)input_addr, (uint32_t *)bias_addr, (uint32_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 12:
 | |
|                             vector_not_equal_uint32((uint32_t *)input_addr, (uint32_t *)bias_addr, (uint32_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 13:
 | |
|                             vector_less_than_uint32((uint32_t *)input_addr, (uint32_t *)bias_addr, (uint32_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 14:
 | |
|                             vector_greater_or_equal_uint32((uint32_t *)input_addr, (uint32_t *)bias_addr, (uint32_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 15:
 | |
|                             vector_logic_and_uint32((uint32_t *)input_addr, (uint32_t *)bias_addr, (uint32_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 16:
 | |
|                             vector_logic_or_uint32((uint32_t *)input_addr, (uint32_t *)bias_addr, (uint32_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 17:
 | |
|                             vector_logic_xor_uint32((uint32_t *)input_addr, (uint32_t *)bias_addr, (uint32_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 18:
 | |
|                             vector_logic_xnor_uint32((uint32_t *)input_addr, (uint32_t *)bias_addr, (uint32_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 19:
 | |
|                             vector_left_shift_uint32((uint32_t *)input_addr, (uint32_t *)bias_addr, (uint32_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 20:
 | |
|                             vector_right_shift_uint32((uint32_t *)input_addr, (uint32_t *)bias_addr, (uint32_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 21:
 | |
|                             vector_right_shift_uint32((uint32_t *)input_addr, (uint32_t *)bias_addr, (uint32_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 22:
 | |
|                             vector_high_half_bits_uint32((uint32_t *)input_addr, (uint16_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 23:
 | |
|                             vector_low_half_bits_uint32((uint32_t *)input_addr, (uint16_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         default:
 | |
|                             break;
 | |
|                     }
 | |
|                     break;
 | |
|         
 | |
|                 default:
 | |
|                     break;
 | |
|             }
 | |
|             break;
 | |
|     
 | |
|         case 1: // signed
 | |
|             switch (config_get[2])
 | |
|             {
 | |
|                 case 1: // 8bit
 | |
|                     switch (config_get[1])
 | |
|                     {
 | |
|                         case 0:
 | |
|                             vector_add_int8((int8_t *)input_addr, (int8_t *)bias_addr, (int8_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 1:
 | |
|                             vector_sub_int8((int8_t *)input_addr, (int8_t *)bias_addr, (int8_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 2:
 | |
|                             vector_multiply_int8((int8_t *)input_addr, (int8_t *)bias_addr, (int16_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 3:
 | |
|                             vector_multiply_int8_high_8bit((int8_t *)input_addr, (int8_t *)bias_addr, (int8_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 4:
 | |
|                             vector_multiply_int8_low_8bit((int8_t *)input_addr, (int8_t *)bias_addr, (int8_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 5:
 | |
|                             vector_mul_add_int8((int8_t *)input_addr, (int8_t *)bias_addr, (int8_t *)weight_addr, (int8_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 6:
 | |
|                             vector_mul_add_int8_to_int32((int8_t *)input_addr, (int8_t *)bias_addr, (int32_t *)weight_addr, (int32_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 7:
 | |
|                             vector_mul_sub_int8((int8_t *)input_addr, (int8_t *)bias_addr, (int8_t *)weight_addr, (int8_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 8:
 | |
|                             vector_mul_sub_int8_to_int32((int8_t *)input_addr, (int8_t *)bias_addr, (int32_t *)weight_addr, (int32_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 9:
 | |
|                             vector_max_int8((int8_t *)input_addr, (int8_t *)bias_addr, (int8_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 10:
 | |
|                             vector_min_int8((int8_t *)input_addr, (int8_t *)bias_addr, (int8_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 11:
 | |
|                             vector_equal_int8((int8_t *)input_addr, (int8_t *)bias_addr, (int8_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 12:
 | |
|                             vector_not_equal_int8((int8_t *)input_addr, (int8_t *)bias_addr, (int8_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 13:
 | |
|                             vector_less_than_int8((int8_t *)input_addr, (int8_t *)bias_addr, (int8_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 14:
 | |
|                             vector_greater_or_equal_int8((int8_t *)input_addr, (int8_t *)bias_addr, (int8_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 15:
 | |
|                             vector_logic_and_int8((int8_t *)input_addr, (int8_t *)bias_addr, (int8_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 16:
 | |
|                             vector_logic_or_int8((int8_t *)input_addr, (int8_t *)bias_addr, (int8_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 17:
 | |
|                             vector_logic_xor_int8((int8_t *)input_addr, (int8_t *)bias_addr, (int8_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 18:
 | |
|                             vector_logic_xnor_int8((int8_t *)input_addr, (int8_t *)bias_addr, (int8_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 19:
 | |
|                             vector_left_shift_int8((int8_t *)input_addr, (uint8_t *)bias_addr, (int8_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 20:
 | |
|                             vector_arithmatic_right_shift_int8((int8_t *)input_addr, (uint8_t *)bias_addr, (int8_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 21:
 | |
|                             vector_logic_right_shift_int8((int8_t *)input_addr, (uint8_t *)bias_addr, (int8_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         default:
 | |
|                             break;
 | |
|                     }
 | |
|                     break;
 | |
|         
 | |
|                 case 2: // 16 bit
 | |
|                     switch (config_get[1])
 | |
|                     {
 | |
|                         case 0:
 | |
|                             vector_add_int16((int16_t *)input_addr, (int16_t *)bias_addr, (int16_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 1:
 | |
|                             vector_sub_int16((int16_t *)input_addr, (int16_t *)bias_addr, (int16_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 2:
 | |
|                             vector_multiply_int16((int16_t *)input_addr, (int16_t *)bias_addr, (int32_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 3:
 | |
|                             vector_multiply_int16_high_16bit((int16_t *)input_addr, (int16_t *)bias_addr, (int16_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 4:
 | |
|                             vector_multiply_int16_low_16bit((int16_t *)input_addr, (int16_t *)bias_addr, (int16_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 5:
 | |
|                             vector_mul_add_int16((int16_t *)input_addr, (int16_t *)bias_addr, (int16_t *)weight_addr, (int16_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 6:
 | |
|                             vector_mul_add_int16_to_int64((int16_t *)input_addr, (int16_t *)bias_addr, (int64_t *)weight_addr, (int64_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 7:
 | |
|                             vector_mul_sub_int16((int16_t *)input_addr, (int16_t *)bias_addr, (int16_t *)weight_addr, (int16_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 8:
 | |
|                             vector_mul_sub_int16_to_int64((int16_t *)input_addr, (int16_t *)bias_addr, (int64_t *)weight_addr, (int64_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 9:
 | |
|                             vector_max_int16((int16_t *)input_addr, (int16_t *)bias_addr, (int16_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 10:
 | |
|                             vector_min_int16((int16_t *)input_addr, (int16_t *)bias_addr, (int16_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 11:
 | |
|                             vector_equal_int16((int16_t *)input_addr, (int16_t *)bias_addr, (int16_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 12:
 | |
|                             vector_not_equal_int16((int16_t *)input_addr, (int16_t *)bias_addr, (int16_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 13:
 | |
|                             vector_less_than_int16((int16_t *)input_addr, (int16_t *)bias_addr, (int16_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 14:
 | |
|                             vector_greater_or_equal_int16((int16_t *)input_addr, (int16_t *)bias_addr, (int16_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 15:
 | |
|                             vector_logic_and_int16((int16_t *)input_addr, (int16_t *)bias_addr, (int16_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 16:
 | |
|                             vector_logic_or_int16((int16_t *)input_addr, (int16_t *)bias_addr, (int16_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 17:
 | |
|                             vector_logic_xor_int16((int16_t *)input_addr, (int16_t *)bias_addr, (int16_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 18:
 | |
|                             vector_logic_xnor_int16((int16_t *)input_addr, (int16_t *)bias_addr, (int16_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 19:
 | |
|                             vector_left_shift_int16((int16_t *)input_addr, (uint16_t *)bias_addr, (int16_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 20:
 | |
|                             vector_arithmatic_right_shift_int16((int16_t *)input_addr, (uint16_t *)bias_addr, (int16_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 21:
 | |
|                             vector_logic_right_shift_int16((int16_t *)input_addr, (uint16_t *)bias_addr, (int16_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 22:
 | |
|                             vector_high_half_bits_int16((int16_t *)input_addr, (int8_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 23:
 | |
|                             vector_low_half_bits_int16((int16_t *)input_addr, (int8_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         default:
 | |
|                             break;
 | |
|                     }
 | |
|                     break;
 | |
|         
 | |
|                 case 4: // 32 bit
 | |
|                     switch (config_get[1])
 | |
|                     {
 | |
|                         case 0:
 | |
|                             vector_add_int32((int32_t *)input_addr, (int32_t *)bias_addr, (int32_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 1:
 | |
|                             vector_sub_int32((int32_t *)input_addr, (int32_t *)bias_addr, (int32_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 2:
 | |
|                             vector_multiply_int32((int32_t *)input_addr, (int32_t *)bias_addr, (int64_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 3:
 | |
|                             vector_multiply_int32_high_32bit((int32_t *)input_addr, (int32_t *)bias_addr, (int32_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 4:
 | |
|                             vector_multiply_int32_low_32bit((int32_t *)input_addr, (int32_t *)bias_addr, (int32_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 5:
 | |
|                             vector_mul_add_int32((int32_t *)input_addr, (int32_t *)bias_addr, (int32_t *)weight_addr, (int32_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 7:
 | |
|                             vector_mul_sub_int32((int32_t *)input_addr, (int32_t *)bias_addr, (int32_t *)weight_addr, (int32_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 9:
 | |
|                             vector_max_int32((int32_t *)input_addr, (int32_t *)bias_addr, (int32_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 10:
 | |
|                             vector_min_int32((int32_t *)input_addr, (int32_t *)bias_addr, (int32_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 11:
 | |
|                             vector_equal_int32((int32_t *)input_addr, (int32_t *)bias_addr, (int32_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 12:
 | |
|                             vector_not_equal_int32((int32_t *)input_addr, (int32_t *)bias_addr, (int32_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 13:
 | |
|                             vector_less_than_int32((int32_t *)input_addr, (int32_t *)bias_addr, (int32_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 14:
 | |
|                             vector_greater_or_equal_int32((int32_t *)input_addr, (int32_t *)bias_addr, (int32_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 15:
 | |
|                             vector_logic_and_int32((int32_t *)input_addr, (int32_t *)bias_addr, (int32_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 16:
 | |
|                             vector_logic_or_int32((int32_t *)input_addr, (int32_t *)bias_addr, (int32_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 17:
 | |
|                             vector_logic_xor_int32((int32_t *)input_addr, (int32_t *)bias_addr, (int32_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 18:
 | |
|                             vector_logic_xnor_int32((int32_t *)input_addr, (int32_t *)bias_addr, (int32_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 19:
 | |
|                             vector_left_shift_int32((int32_t *)input_addr, (uint32_t *)bias_addr, (int32_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 20:
 | |
|                             vector_arithmatic_right_shift_int32((int32_t *)input_addr, (uint32_t *)bias_addr, (int32_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 21:
 | |
|                             vector_logic_right_shift_int32((int32_t *)input_addr, (uint32_t *)bias_addr, (int32_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 22:
 | |
|                             vector_high_half_bits_int32((int32_t *)input_addr, (int16_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 23:
 | |
|                             vector_low_half_bits_int32((int32_t *)input_addr, (int16_t *)output_addr, config_get[0]);
 | |
|                             break;
 | |
|                     
 | |
|                         default:
 | |
|                             break;
 | |
|                     }
 | |
|                     break;
 | |
|         
 | |
|                 default:
 | |
|                     break;
 | |
|             }
 | |
|             break;
 | |
| 
 | |
|         default:
 | |
|             break;
 | |
|     }
 | |
|     uint32_t conf;
 | |
|     vcsrr(conf, 0)
 | |
|     iot_printf("conf = %x\n", conf);
 | |
|     //if (config_get[52] + config_get[53] + config_get[54] + config_get[55] * 2 <= 0x400000) {
 | |
|     if (config_get[2] != 3) {
 | |
|         verify_8bits(output_addr, golden_addr, config_get[55]);
 | |
|     } else {
 | |
|         verify_float(output_addr, golden_addr, config_get[55] / 4);
 | |
|     }
 | |
|     //os_mem_free(data);
 | |
| }
 | |
| 
 | |
| 
 | |
| void vector_saturation_test_from_pc() {
 | |
|     uint8_t config_read[120];
 | |
|     uint32_t config_get[60];
 | |
|     uint32_t data_begin = 0x10200000;
 | |
|     read_case_from_python(config_read, config_get, (uint8_t *)data_begin);
 | |
|     uint32_t weight_addr = data_begin;
 | |
|     uint32_t bias_addr = weight_addr + config_get[52];
 | |
|     uint32_t input_addr = bias_addr + config_get[53];
 | |
|     uint32_t golden_addr = input_addr + config_get[54];
 | |
|     uint32_t output_addr = golden_addr + config_get[55];
 | |
|     uint32_t with_const = config_get[4];
 | |
|     uint32_t right_shift = config_get[5];
 | |
|     switch (config_get[3])
 | |
|     {
 | |
|         case 0: // unsigned
 | |
|             switch (config_get[2])
 | |
|             {
 | |
|                 case 1: // 8bit
 | |
|                     switch (config_get[1])
 | |
|                     {        
 | |
|                         case 2:
 | |
|                             if (with_const) {
 | |
|                                 vector_multiply_const_uint8_right_shift_saturation((uint8_t *)input_addr, *((uint8_t *)bias_addr), (uint8_t *)output_addr, right_shift, config_get[0]);
 | |
|                             } else {
 | |
|                                 vector_multiply_uint8_right_shift_saturation((uint8_t *)input_addr, (uint8_t *)bias_addr, (uint8_t *)output_addr, right_shift, config_get[0]);
 | |
|                             }
 | |
|                             break;
 | |
|                     
 | |
|                         default:
 | |
|                             break;
 | |
|                     }
 | |
|                     break;
 | |
|         
 | |
|                 case 2: // 16 bit
 | |
|                     switch (config_get[1])
 | |
|                     {
 | |
|                         case 2:
 | |
|                             if (with_const) {
 | |
|                                 vector_multiply_const_uint16_right_shift_saturation((uint16_t *)input_addr, *((uint16_t *)bias_addr), (uint16_t *)output_addr, right_shift, config_get[0]);
 | |
|                             } else {
 | |
|                                 vector_multiply_uint16_right_shift_saturation((uint16_t *)input_addr, (uint16_t *)bias_addr, (uint16_t *)output_addr, right_shift, config_get[0]);
 | |
|                             }
 | |
|                             break;
 | |
|                     
 | |
|                         default:
 | |
|                             break;
 | |
|                     }
 | |
|                     break;
 | |
|         
 | |
|                 case 4: // 32 bit
 | |
|                     switch (config_get[1])
 | |
|                     {
 | |
|                         case 2:
 | |
|                             if (with_const) {
 | |
|                                 vector_multiply_const_uint32_right_shift_saturation((uint32_t *)input_addr, *((uint32_t *)bias_addr), (uint32_t *)output_addr, right_shift, config_get[0]);
 | |
|                             } else {
 | |
|                                 vector_multiply_uint32_right_shift_saturation((uint32_t *)input_addr, (uint32_t *)bias_addr, (uint32_t *)output_addr, right_shift, config_get[0]);
 | |
|                             }
 | |
|                             break;
 | |
| 
 | |
|                         default:
 | |
|                             break;
 | |
|                     }
 | |
|                     break;
 | |
|         
 | |
|                 default:
 | |
|                     break;
 | |
|             }
 | |
|             break;
 | |
|     
 | |
|         case 1: // signed
 | |
|             switch (config_get[2])
 | |
|             {
 | |
|                 case 1: // 8bit
 | |
|                     switch (config_get[1])
 | |
|                     {
 | |
|                         case 2:
 | |
|                             if (with_const) {
 | |
|                                 vector_multiply_const_int8_right_shift_saturation((int8_t *)input_addr, *((int8_t *)bias_addr), (int8_t *)output_addr, right_shift, config_get[0]);
 | |
|                             } else {
 | |
|                                 vector_multiply_int8_right_shift_saturation((int8_t *)input_addr, (int8_t *)bias_addr, (int8_t *)output_addr, right_shift, config_get[0]);
 | |
|                             }
 | |
|                             break;
 | |
|         
 | |
|                         default:
 | |
|                             break;
 | |
|                     }
 | |
|                     break;
 | |
|         
 | |
|                 case 2: // 16 bit
 | |
|                     switch (config_get[1])
 | |
|                     {            
 | |
|                         case 2:
 | |
|                             if (with_const) {
 | |
|                                 vector_multiply_const_int16_right_shift_saturation((int16_t *)input_addr, *((int16_t *)bias_addr), (int16_t *)output_addr, right_shift, config_get[0]);
 | |
|                             } else {
 | |
|                                 vector_multiply_int16_right_shift_saturation((int16_t *)input_addr, (int16_t *)bias_addr, (int16_t *)output_addr, right_shift, config_get[0]);
 | |
|                             }
 | |
|                             break;
 | |
|                 
 | |
|                         default:
 | |
|                             break;
 | |
|                     }
 | |
|                     break;
 | |
|         
 | |
|                 case 4: // 32 bit
 | |
|                     switch (config_get[1])
 | |
|                     {    
 | |
|                         case 2:
 | |
|                             if (with_const) {
 | |
|                                 vector_multiply_const_int32_right_shift_saturation((int32_t *)input_addr, *((int32_t *)bias_addr), (int32_t *)output_addr, right_shift, config_get[0]);
 | |
|                             } else {
 | |
|                                 vector_multiply_int32_right_shift_saturation((int32_t *)input_addr, (int32_t *)bias_addr, (int32_t *)output_addr, right_shift, config_get[0]);
 | |
|                             }
 | |
|                             break;
 | |
|                     
 | |
|                         default:
 | |
|                             break;
 | |
|                     }
 | |
|                     break;
 | |
|         
 | |
|                 default:
 | |
|                     break;
 | |
|             }
 | |
|             break;
 | |
| 
 | |
|         default:
 | |
|             break;
 | |
|     }
 | |
|     uint32_t conf;
 | |
|     vcsrr(conf, 0)
 | |
|     iot_printf("conf = %x\n", conf);
 | |
|     if (config_get[52] + config_get[53] + config_get[54] + config_get[55] * 2 <= 0x8000) {
 | |
|         verify_8bits(output_addr, golden_addr, config_get[55]);
 | |
|     } else {
 | |
|         iot_printf("too large\nfinished~");
 | |
|     }
 | |
| }
 | |
| 
 | |
| void matrix_multi_test_from_pc() {
 | |
|     uint8_t config_read[120];
 | |
|     uint32_t config_get[60];
 | |
|     uint8_t *data_begin = (uint8_t *)0x10200000;
 | |
|     read_case_from_python(config_read, config_get, data_begin);
 | |
|     uint32_t matrixAaddr = (uint32_t)data_begin;
 | |
|     uint32_t matrixBaddr = matrixAaddr + config_get[53];
 | |
|     uint32_t golden_addr = matrixBaddr + config_get[54];
 | |
|     uint32_t output_addr = golden_addr + config_get[55];
 | |
|     uint64_t begin = cpu_get_mcycle();
 | |
|     uint32_t elem_bytes = config_get[3];
 | |
|     uint32_t signed_ = config_get[4];
 | |
|     uint32_t w4b = config_get[5];
 | |
|     switch (elem_bytes)
 | |
|     {
 | |
|         case 1: // 8bits
 | |
|             switch (signed_)
 | |
|             {
 | |
|                 case 0: // unsigned
 | |
|                     switch (w4b)
 | |
|                     {
 | |
|                         case 1: // vmadd
 | |
|                             matrix_multi_uint8_to_uint8((uint8_t *)matrixAaddr, (uint8_t *)matrixBaddr, (uint8_t *)output_addr, config_get[0], config_get[1], config_get[2], config_get[6]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 4: // vmaddw
 | |
|                             matrix_multi_uint8_to_uint32((uint8_t *)matrixAaddr, (uint8_t *)matrixBaddr, (uint32_t *)output_addr, config_get[0], config_get[1], config_get[2]);
 | |
|                             break;
 | |
|                     
 | |
|                         default:
 | |
|                             break;
 | |
|                     }
 | |
|                     break;
 | |
|             
 | |
|                 case 1: // signed
 | |
|                     switch (w4b)
 | |
|                     {
 | |
|                         case 1: // vmadd
 | |
|                             matrix_multi_int8_to_int8((int8_t *)matrixAaddr, (int8_t *)matrixBaddr, (int8_t *)output_addr, config_get[0], config_get[1], config_get[2], config_get[6]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 4: // vmaddw
 | |
|                             matrix_multi_int8_to_int32((int8_t *)matrixAaddr, (int8_t *)matrixBaddr, (int32_t *)output_addr, config_get[0], config_get[1], config_get[2]);
 | |
|                             break;
 | |
|                     
 | |
|                         default:
 | |
|                             break;
 | |
|                     }
 | |
|                     break;
 | |
|             
 | |
|                 default:
 | |
|                     break;
 | |
|             }
 | |
|             break;
 | |
|     
 | |
|         case 2: // 16bits
 | |
|             switch (signed_)
 | |
|             {
 | |
|                 case 0: // unsigned
 | |
|                     switch (w4b)
 | |
|                     {
 | |
|                         case 1: // vmadd
 | |
|                             matrix_multi_uint16_to_uint16((uint16_t *)matrixAaddr, (uint16_t *)matrixBaddr, (uint16_t *)output_addr, config_get[0], config_get[1], config_get[2], config_get[6]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 4: // vmaddw
 | |
|                             matrix_multi_uint16_to_uint64((uint16_t *)matrixAaddr, (uint16_t *)matrixBaddr, (uint64_t *)output_addr, config_get[0], config_get[1], config_get[2]);
 | |
|                             break;
 | |
|                     
 | |
|                         default:
 | |
|                             break;
 | |
|                     }
 | |
|                     break;
 | |
|             
 | |
|                 case 1: // signed
 | |
|                     switch (w4b)
 | |
|                     {
 | |
|                         case 1: // vmadd
 | |
|                             matrix_multi_int16_to_int16((int16_t *)matrixAaddr, (int16_t *)matrixBaddr, (int16_t *)output_addr, config_get[0], config_get[1], config_get[2], config_get[6]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 4: // vmaddw
 | |
|                             matrix_multi_int16_to_int64((int16_t *)matrixAaddr, (int16_t *)matrixBaddr, (int64_t *)output_addr, config_get[0], config_get[1], config_get[2]);
 | |
|                             break;
 | |
|                     
 | |
|                         default:
 | |
|                             break;
 | |
|                     }
 | |
|                     break;
 | |
|             
 | |
|                 default:
 | |
|                     break;
 | |
|             }
 | |
|             break;
 | |
|     
 | |
|         case 4: // 32bits
 | |
|             switch (signed_)
 | |
|             {
 | |
|                 case 0: // signed
 | |
|                     matrix_multi_uint32_to_uint32((uint32_t *)matrixAaddr, (uint32_t *)matrixBaddr, (uint32_t *)output_addr, config_get[0], config_get[1], config_get[2]);
 | |
|                     break;
 | |
|             
 | |
|                 case 1: // unsigned
 | |
|                     matrix_multi_int32_to_int32((int32_t *)matrixAaddr, (int32_t *)matrixBaddr, (int32_t *)output_addr, config_get[0], config_get[1], config_get[2]);
 | |
|                     break;
 | |
|             
 | |
|                 default:
 | |
|                     break;
 | |
|             }
 | |
|             break;
 | |
|     
 | |
|         default:
 | |
|             break;
 | |
|     }
 | |
|     uint64_t end = cpu_get_mcycle();
 | |
|     iot_printf("time = %d\n", (uint32_t)(end - begin));
 | |
|     verify_8bits(output_addr, golden_addr, config_get[55]);
 | |
| }
 | |
| 
 | |
| void matrix_transpose_multi_test_from_pc() {
 | |
|     uint8_t config_read[120];
 | |
|     uint32_t config_get[60];
 | |
|     uint32_t data_begin = 0x10200000;
 | |
|     read_case_from_python(config_read, config_get, (uint8_t *)data_begin);
 | |
|     uint32_t matrixAaddr = data_begin;
 | |
|     uint32_t matrixBaddr = matrixAaddr + config_get[53];
 | |
|     uint32_t golden_addr = matrixBaddr + config_get[54];
 | |
|     uint32_t output_addr = golden_addr + config_get[55];
 | |
|     uint64_t begin = cpu_get_mcycle();
 | |
|     uint32_t elem_bytes = config_get[3];
 | |
|     uint32_t signed_ = config_get[4];
 | |
|     uint32_t w4b = config_get[5];
 | |
|     switch (elem_bytes)
 | |
|     {
 | |
|         case 1: // 8bits
 | |
|             switch (signed_)
 | |
|             {
 | |
|                 case 0: // unsigned
 | |
|                     switch (w4b)
 | |
|                     {
 | |
|                         case 1: // vmadd
 | |
|                             matrix_transpose_multi_uint8_to_uint8((uint8_t *)matrixAaddr, (uint8_t *)matrixBaddr, (uint8_t *)output_addr, config_get[0], config_get[1], config_get[2], config_get[6]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 4: // vmaddw
 | |
|                             matrix_transpose_multi_uint8_to_uint32((uint8_t *)matrixAaddr, (uint8_t *)matrixBaddr, (uint32_t *)output_addr, config_get[0], config_get[1], config_get[2]);
 | |
|                             break;
 | |
|                     
 | |
|                         default:
 | |
|                             break;
 | |
|                     }
 | |
|                     break;
 | |
|             
 | |
|                 case 1: // signed
 | |
|                     switch (w4b)
 | |
|                     {
 | |
|                         case 1: // vmadd
 | |
|                             matrix_transpose_multi_int8_to_int8((int8_t *)matrixAaddr, (int8_t *)matrixBaddr, (int8_t *)output_addr, config_get[0], config_get[1], config_get[2], config_get[6]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 4: // vmaddw
 | |
|                             matrix_transpose_multi_int8_to_int32((int8_t *)matrixAaddr, (int8_t *)matrixBaddr, (int32_t *)output_addr, config_get[0], config_get[1], config_get[2]);
 | |
|                             break;
 | |
|                     
 | |
|                         default:
 | |
|                             break;
 | |
|                     }
 | |
|                     break;
 | |
|             
 | |
|                 default:
 | |
|                     break;
 | |
|             }
 | |
|             break;
 | |
|     
 | |
|         case 2: // 16bits
 | |
|             switch (signed_)
 | |
|             {
 | |
|                 case 0: // unsigned
 | |
|                     switch (w4b)
 | |
|                     {
 | |
|                         case 1: // vmadd
 | |
|                             matrix_transpose_multi_uint16_to_uint16((uint16_t *)matrixAaddr, (uint16_t *)matrixBaddr, (uint16_t *)output_addr, config_get[0], config_get[1], config_get[2], config_get[6]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 4: // vmaddw
 | |
|                             matrix_transpose_multi_uint16_to_uint64((uint16_t *)matrixAaddr, (uint16_t *)matrixBaddr, (uint64_t *)output_addr, config_get[0], config_get[1], config_get[2]);
 | |
|                             break;
 | |
|                     
 | |
|                         default:
 | |
|                             break;
 | |
|                     }
 | |
|                     break;
 | |
|             
 | |
|                 case 1: // signed
 | |
|                     switch (w4b)
 | |
|                     {
 | |
|                         case 1: // vmadd
 | |
|                             matrix_transpose_multi_int16_to_int16((int16_t *)matrixAaddr, (int16_t *)matrixBaddr, (int16_t *)output_addr, config_get[0], config_get[1], config_get[2], config_get[6]);
 | |
|                             break;
 | |
|                     
 | |
|                         case 4: // vmaddw
 | |
|                             matrix_transpose_multi_int16_to_int64((int16_t *)matrixAaddr, (int16_t *)matrixBaddr, (int64_t *)output_addr, config_get[0], config_get[1], config_get[2]);
 | |
|                             break;
 | |
|                     
 | |
|                         default:
 | |
|                             break;
 | |
|                     }
 | |
|                     break;
 | |
|             
 | |
|                 default:
 | |
|                     break;
 | |
|             }
 | |
|             break;
 | |
|     
 | |
|         case 4: // 32bits
 | |
|             switch (signed_)
 | |
|             {
 | |
|                 case 0: // signed
 | |
|                     matrix_transpose_multi_uint32_to_uint32((uint32_t *)matrixAaddr, (uint32_t *)matrixBaddr, (uint32_t *)output_addr, config_get[0], config_get[1], config_get[2]);
 | |
|                     break;
 | |
|             
 | |
|                 case 1: // unsigned
 | |
|                     matrix_transpose_multi_int32_to_int32((int32_t *)matrixAaddr, (int32_t *)matrixBaddr, (int32_t *)output_addr, config_get[0], config_get[1], config_get[2]);
 | |
|                     break;
 | |
|             
 | |
|                 default:
 | |
|                     break;
 | |
|             }
 | |
|             break;
 | |
|     
 | |
|         default:
 | |
|             matrix_transpose_multi_float((float *)matrixAaddr, (float *)matrixBaddr, (float *)output_addr, config_get[0], config_get[1], config_get[2]);
 | |
|             break;
 | |
|     }
 | |
|     uint64_t end = cpu_get_mcycle();
 | |
|     iot_printf("time = %d\n", (uint32_t)(end - begin));
 | |
|     verify_8bits(output_addr, golden_addr, config_get[55]);
 | |
| }
 | |
| 
 | |
| void maximum_test() {
 | |
|     int8_t v[256];
 | |
|     for (uint32_t i = 0; i < 256; i++) {
 | |
|         v[i] = 3 * i;
 | |
|     }
 | |
|     for (uint32_t i = 0; i < 256; i++) {
 | |
|         iot_printf("v[%d] = %d\n", i, v[i]);
 | |
|     }
 | |
|     uint8_t max_index;
 | |
|     int8_t max_value;
 | |
|     uint64_t begin;
 | |
|     uint64_t end;
 | |
|     begin = cpu_get_mcycle();
 | |
|     vector_maximum_element_int8_small(v, &max_index, &max_value, 0);
 | |
|     end = cpu_get_mcycle();
 | |
|     iot_printf("cycle = %d\n", (uint32_t)(end - begin));
 | |
|     iot_printf("max_index = %d\n", max_index);
 | |
|     iot_printf("max_value = %d\n", max_value);
 | |
|     begin = cpu_get_mcycle();
 | |
|     max_index = 0;
 | |
|     max_value = v[0];
 | |
|     for (uint32_t i = 1; i < 256; i++) {
 | |
|         if (max_value < v[i]) {
 | |
|             max_value = v[i];
 | |
|             max_index = i;
 | |
|         }
 | |
|     }
 | |
|     end = cpu_get_mcycle();
 | |
|     iot_printf("cycle = %d\n", (uint32_t)(end - begin));
 | |
|     int8_t *in = (int8_t *)0x10200000;
 | |
|     for (uint32_t i = 0; i < 128; i++) {
 | |
|         for (uint32_t j = 0; j < 256; j++) {
 | |
|             in[i * 256 + j] = (3 * j) % ((5 * i != 0) ? 5 * i : 1);
 | |
|             if (in[i * 256 + j] > 126) {
 | |
|                 in[i * 256 + j] = 126;
 | |
|             }
 | |
|             //iot_printf("in[%d] = %d\n", i * 256 + j, in[i * 256 + j]);
 | |
|         }
 | |
|     }
 | |
|     uint16_t max_index16;
 | |
|     begin = cpu_get_mcycle();
 | |
|     vector_maximum_element_int8(in, &max_index16, &max_value, 128 * 256);
 | |
|     end = cpu_get_mcycle();
 | |
|     iot_printf("cycle = %d\n", (uint32_t)(end - begin));
 | |
|     iot_printf("max_index = %d\n", max_index16);
 | |
|     iot_printf("max_value = %d\n", max_value);
 | |
| }
 | |
| 
 | |
| void vector_maxi_mini_test_from_pc() {
 | |
|     uint8_t config_read[120];
 | |
|     uint32_t config_get[60];
 | |
|     uint32_t data_begin = 0x10200000;
 | |
|     read_case_from_python(config_read, config_get, (uint8_t *)data_begin);
 | |
|     uint32_t weight_addr = data_begin;
 | |
|     uint32_t bias_addr = weight_addr + config_get[52];
 | |
|     uint32_t input_addr = bias_addr + config_get[53];
 | |
|     uint32_t golden_addr = input_addr + config_get[54];
 | |
|     uint32_t output_addr = golden_addr + config_get[55];
 | |
|     uint32_t length = config_get[0];
 | |
|     uint32_t to_do = config_get[1];
 | |
|     uint32_t elem_bytes = config_get[2];
 | |
|     uint32_t signed_ = config_get[3];
 | |
|     uint8_t *input_u8;
 | |
|     uint16_t *index_u8;
 | |
|     uint8_t *output_u8;
 | |
|     uint8_t *golden_u8;
 | |
|     int8_t *input_s8;
 | |
|     uint16_t *index_s8;
 | |
|     int8_t *output_s8;
 | |
|     int8_t *golden_s8;
 | |
|     uint16_t *input_u16;
 | |
|     uint16_t *index_u16;
 | |
|     uint16_t *output_u16;
 | |
|     uint16_t *golden_u16;
 | |
|     int16_t *input_s16;
 | |
|     uint16_t *index_s16;
 | |
|     int16_t *output_s16;
 | |
|     int16_t *golden_s16;
 | |
|     uint32_t *input_u32;
 | |
|     uint32_t *index_u32;
 | |
|     uint32_t *output_u32;
 | |
|     uint32_t *golden_u32;
 | |
|     int32_t *input_s32;
 | |
|     uint32_t *index_s32;
 | |
|     int32_t *output_s32;
 | |
|     int32_t *golden_s32;
 | |
|     float *input_f;
 | |
|     uint32_t *index_f;
 | |
|     float *output_f;
 | |
|     float *golden_f;
 | |
|     switch (elem_bytes)
 | |
|     {
 | |
|         case 1 /* 8bits */:
 | |
|             switch (to_do)
 | |
|             {
 | |
|                 case 0 /* min */:
 | |
|                     switch (signed_)
 | |
|                     {
 | |
|                         case 0 /* unsigned */:
 | |
|                             input_u8 = (uint8_t *)input_addr;
 | |
|                             index_u8 = (uint16_t *)(output_addr + 4);
 | |
|                             output_u8 = (uint8_t *)output_addr;
 | |
|                             golden_u8 = (uint8_t *)golden_addr;
 | |
|                             vector_minimum_element_uint8(input_u8, index_u8, output_u8, length);
 | |
|                             if (*output_u8 != *golden_u8) {
 | |
|                                 iot_printf("elem incorrent, golden = %d, output = %d\n", *golden_u8, *output_u8);
 | |
|                                 iot_printf("not all correct!\n");
 | |
|                             } else if (input_u8[*index_u8] != *output_u8) {
 | |
|                                 iot_printf("elem index incorrent, input at index %d is %d\n", *index_u8, input_u8[*index_u8]);
 | |
|                                 iot_printf("not all correct!\n");
 | |
|                             }
 | |
|                             iot_printf("finished~");
 | |
|                             /* code */
 | |
|                             break;
 | |
| 
 | |
|                         case 1 /* signed */:
 | |
|                             input_s8 = (int8_t *)input_addr;
 | |
|                             index_s8 = (uint16_t *)(output_addr + 4);
 | |
|                             output_s8 = (int8_t *)output_addr;
 | |
|                             golden_s8 = (int8_t *)golden_addr;
 | |
|                             vector_minimum_element_int8(input_s8, index_s8, output_s8, length);
 | |
|                             if (*output_s8 != *golden_s8) {
 | |
|                                 iot_printf("elem incorrent, golden = %d, output = %d\n", *golden_s8, *output_s8);
 | |
|                                 iot_printf("not all correct!\n");
 | |
|                             } else if (input_s8[*index_s8] != *output_s8) {
 | |
|                                 iot_printf("elem index incorrent, input at index %d is %d\n", *index_s8, input_s8[*index_s8]);
 | |
|                                 iot_printf("not all correct!\n");
 | |
|                             }
 | |
|                             iot_printf("finished~");
 | |
|                             /* code */
 | |
|                             break;
 | |
| 
 | |
|                         default:
 | |
|                             break;
 | |
|                     }
 | |
|                     /* code */
 | |
|                     break;
 | |
|                 case 1 /* max */:
 | |
|                     switch (signed_)
 | |
|                     {
 | |
|                         case 0 /* unsigned */:
 | |
|                             input_u8 = (uint8_t *)input_addr;
 | |
|                             index_u8 = (uint16_t *)(output_addr + 4);
 | |
|                             output_u8 = (uint8_t *)output_addr;
 | |
|                             golden_u8 = (uint8_t *)golden_addr;
 | |
|                             vector_maximum_element_uint8(input_u8, index_u8, output_u8, length);
 | |
|                             if (*output_u8 != *golden_u8) {
 | |
|                                 iot_printf("elem incorrent, golden = %d, output = %d\n", *golden_u8, *output_u8);
 | |
|                                 iot_printf("not all correct!\n");
 | |
|                             } else if (input_u8[*index_u8] != *output_u8) {
 | |
|                                 iot_printf("elem index incorrent, input at index %d is %d\n", *index_u8, input_u8[*index_u8]);
 | |
|                                 iot_printf("not all correct!\n");
 | |
|                             }
 | |
|                             iot_printf("finished~");
 | |
|                             /* code */
 | |
|                             break;
 | |
| 
 | |
|                         case 1 /* signed */:
 | |
|                             input_s8 = (int8_t *)input_addr;
 | |
|                             index_s8 = (uint16_t *)(output_addr + 4);
 | |
|                             output_s8 = (int8_t *)output_addr;
 | |
|                             golden_s8 = (int8_t *)golden_addr;
 | |
|                             vector_maximum_element_int8(input_s8, index_s8, output_s8, length);
 | |
|                             if (*output_s8 != *golden_s8) {
 | |
|                                 iot_printf("elem incorrent, golden = %d, output = %d\n", *golden_s8, *output_s8);
 | |
|                                 iot_printf("not all correct!\n");
 | |
|                             } else if (input_s8[*index_s8] != *golden_s8) {
 | |
|                                 iot_printf("elem index incorrent, input at index %d is %d\n", *index_s8, input_s8[*index_s8]);
 | |
|                                 iot_printf("not all correct!\n");
 | |
|                             }
 | |
|                             iot_printf("finished~");
 | |
|                             /* code */
 | |
|                             break;
 | |
| 
 | |
|                         default:
 | |
|                             break;
 | |
|                     }
 | |
|                     /* code */
 | |
|                     break;
 | |
| 
 | |
|                 default:
 | |
|                     break;
 | |
|             }
 | |
|             /* code */
 | |
|             break;
 | |
| 
 | |
|         case 2 /* 16bits */:
 | |
|             switch (to_do)
 | |
|             {
 | |
|                 case 0 /* min */:
 | |
|                     switch (signed_)
 | |
|                     {
 | |
|                         case 0 /* unsigned */:
 | |
|                             input_u16 = (uint16_t *)input_addr;
 | |
|                             index_u16 = (uint16_t *)(output_addr + 4);
 | |
|                             output_u16 = (uint16_t *)output_addr;
 | |
|                             golden_u16 = (uint16_t *)golden_addr;
 | |
|                             vector_minimum_element_uint16(input_u16, index_u16, output_u16, length);
 | |
|                             if (*output_u16 != *golden_u16) {
 | |
|                                 iot_printf("elem incorrent, golden = %d, output = %d\n", *golden_u16, *output_u16);
 | |
|                                 iot_printf("not all correct!\n");
 | |
|                             } else if (input_u16[*index_u16] != *output_u16) {
 | |
|                                 iot_printf("elem index incorrent, input at index %d is %d\n", *index_u16, input_u16[*index_u16]);
 | |
|                                 iot_printf("not all correct!\n");
 | |
|                             }
 | |
|                             iot_printf("finished~");
 | |
|                             /* code */
 | |
|                             break;
 | |
| 
 | |
|                         case 1 /* signed */:
 | |
|                             input_s16 = (int16_t *)input_addr;
 | |
|                             index_s16 = (uint16_t *)(output_addr + 4);
 | |
|                             output_s16 = (int16_t *)output_addr;
 | |
|                             golden_s16 = (int16_t *)golden_addr;
 | |
|                             vector_minimum_element_int16(input_s16, index_s16, output_s16, length);
 | |
|                             if (*output_s16 != *golden_s16) {
 | |
|                                 iot_printf("elem incorrent, golden = %d, output = %d\n", *golden_s16, *output_s16);
 | |
|                                 iot_printf("not all correct!\n");
 | |
|                             } else if (input_s16[*index_s16] != *output_s16) {
 | |
|                                 iot_printf("elem index incorrent, input at index %d is %d\n", *index_s16, input_s16[*index_s16]);
 | |
|                                 iot_printf("not all correct!\n");
 | |
|                             }
 | |
|                             iot_printf("finished~");
 | |
|                             /* code */
 | |
|                             break;
 | |
| 
 | |
|                         default:
 | |
|                             break;
 | |
|                     }
 | |
|                     /* code */
 | |
|                     break;
 | |
|                 case 1 /* max */:
 | |
|                     switch (signed_)
 | |
|                     {
 | |
|                         case 0 /* unsigned */:
 | |
|                             input_u16 = (uint16_t *)input_addr;
 | |
|                             index_u16 = (uint16_t *)(output_addr + 4);
 | |
|                             output_u16 = (uint16_t *)output_addr;
 | |
|                             golden_u16 = (uint16_t *)golden_addr;
 | |
|                             vector_maximum_element_uint16(input_u16, index_u16, output_u16, length);
 | |
|                             if (*output_u16 != *golden_u16) {
 | |
|                                 iot_printf("elem incorrent, golden = %d, output = %d\n", *golden_u16, *output_u16);
 | |
|                                 iot_printf("not all correct!\n");
 | |
|                             } else if (input_u16[*index_u16] != *output_u16) {
 | |
|                                 iot_printf("elem index incorrent, input at index %d is %d\n", *index_u16, input_u16[*index_u16]);
 | |
|                                 iot_printf("not all correct!\n");
 | |
|                             }
 | |
|                             iot_printf("finished~");
 | |
|                             /* code */
 | |
|                             break;
 | |
| 
 | |
|                         case 1 /* signed */:
 | |
|                             input_s16 = (int16_t *)input_addr;
 | |
|                             index_s16 = (uint16_t *)(output_addr + 4);
 | |
|                             output_s16 = (int16_t *)output_addr;
 | |
|                             golden_s16 = (int16_t *)golden_addr;
 | |
|                             vector_maximum_element_int16(input_s16, index_s16, output_s16, length);
 | |
|                             if (*output_s16 != *golden_s16) {
 | |
|                                 iot_printf("elem incorrent, golden = %d, output = %d\n", *golden_s16, *output_s16);
 | |
|                                 iot_printf("not all correct!\n");
 | |
|                             } else if (input_s16[*index_s16] != *output_s16) {
 | |
|                                 iot_printf("elem index incorrent, input at index %d is %d\n", *index_s16, input_s16[*index_s16]);
 | |
|                                 iot_printf("not all correct!\n");
 | |
|                             }
 | |
|                             iot_printf("finished~");
 | |
|                             /* code */
 | |
|                             break;
 | |
| 
 | |
|                         default:
 | |
|                             break;
 | |
|                     }
 | |
|                     /* code */
 | |
|                     break;
 | |
| 
 | |
|                 default:
 | |
|                     break;
 | |
|             }
 | |
|             /* code */
 | |
|             break;
 | |
| 
 | |
|         case 3 /* float */:
 | |
|             switch (to_do)
 | |
|             {
 | |
|                 case 0 /* min */:
 | |
|                     input_f = (float *)input_addr;
 | |
|                     index_f = (uint32_t *)(output_addr + 4);
 | |
|                     output_f = (float *)output_addr;
 | |
|                     golden_f = (float *)golden_addr;
 | |
|                     vector_minimum_element_float(input_f, index_f, output_f, length);
 | |
|                     if (*output_f != *golden_f) {
 | |
|                         iot_printf("elem incorrent, golden = %f, output = %f\n", *golden_f, *output_f);
 | |
|                         iot_printf("not all correct!\n");
 | |
|                     } else if (input_f[*index_f] != *output_f) {
 | |
|                         iot_printf("elem index incorrent, input at index %d is %f\n", *index_f, input[*index_f]);
 | |
|                         iot_printf("not all correct!\n");
 | |
|                     }
 | |
|                     iot_printf("finished~");
 | |
|                     /* code */
 | |
|                     break;
 | |
| 
 | |
|                 case 1 /* max */:
 | |
|                     input_f = (float *)input_addr;
 | |
|                     index_f = (uint32_t *)(output_addr + 4);
 | |
|                     output_f = (float *)output_addr;
 | |
|                     golden_f = (float *)golden_addr;
 | |
|                     vector_maximum_element_float(input_f, index_f, output_f, length);
 | |
|                     if (*output_f != *golden_f) {
 | |
|                         iot_printf("elem incorrent, golden = %f, output = %f\n", *golden_f, *output_f);
 | |
|                         iot_printf("not all correct!\n");
 | |
|                     } else if (input_f[*index_f] != *output_f) {
 | |
|                         iot_printf("elem index incorrent, input at index %d is %f\n", *index_f, input[*index_f]);
 | |
|                         iot_printf("not all correct!\n");
 | |
|                     }
 | |
|                     iot_printf("finished~");
 | |
|                     /* code */
 | |
|                     break;
 | |
|                         
 | |
|                 default:
 | |
|                     break;
 | |
|             }
 | |
|             /* code */
 | |
|             break;
 | |
| 
 | |
|         case 4 /* 32bits */:
 | |
|             switch (to_do)
 | |
|             {
 | |
|                 case 0 /* min */:
 | |
|                     switch (signed_)
 | |
|                     {
 | |
|                         case 0 /* unsigned */:
 | |
|                             input_u32 = (uint32_t *)input_addr;
 | |
|                             index_u32 = (uint32_t *)(output_addr + 4);
 | |
|                             output_u32 = (uint32_t *)output_addr;
 | |
|                             golden_u32 = (uint32_t *)golden_addr;
 | |
|                             vector_minimum_element_uint32(input_u32, index_u32, output_u32, length);
 | |
|                             if (*output_u32 != *golden_u32) {
 | |
|                                 iot_printf("elem incorrent, golden = %d, output = %d\n", *golden_u32, *output_u32);
 | |
|                                 iot_printf("not all correct!\n");
 | |
|                             } else if (input_u32[*index_u32] != *output_u32) {
 | |
|                                 iot_printf("elem index incorrent, input at index %d is %d\n", *index_u32, input_u32[*index_u32]);
 | |
|                                 iot_printf("not all correct!\n");
 | |
|                             }
 | |
|                             iot_printf("finished~");
 | |
|                             /* code */
 | |
|                             break;
 | |
| 
 | |
|                         case 1 /* signed */:
 | |
|                             input_s32 = (int32_t *)input_addr;
 | |
|                             index_s32 = (uint32_t *)(output_addr + 4);
 | |
|                             output_s32 = (int32_t *)output_addr;
 | |
|                             golden_s32 = (int32_t *)golden_addr;
 | |
|                             vector_minimum_element_int32(input_s32, index_s32, output_s32, length);
 | |
|                             if (*output_s32 != *golden_s32) {
 | |
|                                 iot_printf("elem incorrent, golden = %d, output = %d\n", *golden_s32, *output_s32);
 | |
|                                 iot_printf("not all correct!\n");
 | |
|                             } else if (input_s32[*index_s32] != *output_s32) {
 | |
|                                 iot_printf("elem index incorrent, input at index %d is %d\n", *index_s32, input_s32[*index_s32]);
 | |
|                                 iot_printf("not all correct!\n");
 | |
|                             }
 | |
|                             iot_printf("finished~");
 | |
|                             /* code */
 | |
|                             break;
 | |
| 
 | |
|                         default:
 | |
|                             break;
 | |
|                     }
 | |
|                     /* code */
 | |
|                     break;
 | |
|                 case 1 /* max */:
 | |
|                     switch (signed_)
 | |
|                     {
 | |
|                         case 0 /* unsigned */:
 | |
|                             input_u32 = (uint32_t *)input_addr;
 | |
|                             index_u32 = (uint32_t *)(output_addr + 4);
 | |
|                             output_u32 = (uint32_t *)output_addr;
 | |
|                             golden_u32 = (uint32_t *)golden_addr;
 | |
|                             vector_maximum_element_uint32(input_u32, index_u32, output_u32, length);
 | |
|                             if (*output_u32 != *golden_u32) {
 | |
|                                 iot_printf("elem incorrent, golden = %d, output = %d\n", *golden_u32, *output_u32);
 | |
|                                 iot_printf("not all correct!\n");
 | |
|                             } else if (input_u32[*index_u32] != *output_u32) {
 | |
|                                 iot_printf("elem index incorrent, input at index %d is %d\n", *index_u32, input_u32[*index_u32]);
 | |
|                                 iot_printf("not all correct!\n");
 | |
|                             }
 | |
|                             iot_printf("finished~");
 | |
|                             /* code */
 | |
|                             break;
 | |
| 
 | |
|                         case 1 /* signed */:
 | |
|                             input_s32 = (int32_t *)input_addr;
 | |
|                             index_s32 = (uint32_t *)(output_addr + 4);
 | |
|                             output_s32 = (int32_t *)output_addr;
 | |
|                             golden_s32 = (int32_t *)golden_addr;
 | |
|                             vector_maximum_element_int32(input_s32, index_s32, output_s32, length);
 | |
|                             if (*output_s32 != *golden_s32) {
 | |
|                                 iot_printf("elem incorrent, golden = %d, output = %d\n", *golden_s32, *output_s32);
 | |
|                                 iot_printf("not all correct!\n");
 | |
|                             } else if (input_s32[*index_s32] != *output_s32) {
 | |
|                                 iot_printf("elem index incorrent, input at index %d is %d\n", *index_s32, input_s32[*index_s32]);
 | |
|                                 iot_printf("not all correct!\n");
 | |
|                             }
 | |
|                             iot_printf("finished~");
 | |
|                             /* code */
 | |
|                             break;
 | |
| 
 | |
|                         default:
 | |
|                             break;
 | |
|                     }
 | |
|                     /* code */
 | |
|                     break;
 | |
| 
 | |
|                 default:
 | |
|                     break;
 | |
|             }
 | |
|             /* code */
 | |
|             break;
 | |
|                 
 | |
|         default:
 | |
|             break;
 | |
|     }
 | |
| }
 | |
| 
 | |
| void logsoftmax_test_from_pc() {
 | |
|     uint8_t config_read[120];
 | |
|     uint32_t config_get[60];
 | |
|     uint32_t data_begin = 0x10200000;
 | |
|     read_case_from_python(config_read, config_get, (uint8_t *)data_begin);
 | |
|     uint32_t weight_addr = data_begin;
 | |
|     uint32_t bias_addr = weight_addr + config_get[52];
 | |
|     uint32_t input_addr = bias_addr + config_get[53];
 | |
|     uint32_t golden_addr = input_addr + config_get[54];
 | |
|     uint32_t output_addr = golden_addr + config_get[55];
 | |
|     uint32_t length = config_get[0];
 | |
|     uint32_t elem_bytes = config_get[1];
 | |
|     uint32_t batch = config_get[2];
 | |
|     uint16_t *max_indice = (uint16_t *)(output_addr + config_get[55]);
 | |
|     int8_t *max = (int8_t *)(output_addr + config_get[55] + 4 * length);;
 | |
|     switch (elem_bytes)
 | |
|     {
 | |
|         case 1/* constant-expression */:
 | |
|             logsoftmax_int8((int8_t *)input_addr, (int8_t *)output_addr, max_indice, max, length, batch);
 | |
|             break;
 | |
|     
 | |
|         default:
 | |
|             logsoftmax_int16((int16_t *)input_addr, (int16_t *)output_addr, max_indice, (int16_t *)max, length, batch);
 | |
|             break;
 | |
|     }
 | |
|     verify_8bits(output_addr, golden_addr, config_get[55]);
 | |
| }
 | |
| 
 | |
| void softmax_test_from_pc() {
 | |
|     uint8_t config_read[120];
 | |
|     uint32_t config_get[60];
 | |
|     #ifdef AI_USING_PSRAM
 | |
|     uint8_t *data_begin = (uint8_t *)os_mem_malloc(1, 131072);
 | |
|     #else
 | |
|     uint32_t data_begin = 0x10200000;
 | |
|     #endif
 | |
|     read_case_from_python(config_read, config_get, (uint8_t *)data_begin);
 | |
|     uint32_t weight_addr = (uint32_t)data_begin;
 | |
|     uint32_t bias_addr = weight_addr + config_get[52];
 | |
|     uint32_t input_addr = bias_addr + config_get[53];
 | |
|     uint32_t golden_addr = input_addr + config_get[54];
 | |
|     uint32_t output_addr = golden_addr + config_get[55];
 | |
|     uint32_t length = config_get[0];
 | |
|     uint32_t elem_bytes = config_get[1];
 | |
|     switch (elem_bytes)
 | |
|     {
 | |
|         case 1/* constant-expression */:
 | |
|             softmax_int8((int8_t *)input_addr, (int8_t *)output_addr, length);
 | |
|             break;
 | |
|     
 | |
|         default:
 | |
|             softmax_int16((int16_t *)input_addr, (int16_t *)output_addr, length);
 | |
|             break;
 | |
|     }
 | |
|     verify_8bits(output_addr, golden_addr, config_get[55]);
 | |
|     #ifdef AI_USING_PSRAM
 | |
|     os_mem_free(data_begin);
 | |
|     #endif
 | |
| }
 | |
| 
 | |
| void dequantize_test_from_pc() {
 | |
|     uint8_t config_read[120];
 | |
|     uint32_t config_get[60];
 | |
|     #ifdef AI_USING_PSRAM
 | |
|     uint8_t *data_begin = (uint8_t *)os_mem_malloc(1, 131072);
 | |
|     #else
 | |
|     uint32_t data_begin = 0x10200000;
 | |
|     #endif
 | |
|     read_case_from_python(config_read, config_get, (uint8_t *)data_begin);
 | |
|     uint32_t weight_addr = (uint32_t)data_begin;
 | |
|     uint32_t bias_addr = weight_addr + config_get[52];
 | |
|     uint32_t input_addr = bias_addr + config_get[53];
 | |
|     uint32_t golden_addr = input_addr + config_get[54];
 | |
|     uint32_t output_addr = golden_addr + config_get[55];
 | |
|     uint32_t length = config_get[0];
 | |
|     uint32_t elem_bytes = config_get[1];
 | |
|     uint32_t signed_ = config_get[2];
 | |
|     int32_t center = config_get[3];
 | |
|     float scale = (float)((int32_t)config_get[4] - 128);
 | |
|     uint32_t scale_power = config_get[5];
 | |
|     for (uint32_t i = 0; i < scale_power; i++) {
 | |
|         scale = scale * 2.0;
 | |
|     }
 | |
|     switch (elem_bytes)
 | |
|     {
 | |
|         case 1/* constant-expression */:
 | |
|             //iot_printf("scale = %f\n", scale);
 | |
|             scale = scale / 256.0;
 | |
|             //iot_printf("scale = %f\n", scale);
 | |
|             switch (signed_)
 | |
|             {
 | |
|                 case 0/* constant-expression */:
 | |
|                     vector_uint8_to_float((uint8_t *)input_addr, (float *)output_addr, (float)(center), scale, length);
 | |
|                     break;
 | |
| 
 | |
|                 case 1/* constant-expression */:
 | |
|                     vector_int8_to_float((int8_t *)input_addr, (float *)output_addr, (float)(center - 128), scale, length);
 | |
|                     break;
 | |
| 
 | |
|                 default:
 | |
|                     break;
 | |
|             }
 | |
|             break;
 | |
|     
 | |
|         default:
 | |
|             //iot_printf("scale = %f\n", scale);
 | |
|             scale = scale / 65536.0;
 | |
|             //iot_printf("scale = %f\n", scale);
 | |
|             switch (signed_)
 | |
|             {
 | |
|                 case 0/* constant-expression */:
 | |
|                     vector_uint16_to_float((uint16_t *)input_addr, (float *)output_addr, (float)(center), scale, length);
 | |
|                     break;
 | |
| 
 | |
|                 case 1/* constant-expression */:
 | |
|                     vector_int16_to_float((int16_t *)input_addr, (float *)output_addr, (float)(center - 32768), scale, length);
 | |
|                     break;
 | |
| 
 | |
|                 default:
 | |
|                     break;
 | |
|             }
 | |
|             break;
 | |
|     }
 | |
|     verify_float(output_addr, golden_addr, length);
 | |
|     #ifdef AI_USING_PSRAM
 | |
|     os_mem_free(data_begin);
 | |
|     #endif
 | |
| }
 | |
| 
 | |
| void quantize_test_from_pc() {
 | |
|     uint8_t config_read[120];
 | |
|     uint32_t config_get[60];
 | |
|     #ifdef AI_USING_PSRAM
 | |
|     uint8_t *data_begin = (uint8_t *)os_mem_malloc(1, 131072);
 | |
|     #else
 | |
|     uint32_t data_begin = 0x10200000;
 | |
|     #endif
 | |
|     read_case_from_python(config_read, config_get, (uint8_t *)data_begin);
 | |
|     uint32_t weight_addr = (uint32_t)data_begin;
 | |
|     uint32_t bias_addr = weight_addr + config_get[52];
 | |
|     uint32_t input_addr = bias_addr + config_get[53];
 | |
|     uint32_t golden_addr = input_addr + config_get[54];
 | |
|     uint32_t output_addr = golden_addr + config_get[55];
 | |
|     uint32_t length = config_get[0];
 | |
|     uint32_t elem_bytes = config_get[1];
 | |
|     uint32_t signed_ = config_get[2];
 | |
|     float *center = (float *)bias_addr;
 | |
|     float *scale = (float *)weight_addr;
 | |
|     switch (elem_bytes)
 | |
|     {
 | |
|         case 1/* constant-expression */:
 | |
|             switch (signed_)
 | |
|             {
 | |
|                 case 0/* constant-expression */:
 | |
|                     vector_float_to_uint8((float *)input_addr, (uint8_t *)output_addr, *center, *scale, length);
 | |
|                     break;
 | |
| 
 | |
|                 case 1/* constant-expression */:
 | |
|                     vector_float_to_int8((float *)input_addr, (int8_t *)output_addr, *center, *scale, length);
 | |
|                     break;
 | |
| 
 | |
|                 default:
 | |
|                     break;
 | |
|             }
 | |
|             break;
 | |
|     
 | |
|         default:
 | |
|             switch (signed_)
 | |
|             {
 | |
|                 case 0/* constant-expression */:
 | |
|                     vector_float_to_uint16((float *)input_addr, (uint16_t *)output_addr, *center, *scale, length);
 | |
|                     break;
 | |
| 
 | |
|                 case 1/* constant-expression */:
 | |
|                     vector_float_to_int16((float *)input_addr, (int16_t *)output_addr, *center, *scale, length);
 | |
|                     break;
 | |
| 
 | |
|                 default:
 | |
|                     break;
 | |
|             }
 | |
|             break;
 | |
|     }
 | |
|     verify_8bits(output_addr, golden_addr, config_get[55]);
 | |
|     #ifdef AI_USING_PSRAM
 | |
|     os_mem_free(data_begin);
 | |
|     #endif
 | |
| }
 | |
| 
 | |
| void psram_test() {
 | |
|     uint32_t *writer = (uint32_t *)0x10200000;
 | |
|     for (uint32_t i = 0; i < 1024 * 512; i++) {
 | |
|         writer[i] = i * 3;
 | |
|     }
 | |
|     for (uint32_t i = 0; i < 1024 * 512; i++) {
 | |
|         if (writer[i] != i * 3) {
 | |
|             iot_printf("incorrect at %08x, should be %08x, in fact %08x\n", 0x10200000 + 4 * i, i * 3, writer[i]);
 | |
|         }
 | |
|     }
 | |
| }
 | |
| 
 | |
| void temp_test() {
 | |
|     uint32_t h_a = 64;
 | |
|     uint32_t w_b = 64;
 | |
|     uint32_t w_a = 64;
 | |
|     uint64_t begin;
 | |
|     uint64_t end;
 | |
|     begin = cpu_get_mcycle();
 | |
|     int8_t *a = (int8_t *)os_mem_malloc(1, h_a * w_a);
 | |
|     int8_t *b = (int8_t *)os_mem_malloc(1, w_a * w_b);
 | |
|     int8_t *o = (int8_t *)os_mem_malloc(1, h_a * w_b);
 | |
|     end = cpu_get_mcycle();
 | |
|     iot_printf("cycle~~~~~~~~~~~~~~~~~~ = %d\n", (uint32_t)(end - begin));
 | |
|     begin = cpu_get_mcycle();
 | |
|     matrix_multi_int8_to_int8(a, b, o, h_a, w_a, w_b, 0);
 | |
|     end = cpu_get_mcycle();
 | |
|     iot_printf("cycle~~~~~~~~~~~~~~~~~~ = %d\n", (uint32_t)(end - begin));
 | |
|     begin = cpu_get_mcycle();
 | |
|     os_mem_free(a);
 | |
|     os_mem_free(b);
 | |
|     os_mem_free(o);
 | |
|     end = cpu_get_mcycle();
 | |
|     iot_printf("cycle~~~~~~~~~~~~~~~~~ = %d\n", (uint32_t)(end - begin));
 | |
|     iot_printf("temp test finished~~~~~~~~~~~~~~~~~\n");
 | |
| }
 | |
| 
 | |
| #ifndef AI_OS_TASK
 | |
| int main(void) {
 | |
|     dbg_uart_init();
 | |
|     test_uart_init();
 | |
|     iot_dbg_uart_set_port(0, 115200 * 1, 0, 8, 1);
 | |
|     iot_printf("main start!\n");
 | |
|     dsp_init();
 | |
|     //while(1){
 | |
|     //test_uart0_getc();}
 | |
| #ifdef SATURATION
 | |
|     uint32_t data = 0x1071f;
 | |
|     vcsrw(data,data,0)
 | |
| #else
 | |
|     uint32_t data = 0x1061f;
 | |
|     vcsrw(data,data,0)
 | |
| #endif
 | |
|     //psram_test();
 | |
|     //maximum_test();
 | |
|     //uint64_t begin;
 | |
|     //uint64_t end;
 | |
|     //begin = cpu_get_mcycle();
 | |
|     //end = cpu_get_mcycle();
 | |
|     //iot_printf("cycle = %d\n", (uint32_t)(end - begin));
 | |
|     //begin = cpu_get_mcycle();
 | |
|     //end = cpu_get_mcycle();
 | |
|     //iot_printf("cycle = %d\n", (uint32_t)(end - begin));
 | |
|     //vector_add_test_float();
 | |
|     //vector_sub_test_float();
 | |
|     //vector_mul_test_float();
 | |
|     //vector_madd_msub_test_float();
 | |
|     //vector_max_test_float();
 | |
|     //vector_min_test_float();
 | |
|     //vector_equal_test_float();
 | |
|     //vector_not_equal_test_float();
 | |
|     //vector_less_than_test_float();
 | |
|     //vector_greater_or_equal_test_float();
 | |
|     //vector_inner_product_test_float();
 | |
|     //vector_inner_product_test_int32();
 | |
|     //vector_inner_product_test_uint32();
 | |
|     //float_to_int8_test();
 | |
|     //float_to_uint8_test();
 | |
|     //float_to_int16_test();
 | |
|     //float_to_uint16_test();
 | |
|     //int8_to_float_test();
 | |
|     //uint8_to_float_test();
 | |
|     //int16_to_float_test();
 | |
|     //uint16_to_float_test();
 | |
|     while(1) {
 | |
|         //temp_test();
 | |
|         //softmax_test_from_pc();
 | |
|         //vector_maxi_mini_test_from_pc();
 | |
|         //matrix_multi_test_from_pc();
 | |
|         //matrix_transpose_multi_test_from_pc();
 | |
|         //vector_test_from_pc();
 | |
|         //logsoftmax_test_from_pc();
 | |
|         //fc_test_from_pc();
 | |
|         dequantize_test_from_pc();
 | |
|         //vector_saturation_test_from_pc();
 | |
|         //fc_16bit_test();
 | |
|         //matrix_transpose_test_from_pc();
 | |
|         //depth_fc_test_from_pc();
 | |
|     }
 | |
|     return 0;
 | |
| }
 | |
| 
 | |
| #else
 | |
| 
 | |
| void user_task_1(){
 | |
| 
 | |
|     test_uart_init();
 | |
|     iot_dbg_uart_set_port(0, 115200 * 1, 0, 8, 1);
 | |
|     iot_printf("main start!\n");
 | |
|     dsp_init();
 | |
| 
 | |
|     while(1)
 | |
|     {
 | |
|         matrix_multi_test_from_pc();
 | |
|     }
 | |
| }
 | |
| 
 | |
| int32_t iot__task_init()
 | |
| {
 | |
|     os_task_h handle;
 | |
| 
 | |
|     handle = os_create_task(user_task_1, NULL, 9);
 | |
| 
 | |
|     //create the tasks;
 | |
|     if(handle != NULL) {
 | |
|         iot_printf("task 1 init successfully...\n");
 | |
|     }
 | |
| 
 | |
|     return 0;
 | |
| }
 | |
| 
 | |
| int32_t iot__module_init(void)
 | |
| {
 | |
|     //platform intialization;
 | |
|     iot__platform_init();
 | |
| 
 | |
|     //create all the tasks;
 | |
|     iot__task_init();
 | |
|     iot_rtc_init();
 | |
|     iot_printf("starting...\n");
 | |
| 
 | |
|     return 0;
 | |
| }
 | |
| 
 | |
| int32_t iot__task_start()
 | |
| {
 | |
|     //start the tasks;
 | |
|     os_start_kernel();
 | |
| 
 | |
|     return 0;
 | |
| }
 | |
| 
 | |
| int32_t iot__module_start(void)
 | |
| {
 | |
|     int32_t res = 0;
 | |
| 
 | |
|     res = iot__task_start();
 | |
| 
 | |
|     return res;
 | |
| }
 | |
| 
 | |
| 
 | |
| int main(void)
 | |
| {
 | |
|     //module init;
 | |
|     iot__module_init();
 | |
| 
 | |
|     //module start;
 | |
|     iot__module_start();
 | |
| 
 | |
|     return 0;
 | |
| }
 | |
| 
 | |
| #endif |