142 lines
		
	
	
		
			6.6 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
		
		
			
		
	
	
			142 lines
		
	
	
		
			6.6 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
|  | /****************************************************************************
 | ||
|  | 
 | ||
|  | Copyright(c) 2019 by Aerospace C.Power (Chongqing) Microelectronics. ALL RIGHTS RESERVED. | ||
|  | 
 | ||
|  | This Information is proprietary to Aerospace C.Power (Chongqing) Microelectronics and MAY NOT | ||
|  | be copied by any method or incorporated into another program without | ||
|  | the express written consent of Aerospace C.Power. This Information or any portion | ||
|  | thereof remains the property of Aerospace C.Power. The Information contained herein | ||
|  | is believed to be accurate and Aerospace C.Power assumes no responsibility or | ||
|  | liability for its use in any way and conveys no license or title under | ||
|  | any patent or copyright and makes no representation or warranty that this | ||
|  | Information is free from patent or copyright infringement. | ||
|  | 
 | ||
|  | ****************************************************************************/ | ||
|  | /* os shim includes */ | ||
|  | #include "os_types.h"
 | ||
|  | #include "os_utils.h"
 | ||
|  | #include "os_mem.h"
 | ||
|  | #include "dbg_io.h"
 | ||
|  | #include "iot_io.h"
 | ||
|  | #include "clk.h"
 | ||
|  | 
 | ||
|  | #include "iot_simd_inst.h"
 | ||
|  | #include "iot_simd_api.h"
 | ||
|  | ///////////////////////////////////////////////////////////////////
 | ||
|  | 
 | ||
|  | #define TEST_FP_CAL     (1 << 0)
 | ||
|  | #define SIMD_TEST_CASE   TEST_FP_CAL
 | ||
|  | 
 | ||
|  | #define VD_N_LOAD_A     0
 | ||
|  | #define VD_N_LOAD_B     1
 | ||
|  | #define VD_N_LOAD_C     2
 | ||
|  | #define VD_N_CAL        3
 | ||
|  | #define VD_N_STORE_A    4
 | ||
|  | 
 | ||
|  | uint32_t test_ui32_4x1_a[4] = {1,2,3,4}; | ||
|  | uint32_t test_ui32_4x1_b[4] = {4,4,4,4}; | ||
|  | uint32_t test_ui32_4x1_c[4] = {6,3,1,2}; | ||
|  | uint32_t result_4x1[4] = {0}; | ||
|  | 
 | ||
|  | float test_fp_4x1_a[4] = {0.1235,-823.514,213905,0234.231}; | ||
|  | float test_fp_4x1_b[4] = {-0.00031,-832,-0.123333,4}; | ||
|  | float test_fp_4x1_c[4] = {6,3,1,2}; | ||
|  | float result_fp_4x1[4] = {0}; | ||
|  | float result_fph_4x1[4] = {0}; | ||
|  | float result_fp_add[4] = {0}; | ||
|  | float result_fp_sub[4] = {0}; | ||
|  | 
 | ||
|  | int32_t input_real_512[] = { | ||
|  | 0,0,0,1,3,6,6,4,11,8,5,3,4,12,-9,0,8,16,33,10,64,53,50,86,94,153,96,169,161,75,255,140,237,420,376,366,398,449,529,466,436,714,741,698,853,1003,832,1054,933,1264,1378,1126,1521,1531,1575,1303,1283,1232,798,651,926,1326,1081,738,675,835,501,626,378,214,520,-328,-590,-973,-910,-864,-1422,-1457,-2618,-2505,-2845,-2937,-2924,-3949,-3812,-3866,-3861,-4391,-5123,-4357,-5302,-5279,-5514,-6363,-6553,-6502,-6552,-8196,-7717,-8082,-6694,-7123,-9027,-8134,-8843,-9073,-8122,-8210,-8603,-8250,-9866,-9880,-10076,-11688,-12243,-12179,-12011,-11782,-11492,-11498,-11083,-10389,-11062,-11534,-11300,-11335,-10973,-12649,-12634,-11037,-10638,-10945,-10530,-9796,-9971,-10586,-11211,-9543,-10683,-11520,-9592,-9493,-12024,-13512,-9928,-10654,-11815,-10440,-11616,-12379,-12642,-12759,-11761,-9389,-9700,-10015,-9177,-9334,-10198,-8623,-7577,-8765,-7214,-8171,-7741,-6971,-5770,-5729,-5008,-4617,-4306,-1040,-1484,-703,-1505,-1158,1435,541,2363,91,920,3149,3913,2437,942,3129,2670,959,964,3005,5750,5486,5810,6235,7360,8696,10144,10796,10948,10590,7159,10067,10832,8598,9986,10444,9856,8420,10248,10392,7024,9399,10074,11076,9709,9091,13028,11542,11904,12706,11094,10904,15128,15831,13651,10680,9701,13411,10192,7632,4161,4732,4289,5541,8836,3517,5455,3300,3989,3081,4456,3203,3665,1605,-344,2297,-459,5751,3107,1151,7259,4956,6457,3114,4153,7499,4845,6807,6460,6113,10954,10950,10370,8983,8632,11732,8736,8039,6655,8139,7673,5720,5256,3994,7294,4780,3523,2383,-793,-565,225,450,-787,1122,560,-558,-780,-1001,3773,1439,3644,1982,878,1971,-1092,2831,3257,1298,1726,5376,4929,5660,7450,1908,6339,4736,2936,4595,3953,9013,6912,6985,9921,11099,10340,10998,9842,7696,9151,8314,7878,8622,9064,8241,7811,11028,10967,11948,11031,12561,11554,12133,13992,16833,19181,15084,16063,18008,15762,16887,17032,15699,17311,18127,17401,19378,18230,15602,16636,17241,19131,20105,21856,21834,21652,20925,19821,21560,21372,19529,19800,19324,20169,19691,18500,20170,18918,18245,17718,18904,17699,16580,16273,14400,14051,12937,13238,11713,12634,12245,11196,10168,8747,9811,6692,5128,5402,4111,5046,5090,2992,3217,3174,2925,3138,2196,1870,2523,1434,754,789,365,-630,-1242,-829,-472,-1142,-1248,-573,-926,-1069,-1050,-1262,-2293,-2363,-2176,-1566,-2027,-1750,-1750,-3137,-2688,-2000,-2303,-2683,-1881,-2164,-1346,-1625,-1261,-1207,-1337,-1025,-1879,-758,-884,-909,-1046,-1176,-346,-421,-349,-1100,-1227,-871,-884,-859,-643,-219,-229,-190,-370,-164,-434,-672,-596,-314,-733,-610,-247,-681,-535,-410,-779,-592,-560,-527,-480,-837,-455,-547,-571,-470,-654,-603,-525,-531,-586,-564,-434,-381,-317,-166,-267,-236,-257,-218,-121,-144,-143,-175,-125,-110,-107,-123,-157,-146,-92,-66,-49,-66,-51,-43,-34,-24,-20,-12,-7,-6,-3,-1,0, | ||
|  | }; | ||
|  | 
 | ||
|  | int32_t output_cmp[512] = {0}; | ||
|  | 
 | ||
|  | 
 | ||
|  | void vector_test() | ||
|  | { | ||
|  | 
 | ||
|  | #if 0
 | ||
|  |     // load data, load a, b, c
 | ||
|  |     vload1_4x1_uiw(VD_N_LOAD_A, test_ui32_4x1_a, 1); | ||
|  | 
 | ||
|  |     iot_printf("load a success\n"); | ||
|  | 
 | ||
|  |     vload1_4x1_uiw(VD_N_LOAD_B, test_ui32_4x1_b, 1); | ||
|  | 
 | ||
|  |     vload1_4x1_uiw(VD_N_LOAD_C, test_ui32_4x1_c, 1); | ||
|  | 
 | ||
|  |     // mul calculate: a*b+c
 | ||
|  |     vmadd(VD_N_CAL, VD_N_LOAD_A, VD_N_LOAD_B, VD_N_LOAD_C); | ||
|  | 
 | ||
|  |     iot_printf("cal success\n"); | ||
|  | 
 | ||
|  |     // store
 | ||
|  |     vstore1_4(result_4x1, 1, VD_N_CAL); | ||
|  | 
 | ||
|  |     iot_printf("result: %d %d %d %d\n", | ||
|  |     result_4x1[0], result_4x1[1], result_4x1[2], result_4x1[3]); | ||
|  | #else
 | ||
|  |     iot_simd_ui_vmadd_4(result_4x1, test_ui32_4x1_a, test_ui32_4x1_b, | ||
|  |         test_ui32_4x1_c, 1); | ||
|  | 
 | ||
|  |     iot_printf("result1: %d %d %d %d\n", | ||
|  |         result_4x1[0], result_4x1[1], result_4x1[2], result_4x1[3]); | ||
|  | 
 | ||
|  | #endif
 | ||
|  | 
 | ||
|  | #if SIMD_TEST_CASE & TEST_FP_CAL
 | ||
|  | 
 | ||
|  |     iot_printf("test vmull\n"); | ||
|  |     iot_simd_fp_vmull_4(result_fp_4x1, test_fp_4x1_a, test_fp_4x1_b, 1); | ||
|  |     iot_printf("vmull: %f %f %f %f\n", | ||
|  |         result_fp_4x1[0], result_fp_4x1[1], result_fp_4x1[2], result_fp_4x1[3]); | ||
|  | 
 | ||
|  |     iot_printf("test vmulh\n"); | ||
|  |     iot_simd_fp_vmulh_4(result_fph_4x1, test_fp_4x1_a, test_fp_4x1_b, 1); | ||
|  |     iot_printf("vmulh: %f %f %f %f\n", | ||
|  |         result_fph_4x1[0], result_fph_4x1[1], result_fph_4x1[2], result_fph_4x1[3]); | ||
|  | 
 | ||
|  |     iot_printf("test vadd\n"); | ||
|  |     iot_simd_fp_vadd_4(result_fp_add, test_fp_4x1_a, test_fp_4x1_b, 1); | ||
|  |     iot_printf("add: %f %f %f %f\n", | ||
|  |         result_fp_add[0], result_fp_add[1], result_fp_add[2], result_fp_add[3]); | ||
|  | 
 | ||
|  |     iot_printf("test vmulh\n"); | ||
|  |     iot_simd_fp_vsub_4(result_fp_sub, test_fp_4x1_a, test_fp_4x1_b, 1); | ||
|  |     iot_printf("sub: %f %f %f %f\n", | ||
|  |         result_fp_sub[0], result_fp_sub[1], result_fp_sub[2], result_fp_sub[3]); | ||
|  | 
 | ||
|  | #endif
 | ||
|  | } | ||
|  | 
 | ||
|  | 
 | ||
|  | int main(void) | ||
|  | { | ||
|  |     dbg_uart_init(); | ||
|  | 
 | ||
|  |     iot_printf("\n-------SIMD TEST---------\n"); | ||
|  | 
 | ||
|  | #if 0
 | ||
|  |     uint32_t mstatus = 0x8001f888; | ||
|  |     asm volatile ("csrw mstatus, %0" : "=r"(mstatus)); | ||
|  | #else
 | ||
|  |     iot_simd_enable(); | ||
|  | #endif
 | ||
|  | 
 | ||
|  |     iot_simd_memcpy_512(output_cmp, input_real_512); | ||
|  | 
 | ||
|  |     for(volatile uint32_t i = 0; i < 512; i++) { | ||
|  |         if (output_cmp[i] != input_real_512[i]) { | ||
|  |             iot_printf("error: i = %d\n", i); | ||
|  |         } | ||
|  |     } | ||
|  | 
 | ||
|  |     while(1) { | ||
|  |         vector_test(); | ||
|  | 
 | ||
|  |     } | ||
|  | 
 | ||
|  |     return 0; | ||
|  | } |