142 lines
		
	
	
		
			6.6 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			142 lines
		
	
	
		
			6.6 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /****************************************************************************
 | |
| 
 | |
| Copyright(c) 2019 by Aerospace C.Power (Chongqing) Microelectronics. ALL RIGHTS RESERVED.
 | |
| 
 | |
| This Information is proprietary to Aerospace C.Power (Chongqing) Microelectronics and MAY NOT
 | |
| be copied by any method or incorporated into another program without
 | |
| the express written consent of Aerospace C.Power. This Information or any portion
 | |
| thereof remains the property of Aerospace C.Power. The Information contained herein
 | |
| is believed to be accurate and Aerospace C.Power assumes no responsibility or
 | |
| liability for its use in any way and conveys no license or title under
 | |
| any patent or copyright and makes no representation or warranty that this
 | |
| Information is free from patent or copyright infringement.
 | |
| 
 | |
| ****************************************************************************/
 | |
| /* os shim includes */
 | |
| #include "os_types.h"
 | |
| #include "os_utils.h"
 | |
| #include "os_mem.h"
 | |
| #include "dbg_io.h"
 | |
| #include "iot_io.h"
 | |
| #include "clk.h"
 | |
| 
 | |
| #include "iot_simd_inst.h"
 | |
| #include "iot_simd_api.h"
 | |
| ///////////////////////////////////////////////////////////////////
 | |
| 
 | |
| #define TEST_FP_CAL     (1 << 0)
 | |
| #define SIMD_TEST_CASE   TEST_FP_CAL
 | |
| 
 | |
| #define VD_N_LOAD_A     0
 | |
| #define VD_N_LOAD_B     1
 | |
| #define VD_N_LOAD_C     2
 | |
| #define VD_N_CAL        3
 | |
| #define VD_N_STORE_A    4
 | |
| 
 | |
| uint32_t test_ui32_4x1_a[4] = {1,2,3,4};
 | |
| uint32_t test_ui32_4x1_b[4] = {4,4,4,4};
 | |
| uint32_t test_ui32_4x1_c[4] = {6,3,1,2};
 | |
| uint32_t result_4x1[4] = {0};
 | |
| 
 | |
| float test_fp_4x1_a[4] = {0.1235,-823.514,213905,0234.231};
 | |
| float test_fp_4x1_b[4] = {-0.00031,-832,-0.123333,4};
 | |
| float test_fp_4x1_c[4] = {6,3,1,2};
 | |
| float result_fp_4x1[4] = {0};
 | |
| float result_fph_4x1[4] = {0};
 | |
| float result_fp_add[4] = {0};
 | |
| float result_fp_sub[4] = {0};
 | |
| 
 | |
| int32_t input_real_512[] = {
 | |
| 0,0,0,1,3,6,6,4,11,8,5,3,4,12,-9,0,8,16,33,10,64,53,50,86,94,153,96,169,161,75,255,140,237,420,376,366,398,449,529,466,436,714,741,698,853,1003,832,1054,933,1264,1378,1126,1521,1531,1575,1303,1283,1232,798,651,926,1326,1081,738,675,835,501,626,378,214,520,-328,-590,-973,-910,-864,-1422,-1457,-2618,-2505,-2845,-2937,-2924,-3949,-3812,-3866,-3861,-4391,-5123,-4357,-5302,-5279,-5514,-6363,-6553,-6502,-6552,-8196,-7717,-8082,-6694,-7123,-9027,-8134,-8843,-9073,-8122,-8210,-8603,-8250,-9866,-9880,-10076,-11688,-12243,-12179,-12011,-11782,-11492,-11498,-11083,-10389,-11062,-11534,-11300,-11335,-10973,-12649,-12634,-11037,-10638,-10945,-10530,-9796,-9971,-10586,-11211,-9543,-10683,-11520,-9592,-9493,-12024,-13512,-9928,-10654,-11815,-10440,-11616,-12379,-12642,-12759,-11761,-9389,-9700,-10015,-9177,-9334,-10198,-8623,-7577,-8765,-7214,-8171,-7741,-6971,-5770,-5729,-5008,-4617,-4306,-1040,-1484,-703,-1505,-1158,1435,541,2363,91,920,3149,3913,2437,942,3129,2670,959,964,3005,5750,5486,5810,6235,7360,8696,10144,10796,10948,10590,7159,10067,10832,8598,9986,10444,9856,8420,10248,10392,7024,9399,10074,11076,9709,9091,13028,11542,11904,12706,11094,10904,15128,15831,13651,10680,9701,13411,10192,7632,4161,4732,4289,5541,8836,3517,5455,3300,3989,3081,4456,3203,3665,1605,-344,2297,-459,5751,3107,1151,7259,4956,6457,3114,4153,7499,4845,6807,6460,6113,10954,10950,10370,8983,8632,11732,8736,8039,6655,8139,7673,5720,5256,3994,7294,4780,3523,2383,-793,-565,225,450,-787,1122,560,-558,-780,-1001,3773,1439,3644,1982,878,1971,-1092,2831,3257,1298,1726,5376,4929,5660,7450,1908,6339,4736,2936,4595,3953,9013,6912,6985,9921,11099,10340,10998,9842,7696,9151,8314,7878,8622,9064,8241,7811,11028,10967,11948,11031,12561,11554,12133,13992,16833,19181,15084,16063,18008,15762,16887,17032,15699,17311,18127,17401,19378,18230,15602,16636,17241,19131,20105,21856,21834,21652,20925,19821,21560,21372,19529,19800,19324,20169,19691,18500,20170,18918,18245,17718,18904,17699,16580,16273,14400,14051,12937,13238,11713,12634,12245,11196,10168,8747,9811,6692,5128,5402,4111,5046,5090,2992,3217,3174,2925,3138,2196,1870,2523,1434,754,789,365,-630,-1242,-829,-472,-1142,-1248,-573,-926,-1069,-1050,-1262,-2293,-2363,-2176,-1566,-2027,-1750,-1750,-3137,-2688,-2000,-2303,-2683,-1881,-2164,-1346,-1625,-1261,-1207,-1337,-1025,-1879,-758,-884,-909,-1046,-1176,-346,-421,-349,-1100,-1227,-871,-884,-859,-643,-219,-229,-190,-370,-164,-434,-672,-596,-314,-733,-610,-247,-681,-535,-410,-779,-592,-560,-527,-480,-837,-455,-547,-571,-470,-654,-603,-525,-531,-586,-564,-434,-381,-317,-166,-267,-236,-257,-218,-121,-144,-143,-175,-125,-110,-107,-123,-157,-146,-92,-66,-49,-66,-51,-43,-34,-24,-20,-12,-7,-6,-3,-1,0,
 | |
| };
 | |
| 
 | |
| int32_t output_cmp[512] = {0};
 | |
| 
 | |
| 
 | |
| void vector_test()
 | |
| {
 | |
| 
 | |
| #if 0
 | |
|     // load data, load a, b, c
 | |
|     vload1_4x1_uiw(VD_N_LOAD_A, test_ui32_4x1_a, 1);
 | |
| 
 | |
|     iot_printf("load a success\n");
 | |
| 
 | |
|     vload1_4x1_uiw(VD_N_LOAD_B, test_ui32_4x1_b, 1);
 | |
| 
 | |
|     vload1_4x1_uiw(VD_N_LOAD_C, test_ui32_4x1_c, 1);
 | |
| 
 | |
|     // mul calculate: a*b+c
 | |
|     vmadd(VD_N_CAL, VD_N_LOAD_A, VD_N_LOAD_B, VD_N_LOAD_C);
 | |
| 
 | |
|     iot_printf("cal success\n");
 | |
| 
 | |
|     // store
 | |
|     vstore1_4(result_4x1, 1, VD_N_CAL);
 | |
| 
 | |
|     iot_printf("result: %d %d %d %d\n",
 | |
|     result_4x1[0], result_4x1[1], result_4x1[2], result_4x1[3]);
 | |
| #else
 | |
|     iot_simd_ui_vmadd_4(result_4x1, test_ui32_4x1_a, test_ui32_4x1_b,
 | |
|         test_ui32_4x1_c, 1);
 | |
| 
 | |
|     iot_printf("result1: %d %d %d %d\n",
 | |
|         result_4x1[0], result_4x1[1], result_4x1[2], result_4x1[3]);
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #if SIMD_TEST_CASE & TEST_FP_CAL
 | |
| 
 | |
|     iot_printf("test vmull\n");
 | |
|     iot_simd_fp_vmull_4(result_fp_4x1, test_fp_4x1_a, test_fp_4x1_b, 1);
 | |
|     iot_printf("vmull: %f %f %f %f\n",
 | |
|         result_fp_4x1[0], result_fp_4x1[1], result_fp_4x1[2], result_fp_4x1[3]);
 | |
| 
 | |
|     iot_printf("test vmulh\n");
 | |
|     iot_simd_fp_vmulh_4(result_fph_4x1, test_fp_4x1_a, test_fp_4x1_b, 1);
 | |
|     iot_printf("vmulh: %f %f %f %f\n",
 | |
|         result_fph_4x1[0], result_fph_4x1[1], result_fph_4x1[2], result_fph_4x1[3]);
 | |
| 
 | |
|     iot_printf("test vadd\n");
 | |
|     iot_simd_fp_vadd_4(result_fp_add, test_fp_4x1_a, test_fp_4x1_b, 1);
 | |
|     iot_printf("add: %f %f %f %f\n",
 | |
|         result_fp_add[0], result_fp_add[1], result_fp_add[2], result_fp_add[3]);
 | |
| 
 | |
|     iot_printf("test vmulh\n");
 | |
|     iot_simd_fp_vsub_4(result_fp_sub, test_fp_4x1_a, test_fp_4x1_b, 1);
 | |
|     iot_printf("sub: %f %f %f %f\n",
 | |
|         result_fp_sub[0], result_fp_sub[1], result_fp_sub[2], result_fp_sub[3]);
 | |
| 
 | |
| #endif
 | |
| }
 | |
| 
 | |
| 
 | |
| int main(void)
 | |
| {
 | |
|     dbg_uart_init();
 | |
| 
 | |
|     iot_printf("\n-------SIMD TEST---------\n");
 | |
| 
 | |
| #if 0
 | |
|     uint32_t mstatus = 0x8001f888;
 | |
|     asm volatile ("csrw mstatus, %0" : "=r"(mstatus));
 | |
| #else
 | |
|     iot_simd_enable();
 | |
| #endif
 | |
| 
 | |
|     iot_simd_memcpy_512(output_cmp, input_real_512);
 | |
| 
 | |
|     for(volatile uint32_t i = 0; i < 512; i++) {
 | |
|         if (output_cmp[i] != input_real_512[i]) {
 | |
|             iot_printf("error: i = %d\n", i);
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     while(1) {
 | |
|         vector_test();
 | |
| 
 | |
|     }
 | |
| 
 | |
|     return 0;
 | |
| }
 |