Files
kunlun/driver/inc/iot_simd_inst.h
2024-09-28 14:24:04 +08:00

1668 lines
83 KiB
C

#define STR1(x) #x
#define STR(x) STR1(x)
#define INST_VADD 0b00000000000000000000000000001011
#define INST_VSUB 0b00000010000000000000000000001011
#define INST_VMULL 0b00000100000000000000000000001011
#define INST_VMULH 0b00000110000000000000000000001011
#define INST_VSEQ 0b00000000000000000000000000001011
#define INST_VSNE 0b00000010000000000000000000001011
#define INST_VSLT 0b00000100000000000000000000001011
#define INST_VSGE 0b00000110000000000000000000001011
#define INST_VZIPB1 0b00000010000000000001000000001011
#define INST_VZIPB2 0b00000100000000000001000000001011
#define INST_VZIPB3 0b00000110000000000001000000001011
#define INST_VZIPH1 0b00001000000000000001000000001011
#define INST_VZIPH2 0b00001010000000000001000000001011
#define INST_VZIPH3 0b00001100000000000001000000001011
#define INST_VZIPW1 0b00001110000000000001000000001011
#define INST_VZIPW2 0b00010000000000000001000000001011
#define INST_VZIPW3 0b00010010000000000001000000001011
#define INST_VZIPD1 0b00010100000000000001000000001011
#define INST_VZIPD2 0b00010110000000000001000000001011
#define INST_VZIPD3 0b00011000000000000001000000001011
#define INST_VZIPQ1 0b00011010000000000001000000001011
#define INST_VZIPQ2 0b00011100000000000001000000001011
#define INST_VZIPQ3 0b00011110000000000001000000001011
//#define INST_VSHIFTL 0b00001100000000000001000000001011
//#define INST_VSHIFTR 0b00001110000000000001000000001011
#define INST_VMACC 0b00000000000000000010000000001011
#define INST_VMAP 0b01000000000000000010000000001011
#define INST_VACCL 0b10000000000000000010000000001011
#define INST_VACCH 0b10000000000000001010000000001011
#define INST_VACCT 0b10000000000000010010000000001011
#define INST_VACCC 0b11000000000000000010000000001011
#define INST_VMADD 0b00000000000000000100000000001011
#define INST_VMSUB 0b01000000000000000100000000001011
#define INST_VNMADD 0b10000000000000000100000000001011
#define INST_VNMSUB 0b11000000000000000100000000001011
#define INST_VPUT 0b00000000000000000110000000101011
#define INST_VGET 0b00000000000000000111000000101011
#define INST_VCSRR 0b10000000000000000110000000101011
#define INST_VCSRW 0b10000000000000000110000000101011
#define FUNCT_VLOAD_TYPE_UIB 0b000
#define FUNCT_VLOAD_TYPE_SIB 0b001
#define FUNCT_VLOAD_TYPE_UIH 0b010
#define FUNCT_VLOAD_TYPE_SIH 0b011
#define FUNCT_VLOAD_TYPE_UIW 0b100
#define FUNCT_VLOAD_TYPE_SIW 0b101
#define FUNCT_VLOAD_TYPE_GF2 0b110
#define FUNCT_VLOAD_TYPE_FPW 0b111
#define FUNCT_VLOAD_MODE_1X1 0b000
#define FUNCT_VLOAD_MODE_2X1 0b001
#define FUNCT_VLOAD_MODE_2X2 0b010
#define FUNCT_VLOAD_MODE_2X4 0b011
#define FUNCT_VLOAD_MODE_4X1 0b100
#define FUNCT_VLOAD_MODE_4X2 0b101
#define FUNCT_VLOAD_MODE_8X1 0b111
#define FUNCT_VSTORE_MODE_1 0b000
#define FUNCT_VSTORE_MODE_2 0b001
#define FUNCT_VSTORE_MODE_3 0b010
#define FUNCT_VSTORE_MODE_4 0b011
#define FUNCT_VSTORE_MODE_5 0b100
#define FUNCT_VSTORE_MODE_6 0b101
#define FUNCT_VSTORE_MODE_7 0b110
#define FUNCT_VSTORE_MODE_8 0b111
#define VCSRW(csr_addr, rs1) \
INST_VCSRW | \
(csr_addr << (7)) | \
(rs1 << (7+5+3))
#define VCSRR(rd, csr_addr) \
INST_VCSRR | \
(rd << (7)) | \
(csr_addr << (7+5+3))
#define VPUT(vd, rs1, put_mask8) \
INST_VPUT | \
(vd << (7)) | \
(rs1 << (7+5+3)) | \
(put_mask8 << (7+5+3+5))
#define VGET(rd, vs1, get_index) \
INST_VGET | \
(rd << (7)) | \
(vs1 << (7+5+3)) | \
(get_index << (7+5+3+5))
#define VMAPS(vd, vs1, get_index) \
INST_VMAP | \
(vd << (7)) | \
(vs1 << (7+5+3)) | \
(get_index << (7+5+3+5))
#define CUSTOM0_VS12(inst, vs1, vs2) \
inst | \
(vs1 << (7+5+3)) | \
(vs2 << (7+5+3+5))
#define CUSTOM0_VD(inst, vd) \
inst | \
(vd << (7))
#define CUSTOM0(inst) \
inst
#define CUSTOM0_VD_VS12(inst, vd, vs1, vs2) \
inst | \
(vd << (7)) | \
(vs1 << (7+5+3)) | \
(vs2 << (7+5+3+5))
#define CUSTOM0_VD_VS123(inst, vd, vs1, vs2, vs3) \
inst | \
(vd << (7)) | \
(vs1 << (7+5+3)) | \
(vs2 << (7+5+3+5)) | \
(vs3 << (7+5+3+5+5))
#define VSTORE1(smode, rs1, inc, vs3) \
0b0101011 | \
(smode << (7)) | \
(0b00 << (7+3)) | \
(0b100 << (7+3+2)) | \
(rs1 << (7+3+2+3)) | \
(inc << (7+3+2+3+5)) | \
(vs3 << (7+3+2+3+5+5)) | \
(0b00 << (7+3+2+3+5+5+5))
#define VSTORE2(smode, rs1, rs2, vs3) \
0b0101011 | \
(smode << (7)) | \
(0b00 << (7+3)) | \
(0b100 << (7+3+2)) | \
(rs1 << (7+3+2+3)) | \
(rs2 << (7+3+2+3+5)) | \
(vs3 << (7+3+2+3+5+5)) | \
(0b10 << (7+3+2+3+5+5+5))
#define VSTORE3(smode, rs1, inc, vs3) \
0b0101011 | \
(smode << (7)) | \
(0b10 << (7+3)) | \
(0b100 << (7+3+2)) | \
(rs1 << (7+3+2+3)) | \
(inc << (7+3+2+3+5)) | \
(vs3 << (7+3+2+3+5+5)) | \
(0b00 << (7+3+2+3+5+5+5))
#define VSTORE4(smode, rs1, rs2, vs3) \
0b0101011 | \
(smode << (7)) | \
(0b10 << (7+3)) | \
(0b100 << (7+3+2)) | \
(rs1 << (7+3+2+3)) | \
(rs2 << (7+3+2+3+5)) | \
(vs3 << (7+3+2+3+5+5)) | \
(0b10 << (7+3+2+3+5+5+5))
#define VLOAD1(lmode, type, vd, rs1, inc) \
0b0101011 | \
(vd << (7)) | \
(0b000 << (7+5)) | \
(rs1 << (7+5+3)) | \
(inc << (7+5+3+5)) | \
(type << (7+5+3+5+5)) | \
(lmode << (7+5+3+5+5+3)) | \
(0b0 << (7+5+3+5+5+3+3))
#define VLOAD2(lmode, type, vd, rs1, rs2) \
0b0101011 | \
(vd << (7)) | \
(0b000 << (7+5)) | \
(rs1 << (7+5+3)) | \
(rs2 << (7+5+3+5)) | \
(type << (7+5+3+5+5)) | \
(lmode << (7+5+3+5+5+3)) | \
(0b1 << (7+5+3+5+5+3+3))
#define VLOAD3(lmode, type, vd, vp, inc) \
0b0101011 | \
(vd << (7)) | \
(0b001 << (7+5)) | \
(vp << (7+5+3)) | \
(inc << (7+5+3+5)) | \
(type << (7+5+3+5+5)) | \
(lmode << (7+5+3+5+5+3)) | \
(0b0 << (7+5+3+5+5+3+3))
#define VLOAD4(lmode, type, vd, rs1, rs2) \
0b0101011 | \
(vd << (7)) | \
(0b001 << (7+5)) | \
(rs1 << (7+5+3)) | \
(rs2 << (7+5+3+5)) | \
(type << (7+5+3+5+5)) | \
(lmode << (7+5+3+5+5+3)) | \
(0b1 << (7+5+3+5+5+3+3))
#define vcsrw(vcsr, rs1) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
asm volatile ( \
".word " STR(VCSRW(vcsr, 10)) "\n\t" \
:: [_rs1] "r" (rs1_)); \
}
#define vcsrr(rd, vcsr) \
{ \
register uint32_t rd_ asm ("x10"); \
asm volatile ( \
".word " STR(VCSRR(10, vcsr)) "\n\t" \
: "=r" (rd_) \
:); \
rd = rd_; \
}
#define vput8(vd_n, rs1) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
asm volatile ( \
".word " STR(VPUT(vd_n, 10, 255)) "\n\t" \
:: [_rs1] "r" (rs1_)); \
}
#define vgets(rd, vs1, index) \
{ \
register uint32_t rd_ asm ("x10"); \
asm volatile ( \
".word " STR(VGET(10, vs1, index)) "\n\t" \
: "=r" (rd_) \
:); \
rd = rd_; \
}
#define vmaps(vd, vs1, index) \
{ \
register uint32_t rd_ asm ("x10"); \
asm volatile ( \
".word " STR(VGET(10, vs1, index)) "\n\t" \
: "=r" (rd_) \
:); \
rd = rd_; \
}
#define vmacc(vs1_n, vs2_n) \
asm volatile (".word " STR(CUSTOM0_VS12(INST_VMACC, vs1_n, vs2_n)));
#define vaccl(vd_n) \
asm volatile (".word " STR(CUSTOM0_VD(INST_VACCL, vd_n)));
#define vacch(vd_n) \
asm volatile (".word " STR(CUSTOM0_VD(INST_VACCH, vd_n)));
#define vacct(vd_n) \
asm volatile (".word " STR(CUSTOM0_VD(INST_VACCT, vd_n)));
#define vaccc() \
asm volatile (".word " STR(CUSTOM0(INST_VACCC)));
#define vmadd(vd_n, vs1_n, vs2_n, vs3_n) \
asm volatile (".word " STR(CUSTOM0_VD_VS123(INST_VMADD, vd_n, vs1_n, vs2_n, vs3_n)));
#define vmadd(vd_n, vs1_n, vs2_n, vs3_n) \
asm volatile (".word " STR(CUSTOM0_VD_VS123(INST_VMADD, vd_n, vs1_n, vs2_n, vs3_n)));
#define vmsub(vd_n, vs1_n, vs2_n, vs3_n) \
asm volatile (".word " STR(CUSTOM0_VD_VS123(INST_VMSUB, vd_n, vs1_n, vs2_n, vs3_n)));
#define vnmadd(vd_n, vs1_n, vs2_n, vs3_n) \
asm volatile (".word " STR(CUSTOM0_VD_VS123(INST_VNMADD, vd_n, vs1_n, vs2_n, vs3_n)));
#define vnmsub(vd_n, vs1_n, vs2_n, vs3_n) \
asm volatile (".word " STR(CUSTOM0_VD_VS123(INST_VNMSUB, vd_n, vs1_n, vs2_n, vs3_n)));
#define vseq(vd_n, vs1_n, vs2_n) \
asm volatile (".word " STR(CUSTOM0_VD_VS12(INST_VSEQ, vd_n, vs1_n, vs2_n)));
#define vsne(vd_n, vs1_n, vs2_n) \
asm volatile (".word " STR(CUSTOM0_VD_VS12(INST_VSNE, vd_n, vs1_n, vs2_n)));
#define vslt(vd_n, vs1_n, vs2_n) \
asm volatile (".word " STR(CUSTOM0_VD_VS12(INST_VSLT, vd_n, vs1_n, vs2_n)));
#define vsge(vd_n, vs1_n, vs2_n) \
asm volatile (".word " STR(CUSTOM0_VD_VS12(INST_VSGE, vd_n, vs1_n, vs2_n)));
#define vadd(vd_n, vs1_n, vs2_n) \
asm volatile (".word " STR(CUSTOM0_VD_VS12(INST_VADD, vd_n, vs1_n, vs2_n)));
#define vsub(vd_n, vs1_n, vs2_n) \
asm volatile (".word " STR(CUSTOM0_VD_VS12(INST_VSUB, vd_n, vs1_n, vs2_n)));
#define vmull(vd_n, vs1_n, vs2_n) \
asm volatile (".word " STR(CUSTOM0_VD_VS12(INST_VMULL, vd_n, vs1_n, vs2_n)));
#define vmulh(vd_n, vs1_n, vs2_n) \
asm volatile (".word " STR(CUSTOM0_VD_VS12(INST_VMULH, vd_n, vs1_n, vs2_n)));
#define vzipb1(vd_n, vs1_n, vs2_n) \
asm volatile (".word " STR(CUSTOM0_VD_VS12(INST_VZIPB1, vd_n, vs1_n, vs2_n)));
#define vzipb2(vd_n, vs1_n, vs2_n) \
asm volatile (".word " STR(CUSTOM0_VD_VS12(INST_VZIPB2, vd_n, vs1_n, vs2_n)));
#define vzipb3(vd_n, vs1_n, vs2_n) \
asm volatile (".word " STR(CUSTOM0_VD_VS12(INST_VZIPB3, vd_n, vs1_n, vs2_n)));
#define vziph1(vd_n, vs1_n, vs2_n) \
asm volatile (".word " STR(CUSTOM0_VD_VS12(INST_VZIPH1, vd_n, vs1_n, vs2_n)));
#define vziph2(vd_n, vs1_n, vs2_n) \
asm volatile (".word " STR(CUSTOM0_VD_VS12(INST_VZIPH2, vd_n, vs1_n, vs2_n)));
#define vziph3(vd_n, vs1_n, vs2_n) \
asm volatile (".word " STR(CUSTOM0_VD_VS12(INST_VZIPH3, vd_n, vs1_n, vs2_n)));
#define vzipw1(vd_n, vs1_n, vs2_n) \
asm volatile (".word " STR(CUSTOM0_VD_VS12(INST_VZIPW1, vd_n, vs1_n, vs2_n)));
#define vzipw2(vd_n, vs1_n, vs2_n) \
asm volatile (".word " STR(CUSTOM0_VD_VS12(INST_VZIPW2, vd_n, vs1_n, vs2_n)));
#define vzipw3(vd_n, vs1_n, vs2_n) \
asm volatile (".word " STR(CUSTOM0_VD_VS12(INST_VZIPW3, vd_n, vs1_n, vs2_n)));
#define vzipd1(vd_n, vs1_n, vs2_n) \
asm volatile (".word " STR(CUSTOM0_VD_VS12(INST_VZIPD1, vd_n, vs1_n, vs2_n)));
#define vzipd2(vd_n, vs1_n, vs2_n) \
asm volatile (".word " STR(CUSTOM0_VD_VS12(INST_VZIPD2, vd_n, vs1_n, vs2_n)));
#define vzipd3(vd_n, vs1_n, vs2_n) \
asm volatile (".word " STR(CUSTOM0_VD_VS12(INST_VZIPD3, vd_n, vs1_n, vs2_n)));
#define vzipq1(vd_n, vs1_n, vs2_n) \
asm volatile (".word " STR(CUSTOM0_VD_VS12(INST_VZIPQ1, vd_n, vs1_n, vs2_n)));
#define vzipq2(vd_n, vs1_n, vs2_n) \
asm volatile (".word " STR(CUSTOM0_VD_VS12(INST_VZIPQ2, vd_n, vs1_n, vs2_n)));
#define vzipq3(vd_n, vs1_n, vs2_n) \
asm volatile (".word " STR(CUSTOM0_VD_VS12(INST_VZIPQ3, vd_n, vs1_n, vs2_n)));
#define vload2_8x1_fpw(vd_n, rs1, rs2) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
register uint32_t rs2_ asm ("x11") = (uint32_t) rs2; \
asm volatile ( \
".word " STR(VLOAD2(FUNCT_VLOAD_MODE_8X1, FUNCT_VLOAD_TYPE_FPW, vd_n, 10, 11)) "\n\t" \
:: [_rs1] "r" (rs1_), [_rs2] "r" (rs2_)); \
}
#define vload2_8x1_uiw(vd_n, rs1, rs2) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
register uint32_t rs2_ asm ("x11") = (uint32_t) rs2; \
asm volatile ( \
".word " STR(VLOAD2(FUNCT_VLOAD_MODE_8X1, FUNCT_VLOAD_TYPE_UIW, vd_n, 10, 11)) "\n\t" \
:: [_rs1] "r" (rs1_), [_rs2] "r" (rs2_)); \
}
#define vload2_8x1_siw(vd_n, rs1, rs2) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
register uint32_t rs2_ asm ("x11") = (uint32_t) rs2; \
asm volatile ( \
".word " STR(VLOAD2(FUNCT_VLOAD_MODE_8X1, FUNCT_VLOAD_TYPE_SIW, vd_n, 10, 11)) "\n\t" \
:: [_rs1] "r" (rs1_), [_rs2] "r" (rs2_)); \
}
#define vload2_8x1_uih(vd_n, rs1, rs2) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
register uint32_t rs2_ asm ("x11") = (uint32_t) rs2; \
asm volatile ( \
".word " STR(VLOAD2(FUNCT_VLOAD_MODE_8X1, FUNCT_VLOAD_TYPE_UIH, vd_n, 10, 11)) "\n\t" \
:: [_rs1] "r" (rs1_), [_rs2] "r" (rs2_)); \
}
#define vload2_8x1_sih(vd_n, rs1, rs2) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
register uint32_t rs2_ asm ("x11") = (uint32_t) rs2; \
asm volatile ( \
".word " STR(VLOAD2(FUNCT_VLOAD_MODE_8X1, FUNCT_VLOAD_TYPE_SIH, vd_n, 10, 11)) "\n\t" \
:: [_rs1] "r" (rs1_), [_rs2] "r" (rs2_)); \
}
#define vload2_8x1_uib(vd_n, rs1, rs2) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
register uint32_t rs2_ asm ("x11") = (uint32_t) rs2; \
asm volatile ( \
".word " STR(VLOAD2(FUNCT_VLOAD_MODE_8X1, FUNCT_VLOAD_TYPE_UIB, vd_n, 10, 11)) "\n\t" \
:: [_rs1] "r" (rs1_), [_rs2] "r" (rs2_)); \
}
#define vload2_8x1_sib(vd_n, rs1, rs2) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
register uint32_t rs2_ asm ("x11") = (uint32_t) rs2; \
asm volatile ( \
".word " STR(VLOAD2(FUNCT_VLOAD_MODE_8X1, FUNCT_VLOAD_TYPE_SIB, vd_n, 10, 11)) "\n\t" \
:: [_rs1] "r" (rs1_), [_rs2] "r" (rs2_)); \
}
#define vload2_4x1_fpw(vd_n, rs1, rs2) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
register uint32_t rs2_ asm ("x11") = (uint32_t) rs2; \
asm volatile ( \
".word " STR(VLOAD2(FUNCT_VLOAD_MODE_4X1, FUNCT_VLOAD_TYPE_FPW, vd_n, 10, 11)) "\n\t" \
:: [_rs1] "r" (rs1_), [_rs2] "r" (rs2_)); \
}
#define vload2_4x1_siw(vd_n, rs1, rs2) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
register uint32_t rs2_ asm ("x11") = (uint32_t) rs2; \
asm volatile ( \
".word " STR(VLOAD2(FUNCT_VLOAD_MODE_4X1, FUNCT_VLOAD_TYPE_SIW, vd_n, 10, 11)) "\n\t" \
:: [_rs1] "r" (rs1_), [_rs2] "r" (rs2_)); \
}
#define vload2_4x1_uiw(vd_n, rs1, rs2) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
register uint32_t rs2_ asm ("x11") = (uint32_t) rs2; \
asm volatile ( \
".word " STR(VLOAD2(FUNCT_VLOAD_MODE_4X1, FUNCT_VLOAD_TYPE_UIW, vd_n, 10, 11)) "\n\t" \
:: [_rs1] "r" (rs1_), [_rs2] "r" (rs2_)); \
}
#define vload2_4x1_sih(vd_n, rs1, rs2) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
register uint32_t rs2_ asm ("x11") = (uint32_t) rs2; \
asm volatile ( \
".word " STR(VLOAD2(FUNCT_VLOAD_MODE_4X1, FUNCT_VLOAD_TYPE_SIH, vd_n, 10, 11)) "\n\t" \
:: [_rs1] "r" (rs1_), [_rs2] "r" (rs2_)); \
}
#define vload2_4x1_uih(vd_n, rs1, rs2) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
register uint32_t rs2_ asm ("x11") = (uint32_t) rs2; \
asm volatile ( \
".word " STR(VLOAD2(FUNCT_VLOAD_MODE_4X1, FUNCT_VLOAD_TYPE_UIH, vd_n, 10, 11)) "\n\t" \
:: [_rs1] "r" (rs1_), [_rs2] "r" (rs2_)); \
}
#define vload2_4x1_sib(vd_n, rs1, rs2) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
register uint32_t rs2_ asm ("x11") = (uint32_t) rs2; \
asm volatile ( \
".word " STR(VLOAD2(FUNCT_VLOAD_MODE_4X1, FUNCT_VLOAD_TYPE_SIB, vd_n, 10, 11)) "\n\t" \
:: [_rs1] "r" (rs1_), [_rs2] "r" (rs2_)); \
}
#define vload2_4x1_uib(vd_n, rs1, rs2) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
register uint32_t rs2_ asm ("x11") = (uint32_t) rs2; \
asm volatile ( \
".word " STR(VLOAD2(FUNCT_VLOAD_MODE_4X1, FUNCT_VLOAD_TYPE_UIB, vd_n, 10, 11)) "\n\t" \
:: [_rs1] "r" (rs1_), [_rs2] "r" (rs2_)); \
}
#define vload2_4x2_fpw(vd_n, rs1, rs2) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
register uint32_t rs2_ asm ("x11") = (uint32_t) rs2; \
asm volatile ( \
".word " STR(VLOAD2(FUNCT_VLOAD_MODE_4X2, FUNCT_VLOAD_TYPE_FPW, vd_n, 10, 11)) "\n\t" \
:: [_rs1] "r" (rs1_), [_rs2] "r" (rs2_)); \
}
#define vload2_4x2_siw(vd_n, rs1, rs2) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
register uint32_t rs2_ asm ("x11") = (uint32_t) rs2; \
asm volatile ( \
".word " STR(VLOAD2(FUNCT_VLOAD_MODE_4X2, FUNCT_VLOAD_TYPE_SIW, vd_n, 10, 11)) "\n\t" \
:: [_rs1] "r" (rs1_), [_rs2] "r" (rs2_)); \
}
#define vload2_4x2_uiw(vd_n, rs1, rs2) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
register uint32_t rs2_ asm ("x11") = (uint32_t) rs2; \
asm volatile ( \
".word " STR(VLOAD2(FUNCT_VLOAD_MODE_4X2, FUNCT_VLOAD_TYPE_UIW, vd_n, 10, 11)) "\n\t" \
:: [_rs1] "r" (rs1_), [_rs2] "r" (rs2_)); \
}
#define vload2_4x2_sih(vd_n, rs1, rs2) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
register uint32_t rs2_ asm ("x11") = (uint32_t) rs2; \
asm volatile ( \
".word " STR(VLOAD2(FUNCT_VLOAD_MODE_4X2, FUNCT_VLOAD_TYPE_SIH, vd_n, 10, 11)) "\n\t" \
:: [_rs1] "r" (rs1_), [_rs2] "r" (rs2_)); \
}
#define vload2_4x2_uih(vd_n, rs1, rs2) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
register uint32_t rs2_ asm ("x11") = (uint32_t) rs2; \
asm volatile ( \
".word " STR(VLOAD2(FUNCT_VLOAD_MODE_4X2, FUNCT_VLOAD_TYPE_UIH, vd_n, 10, 11)) "\n\t" \
:: [_rs1] "r" (rs1_), [_rs2] "r" (rs2_)); \
}
#define vload2_4x2_sib(vd_n, rs1, rs2) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
register uint32_t rs2_ asm ("x11") = (uint32_t) rs2; \
asm volatile ( \
".word " STR(VLOAD2(FUNCT_VLOAD_MODE_4X2, FUNCT_VLOAD_TYPE_SIB, vd_n, 10, 11)) "\n\t" \
:: [_rs1] "r" (rs1_), [_rs2] "r" (rs2_)); \
}
#define vload2_4x2_uib(vd_n, rs1, rs2) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
register uint32_t rs2_ asm ("x11") = (uint32_t) rs2; \
asm volatile ( \
".word " STR(VLOAD2(FUNCT_VLOAD_MODE_4X2, FUNCT_VLOAD_TYPE_UIB, vd_n, 10, 11)) "\n\t" \
:: [_rs1] "r" (rs1_), [_rs2] "r" (rs2_)); \
}
#define vload2_2x4_fpw(vd_n, rs1, rs2) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
register uint32_t rs2_ asm ("x11") = (uint32_t) rs2; \
asm volatile ( \
".word " STR(VLOAD2(FUNCT_VLOAD_MODE_2x4, FUNCT_VLOAD_TYPE_FPW, vd_n, 10, 11)) "\n\t" \
:: [_rs1] "r" (rs1_), [_rs2] "r" (rs2_)); \
}
#define vload2_2x4_siw(vd_n, rs1, rs2) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
register uint32_t rs2_ asm ("x11") = (uint32_t) rs2; \
asm volatile ( \
".word " STR(VLOAD2(FUNCT_VLOAD_MODE_2X4, FUNCT_VLOAD_TYPE_SIW, vd_n, 10, 11)) "\n\t" \
:: [_rs1] "r" (rs1_), [_rs2] "r" (rs2_)); \
}
#define vload2_2x4_uiw(vd_n, rs1, rs2) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
register uint32_t rs2_ asm ("x11") = (uint32_t) rs2; \
asm volatile ( \
".word " STR(VLOAD2(FUNCT_VLOAD_MODE_2X4, FUNCT_VLOAD_TYPE_UIW, vd_n, 10, 11)) "\n\t" \
:: [_rs1] "r" (rs1_), [_rs2] "r" (rs2_)); \
}
#define vload2_2x4_sih(vd_n, rs1, rs2) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
register uint32_t rs2_ asm ("x11") = (uint32_t) rs2; \
asm volatile ( \
".word " STR(VLOAD2(FUNCT_VLOAD_MODE_2X4, FUNCT_VLOAD_TYPE_SIH, vd_n, 10, 11)) "\n\t" \
:: [_rs1] "r" (rs1_), [_rs2] "r" (rs2_)); \
}
#define vload2_2x4_uih(vd_n, rs1, rs2) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
register uint32_t rs2_ asm ("x11") = (uint32_t) rs2; \
asm volatile ( \
".word " STR(VLOAD2(FUNCT_VLOAD_MODE_2X4, FUNCT_VLOAD_TYPE_UIH, vd_n, 10, 11)) "\n\t" \
:: [_rs1] "r" (rs1_), [_rs2] "r" (rs2_)); \
}
#define vload2_2x4_sib(vd_n, rs1, rs2) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
register uint32_t rs2_ asm ("x11") = (uint32_t) rs2; \
asm volatile ( \
".word " STR(VLOAD2(FUNCT_VLOAD_MODE_2X4, FUNCT_VLOAD_TYPE_SIB, vd_n, 10, 11)) "\n\t" \
:: [_rs1] "r" (rs1_), [_rs2] "r" (rs2_)); \
}
#define vload2_2x4_uib(vd_n, rs1, rs2) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
register uint32_t rs2_ asm ("x11") = (uint32_t) rs2; \
asm volatile ( \
".word " STR(VLOAD2(FUNCT_VLOAD_MODE_2X4, FUNCT_VLOAD_TYPE_UIB, vd_n, 10, 11)) "\n\t" \
:: [_rs1] "r" (rs1_), [_rs2] "r" (rs2_)); \
}
#define vload2_2x2_fpw(vd_n, rs1, rs2) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
register uint32_t rs2_ asm ("x11") = (uint32_t) rs2; \
asm volatile ( \
".word " STR(VLOAD2(FUNCT_VLOAD_MODE_2X2, FUNCT_VLOAD_TYPE_FPW, vd_n, 10, 11)) "\n\t" \
:: [_rs1] "r" (rs1_), [_rs2] "r" (rs2_)); \
}
#define vload2_2x2_siw(vd_n, rs1, rs2) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
register uint32_t rs2_ asm ("x11") = (uint32_t) rs2; \
asm volatile ( \
".word " STR(VLOAD2(FUNCT_VLOAD_MODE_2X2, FUNCT_VLOAD_TYPE_SIW, vd_n, 10, 11)) "\n\t" \
:: [_rs1] "r" (rs1_), [_rs2] "r" (rs2_)); \
}
#define vload2_2x2_uiw(vd_n, rs1, rs2) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
register uint32_t rs2_ asm ("x11") = (uint32_t) rs2; \
asm volatile ( \
".word " STR(VLOAD2(FUNCT_VLOAD_MODE_2X2, FUNCT_VLOAD_TYPE_UIW, vd_n, 10, 11)) "\n\t" \
:: [_rs1] "r" (rs1_), [_rs2] "r" (rs2_)); \
}
#define vload2_2x2_sih(vd_n, rs1, rs2) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
register uint32_t rs2_ asm ("x11") = (uint32_t) rs2; \
asm volatile ( \
".word " STR(VLOAD2(FUNCT_VLOAD_MODE_2X2, FUNCT_VLOAD_TYPE_SIH, vd_n, 10, 11)) "\n\t" \
:: [_rs1] "r" (rs1_), [_rs2] "r" (rs2_)); \
}
#define vload2_2x2_uih(vd_n, rs1, rs2) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
register uint32_t rs2_ asm ("x11") = (uint32_t) rs2; \
asm volatile ( \
".word " STR(VLOAD2(FUNCT_VLOAD_MODE_2X2, FUNCT_VLOAD_TYPE_UIH, vd_n, 10, 11)) "\n\t" \
:: [_rs1] "r" (rs1_), [_rs2] "r" (rs2_)); \
}
#define vload2_2x2_sib(vd_n, rs1, rs2) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
register uint32_t rs2_ asm ("x11") = (uint32_t) rs2; \
asm volatile ( \
".word " STR(VLOAD2(FUNCT_VLOAD_MODE_2X2, FUNCT_VLOAD_TYPE_SIB, vd_n, 10, 11)) "\n\t" \
:: [_rs1] "r" (rs1_), [_rs2] "r" (rs2_)); \
}
#define vload2_2x2_uib(vd_n, rs1, rs2) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
register uint32_t rs2_ asm ("x11") = (uint32_t) rs2; \
asm volatile ( \
".word " STR(VLOAD2(FUNCT_VLOAD_MODE_2X2, FUNCT_VLOAD_TYPE_UIB, vd_n, 10, 11)) "\n\t" \
:: [_rs1] "r" (rs1_), [_rs2] "r" (rs2_)); \
}
#define vload2_2x1_fpw(vd_n, rs1, rs2) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
register uint32_t rs2_ asm ("x11") = (uint32_t) rs2; \
asm volatile ( \
".word " STR(VLOAD2(FUNCT_VLOAD_MODE_2X1, FUNCT_VLOAD_TYPE_FPW, vd_n, 10, 11)) "\n\t" \
:: [_rs1] "r" (rs1_), [_rs2] "r" (rs2_)); \
}
#define vload2_2x1_siw(vd_n, rs1, rs2) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
register uint32_t rs2_ asm ("x11") = (uint32_t) rs2; \
asm volatile ( \
".word " STR(VLOAD2(FUNCT_VLOAD_MODE_2X1, FUNCT_VLOAD_TYPE_SIW, vd_n, 10, 11)) "\n\t" \
:: [_rs1] "r" (rs1_), [_rs2] "r" (rs2_)); \
}
#define vload2_2x1_uiw(vd_n, rs1, rs2) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
register uint32_t rs2_ asm ("x11") = (uint32_t) rs2; \
asm volatile ( \
".word " STR(VLOAD2(FUNCT_VLOAD_MODE_2X1, FUNCT_VLOAD_TYPE_UIW, vd_n, 10, 11)) "\n\t" \
:: [_rs1] "r" (rs1_), [_rs2] "r" (rs2_)); \
}
#define vload2_2x1_sih(vd_n, rs1, rs2) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
register uint32_t rs2_ asm ("x11") = (uint32_t) rs2; \
asm volatile ( \
".word " STR(VLOAD2(FUNCT_VLOAD_MODE_2X1, FUNCT_VLOAD_TYPE_SIH, vd_n, 10, 11)) "\n\t" \
:: [_rs1] "r" (rs1_), [_rs2] "r" (rs2_)); \
}
#define vload2_2x1_uih(vd_n, rs1, rs2) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
register uint32_t rs2_ asm ("x11") = (uint32_t) rs2; \
asm volatile ( \
".word " STR(VLOAD2(FUNCT_VLOAD_MODE_2X1, FUNCT_VLOAD_TYPE_UIH, vd_n, 10, 11)) "\n\t" \
:: [_rs1] "r" (rs1_), [_rs2] "r" (rs2_)); \
}
#define vload2_2x1_sib(vd_n, rs1, rs2) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
register uint32_t rs2_ asm ("x11") = (uint32_t) rs2; \
asm volatile ( \
".word " STR(VLOAD2(FUNCT_VLOAD_MODE_2X1, FUNCT_VLOAD_TYPE_SIB, vd_n, 10, 11)) "\n\t" \
:: [_rs1] "r" (rs1_), [_rs2] "r" (rs2_)); \
}
#define vload2_2x1_uib(vd_n, rs1, rs2) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
register uint32_t rs2_ asm ("x11") = (uint32_t) rs2; \
asm volatile ( \
".word " STR(VLOAD2(FUNCT_VLOAD_MODE_2X1, FUNCT_VLOAD_TYPE_UIB, vd_n, 10, 11)) "\n\t" \
:: [_rs1] "r" (rs1_), [_rs2] "r" (rs2_)); \
}
#define vload2_1x1_fpw(vd_n, rs1, rs2) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
register uint32_t rs2_ asm ("x11") = (uint32_t) rs2; \
asm volatile ( \
".word " STR(VLOAD2(FUNCT_VLOAD_MODE_1X1, FUNCT_VLOAD_TYPE_FPW, vd_n, 10, 11)) "\n\t" \
:: [_rs1] "r" (rs1_), [_rs2] "r" (rs2_)); \
}
#define vload2_1x1_siw(vd_n, rs1, rs2) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
register uint32_t rs2_ asm ("x11") = (uint32_t) rs2; \
asm volatile ( \
".word " STR(VLOAD2(FUNCT_VLOAD_MODE_1X1, FUNCT_VLOAD_TYPE_SIW, vd_n, 10, 11)) "\n\t" \
:: [_rs1] "r" (rs1_), [_rs2] "r" (rs2_)); \
}
#define vload2_1x1_uiw(vd_n, rs1, rs2) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
register uint32_t rs2_ asm ("x11") = (uint32_t) rs2; \
asm volatile ( \
".word " STR(VLOAD2(FUNCT_VLOAD_MODE_1X1, FUNCT_VLOAD_TYPE_UIW, vd_n, 10, 11)) "\n\t" \
:: [_rs1] "r" (rs1_), [_rs2] "r" (rs2_)); \
}
#define vload2_1x1_sih(vd_n, rs1, rs2) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
register uint32_t rs2_ asm ("x11") = (uint32_t) rs2; \
asm volatile ( \
".word " STR(VLOAD2(FUNCT_VLOAD_MODE_1X1, FUNCT_VLOAD_TYPE_SIH, vd_n, 10, 11)) "\n\t" \
:: [_rs1] "r" (rs1_), [_rs2] "r" (rs2_)); \
}
#define vload2_1x1_uih(vd_n, rs1, rs2) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
register uint32_t rs2_ asm ("x11") = (uint32_t) rs2; \
asm volatile ( \
".word " STR(VLOAD2(FUNCT_VLOAD_MODE_1X1, FUNCT_VLOAD_TYPE_UIH, vd_n, 10, 11)) "\n\t" \
:: [_rs1] "r" (rs1_), [_rs2] "r" (rs2_)); \
}
#define vload2_1x1_sib(vd_n, rs1, rs2) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
register uint32_t rs2_ asm ("x11") = (uint32_t) rs2; \
asm volatile ( \
".word " STR(VLOAD2(FUNCT_VLOAD_MODE_1X1, FUNCT_VLOAD_TYPE_SIB, vd_n, 10, 11)) "\n\t" \
:: [_rs1] "r" (rs1_), [_rs2] "r" (rs2_)); \
}
#define vload2_1x1_uib(vd_n, rs1, rs2) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
register uint32_t rs2_ asm ("x11") = (uint32_t) rs2; \
asm volatile ( \
".word " STR(VLOAD2(FUNCT_VLOAD_MODE_1X1, FUNCT_VLOAD_TYPE_UIB, vd_n, 10, 11)) "\n\t" \
:: [_rs1] "r" (rs1_), [_rs2] "r" (rs2_)); \
}
#define vload1_8x1_fpw(vd_n, rs1, inc) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
asm volatile ( \
".word " STR(VLOAD1(FUNCT_VLOAD_MODE_8X1, FUNCT_VLOAD_TYPE_FPW, vd_n, 10, inc)) "\n\t" \
:: [_rs1] "r" (rs1_)); \
}
#define vload1_8x1_siw(vd_n, rs1, inc) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
asm volatile ( \
".word " STR(VLOAD1(FUNCT_VLOAD_MODE_8X1, FUNCT_VLOAD_TYPE_SIW, vd_n, 10, inc)) "\n\t" \
:: [_rs1] "r" (rs1_)); \
}
#define vload1_8x1_uiw(vd_n, rs1, inc) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
asm volatile ( \
".word " STR(VLOAD1(FUNCT_VLOAD_MODE_8X1, FUNCT_VLOAD_TYPE_UIW, vd_n, 10, inc)) "\n\t" \
:: [_rs1] "r" (rs1_)); \
}
#define vload1_8x1_sih(vd_n, rs1, inc) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
asm volatile ( \
".word " STR(VLOAD1(FUNCT_VLOAD_MODE_8X1, FUNCT_VLOAD_TYPE_SIH, vd_n, 10, inc)) "\n\t" \
:: [_rs1] "r" (rs1_)); \
}
#define vload1_8x1_uih(vd_n, rs1, inc) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
asm volatile ( \
".word " STR(VLOAD1(FUNCT_VLOAD_MODE_8X1, FUNCT_VLOAD_TYPE_UIH, vd_n, 10, inc)) "\n\t" \
:: [_rs1] "r" (rs1_)); \
}
#define vload1_8x1_sib(vd_n, rs1, inc) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
asm volatile ( \
".word " STR(VLOAD1(FUNCT_VLOAD_MODE_8X1, FUNCT_VLOAD_TYPE_SIB, vd_n, 10, inc)) "\n\t" \
:: [_rs1] "r" (rs1_)); \
}
#define vload1_8x1_uib(vd_n, rs1, inc) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
asm volatile ( \
".word " STR(VLOAD1(FUNCT_VLOAD_MODE_8X1, FUNCT_VLOAD_TYPE_UIB, vd_n, 10, inc)) "\n\t" \
:: [_rs1] "r" (rs1_)); \
}
#define vload1_4x2_fpw(vd_n, rs1, inc) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
asm volatile ( \
".word " STR(VLOAD1(FUNCT_VLOAD_MODE_4X2, FUNCT_VLOAD_TYPE_FPW, vd_n, 10, inc)) "\n\t" \
:: [_rs1] "r" (rs1_)); \
}
#define vload1_4x2_siw(vd_n, rs1, inc) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
asm volatile ( \
".word " STR(VLOAD1(FUNCT_VLOAD_MODE_4X2, FUNCT_VLOAD_TYPE_SIW, vd_n, 10, inc)) "\n\t" \
:: [_rs1] "r" (rs1_)); \
}
#define vload1_4x2_uiw(vd_n, rs1, inc) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
asm volatile ( \
".word " STR(VLOAD1(FUNCT_VLOAD_MODE_4X2, FUNCT_VLOAD_TYPE_UIW, vd_n, 10, inc)) "\n\t" \
:: [_rs1] "r" (rs1_)); \
}
#define vload1_4x2_sih(vd_n, rs1, inc) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
asm volatile ( \
".word " STR(VLOAD1(FUNCT_VLOAD_MODE_4X2, FUNCT_VLOAD_TYPE_SIH, vd_n, 10, inc)) "\n\t" \
:: [_rs1] "r" (rs1_)); \
}
#define vload1_4x2_uih(vd_n, rs1, inc) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
asm volatile ( \
".word " STR(VLOAD1(FUNCT_VLOAD_MODE_4X2, FUNCT_VLOAD_TYPE_UIH, vd_n, 10, inc)) "\n\t" \
:: [_rs1] "r" (rs1_)); \
}
#define vload1_4x2_sib(vd_n, rs1, inc) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
asm volatile ( \
".word " STR(VLOAD1(FUNCT_VLOAD_MODE_4X2, FUNCT_VLOAD_TYPE_SIB, vd_n, 10, inc)) "\n\t" \
:: [_rs1] "r" (rs1_)); \
}
#define vload1_4x2_uib(vd_n, rs1, inc) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
asm volatile ( \
".word " STR(VLOAD1(FUNCT_VLOAD_MODE_4X2, FUNCT_VLOAD_TYPE_UIB, vd_n, 10, inc)) "\n\t" \
:: [_rs1] "r" (rs1_)); \
}
#define vload1_4x1_fpw(vd_n, rs1, inc) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
asm volatile ( \
".word " STR(VLOAD1(FUNCT_VLOAD_MODE_4X1, FUNCT_VLOAD_TYPE_FPW, vd_n, 10, inc)) "\n\t" \
:: [_rs1] "r" (rs1_)); \
}
#define vload1_4x1_siw(vd_n, rs1, inc) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
asm volatile ( \
".word " STR(VLOAD1(FUNCT_VLOAD_MODE_4X1, FUNCT_VLOAD_TYPE_SIW, vd_n, 10, inc)) "\n\t" \
:: [_rs1] "r" (rs1_)); \
}
#define vload1_4x1_uiw(vd_n, rs1, inc) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
asm volatile ( \
".word " STR(VLOAD1(FUNCT_VLOAD_MODE_4X1, FUNCT_VLOAD_TYPE_UIW, vd_n, 10, inc)) "\n\t" \
:: [_rs1] "r" (rs1_)); \
}
#define vload1_4x1_sih(vd_n, rs1, inc) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
asm volatile ( \
".word " STR(VLOAD1(FUNCT_VLOAD_MODE_4X1, FUNCT_VLOAD_TYPE_SIH, vd_n, 10, inc)) "\n\t" \
:: [_rs1] "r" (rs1_)); \
}
#define vload1_4x1_uih(vd_n, rs1, inc) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
asm volatile ( \
".word " STR(VLOAD1(FUNCT_VLOAD_MODE_4X1, FUNCT_VLOAD_TYPE_UIH, vd_n, 10, inc)) "\n\t" \
:: [_rs1] "r" (rs1_)); \
}
#define vload1_4x1_sib(vd_n, rs1, inc) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
asm volatile ( \
".word " STR(VLOAD1(FUNCT_VLOAD_MODE_4X1, FUNCT_VLOAD_TYPE_SIB, vd_n, 10, inc)) "\n\t" \
:: [_rs1] "r" (rs1_)); \
}
#define vload1_4x1_uib(vd_n, rs1, inc) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
asm volatile ( \
".word " STR(VLOAD1(FUNCT_VLOAD_MODE_4X1, FUNCT_VLOAD_TYPE_UIB, vd_n, 10, inc)) "\n\t" \
:: [_rs1] "r" (rs1_)); \
}
#define vload1_2x4_fpw(vd_n, rs1, inc) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
asm volatile ( \
".word " STR(VLOAD1(FUNCT_VLOAD_MODE_2X4, FUNCT_VLOAD_TYPE_FPW, vd_n, 10, inc)) "\n\t" \
:: [_rs1] "r" (rs1_)); \
}
#define vload1_2x4_siw(vd_n, rs1, inc) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
asm volatile ( \
".word " STR(VLOAD1(FUNCT_VLOAD_MODE_2X4, FUNCT_VLOAD_TYPE_SIW, vd_n, 10, inc)) "\n\t" \
:: [_rs1] "r" (rs1_)); \
}
#define vload1_2x4_uiw(vd_n, rs1, inc) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
asm volatile ( \
".word " STR(VLOAD1(FUNCT_VLOAD_MODE_2X4, FUNCT_VLOAD_TYPE_UIW, vd_n, 10, inc)) "\n\t" \
:: [_rs1] "r" (rs1_)); \
}
#define vload1_2x4_sih(vd_n, rs1, inc) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
asm volatile ( \
".word " STR(VLOAD1(FUNCT_VLOAD_MODE_2X4, FUNCT_VLOAD_TYPE_SIH, vd_n, 10, inc)) "\n\t" \
:: [_rs1] "r" (rs1_)); \
}
#define vload1_2x4_uih(vd_n, rs1, inc) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
asm volatile ( \
".word " STR(VLOAD1(FUNCT_VLOAD_MODE_2X4, FUNCT_VLOAD_TYPE_UIH, vd_n, 10, inc)) "\n\t" \
:: [_rs1] "r" (rs1_)); \
}
#define vload1_2x4_sib(vd_n, rs1, inc) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
asm volatile ( \
".word " STR(VLOAD1(FUNCT_VLOAD_MODE_2X4, FUNCT_VLOAD_TYPE_SIB, vd_n, 10, inc)) "\n\t" \
:: [_rs1] "r" (rs1_)); \
}
#define vload1_2x4_uib(vd_n, rs1, inc) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
asm volatile ( \
".word " STR(VLOAD1(FUNCT_VLOAD_MODE_2X4, FUNCT_VLOAD_TYPE_UIB, vd_n, 10, inc)) "\n\t" \
:: [_rs1] "r" (rs1_)); \
}
#define vload1_2x2_fpw(vd_n, rs1, inc) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
asm volatile ( \
".word " STR(VLOAD1(FUNCT_VLOAD_MODE_2X2, FUNCT_VLOAD_TYPE_FPW, vd_n, 10, inc)) "\n\t" \
:: [_rs1] "r" (rs1_)); \
}
#define vload1_2x2_siw(vd_n, rs1, inc) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
asm volatile ( \
".word " STR(VLOAD1(FUNCT_VLOAD_MODE_2X2, FUNCT_VLOAD_TYPE_SIW, vd_n, 10, inc)) "\n\t" \
:: [_rs1] "r" (rs1_)); \
}
#define vload1_2x2_uiw(vd_n, rs1, inc) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
asm volatile ( \
".word " STR(VLOAD1(FUNCT_VLOAD_MODE_2X2, FUNCT_VLOAD_TYPE_UIW, vd_n, 10, inc)) "\n\t" \
:: [_rs1] "r" (rs1_)); \
}
#define vload1_2x2_sih(vd_n, rs1, inc) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
asm volatile ( \
".word " STR(VLOAD1(FUNCT_VLOAD_MODE_2X2, FUNCT_VLOAD_TYPE_SIH, vd_n, 10, inc)) "\n\t" \
:: [_rs1] "r" (rs1_)); \
}
#define vload1_2x2_uih(vd_n, rs1, inc) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
asm volatile ( \
".word " STR(VLOAD1(FUNCT_VLOAD_MODE_2X2, FUNCT_VLOAD_TYPE_UIH, vd_n, 10, inc)) "\n\t" \
:: [_rs1] "r" (rs1_)); \
}
#define vload1_2x2_sib(vd_n, rs1, inc) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
asm volatile ( \
".word " STR(VLOAD1(FUNCT_VLOAD_MODE_2X2, FUNCT_VLOAD_TYPE_SIB, vd_n, 10, inc)) "\n\t" \
:: [_rs1] "r" (rs1_)); \
}
#define vload1_2x2_uib(vd_n, rs1, inc) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
asm volatile ( \
".word " STR(VLOAD1(FUNCT_VLOAD_MODE_2X2, FUNCT_VLOAD_TYPE_UIB, vd_n, 10, inc)) "\n\t" \
:: [_rs1] "r" (rs1_)); \
}
#define vload1_2x1_fpw(vd_n, rs1, inc) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
asm volatile ( \
".word " STR(VLOAD1(FUNCT_VLOAD_MODE_2X1, FUNCT_VLOAD_TYPE_FPW, vd_n, 10, inc)) "\n\t" \
:: [_rs1] "r" (rs1_)); \
}
#define vload1_2x1_siw(vd_n, rs1, inc) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
asm volatile ( \
".word " STR(VLOAD1(FUNCT_VLOAD_MODE_2X1, FUNCT_VLOAD_TYPE_SIW, vd_n, 10, inc)) "\n\t" \
:: [_rs1] "r" (rs1_)); \
}
#define vload1_2x1_uiw(vd_n, rs1, inc) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
asm volatile ( \
".word " STR(VLOAD1(FUNCT_VLOAD_MODE_2X1, FUNCT_VLOAD_TYPE_UIW, vd_n, 10, inc)) "\n\t" \
:: [_rs1] "r" (rs1_)); \
}
#define vload1_2x1_sih(vd_n, rs1, inc) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
asm volatile ( \
".word " STR(VLOAD1(FUNCT_VLOAD_MODE_2X1, FUNCT_VLOAD_TYPE_SIH, vd_n, 10, inc)) "\n\t" \
:: [_rs1] "r" (rs1_)); \
}
#define vload1_2x1_uih(vd_n, rs1, inc) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
asm volatile ( \
".word " STR(VLOAD1(FUNCT_VLOAD_MODE_2X1, FUNCT_VLOAD_TYPE_UIH, vd_n, 10, inc)) "\n\t" \
:: [_rs1] "r" (rs1_)); \
}
#define vload1_2x1_sib(vd_n, rs1, inc) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
asm volatile ( \
".word " STR(VLOAD1(FUNCT_VLOAD_MODE_2X1, FUNCT_VLOAD_TYPE_SIB, vd_n, 10, inc)) "\n\t" \
:: [_rs1] "r" (rs1_)); \
}
#define vload1_2x1_uib(vd_n, rs1, inc) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
asm volatile ( \
".word " STR(VLOAD1(FUNCT_VLOAD_MODE_2X1, FUNCT_VLOAD_TYPE_UIB, vd_n, 10, inc)) "\n\t" \
:: [_rs1] "r" (rs1_)); \
}
#define vload1_1x1_fpw(vd_n, rs1, inc) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
asm volatile ( \
".word " STR(VLOAD1(FUNCT_VLOAD_MODE_1X1, FUNCT_VLOAD_TYPE_FPW, vd_n, 10, inc)) "\n\t" \
:: [_rs1] "r" (rs1_)); \
}
#define vload1_1x1_siw(vd_n, rs1, inc) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
asm volatile ( \
".word " STR(VLOAD1(FUNCT_VLOAD_MODE_1X1, FUNCT_VLOAD_TYPE_SIW, vd_n, 10, inc)) "\n\t" \
:: [_rs1] "r" (rs1_)); \
}
#define vload1_1x1_uiw(vd_n, rs1, inc) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
asm volatile ( \
".word " STR(VLOAD1(FUNCT_VLOAD_MODE_1X1, FUNCT_VLOAD_TYPE_UIW, vd_n, 10, inc)) "\n\t" \
:: [_rs1] "r" (rs1_)); \
}
#define vload1_1x1_sih(vd_n, rs1, inc) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
asm volatile ( \
".word " STR(VLOAD1(FUNCT_VLOAD_MODE_1X1, FUNCT_VLOAD_TYPE_SIH, vd_n, 10, inc)) "\n\t" \
:: [_rs1] "r" (rs1_)); \
}
#define vload1_1x1_uih(vd_n, rs1, inc) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
asm volatile ( \
".word " STR(VLOAD1(FUNCT_VLOAD_MODE_1X1, FUNCT_VLOAD_TYPE_UIH, vd_n, 10, inc)) "\n\t" \
:: [_rs1] "r" (rs1_)); \
}
#define vload1_1x1_sib(vd_n, rs1, inc) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
asm volatile ( \
".word " STR(VLOAD1(FUNCT_VLOAD_MODE_1X1, FUNCT_VLOAD_TYPE_SIB, vd_n, 10, inc)) "\n\t" \
:: [_rs1] "r" (rs1_)); \
}
#define vload1_1x1_uib(vd_n, rs1, inc) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
asm volatile ( \
".word " STR(VLOAD1(FUNCT_VLOAD_MODE_1X1, FUNCT_VLOAD_TYPE_UIB, vd_n, 10, inc)) "\n\t" \
:: [_rs1] "r" (rs1_)); \
}
#define vstore3_1(vp_n, inc, vs3_n) \
{ \
asm volatile ( \
".word " STR(VSTORE3(FUNCT_VSTORE_MODE_1, vp_n, inc, vs3_n))); \
}
#define vstore3_2(vp_n, inc, vs3_n) \
{ \
asm volatile ( \
".word " STR(VSTORE3(FUNCT_VSTORE_MODE_2, vp_n, inc, vs3_n))); \
}
#define vstore3_3(vp_n, inc, vs3_n) \
{ \
asm volatile ( \
".word " STR(VSTORE3(FUNCT_VSTORE_MODE_3, vp_n, inc, vs3_n))); \
}
#define vstore3_4(vp_n, inc, vs3_n) \
{ \
asm volatile ( \
".word " STR(VSTORE3(FUNCT_VSTORE_MODE_4, vp_n, inc, vs3_n))); \
}
#define vstore3_5(vp_n, inc, vs3_n) \
{ \
asm volatile ( \
".word " STR(VSTORE3(FUNCT_VSTORE_MODE_5, vp_n, inc, vs3_n))); \
}
#define vstore3_6(vp_n, inc, vs3_n) \
{ \
asm volatile ( \
".word " STR(VSTORE3(FUNCT_VSTORE_MODE_6, vp_n, inc, vs3_n))); \
}
#define vstore3_7(vp_n, inc, vs3_n) \
{ \
asm volatile ( \
".word " STR(VSTORE3(FUNCT_VSTORE_MODE_7, vp_n, inc, vs3_n))); \
}
#define vstore3_8(vp_n, inc, vs3_n) \
{ \
asm volatile ( \
".word " STR(VSTORE3(FUNCT_VSTORE_MODE_8, vp_n, inc, vs3_n))); \
}
#define vstore2_1(rs1, rs2, vs3_n) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
register uint32_t rs2_ asm ("x11") = (uint32_t) rs2; \
asm volatile ( \
".word " STR(VSTORE2(FUNCT_VSTORE_MODE_1, 10, 11, vs3_n)) "\n\t" \
:: [_rs1] "r" (rs1_), [_rs2] "r" (rs2_)); \
}
#define vstore2_2(rs1, rs2, vs3_n) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
register uint32_t rs2_ asm ("x11") = (uint32_t) rs2; \
asm volatile ( \
".word " STR(VSTORE2(FUNCT_VSTORE_MODE_2, 10, 11, vs3_n)) "\n\t" \
:: [_rs1] "r" (rs1_), [_rs2] "r" (rs2_)); \
}
#define vstore2_3(rs1, rs2, vs3_n) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
register uint32_t rs2_ asm ("x11") = (uint32_t) rs2; \
asm volatile ( \
".word " STR(VSTORE2(FUNCT_VSTORE_MODE_3, 10, 11, vs3_n)) "\n\t" \
:: [_rs1] "r" (rs1_), [_rs2] "r" (rs2_)); \
}
#define vstore2_4(rs1, rs2, vs3_n) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
register uint32_t rs2_ asm ("x11") = (uint32_t) rs2; \
asm volatile ( \
".word " STR(VSTORE2(FUNCT_VSTORE_MODE_4, 10, 11, vs3_n)) "\n\t" \
:: [_rs1] "r" (rs1_), [_rs2] "r" (rs2_)); \
}
#define vstore2_5(rs1, rs2, vs3_n) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
register uint32_t rs2_ asm ("x11") = (uint32_t) rs2; \
asm volatile ( \
".word " STR(VSTORE2(FUNCT_VSTORE_MODE_5, 10, 11, vs3_n)) "\n\t" \
:: [_rs1] "r" (rs1_), [_rs2] "r" (rs2_)); \
}
#define vstore2_6(rs1, rs2, vs3_n) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
register uint32_t rs2_ asm ("x11") = (uint32_t) rs2; \
asm volatile ( \
".word " STR(VSTORE2(FUNCT_VSTORE_MODE_6, 10, 11, vs3_n)) "\n\t" \
:: [_rs1] "r" (rs1_), [_rs2] "r" (rs2_)); \
}
#define vstore2_7(rs1, rs2, vs3_n) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
register uint32_t rs2_ asm ("x11") = (uint32_t) rs2; \
asm volatile ( \
".word " STR(VSTORE2(FUNCT_VSTORE_MODE_7, 10, 11, vs3_n)) "\n\t" \
:: [_rs1] "r" (rs1_), [_rs2] "r" (rs2_)); \
}
#define vstore2_8(rs1, rs2, vs3_n) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
register uint32_t rs2_ asm ("x11") = (uint32_t) rs2; \
asm volatile ( \
".word " STR(VSTORE2(FUNCT_VSTORE_MODE_8, 10, 11, vs3_n)) "\n\t" \
:: [_rs1] "r" (rs1_), [_rs2] "r" (rs2_)); \
}
#define vstore1_1(rs1, inc, vs3_n) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
asm volatile ( \
".word " STR(VSTORE1(FUNCT_VSTORE_MODE_1, 10, inc, vs3_n)) "\n\t" \
:: [_rs1] "r" (rs1_)); \
}
#define vstore1_2(rs1, inc, vs3_n) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
asm volatile ( \
".word " STR(VSTORE1(FUNCT_VSTORE_MODE_2, 10, inc, vs3_n)) "\n\t" \
:: [_rs1] "r" (rs1_)); \
}
#define vstore1_3(rs1, inc, vs3_n) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
asm volatile ( \
".word " STR(VSTORE1(FUNCT_VSTORE_MODE_3, 10, inc, vs3_n)) "\n\t" \
:: [_rs1] "r" (rs1_)); \
}
#define vstore1_4(rs1, inc, vs3_n) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
asm volatile ( \
".word " STR(VSTORE1(FUNCT_VSTORE_MODE_4, 10, inc, vs3_n)) "\n\t" \
:: [_rs1] "r" (rs1_)); \
}
#define vstore1_5(rs1, inc, vs3_n) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
asm volatile ( \
".word " STR(VSTORE1(FUNCT_VSTORE_MODE_5, 10, inc, vs3_n)) "\n\t" \
:: [_rs1] "r" (rs1_)); \
}
#define vstore1_6(rs1, inc, vs3_n) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
asm volatile ( \
".word " STR(VSTORE1(FUNCT_VSTORE_MODE_6, 10, inc, vs3_n)) "\n\t" \
:: [_rs1] "r" (rs1_)); \
}
#define vstore1_7(rs1, inc, vs3_n) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
asm volatile ( \
".word " STR(VSTORE1(FUNCT_VSTORE_MODE_7, 10, inc, vs3_n)) "\n\t" \
:: [_rs1] "r" (rs1_)); \
}
#define vstore1_8(rs1, inc, vs3_n) \
{ \
register uint32_t rs1_ asm ("x10") = (uint32_t) rs1; \
asm volatile ( \
".word " STR(VSTORE1(FUNCT_VSTORE_MODE_8, 10, inc, vs3_n)) "\n\t" \
:: [_rs1] "r" (rs1_)); \
}
#define vload3_8x1_fpw(vd_n, vp_n, inc) \
{ \
asm volatile ( \
".word " STR(VLOAD3(FUNCT_VLOAD_MODE_8X1, FUNCT_VLOAD_TYPE_FPW, vd_n, vp_n, inc))); \
}
#define vload3_8x1_siw(vd_n, vp_n, inc) \
{ \
asm volatile ( \
".word " STR(VLOAD3(FUNCT_VLOAD_MODE_8X1, FUNCT_VLOAD_TYPE_SIW, vd_n, vp_n, inc))); \
}
#define vload3_8x1_uiw(vd_n, vp_n, inc) \
{ \
asm volatile ( \
".word " STR(VLOAD3(FUNCT_VLOAD_MODE_8X1, FUNCT_VLOAD_TYPE_UIW, vd_n, vp_n, inc))); \
}
#define vload3_8x1_sih(vd_n, vp_n, inc) \
{ \
asm volatile ( \
".word " STR(VLOAD3(FUNCT_VLOAD_MODE_8X1, FUNCT_VLOAD_TYPE_SIH, vd_n, vp_n, inc))); \
}
#define vload3_8x1_uih(vd_n, vp_n, inc) \
{ \
asm volatile ( \
".word " STR(VLOAD3(FUNCT_VLOAD_MODE_8X1, FUNCT_VLOAD_TYPE_UIH, vd_n, vp_n, inc))); \
}
#define vload3_8x1_sib(vd_n, vp_n, inc) \
{ \
asm volatile ( \
".word " STR(VLOAD3(FUNCT_VLOAD_MODE_8X1, FUNCT_VLOAD_TYPE_SIB, vd_n, vp_n, inc))); \
}
#define vload3_8x1_uib(vd_n, vp_n, inc) \
{ \
asm volatile ( \
".word " STR(VLOAD3(FUNCT_VLOAD_MODE_8X1, FUNCT_VLOAD_TYPE_UIB, vd_n, vp_n, inc))); \
}
#define vload3_4x2_fpw(vd_n, vp_n, inc) \
{ \
asm volatile ( \
".word " STR(VLOAD3(FUNCT_VLOAD_MODE_4X2, FUNCT_VLOAD_TYPE_FPW, vd_n, vp_n, inc))); \
}
#define vload3_4x2_siw(vd_n, vp_n, inc) \
{ \
asm volatile ( \
".word " STR(VLOAD3(FUNCT_VLOAD_MODE_4X2, FUNCT_VLOAD_TYPE_SIW, vd_n, vp_n, inc))); \
}
#define vload3_4x2_uiw(vd_n, vp_n, inc) \
{ \
asm volatile ( \
".word " STR(VLOAD3(FUNCT_VLOAD_MODE_4X2, FUNCT_VLOAD_TYPE_UIW, vd_n, vp_n, inc))); \
}
#define vload3_4x2_sih(vd_n, vp_n, inc) \
{ \
asm volatile ( \
".word " STR(VLOAD3(FUNCT_VLOAD_MODE_4X2, FUNCT_VLOAD_TYPE_SIH, vd_n, vp_n, inc))); \
}
#define vload3_4x2_uih(vd_n, vp_n, inc) \
{ \
asm volatile ( \
".word " STR(VLOAD3(FUNCT_VLOAD_MODE_4X2, FUNCT_VLOAD_TYPE_UIH, vd_n, vp_n, inc))); \
}
#define vload3_4x2_sib(vd_n, vp_n, inc) \
{ \
asm volatile ( \
".word " STR(VLOAD3(FUNCT_VLOAD_MODE_4X2, FUNCT_VLOAD_TYPE_SIB, vd_n, vp_n, inc))); \
}
#define vload3_4x2_uib(vd_n, vp_n, inc) \
{ \
asm volatile ( \
".word " STR(VLOAD3(FUNCT_VLOAD_MODE_4X2, FUNCT_VLOAD_TYPE_UIB, vd_n, vp_n, inc))); \
}
#define vload3_4x1_fpw(vd_n, vp_n, inc) \
{ \
asm volatile ( \
".word " STR(VLOAD3(FUNCT_VLOAD_MODE_4X1, FUNCT_VLOAD_TYPE_FPW, vd_n, vp_n, inc))); \
}
#define vload3_4x1_siw(vd_n, vp_n, inc) \
{ \
asm volatile ( \
".word " STR(VLOAD3(FUNCT_VLOAD_MODE_4X1, FUNCT_VLOAD_TYPE_SIW, vd_n, vp_n, inc))); \
}
#define vload3_4x1_uiw(vd_n, vp_n, inc) \
{ \
asm volatile ( \
".word " STR(VLOAD3(FUNCT_VLOAD_MODE_4X1, FUNCT_VLOAD_TYPE_UIW, vd_n, vp_n, inc))); \
}
#define vload3_4x1_sih(vd_n, vp_n, inc) \
{ \
asm volatile ( \
".word " STR(VLOAD3(FUNCT_VLOAD_MODE_4X1, FUNCT_VLOAD_TYPE_SIH, vd_n, vp_n, inc))); \
}
#define vload3_4x1_uih(vd_n, vp_n, inc) \
{ \
asm volatile ( \
".word " STR(VLOAD3(FUNCT_VLOAD_MODE_4X1, FUNCT_VLOAD_TYPE_UIH, vd_n, vp_n, inc))); \
}
#define vload3_4x1_sib(vd_n, vp_n, inc) \
{ \
asm volatile ( \
".word " STR(VLOAD3(FUNCT_VLOAD_MODE_4X1, FUNCT_VLOAD_TYPE_SIB, vd_n, vp_n, inc))); \
}
#define vload3_4x1_uib(vd_n, vp_n, inc) \
{ \
asm volatile ( \
".word " STR(VLOAD3(FUNCT_VLOAD_MODE_4X1, FUNCT_VLOAD_TYPE_UIB, vd_n, vp_n, inc))); \
}
#define vload3_2x4_fpw(vd_n, vp_n, inc) \
{ \
asm volatile ( \
".word " STR(VLOAD3(FUNCT_VLOAD_MODE_2X4, FUNCT_VLOAD_TYPE_FPW, vd_n, vp_n, inc))); \
}
#define vload3_2x4_siw(vd_n, vp_n, inc) \
{ \
asm volatile ( \
".word " STR(VLOAD3(FUNCT_VLOAD_MODE_2X4, FUNCT_VLOAD_TYPE_SIW, vd_n, vp_n, inc))); \
}
#define vload3_2x4_uiw(vd_n, vp_n, inc) \
{ \
asm volatile ( \
".word " STR(VLOAD3(FUNCT_VLOAD_MODE_2X4, FUNCT_VLOAD_TYPE_UIW, vd_n, vp_n, inc))); \
}
#define vload3_2x4_sih(vd_n, vp_n, inc) \
{ \
asm volatile ( \
".word " STR(VLOAD3(FUNCT_VLOAD_MODE_2X4, FUNCT_VLOAD_TYPE_SIH, vd_n, vp_n, inc))); \
}
#define vload3_2x4_uih(vd_n, vp_n, inc) \
{ \
asm volatile ( \
".word " STR(VLOAD3(FUNCT_VLOAD_MODE_2X4, FUNCT_VLOAD_TYPE_UIH, vd_n, vp_n, inc))); \
}
#define vload3_2x4_sib(vd_n, vp_n, inc) \
{ \
asm volatile ( \
".word " STR(VLOAD3(FUNCT_VLOAD_MODE_2X4, FUNCT_VLOAD_TYPE_SIB, vd_n, vp_n, inc))); \
}
#define vload3_2x4_uib(vd_n, vp_n, inc) \
{ \
asm volatile ( \
".word " STR(VLOAD3(FUNCT_VLOAD_MODE_2X4, FUNCT_VLOAD_TYPE_UIB, vd_n, vp_n, inc))); \
}
#define vload3_2x2_fpw(vd_n, vp_n, inc) \
{ \
asm volatile ( \
".word " STR(VLOAD3(FUNCT_VLOAD_MODE_2X2, FUNCT_VLOAD_TYPE_FPW, vd_n, vp_n, inc))); \
}
#define vload3_2x2_siw(vd_n, vp_n, inc) \
{ \
asm volatile ( \
".word " STR(VLOAD3(FUNCT_VLOAD_MODE_2X2, FUNCT_VLOAD_TYPE_SIW, vd_n, vp_n, inc))); \
}
#define vload3_2x2_uiw(vd_n, vp_n, inc) \
{ \
asm volatile ( \
".word " STR(VLOAD3(FUNCT_VLOAD_MODE_2X2, FUNCT_VLOAD_TYPE_UIW, vd_n, vp_n, inc))); \
}
#define vload3_2x2_sih(vd_n, vp_n, inc) \
{ \
asm volatile ( \
".word " STR(VLOAD3(FUNCT_VLOAD_MODE_2X2, FUNCT_VLOAD_TYPE_SIH, vd_n, vp_n, inc))); \
}
#define vload3_2x2_uih(vd_n, vp_n, inc) \
{ \
asm volatile ( \
".word " STR(VLOAD3(FUNCT_VLOAD_MODE_2X2, FUNCT_VLOAD_TYPE_UIH, vd_n, vp_n, inc))); \
}
#define vload3_2x2_sib(vd_n, vp_n, inc) \
{ \
asm volatile ( \
".word " STR(VLOAD3(FUNCT_VLOAD_MODE_2X2, FUNCT_VLOAD_TYPE_SIB, vd_n, vp_n, inc))); \
}
#define vload3_2x2_uib(vd_n, vp_n, inc) \
{ \
asm volatile ( \
".word " STR(VLOAD3(FUNCT_VLOAD_MODE_2X2, FUNCT_VLOAD_TYPE_UIB, vd_n, vp_n, inc))); \
}
#define vload3_2x1_fpw(vd_n, vp_n, inc) \
{ \
asm volatile ( \
".word " STR(VLOAD3(FUNCT_VLOAD_MODE_2X1, FUNCT_VLOAD_TYPE_FPW, vd_n, vp_n, inc))); \
}
#define vload3_2x1_siw(vd_n, vp_n, inc) \
{ \
asm volatile ( \
".word " STR(VLOAD3(FUNCT_VLOAD_MODE_2X1, FUNCT_VLOAD_TYPE_SIW, vd_n, vp_n, inc))); \
}
#define vload3_2x1_uiw(vd_n, vp_n, inc) \
{ \
asm volatile ( \
".word " STR(VLOAD3(FUNCT_VLOAD_MODE_2X1, FUNCT_VLOAD_TYPE_UIW, vd_n, vp_n, inc))); \
}
#define vload3_2x1_sih(vd_n, vp_n, inc) \
{ \
asm volatile ( \
".word " STR(VLOAD3(FUNCT_VLOAD_MODE_2X1, FUNCT_VLOAD_TYPE_SIH, vd_n, vp_n, inc))); \
}
#define vload3_2x1_uih(vd_n, vp_n, inc) \
{ \
asm volatile ( \
".word " STR(VLOAD3(FUNCT_VLOAD_MODE_2X1, FUNCT_VLOAD_TYPE_UIH, vd_n, vp_n, inc))); \
}
#define vload3_2x1_sib(vd_n, vp_n, inc) \
{ \
asm volatile ( \
".word " STR(VLOAD3(FUNCT_VLOAD_MODE_2X1, FUNCT_VLOAD_TYPE_SIB, vd_n, vp_n, inc))); \
}
#define vload3_2x1_uib(vd_n, vp_n, inc) \
{ \
asm volatile ( \
".word " STR(VLOAD3(FUNCT_VLOAD_MODE_2X1, FUNCT_VLOAD_TYPE_UIB, vd_n, vp_n, inc))); \
}
#define vload3_1x1_fpw(vd_n, vp_n, inc) \
{ \
asm volatile ( \
".word " STR(VLOAD3(FUNCT_VLOAD_MODE_1X1, FUNCT_VLOAD_TYPE_FPW, vd_n, vp_n, inc))); \
}
#define vload3_1x1_siw(vd_n, vp_n, inc) \
{ \
asm volatile ( \
".word " STR(VLOAD3(FUNCT_VLOAD_MODE_1X1, FUNCT_VLOAD_TYPE_SIW, vd_n, vp_n, inc))); \
}
#define vload3_1x1_uiw(vd_n, vp_n, inc) \
{ \
asm volatile ( \
".word " STR(VLOAD3(FUNCT_VLOAD_MODE_1X1, FUNCT_VLOAD_TYPE_UIW, vd_n, vp_n, inc))); \
}
#define vload3_1x1_sih(vd_n, vp_n, inc) \
{ \
asm volatile ( \
".word " STR(VLOAD3(FUNCT_VLOAD_MODE_1X1, FUNCT_VLOAD_TYPE_SIH, vd_n, vp_n, inc))); \
}
#define vload3_1x1_uih(vd_n, vp_n, inc) \
{ \
asm volatile ( \
".word " STR(VLOAD3(FUNCT_VLOAD_MODE_1X1, FUNCT_VLOAD_TYPE_UIH, vd_n, vp_n, inc))); \
}
#define vload3_1x1_sib(vd_n, vp_n, inc) \
{ \
asm volatile ( \
".word " STR(VLOAD3(FUNCT_VLOAD_MODE_1X1, FUNCT_VLOAD_TYPE_SIB, vd_n, vp_n, inc))); \
}
#define vload3_1x1_uib(vd_n, vp_n, inc) \
{ \
asm volatile ( \
".word " STR(VLOAD3(FUNCT_VLOAD_MODE_1X1, FUNCT_VLOAD_TYPE_UIB, vd_n, vp_n, inc))); \
}