1 #if !defined(RWBITS) || (RWBITS != 64 && RWBITS != 128 && RWBITS != 256 && RWBITS != 512)
2 #error The register width variable RWBITS must be declared as 64,128,256 or 512 bits.
6 #define SIZEOF_VEC(T) (RW/sizeof(CTYPE_##T))
7 #define VW(T) SIZEOF_VEC(T)
13 #define CTYPE_PD double
14 #define CTYPE_PS float
15 #define CTYPE_DI int64_t
16 #define CTYPE_D int32_t
17 #define CTYPE_W int16_t
18 #define CTYPE_B int8_t
21 #define CTYPEP_PD double
22 #define CTYPEP_PS double
23 #define CTYPEP_DI int64_t
24 #define CTYPEP_D int32_t
25 #define CTYPEP_W int32_t
26 #define CTYPEP_B int32_t
72 #define OP_F_TYPE(P, T)\
73 CTYPE_##T F_##P##T(int i, va_list ap)\
76 v1 = va_arg(ap, CTYPE_##T *);\
77 v2 = va_arg(ap, CTYPE_##T *);\
78 r = v1[i] OP_##P v2[i];\
84 #define OP_MULADD_TYPE(P,T)\
85 CTYPE_##T F_MULADD##T(int i, va_list ap)\
87 CTYPE_##T *v1,*v2,*v3;\
88 v1 = va_arg(ap, CTYPE_##T *);\
89 v2 = va_arg(ap, CTYPE_##T *);\
90 v3 = va_arg(ap, CTYPE_##T *);\
91 return v1[i] + v2[i]*v3[i];\
96 #define OP_UMIN_TYPE(P,T)\
97 CTYPE_##T F_UMIN##T(int i, va_list ap)\
100 v1 = va_arg(ap, CTYPE_##T *);\
105 #define SIMD_OP_TYPE(P,T)\
106 void SIMD_##P##T(CTYPE_##T *dst, ...)\
111 for (i = 0; i < (VW(T)); i++)\
114 dst[i] = F_##P##T(i,ap_f);\
119 #define _DEF_FOR_TYPES(F,P)\
127 #define SIMD_OP(P) _DEF_FOR_TYPES(SIMD_OP_TYPE,P)
128 #define OP_F(P) _DEF_FOR_TYPES(OP_F_TYPE,P)
135 #define SIMD_LOAD_TYPE(A,T) _SIMD_LOAD_TYPE(T,RWBITS,A)
136 #define _SIMD_LOAD_TYPE(T,RWB,A) __SIMD_LOAD_TYPE(T,RWB,A)
137 #define __SIMD_LOAD_TYPE(T,RWB,A) ___SIMD_LOAD_TYPE(T,LSTYPE_##T,VW_##RWB##_##T,A)
138 #define ___SIMD_LOAD_TYPE(T,LST,VW,A) ____SIMD_LOAD_TYPE(T,LST,VW,A)
139 #define ____SIMD_LOAD_TYPE(T,LST,VW,A)\
140 void SIMD_LOAD_##A##VW##LST(CTYPE_##T vec[VW], CTYPE_##T base[VW])\
143 for (i = 0; i < (VW); i++)\
147 void SIMD_LOAD_GENERIC_##A##VW##LST(CTYPE_##T vec[VW], ...)\
153 for (i = 0; i < (VW); i++)\
155 n = (CTYPE_##T) va_arg(ap, CTYPEP_##T);\
161 void SIMD_LOAD_BROADCAST_##A##VW##LST(CTYPE_##T vec[VW], CTYPE_##T base)\
164 for (i = 0; i < (VW); i++)\
169 #define SIMD_STORE_TYPE(A,T) _SIMD_STORE_TYPE(T,RWBITS,A)
170 #define _SIMD_STORE_TYPE(T,RWB,A) __SIMD_STORE_TYPE(T,RWB,A)
171 #define __SIMD_STORE_TYPE(T,RWB,A) ___SIMD_STORE_TYPE(T,LSTYPE_##T,VW_##RWB##_##T,A)
172 #define ___SIMD_STORE_TYPE(T,LST,VW,A) ____SIMD_STORE_TYPE(T,LST,VW,A)
173 #define ____SIMD_STORE_TYPE(T,LST,VW,A)\
174 void SIMD_STORE_##A##VW##LST(CTYPE_##T vec[VW], CTYPE_##T base[VW])\
177 for (i = 0; i < (VW); i++)\
181 void SIMD_STORE_GENERIC_##A##VW##LST(CTYPE_##T vec[VW], ...)\
187 for (i = 0; i < (VW); i++)\
189 pn = va_arg(ap, CTYPE_##T *);\
196 #define SIMD_ZERO_TYPE(A,T) _SIMD_ZERO_TYPE(T,RWBITS,A)
197 #define _SIMD_ZERO_TYPE(T,RWB,A) __SIMD_ZERO_TYPE(T,RWB,A)
198 #define __SIMD_ZERO_TYPE(T,RWB,A) ___SIMD_ZERO_TYPE(T,LSTYPE_##T,VW_##RWB##_##T,A)
199 #define ___SIMD_ZERO_TYPE(T,LST,VW,A) ____SIMD_ZERO_TYPE(T,LST,VW,A)
200 #define ____SIMD_ZERO_TYPE(T,LST,VW,A)\
201 void SIMD_ZERO_##A##VW##LST(CTYPE_##T vec[VW])\
204 for (i = 0; i < (VW); i++)\
209 #define SIMD_SHUFFLE_TYPE(A,T) _SIMD_SHUFFLE_TYPE(T,RWBITS,A)
210 #define _SIMD_SHUFFLE_TYPE(T,RWB,A) __SIMD_SHUFFLE_TYPE(T,RWB,A)
211 #define __SIMD_SHUFFLE_TYPE(T,RWB,A) ___SIMD_SHUFFLE_TYPE(T,LSTYPE_##T,VW_##RWB##_##T,A)
212 #define ___SIMD_SHUFFLE_TYPE(T,LST,VW,A) ____SIMD_SHUFFLE_TYPE(T,LST,VW,A)
213 #define ____SIMD_SHUFFLE_TYPE(T,LST,VW,A) \
214 void SIMD_SHUFFLE_V##VW##LST(CTYPE_##T res[VW], CTYPE_##T vec[VW], ...)\
220 for (i = 0; i < (VW); i++)\
222 p = va_arg(ap, int);\
228 #define SIMD_INVERT_TYPE(A,T) _SIMD_INVERT_TYPE(T,RWBITS,A)
229 #define _SIMD_INVERT_TYPE(T,RWB,A) __SIMD_INVERT_TYPE(T,RWB,A)
230 #define __SIMD_INVERT_TYPE(T,RWB,A) ___SIMD_INVERT_TYPE(T,LSTYPE_##T,VW_##RWB##_##T,A)
231 #define ___SIMD_INVERT_TYPE(T,LST,VW,A) ____SIMD_INVERT_TYPE(T,LST,VW,A)
232 #define ____SIMD_INVERT_TYPE(T,LST,VW,A) \
233 void SIMD_INVERT_V##VW##LST(CTYPE_##T res[VW], CTYPE_##T vec[VW])\
236 for (i = 0; i < (VW); i++)\
237 res[VW-i-1] = vec[i];\
246 #define SIMD_LOAD_CONV(A,TO,TD) _SIMD_LOAD_CONV(A,TO,TD,RWBITS)
247 #define _SIMD_LOAD_CONV(A,TO,TD,RWB) __SIMD_LOAD_CONV(A, TO, TD, RWB)
248 #define __SIMD_LOAD_CONV(A,TO,TD,RWB) ___SIMD_LOAD_CONV(A,TO,TD,VW_##RWB##_##TD,LSTYPE_##TO,LSTYPE_##TD)
249 #define ___SIMD_LOAD_CONV(A,TO,TD,VWD,TOLST,TDLST) ____SIMD_LOAD_CONV(A,TO,TD,VWD,TOLST,TDLST)
250 #define ____SIMD_LOAD_CONV(A,TO,TD,VWD,TOLST,TDLST)\
251 void SIMD_LOAD_##A##VWD##TOLST##_TO_##A##VWD##TDLST(CTYPE_##TD dst[VWD], CTYPE_##TO src[VWD])\
254 for (i = 0; i < VWD; i++)\
258 void SIMD_LOAD_GENERIC_##A##VWD##TOLST##_TO_##A##VWD##TDLST(CTYPE_##TD vec[VWD], ...)\
264 for (i = 0; i < (VWD); i++)\
266 n = (CTYPE_##TO) va_arg(ap, CTYPEP_##TO);\
272 #define SIMD_STORE_CONV(A,TO,TD) _SIMD_STORE_CONV(A,TO,TD,RWBITS)
273 #define _SIMD_STORE_CONV(A,TO,TD,RWB) __SIMD_STORE_CONV(A, TO, TD, RWB)
274 #define __SIMD_STORE_CONV(A,TO,TD,RWB) ___SIMD_STORE_CONV(A,TO,TD,VW_##RWB##_##TD,LSTYPE_##TO,LSTYPE_##TD)
275 #define ___SIMD_STORE_CONV(A,TO,TD,VWD,TOLST,TDLST) ____SIMD_STORE_CONV(A,TO,TD,VWD,TOLST,TDLST)
276 #define ____SIMD_STORE_CONV(A,TD,TO,VWD,TDLST,TOLST)\
277 void SIMD_STORE_##A##VWD##TOLST##_TO_##A##VWD##TDLST(CTYPE_##TO src[VWD], CTYPE_##TD dst[VWD])\
280 for (i = 0; i < VWD; i++)\
283 void SIMD_STORE_##A##VWD##TDLST##_TO_##A##VWD##TOLST(CTYPE_##TD src[VWD], CTYPE_##TO dst[VWD])\
286 for (i = 0; i < VWD; i++)\
290 #define SIMD_LOADS(A) _DEF_FOR_TYPES(SIMD_LOAD_TYPE,A)
291 #define SIMD_STORES(A) _DEF_FOR_TYPES(SIMD_STORE_TYPE,A)
292 #define SIMD_ZEROS(A) _DEF_FOR_TYPES(SIMD_ZERO_TYPE,A)
293 #define SIMD_SHUFFLES(A) _DEF_FOR_TYPES(SIMD_SHUFFLE_TYPE,A)
294 #define SIMD_INVERTS(A) _DEF_FOR_TYPES(SIMD_INVERT_TYPE,A)
296 #define CTYPE_PD double
297 #define CTYPE_PS float
298 #define CTYPE_DI int64_t
299 #define CTYPE_D int32_t
300 #define CTYPE_W int16_t
301 #define CTYPE_B int8_t
303 #define _DEF_ALL_CONV(F,A) \
320 #define SIMD_LOAD_CONVS(A) _DEF_ALL_CONV(SIMD_LOAD_CONV,A)
321 #define SIMD_STORE_CONVS(A) _DEF_ALL_CONV(SIMD_STORE_CONV,A)
#define SIMD_LOAD_CONVS(A)
#define SIMD_STORE_CONVS(A)
#define _DEF_FOR_TYPES(F, P)
#define OP_MULADD_TYPE(P, T)
#define OP_UMIN_TYPE(P, T)