PIPS
|
#include <xmmintrin.h>
#include <emmintrin.h>
Go to the source code of this file.
Macros | |
#define | SIMD_LOAD_V4SI(vec, arr) vec=_mm_loadu_si128((__m128i*)arr) |
int More... | |
#define | SIMD_LOADA_V4SI(vec, arr) vec=_mm_load_si128((__m128i*)arr) |
#define | SIMD_LOAD_BROADCAST_V4SI(vec, val) vec=_mm_set1_si128(val) |
#define | SIMD_MULD(vec1, vec2, vec3) vec1=_mm_mul_epi32(vec2,vec3) |
#define | SIMD_ADDD(vec1, vec2, vec3) vec1=_mm_add_epi32(vec2,vec3) |
#define | SIMD_SUBD(vec1, vec2, vec3) vec1 = _mm_sub_epi32(vec2, vec3) |
#define | SIMD_STORE_V4SI(vec, arr) _mm_storeu_si128((__m128i*)arr,vec) |
#define | SIMD_STOREA_V4SI(vec, arr) _mm_store_si128((__m128i*)arr,vec) |
#define | SIMD_LOAD_V4SF(vec, arr) vec=_mm_loadu_ps(arr) |
float More... | |
#define | SIMD_LOADA_V4SF(vec, arr) vec=_mm_load_ps(arr) |
#define | SIMD_LOAD_BROADCAST_V4SF(vec, val) vec=_mm_set1_ps(val) |
#define | SIMD_MULPS(vec1, vec2, vec3) vec1=_mm_mul_ps(vec2,vec3) |
#define | SIMD_DIVPS(vec1, vec2, vec3) vec1=_mm_div_ps(vec2,vec3) |
#define | SIMD_ADDPS(vec1, vec2, vec3) vec1=_mm_add_ps(vec2,vec3) |
#define | SIMD_SUBPS(vec1, vec2, vec3) vec1 = _mm_sub_ps(vec2, vec3) |
#define | SIMD_MULADDPS(vec1, vec2, vec3, vec4) |
#define | SIMD_SHUFFLE_V4SF(dist, src, i0, i1, i2, i3) dist=_mm_shuffle_ps(src,src,_MM_SHUFFLE(i3,i2,i1,i0) |
#define | SIMD_UMINPS(vec1, vec2) |
umin as in unary minus More... | |
#define | SIMD_STORE_V4SF(vec, arr) _mm_storeu_ps(arr,vec) |
#define | SIMD_STOREA_V4SF(vec, arr) _mm_store_ps(arr,vec) |
#define | SIMD_STORE_GENERIC_V4SF(vec, v0, v1, v2, v3) |
#define | SIMD_ZERO_V4SF(vec) vec = _mm_setzero_ps() |
#define | SIMD_INVERT_V4SF(vec) vec = _mm_shuffle_ps(vec,vec,_MM_SHUFFLE(4,3,2,1)) |
#define | SIMD_LOAD_GENERIC_V4SF(vec, v0, v1, v2, v3) |
#define | SIMD_STORE_MASKED_V4SF(vec, arr) |
handle padded value, this is a very bad implementation ... More... | |
#define | SIMD_LOAD_V4SI_TO_V4SF(v, f) |
#define | SIMD_LOAD_V2DF(vec, arr) vec=_mm_loadu_pd(arr) |
double More... | |
#define | SIMD_MULPD(vec1, vec2, vec3) vec1=_mm_mul_pd(vec2,vec3) |
#define | SIMD_ADDPD(vec1, vec2, vec3) vec1=_mm_add_pd(vec2,vec3) |
#define | SIMD_MULADDPD(vec1, vec2, vec3, vec4) |
#define | SIMD_UMINPD(vec1, vec2) |
#define | SIMD_COSPD(vec1, vec2) |
#define | SIMD_SINPD(vec1, vec2) |
#define | SIMD_STORE_V2DF(vec, arr) _mm_storeu_pd(arr,vec) |
#define | SIMD_STORE_GENERIC_V2DF(vec, v0, v1) |
#define | SIMD_LOAD_GENERIC_V2DF(vec, v0, v1) |
#define | SIMD_STORE_V2DF_TO_V2SF(vec, f) |
conversions More... | |
#define | SIMD_LOAD_V2SF_TO_V2DF(vec, f) SIMD_LOAD_GENERIC_V2DF(vec,(f)[0],(f)[1]) |
#define | SIMD_LOAD_V8HI(vec, arr) vec = (__m128i*)(arr) |
char More... | |
#define | SIMD_STORE_V8HI(vec, arr) *(__m128i *)(&(arr)[0]) = vec |
#define | SIMD_STORE_V8HI_TO_V8SI(vec, arr) SIMD_STORE_V8HI(vec,arr) |
#define | SIMD_LOAD_V8SI_TO_V8HI(vec, arr) SIMD_LOAD_V8HI(vec,arr) |
Typedefs | |
typedef float a2sf[2] | __attribute__((aligned(16))) |
typedef __m128 | v4sf |
typedef __m128d | v2df |
typedef __m128i | v4si |
typedef __m128i | v8si |
typedef __m128i | v8hi |
#define SIMD_ADDD | ( | vec1, | |
vec2, | |||
vec3 | |||
) | vec1=_mm_add_epi32(vec2,vec3) |
#define SIMD_ADDPD | ( | vec1, | |
vec2, | |||
vec3 | |||
) | vec1=_mm_add_pd(vec2,vec3) |
#define SIMD_ADDPS | ( | vec1, | |
vec2, | |||
vec3 | |||
) | vec1=_mm_add_ps(vec2,vec3) |
#define SIMD_COSPD | ( | vec1, | |
vec2 | |||
) |
#define SIMD_DIVPS | ( | vec1, | |
vec2, | |||
vec3 | |||
) | vec1=_mm_div_ps(vec2,vec3) |
#define SIMD_INVERT_V4SF | ( | vec | ) | vec = _mm_shuffle_ps(vec,vec,_MM_SHUFFLE(4,3,2,1)) |
#define SIMD_LOAD_BROADCAST_V4SF | ( | vec, | |
val | |||
) | vec=_mm_set1_ps(val) |
#define SIMD_LOAD_BROADCAST_V4SI | ( | vec, | |
val | |||
) | vec=_mm_set1_si128(val) |
#define SIMD_LOAD_GENERIC_V2DF | ( | vec, | |
v0, | |||
v1 | |||
) |
#define SIMD_LOAD_GENERIC_V4SF | ( | vec, | |
v0, | |||
v1, | |||
v2, | |||
v3 | |||
) |
#define SIMD_LOAD_V2DF | ( | vec, | |
arr | |||
) | vec=_mm_loadu_pd(arr) |
#define SIMD_LOAD_V2SF_TO_V2DF | ( | vec, | |
f | |||
) | SIMD_LOAD_GENERIC_V2DF(vec,(f)[0],(f)[1]) |
#define SIMD_LOAD_V4SF | ( | vec, | |
arr | |||
) | vec=_mm_loadu_ps(arr) |
#define SIMD_LOAD_V4SI | ( | vec, | |
arr | |||
) | vec=_mm_loadu_si128((__m128i*)arr) |
#define SIMD_LOAD_V4SI_TO_V4SF | ( | v, | |
f | |||
) |
#define SIMD_LOAD_V8HI | ( | vec, | |
arr | |||
) | vec = (__m128i*)(arr) |
#define SIMD_LOAD_V8SI_TO_V8HI | ( | vec, | |
arr | |||
) | SIMD_LOAD_V8HI(vec,arr) |
#define SIMD_LOADA_V4SI | ( | vec, | |
arr | |||
) | vec=_mm_load_si128((__m128i*)arr) |
#define SIMD_MULADDPD | ( | vec1, | |
vec2, | |||
vec3, | |||
vec4 | |||
) |
#define SIMD_MULADDPS | ( | vec1, | |
vec2, | |||
vec3, | |||
vec4 | |||
) |
#define SIMD_MULD | ( | vec1, | |
vec2, | |||
vec3 | |||
) | vec1=_mm_mul_epi32(vec2,vec3) |
#define SIMD_MULPD | ( | vec1, | |
vec2, | |||
vec3 | |||
) | vec1=_mm_mul_pd(vec2,vec3) |
#define SIMD_MULPS | ( | vec1, | |
vec2, | |||
vec3 | |||
) | vec1=_mm_mul_ps(vec2,vec3) |
#define SIMD_SINPD | ( | vec1, | |
vec2 | |||
) |
#define SIMD_STORE_GENERIC_V2DF | ( | vec, | |
v0, | |||
v1 | |||
) |
#define SIMD_STORE_GENERIC_V4SF | ( | vec, | |
v0, | |||
v1, | |||
v2, | |||
v3 | |||
) |
#define SIMD_STORE_MASKED_V4SF | ( | vec, | |
arr | |||
) |
handle padded value, this is a very bad implementation ...
#define SIMD_STORE_V2DF_TO_V2SF | ( | vec, | |
f | |||
) |
#define SIMD_STORE_V4SI | ( | vec, | |
arr | |||
) | _mm_storeu_si128((__m128i*)arr,vec) |
#define SIMD_STORE_V8HI | ( | vec, | |
arr | |||
) | *(__m128i *)(&(arr)[0]) = vec |
#define SIMD_STORE_V8HI_TO_V8SI | ( | vec, | |
arr | |||
) | SIMD_STORE_V8HI(vec,arr) |
#define SIMD_STOREA_V4SI | ( | vec, | |
arr | |||
) | _mm_store_si128((__m128i*)arr,vec) |
#define SIMD_SUBD | ( | vec1, | |
vec2, | |||
vec3 | |||
) | vec1 = _mm_sub_epi32(vec2, vec3) |
#define SIMD_SUBPS | ( | vec1, | |
vec2, | |||
vec3 | |||
) | vec1 = _mm_sub_ps(vec2, vec3) |
#define SIMD_UMINPD | ( | vec1, | |
vec2 | |||
) |
#define SIMD_UMINPS | ( | vec1, | |
vec2 | |||
) |