#include <xmmintrin.h>
#include <emmintrin.h>

Include dependency graph for sse.h:

Macros
#define	SIMD_LOAD_V4SI(vec, arr) vec=_mm_loadu_si128((__m128i*)arr)
	int More...

#define	SIMD_LOADA_V4SI(vec, arr) vec=_mm_load_si128((__m128i*)arr)

#define	SIMD_LOAD_BROADCAST_V4SI(vec, val) vec=_mm_set1_si128(val)

#define	SIMD_MULD(vec1, vec2, vec3) vec1=_mm_mul_epi32(vec2,vec3)

#define	SIMD_ADDD(vec1, vec2, vec3) vec1=_mm_add_epi32(vec2,vec3)

#define	SIMD_SUBD(vec1, vec2, vec3) vec1 = _mm_sub_epi32(vec2, vec3)

#define	SIMD_STORE_V4SI(vec, arr) _mm_storeu_si128((__m128i*)arr,vec)

#define	SIMD_STOREA_V4SI(vec, arr) _mm_store_si128((__m128i*)arr,vec)

#define	SIMD_LOAD_V4SF(vec, arr) vec=_mm_loadu_ps(arr)
	float More...

#define	SIMD_LOADA_V4SF(vec, arr) vec=_mm_load_ps(arr)

#define	SIMD_LOAD_BROADCAST_V4SF(vec, val) vec=_mm_set1_ps(val)

#define	SIMD_MULPS(vec1, vec2, vec3) vec1=_mm_mul_ps(vec2,vec3)

#define	SIMD_DIVPS(vec1, vec2, vec3) vec1=_mm_div_ps(vec2,vec3)

#define	SIMD_ADDPS(vec1, vec2, vec3) vec1=_mm_add_ps(vec2,vec3)

#define	SIMD_SUBPS(vec1, vec2, vec3) vec1 = _mm_sub_ps(vec2, vec3)

#define	SIMD_MULADDPS(vec1, vec2, vec3, vec4)

#define	SIMD_SHUFFLE_V4SF(dist, src, i0, i1, i2, i3) dist=_mm_shuffle_ps(src,src,_MM_SHUFFLE(i3,i2,i1,i0)

#define	SIMD_UMINPS(vec1, vec2)
	umin as in unary minus More...

#define	SIMD_STORE_V4SF(vec, arr) _mm_storeu_ps(arr,vec)

#define	SIMD_STOREA_V4SF(vec, arr) _mm_store_ps(arr,vec)

#define	SIMD_STORE_GENERIC_V4SF(vec, v0, v1, v2, v3)

#define	SIMD_ZERO_V4SF(vec) vec = _mm_setzero_ps()

#define	SIMD_INVERT_V4SF(vec) vec = _mm_shuffle_ps(vec,vec,_MM_SHUFFLE(4,3,2,1))

#define	SIMD_LOAD_GENERIC_V4SF(vec, v0, v1, v2, v3)

#define	SIMD_STORE_MASKED_V4SF(vec, arr)
	handle padded value, this is a very bad implementation ... More...

#define	SIMD_LOAD_V4SI_TO_V4SF(v, f)

#define	SIMD_LOAD_V2DF(vec, arr) vec=_mm_loadu_pd(arr)
	double More...

#define	SIMD_MULPD(vec1, vec2, vec3) vec1=_mm_mul_pd(vec2,vec3)

#define	SIMD_ADDPD(vec1, vec2, vec3) vec1=_mm_add_pd(vec2,vec3)

#define	SIMD_MULADDPD(vec1, vec2, vec3, vec4)

#define	SIMD_UMINPD(vec1, vec2)

#define	SIMD_COSPD(vec1, vec2)

#define	SIMD_SINPD(vec1, vec2)

#define	SIMD_STORE_V2DF(vec, arr) _mm_storeu_pd(arr,vec)

#define	SIMD_STORE_GENERIC_V2DF(vec, v0, v1)

#define	SIMD_LOAD_GENERIC_V2DF(vec, v0, v1)

#define	SIMD_STORE_V2DF_TO_V2SF(vec, f)
	conversions More...

#define	SIMD_LOAD_V2SF_TO_V2DF(vec, f) SIMD_LOAD_GENERIC_V2DF(vec,(f)[0],(f)[1])

#define	SIMD_LOAD_V8HI(vec, arr) vec = (__m128i*)(arr)
	char More...

#define	SIMD_STORE_V8HI(vec, arr) (__m128i )(&(arr)[0]) = vec

#define	SIMD_STORE_V8HI_TO_V8SI(vec, arr) SIMD_STORE_V8HI(vec,arr)

#define	SIMD_LOAD_V8SI_TO_V8HI(vec, arr) SIMD_LOAD_V8HI(vec,arr)

Typedefs
typedef float a2sf[2]	__attribute__((aligned(16)))

typedef __m128	v4sf

typedef __m128d	v2df

typedef __m128i	v4si

typedef __m128i	v8si

typedef __m128i	v8hi

Macro Definition Documentation

◆ SIMD_ADDD

#define SIMD_ADDD	(	vec1,
		vec2,
		vec3
	)	vec1=_mm_add_epi32(vec2,vec3)

Definition at line 20 of file sse.h.

◆ SIMD_ADDPD

#define SIMD_ADDPD	(	vec1,
		vec2,
		vec3
	)	vec1=_mm_add_pd(vec2,vec3)

Definition at line 99 of file sse.h.

◆ SIMD_ADDPS

#define SIMD_ADDPS	(	vec1,
		vec2,
		vec3
	)	vec1=_mm_add_ps(vec2,vec3)

Definition at line 32 of file sse.h.

◆ SIMD_COSPD

#define SIMD_COSPD	(	vec1,
		vec2
	)

Value:

                do {                                                            \
                double __pips_tmp[2] __attribute__ ((aligned (16)));    \
                SIMD_STORE_V2DF(vec2, __pips_tmp);                      \
                __pips_tmp[0] = cos(__pips_tmp[0]);                     \
                __pips_tmp[1] = cos(__pips_tmp[1]);                     \
                SIMD_LOAD_V2DF(vec1, __pips_tmp);                       \
                } while(0)

Definition at line 113 of file sse.h.

◆ SIMD_DIVPS

#define SIMD_DIVPS	(	vec1,
		vec2,
		vec3
	)	vec1=_mm_div_ps(vec2,vec3)

Definition at line 31 of file sse.h.

◆ SIMD_INVERT_V4SF

#define SIMD_INVERT_V4SF ( vec ) vec = _mm_shuffle_ps(vec,vec,_MM_SHUFFLE(4,3,2,1))

Definition at line 64 of file sse.h.

◆ SIMD_LOAD_BROADCAST_V4SF

#define SIMD_LOAD_BROADCAST_V4SF	(	vec,
		val
	)	vec=_mm_set1_ps(val)

Definition at line 29 of file sse.h.

◆ SIMD_LOAD_BROADCAST_V4SI

#define SIMD_LOAD_BROADCAST_V4SI	(	vec,
		val
	)	vec=_mm_set1_si128(val)

Definition at line 18 of file sse.h.

◆ SIMD_LOAD_GENERIC_V2DF

#define SIMD_LOAD_GENERIC_V2DF	(	vec,
		v0,
		v1
	)

Value:

                do {                                    \
                double v[2] = { v0,v1};         \
                SIMD_LOAD_V2DF(vec,&v[0]);      \
                } while(0)

Definition at line 139 of file sse.h.

◆ SIMD_LOAD_GENERIC_V4SF

#define SIMD_LOAD_GENERIC_V4SF	(	vec,
		v0,
		v1,
		v2,
		v3
	)

Value:

                do {                                                            \
                float __pips_v[4] __attribute ((aligned (16)));\
                __pips_v[0]=v0;\
                __pips_v[1]=v1;\
                __pips_v[2]=v2;\
                __pips_v[3]=v3;\
                SIMD_LOADA_V4SF(vec,&__pips_v[0]);                      \
                } while(0)

Definition at line 66 of file sse.h.

◆ SIMD_LOAD_V2DF

#define SIMD_LOAD_V2DF	(	vec,
		arr
	)	vec=_mm_loadu_pd(arr)

double

Definition at line 97 of file sse.h.

◆ SIMD_LOAD_V2SF_TO_V2DF

#define SIMD_LOAD_V2SF_TO_V2DF	(	vec,
		f
	)	SIMD_LOAD_GENERIC_V2DF(vec,(f)[0],(f)[1])

Definition at line 154 of file sse.h.

◆ SIMD_LOAD_V4SF

#define SIMD_LOAD_V4SF	(	vec,
		arr
	)	vec=_mm_loadu_ps(arr)

float

Definition at line 27 of file sse.h.

◆ SIMD_LOAD_V4SI

#define SIMD_LOAD_V4SI	(	vec,
		arr
	)	vec=_mm_loadu_si128((__m128i*)arr)

int

Definition at line 16 of file sse.h.

◆ SIMD_LOAD_V4SI_TO_V4SF

#define SIMD_LOAD_V4SI_TO_V4SF	(	v,
		f
	)

Value:

                do {                                    \
                float __pips_tmp[4];            \
                __pips_tmp[0] = (f)[0];         \
                __pips_tmp[1] = (f)[1];         \
                __pips_tmp[2] = (f)[2];         \
                __pips_tmp[3] = (f)[3];         \
                SIMD_LOAD_V4SF(v, __pips_tmp);  \
                } while(0)

Definition at line 86 of file sse.h.

◆ SIMD_LOAD_V8HI

#define SIMD_LOAD_V8HI	(	vec,
		arr
	)	vec = (__m128i*)(arr)

char

Definition at line 158 of file sse.h.

◆ SIMD_LOAD_V8SI_TO_V8HI

#define SIMD_LOAD_V8SI_TO_V8HI	(	vec,
		arr
	)	SIMD_LOAD_V8HI(vec,arr)

Definition at line 166 of file sse.h.

◆ SIMD_LOADA_V4SF

#define SIMD_LOADA_V4SF	(	vec,
		arr
	)	vec=_mm_load_ps(arr)

Definition at line 28 of file sse.h.

◆ SIMD_LOADA_V4SI

#define SIMD_LOADA_V4SI	(	vec,
		arr
	)	vec=_mm_load_si128((__m128i*)arr)

Definition at line 17 of file sse.h.

◆ SIMD_MULADDPD

#define SIMD_MULADDPD	(	vec1,
		vec2,
		vec3,
		vec4
	)

Value:

                do { \
                __m128 __pips_tmp;\
                SIMD_MULPD(__pips_tmp, vec3, vec4);\
                SIMD_ADDPD(vec1, __pips_tmp, vec2); \
                } while(0)

Definition at line 100 of file sse.h.

◆ SIMD_MULADDPS

#define SIMD_MULADDPS	(	vec1,
		vec2,
		vec3,
		vec4
	)

Value:

                do { \
                __m128 __pips_tmp;\
                SIMD_MULPS(__pips_tmp, vec3, vec4);\
                SIMD_ADDPS(vec1, __pips_tmp, vec2); \
                } while(0)

Definition at line 34 of file sse.h.

◆ SIMD_MULD

#define SIMD_MULD	(	vec1,
		vec2,
		vec3
	)	vec1=_mm_mul_epi32(vec2,vec3)

Definition at line 19 of file sse.h.

◆ SIMD_MULPD

#define SIMD_MULPD	(	vec1,
		vec2,
		vec3
	)	vec1=_mm_mul_pd(vec2,vec3)

Definition at line 98 of file sse.h.

◆ SIMD_MULPS

#define SIMD_MULPS	(	vec1,
		vec2,
		vec3
	)	vec1=_mm_mul_ps(vec2,vec3)

Definition at line 30 of file sse.h.

◆ SIMD_SHUFFLE_V4SF

#define SIMD_SHUFFLE_V4SF	(	dist,
		src,
		i0,
		i1,
		i2,
		i3
	)	dist=_mm_shuffle_ps(src,src,_MM_SHUFFLE(i3,i2,i1,i0)

Definition at line 41 of file sse.h.

◆ SIMD_SINPD

#define SIMD_SINPD	(	vec1,
		vec2
	)

Value:

                do {                                                            \
                double __pips_tmp[2] __attribute__ ((aligned (16)));    \
                SIMD_STORE_V2DF(vec2, __pips_tmp);                      \
                __pips_tmp[0] = sin(__pips_tmp[0]);                     \
                __pips_tmp[1] = sin(__pips_tmp[1]);                     \
                SIMD_LOAD_V2DF(vec1, __pips_tmp);                       \
                } while(0)

Definition at line 122 of file sse.h.

◆ SIMD_STORE_GENERIC_V2DF

#define SIMD_STORE_GENERIC_V2DF	(	vec,
		v0,
		v1
	)

Value:

                do {                                    \
                double __pips_tmp[2];                   \
                SIMD_STORE_V2DF(vec,&__pips_tmp[0]);    \
                *(v0)=__pips_tmp[0];                    \
                *(v1)=__pips_tmp[1];                    \
                } while (0)

Definition at line 132 of file sse.h.

◆ SIMD_STORE_GENERIC_V4SF

#define SIMD_STORE_GENERIC_V4SF	(	vec,
		v0,
		v1,
		v2,
		v3
	)

Value:

                do {                                                            \
                float __pips_tmp[4] __attribute__ ((aligned (16)));     \
                SIMD_STOREA_V4SF(vec,&__pips_tmp[0]);                   \
                *(v0)=__pips_tmp[0];                                    \
                *(v1)=__pips_tmp[1];                                    \
                *(v2)=__pips_tmp[2];                                    \
                *(v3)=__pips_tmp[3];                                    \
                } while (0)

Definition at line 53 of file sse.h.

◆ SIMD_STORE_MASKED_V4SF

#define SIMD_STORE_MASKED_V4SF	(	vec,
		arr
	)

Value:

                do {                                                            \
                float __pips_tmp[4] __attribute__ ((aligned (16)));                                     \
                SIMD_STOREA_V4SF(vec,&__pips_tmp[0]);                   \
                (arr)[0] = __pips_tmp[0];                               \
                (arr)[1] = __pips_tmp[1];                               \
                (arr)[2] = __pips_tmp[2];                               \
                } while(0)

handle padded value, this is a very bad implementation ...

Definition at line 77 of file sse.h.

◆ SIMD_STORE_V2DF

#define SIMD_STORE_V2DF	(	vec,
		arr
	)	_mm_storeu_pd(arr,vec)

Definition at line 131 of file sse.h.

◆ SIMD_STORE_V2DF_TO_V2SF

#define SIMD_STORE_V2DF_TO_V2SF	(	vec,
		f
	)

Value:

                do {                                            \
                double __pips_tmp[2];                   \
                SIMD_STORE_V2DF(vec, __pips_tmp);       \
                (f)[0] = __pips_tmp[0];                 \
                (f)[1] = __pips_tmp[1];                 \
                } while(0)

conversions

Definition at line 146 of file sse.h.

◆ SIMD_STORE_V4SF

#define SIMD_STORE_V4SF	(	vec,
		arr
	)	_mm_storeu_ps(arr,vec)

Definition at line 51 of file sse.h.

◆ SIMD_STORE_V4SI

#define SIMD_STORE_V4SI	(	vec,
		arr
	)	_mm_storeu_si128((__m128i*)arr,vec)

Definition at line 23 of file sse.h.

◆ SIMD_STORE_V8HI

#define SIMD_STORE_V8HI	(	vec,
		arr
	)	(__m128i )(&(arr)[0]) = vec

Definition at line 161 of file sse.h.

◆ SIMD_STORE_V8HI_TO_V8SI

#define SIMD_STORE_V8HI_TO_V8SI	(	vec,
		arr
	)	SIMD_STORE_V8HI(vec,arr)

Definition at line 164 of file sse.h.

◆ SIMD_STOREA_V4SF

#define SIMD_STOREA_V4SF	(	vec,
		arr
	)	_mm_store_ps(arr,vec)

Definition at line 52 of file sse.h.

◆ SIMD_STOREA_V4SI

#define SIMD_STOREA_V4SI	(	vec,
		arr
	)	_mm_store_si128((__m128i*)arr,vec)

Definition at line 24 of file sse.h.

◆ SIMD_SUBD

#define SIMD_SUBD	(	vec1,
		vec2,
		vec3
	)	vec1 = _mm_sub_epi32(vec2, vec3)

Definition at line 21 of file sse.h.

◆ SIMD_SUBPS

#define SIMD_SUBPS	(	vec1,
		vec2,
		vec3
	)	vec1 = _mm_sub_ps(vec2, vec3)

Definition at line 33 of file sse.h.

◆ SIMD_UMINPD

#define SIMD_UMINPD	(	vec1,
		vec2
	)

Value:

                do {                                            \
                __m128d __pips_tmp;                     \
                __pips_tmp = _mm_setzero_pd();          \
                vec1 = _mm_sub_pd(__pips_tmp, vec2);    \
                } while(0)

Definition at line 106 of file sse.h.

◆ SIMD_UMINPS

#define SIMD_UMINPS	(	vec1,
		vec2
	)

Value:

                do {                                            \
                __m128 __pips_tmp;                      \
                __pips_tmp = _mm_setzero_ps();          \
                vec1 = _mm_sub_ps(__pips_tmp, vec2);    \
                } while(0)

umin as in unary minus

Definition at line 44 of file sse.h.

◆ SIMD_ZERO_V4SF

#define SIMD_ZERO_V4SF ( vec ) vec = _mm_setzero_ps()

Definition at line 63 of file sse.h.

Typedef Documentation

◆ attribute

typedef int a8si [4] __attribute__((aligned(16)))

Definition at line 4 of file sse.h.

◆ v2df

typedef __m128d v2df

Definition at line 11 of file sse.h.

◆ v4sf

typedef __m128 v4sf

Definition at line 10 of file sse.h.

◆ v4si

typedef __m128i v4si

Definition at line 12 of file sse.h.

◆ v8hi

typedef __m128i v8hi

Definition at line 14 of file sse.h.

◆ v8si

typedef __m128i v8si

Definition at line 13 of file sse.h.

Macros

Typedefs

Macro Definition Documentation

◆ SIMD_ADDD

◆ SIMD_ADDPD

◆ SIMD_ADDPS

◆ SIMD_COSPD

◆ SIMD_DIVPS

◆ SIMD_INVERT_V4SF

◆ SIMD_LOAD_BROADCAST_V4SF

◆ SIMD_LOAD_BROADCAST_V4SI

◆ SIMD_LOAD_GENERIC_V2DF

◆ SIMD_LOAD_GENERIC_V4SF

◆ SIMD_LOAD_V2DF

◆ SIMD_LOAD_V2SF_TO_V2DF

◆ SIMD_LOAD_V4SF

◆ SIMD_LOAD_V4SI

◆ SIMD_LOAD_V4SI_TO_V4SF

◆ SIMD_LOAD_V8HI

◆ SIMD_LOAD_V8SI_TO_V8HI

◆ SIMD_LOADA_V4SF

◆ SIMD_LOADA_V4SI

◆ SIMD_MULADDPD

◆ SIMD_MULADDPS

◆ SIMD_MULD

◆ SIMD_MULPD

◆ SIMD_MULPS

◆ SIMD_SHUFFLE_V4SF

◆ SIMD_SINPD

◆ SIMD_STORE_GENERIC_V2DF

◆ SIMD_STORE_GENERIC_V4SF

◆ SIMD_STORE_MASKED_V4SF

◆ SIMD_STORE_V2DF

◆ SIMD_STORE_V2DF_TO_V2SF

◆ SIMD_STORE_V4SF

◆ SIMD_STORE_V4SI

◆ SIMD_STORE_V8HI

◆ SIMD_STORE_V8HI_TO_V8SI

◆ SIMD_STOREA_V4SF

◆ SIMD_STOREA_V4SI

◆ SIMD_SUBD

◆ SIMD_SUBPS

◆ SIMD_UMINPD

◆ SIMD_UMINPS

◆ SIMD_ZERO_V4SF

Typedef Documentation

◆ __attribute__

◆ v2df

◆ v4sf

◆ v4si

◆ v8hi

◆ v8si

◆ attribute