unroll_prefetch_simd_def.h File Reference

macros for composing unrolled prefetching loops over arrays using SIMD instrinsics. More...

Go to the source code of this file.

Defines

#define UNROLL4_PREF_KERNEL5_SIMD(MDOP, ADV, T, SUF, UNA1, UNA2)
 TODO: Should be merged with unroll_prefetch_def.h.
#define UNROLL4_KERNEL5_SIMD(MDOP, ADV, SUF, UNA1, UNA2)
 Four times unrolled kernel for 5 args without prefetching.
#define VKERN_TEMPL_3V_NP_SIMD(MDOP, ADV, STP, SUF, UNA1, UNA2)
#define VKERN_TEMPL_3V_PLAIN_SIMD(MDOP, ADV, STP, SUF, UNA1, UNA2)
#define VKERN_TEMPL_3V_SISD(SDOP, COND, STP, SUF)
#define UNROLL4_PREF_KERNEL4_SIMD(MDOP, ADV, T, SUF, UNA)
 Four times unrolled kernel for 4 args with prefetching.
#define UNROLL4_KERNEL4_SIMD(MDOP, ADV, SUF, UNA)
 Four times unrolled kernel for 4 args without prefetching.
#define VKERN_TEMPL_2V_NP_SIMD(MDOP, ADV, STP, SUF, UNA)
#define VKERN_TEMPL_2V_PLAIN_SIMD(MDOP, ADV, STP, SUF, UNA)
#define VKERN_TEMPL_2V_SISD(SDOP, COND, STP, SUF)
#define UNROLL4_PREF_KERNEL3_SIMD(MDOP, ADV, T, SUF)
 Four times unrolled kernel for 3 args with prefetching TODO: Prefetching.
#define UNROLL4_KERNEL3_SIMD(MDOP, ADV, SUF)
 Four times unrolled kernel for 3 args without prefetching.
#define VKERN_TEMPL_1V_NP_SIMD(MDOP, ADV, STP, SUF)
#define VKERN_TEMPL_1V_PLAIN_SIMD(MDOP, ADV, STP, SUF)
#define VKERN_TEMPL_1V_SISD(SDOP, COND, STP, SUF)
#define MISALIGNMENT_CHECK(x)   ((unsigned long)x & 0x0f)
#define WARN_UNALIGN(v)   do {} while (0)
 WARN_UNALIGNED macro: If defined, the TBCI library will print a warning to stderr for unaligned SIMD accesses, which will be slower .
#define VKERN_TEMPL_3V_SIMD(FNAME, OP, SSUF, MSUF, PREP, SFIN, FIN, ADV, TYPE, STP)
 TODO: Check whether enabling the non-unrolled fixup (loop tail) is beneficial.
#define VKERN_TEMPL_3V_SIMD_UA(FNAME, OP, SSUF, MSUF, PREP, SFIN, FIN, ADV, TYPE, STP)
 Without the unaligned check.
#define VKERN_TEMPL_3V_C_SIMD(FNAME, OP, SSUF, MSUF, PREP, SFIN, FIN, ADV, TYPE, STP)
#define VKERN_TEMPL_3V_CC_SIMD(FNAME, OP, SSUF, MSUF, PREP, SFIN, FIN, ADV, TYPE, STP)
#define VKERN_TEMPL_2V_SIMD(FNAME, OP, SSUF, MSUF, PREP, SFIN, FIN, ADV, TYPE, STP)
#define VKERN_TEMPL_2V_C_SIMD(FNAME, OP, SSUF, MSUF, PREP, SFIN, FIN, ADV, TYPE, STP)
#define VKERN_TEMPL_2V_CC_SIMD(FNAME, OP, SSUF, MSUF, PREP, SFIN, FIN, ADV, TYPE, STP)
#define VKERN_TEMPL_2V_T_SIMD(FNAME, OP, SSUF, MSUF, PREP, SFIN, FIN, ADV, TYPE, STP)
#define VKERN_TEMPL_1V_SIMD(FNAME, OP, SSUF, MSUF, PREP, SFIN, FIN, ADV, TYPE, STP)
#define VKERN_TEMPL_1V_C_SIMD(FNAME, OP, SSUF, MSUF, PREP, SFIN, FIN, ADV, TYPE, STP)
#define VKERN_TEMPL_1V_CC_SIMD(FNAME, OP, SSUF, MSUF, PREP, SFIN, FIN, ADV, TYPE, STP)
#define VKERN_TEMPL_1V_T_SIMD(FNAME, OP, SSUF, MSUF, PREP, SFIN, FIN, ADV, TYPE, STP)


Detailed Description

macros for composing unrolled prefetching loops over arrays using SIMD instrinsics.

(c) Kurt Garloff, <kurt@garloff.de>, 4/2005, GNU LGPL v2

Id
unroll_prefetch_simd_def.h,v 1.1.2.16 2006/03/03 10:10:24 garloff Exp

Definition in file unroll_prefetch_simd_def.h.


Define Documentation

#define MISALIGNMENT_CHECK (  )     ((unsigned long)x & 0x0f)

Definition at line 234 of file unroll_prefetch_simd_def.h.

#define UNROLL4_KERNEL3_SIMD ( MDOP,
ADV,
SUF   ) 

Value:

MDOP(res, f1, f2, SUF);                 \
        MDOP(res+ADV, f1, f2, SUF);             \
        i -= 4*ADV;                             \
        MDOP(res+2*ADV, f1, f2, SUF);           \
        MDOP(res+3*ADV, f1, f2, SUF);           \
        res += 4*ADV
Four times unrolled kernel for 3 args without prefetching.

Definition at line 187 of file unroll_prefetch_simd_def.h.

#define UNROLL4_KERNEL4_SIMD ( MDOP,
ADV,
SUF,
UNA   ) 

Value:

MDOP(res, v1, f1, f2, SUF, UNA);                \
        MDOP(res+ADV, v1+ADV, f1, f2, SUF, UNA);        \
        i -= 4*ADV;                                     \
        MDOP(res+2*ADV, v1+2*ADV, f1, f2, SUF, UNA);    \
        MDOP(res+3*ADV, v1+3*ADV, f1, f2, SUF, UNA);    \
        v1 += 4*ADV; res += 4*ADV
Four times unrolled kernel for 4 args without prefetching.

Definition at line 120 of file unroll_prefetch_simd_def.h.

#define UNROLL4_KERNEL5_SIMD ( MDOP,
ADV,
SUF,
UNA1,
UNA2   ) 

Value:

MDOP(res,v1,v2,f1,f2,SUF,UNA1,UNA2);            \
        i -= 4*ADV;                                     \
        MDOP(res+ADV,v1+ADV,v2+ADV,f1,f2,SUF,UNA1,UNA2);\
        v1 += 4*ADV;                                    \
        MDOP(res+2*ADV,v1-2*ADV,v2+2*ADV,f1,f2,SUF,UNA1,UNA2);  \
        v2 += 4*ADV;                                    \
        MDOP(res+3*ADV,v1-ADV,v2-ADV,f1,f2,SUF,UNA1,UNA2);      \
        res += 4*ADV
Four times unrolled kernel for 5 args without prefetching.

Definition at line 52 of file unroll_prefetch_simd_def.h.

#define UNROLL4_PREF_KERNEL3_SIMD ( MDOP,
ADV,
T,
SUF   ) 

Value:

if (EL_PER_CL(T) <= 1) {                        \
                MDOP(res, f1, f2, SUF);                 \
                MDOP(res+ADV, f1, f2, SUF);             \
                i -= 4*ADV;                             \
                MDOP(res+2*ADV, f1, f2, SUF);           \
                MDOP(res+3*ADV, f1, f2, SUF);           \
                res += 4*ADV;                           \
        } else if (EL_PER_CL(T) <= 2) {                 \
                MDOP(res, f1, f2, SUF);                 \
                MDOP(res+ADV, f1, f2, SUF);             \
                i -= 4*ADV;                             \
                MDOP(res+2*ADV, f1, f2, SUF);           \
                MDOP(res+3*ADV, f1, f2, SUF);           \
                res += 4*ADV;                           \
        } else {                                        \
                MDOP(res, f1, f2, SUF);                 \
                MDOP(res+ADV, f1, f2, SUF);             \
                i -= 4*ADV;                             \
                MDOP(res+2*ADV, f1, f2, SUF);           \
                MDOP(res+3*ADV, f1, f2, SUF);           \
                res += 4*ADV;                           \
        }
Four times unrolled kernel for 3 args with prefetching TODO: Prefetching.

(FIXME: Is it needed? SSE2 capable CPUs do hardware prefetching, no???)

Definition at line 161 of file unroll_prefetch_simd_def.h.

#define UNROLL4_PREF_KERNEL4_SIMD ( MDOP,
ADV,
T,
SUF,
UNA   ) 

Value:

if (EL_PER_CL(T) <= 1) {                        \
                MDOP(res,v1,f1,f2,SUF,UNA);             \
                i -= 4*ADV;                             \
                MDOP(res+ADV,v1+ADV,f1,f2,SUF,UNA);     \
                MDOP(res+2*ADV,v1+2*ADV,f1,f2,SUF,UNA); \
                v1 += 4*ADV;                            \
                MDOP(res+3*ADV,v1-ADV,f1,f2,SUF,UNA);   \
                res += 4*ADV;                           \
        } else if (EL_PER_CL(T) <= 2) {                 \
                MDOP(res,v1,f1,f2, SUF,UNA);            \
                i -= 4*ADV;                             \
                MDOP(res+ADV,v1+ADV,f1,f2,SUF,UNA);     \
                MDOP(res+2*ADV,v1+2*ADV,f1,f2,SUF,UNA); \
                v1 += 4*ADV;                            \
                MDOP(res+3*ADV,v1-ADV,f1,f2,SUF,UNA);   \
                res += 4*ADV;                           \
        } else {                                        \
                MDOP(res,v1,f1,f2,SUF,UNA);             \
                i -= 4*ADV;                             \
                MDOP(res+ADV,v1+ADV,f1,f2,SUF,UNA);     \
                MDOP(res+2*ADV,v1+2*ADV,f1,f2,SUF,UNA); \
                v1 += 4*ADV;                            \
                MDOP(res+3*ADV,v1-ADV,f1,f2,SUF,UNA);   \
                res += 4*ADV;                           \
        }
Four times unrolled kernel for 4 args with prefetching.

Definition at line 91 of file unroll_prefetch_simd_def.h.

#define UNROLL4_PREF_KERNEL5_SIMD ( MDOP,
ADV,
T,
SUF,
UNA1,
UNA2   ) 

Value:

if (EL_PER_CL(T) <= 1) {                        \
                MDOP(res,v1,v2,f1,f2,SUF,UNA1,UNA2);    \
                i -= 4*ADV;                             \
                MDOP(res+ADV,v1+ADV,v2+ADV,f1,f2,SUF,UNA1,UNA2);        \
                v1 += 4*ADV;                            \
                MDOP(res+2*ADV,v1-2*ADV,v2+2*ADV,f1,f2,SUF,UNA1,UNA2);  \
                v2 += 4*ADV;                            \
                MDOP(res+3*ADV,v1-ADV,v2-ADV,f1,f2,SUF,UNA1,UNA2);      \
                res += 4*ADV;                           \
        } else if (EL_PER_CL(T) <= 2) {                 \
                MDOP(res,v1,v2,f1,f2,SUF,UNA1,UNA2);    \
                i -= 4*ADV;                             \
                MDOP(res+ADV,v1+ADV,v2+ADV,f1,f2,SUF,UNA1,UNA2);        \
                v1 += 4*ADV;                            \
                MDOP(res+2*ADV,v1-2*ADV,v2+2*ADV,f1,f2,SUF,UNA1,UNA2);  \
                v2 += 4*ADV;                            \
                MDOP(res+3*ADV,v1-ADV,v2-ADV,f1,f2,SUF,UNA1,UNA2);      \
                res += 4*ADV;                           \
        } else {                                        \
                MDOP(res,v1,v2,f1,f2,SUF,UNA1,UNA2);    \
                i -= 4*ADV;                             \
                MDOP(res+ADV,v1+ADV,v2+ADV,f1,f2,SUF,UNA1,UNA2);        \
                v1 += 4*ADV;                            \
                MDOP(res+2*ADV,v1-2*ADV,v2+2*ADV,f1,f2,SUF,UNA1,UNA2);  \
                v2 += 4*ADV;                            \
                MDOP(res+4*ADV,v1-ADV,v2-ADV,f1,f2,SUF,UNA1,UNA2);      \
                res += 4*ADV;                           \
        }
TODO: Should be merged with unroll_prefetch_def.h.

Note that we dropped all PREFETCH insns, HW that does SSE2 in general does prefetching as well, so we rather settle for smaller kernels. Four times unrolled kernel for 5 args with prefetching

Definition at line 20 of file unroll_prefetch_simd_def.h.

#define VKERN_TEMPL_1V_C_SIMD ( FNAME,
OP,
SSUF,
MSUF,
PREP,
SFIN,
FIN,
ADV,
TYPE,
STP   ) 

Value:

TWEAK(template <>                                       \
VEC_INLINE void FNAME < TYPE > (const unsigned long sz, \
                        TYPE* RESTRICT const _res,      \
                        LCTYPED(TYPE) _f2))             \
{                                                       \
        register TYPE *res= _res;                       \
        PREP(_f2);                                      \
        register long i = sz;                           \
        /* Make sure we have proper alignment */        \
        VKERN_TEMPL_1V_SISD(OP,MISALIGNMENT_CHECK(res),STP,SSUF);       \
        VKERN_TEMPL_1V_NP_SIMD(OP,ADV,STP,MSUF);        \
        /*VKERN_TEMPL_1V_PLAIN_SIMD(OP,ADV,STP,MSUF);*/ \
        SFIN;                                           \
        VKERN_TEMPL_1V_SISD(OP,true,STP,SSUF);          \
        FIN(_f2);                                       \
}

Definition at line 535 of file unroll_prefetch_simd_def.h.

#define VKERN_TEMPL_1V_CC_SIMD ( FNAME,
OP,
SSUF,
MSUF,
PREP,
SFIN,
FIN,
ADV,
TYPE,
STP   ) 

Value:

TWEAK(template <>                                       \
VEC_INLINE void FNAME < TYPE > (const unsigned long sz, \
                        TYPE* RESTRICT const _res,      \
                        LCTYPED(TYPE) _f1,              \
                        LCTYPED(TYPE) _f2))             \
{                                                       \
        register TYPE *res= _res;                       \
        PREP(_f1, _f2);                                 \
        register long i = sz;                           \
        /* Make sure we have proper alignment */        \
        VKERN_TEMPL_1V_SISD(OP,MISALIGNMENT_CHECK(res),STP,SSUF);       \
        VKERN_TEMPL_1V_NP_SIMD(OP,ADV,STP,MSUF);        \
        /*VKERN_TEMPL_1V_PLAIN_SIMD(OP,ADV,STP,MSUF);*/ \
        SFIN;                                           \
        VKERN_TEMPL_1V_SISD(OP,true,STP,SSUF);          \
        FIN(_f1, _f2);                                  \
}

Definition at line 553 of file unroll_prefetch_simd_def.h.

#define VKERN_TEMPL_1V_NP_SIMD ( MDOP,
ADV,
STP,
SUF   ) 

Value:

if (LIKELY(i >= 4*ADV)) {                       \
                STP TMP  UNUSED;                        \
                do {                                    \
                        UNROLL4_KERNEL3_SIMD(MDOP,ADV,SUF);     \
                } while (i >= 4*ADV);                   \
        }

Definition at line 196 of file unroll_prefetch_simd_def.h.

#define VKERN_TEMPL_1V_PLAIN_SIMD ( MDOP,
ADV,
STP,
SUF   ) 

Value:

while (i >= ADV) {                              \
                STP TMP  UNUSED;                        \
                MDOP(res,f1,f2,SUF);                    \
                i -= ADV; res += ADV;                   \
        }

Definition at line 205 of file unroll_prefetch_simd_def.h.

#define VKERN_TEMPL_1V_SIMD ( FNAME,
OP,
SSUF,
MSUF,
PREP,
SFIN,
FIN,
ADV,
TYPE,
STP   ) 

Value:

TWEAK(template <>                                       \
VEC_INLINE void FNAME < TYPE > (const unsigned long sz, \
                        TYPE* RESTRICT const _res))     \
{                                                       \
        register TYPE *res= _res;                       \
        PREP;                                           \
        register long i = sz;                           \
        /* Make sure we have proper alignment */        \
        VKERN_TEMPL_1V_SISD(OP,MISALIGNMENT_CHECK(res),STP,SSUF);       \
        VKERN_TEMPL_1V_NP_SIMD(OP,ADV,STP,MSUF);        \
        /*VKERN_TEMPL_1V_PLAIN_SIMD(OP,ADV,STP,MSUF);*/ \
        SFIN;                                           \
        VKERN_TEMPL_1V_SISD(OP,true,STP,SSUF);          \
        FIN;                                            \
}

Definition at line 518 of file unroll_prefetch_simd_def.h.

#define VKERN_TEMPL_1V_SISD ( SDOP,
COND,
STP,
SUF   ) 

Value:

while (COND && i) {                             \
                STP TMP  UNUSED;                        \
                SDOP(res,f1,f2,SUF);                    \
                --i; ++res;                             \
        }

Definition at line 213 of file unroll_prefetch_simd_def.h.

#define VKERN_TEMPL_1V_T_SIMD ( FNAME,
OP,
SSUF,
MSUF,
PREP,
SFIN,
FIN,
ADV,
TYPE,
STP   ) 

Value:

TWEAK(template <>                                       \
VEC_INLINE void FNAME < TYPE > (const unsigned long sz, \
                        const TYPE* const _res,         \
                        TYPE &_f2))                     \
{                                                       \
        register const TYPE *res= _res;                 \
        PREP(_f2);                                      \
        register long i = sz;                           \
        /* Make sure we have proper alignment */        \
        VKERN_TEMPL_1V_SISD(OP,MISALIGNMENT_CHECK(res),STP,SSUF);       \
        VKERN_TEMPL_1V_NP_SIMD(OP,ADV,STP,MSUF);        \
        /*VKERN_TEMPL_1V_PLAIN_SIMD(OP,ADV,STP,MSUF);*/ \
        SFIN;                                           \
        VKERN_TEMPL_1V_SISD(OP,true,STP,SSUF);          \
        FIN(_f2);                                       \
}

Definition at line 572 of file unroll_prefetch_simd_def.h.

#define VKERN_TEMPL_2V_C_SIMD ( FNAME,
OP,
SSUF,
MSUF,
PREP,
SFIN,
FIN,
ADV,
TYPE,
STP   ) 

Value:

TWEAK(template <>                                               \
VEC_INLINE void FNAME < TYPE > (const unsigned long sz,         \
                        TYPE* RESTRICT const _res,              \
                        const TYPE* RESTRICT const _v1,         \
                        LCTYPED(TYPE) _f2))                     \
{                                                               \
        register const TYPE *v1 = _v1;                          \
        register TYPE *res= _res;                               \
        PREP(_f2);                                              \
        register long i = sz;                                   \
        /* Make sure we have proper alignment */                \
        VKERN_TEMPL_2V_SISD(OP,MISALIGNMENT_CHECK(res),STP,SSUF);       \
        if (MISALIGNMENT_CHECK(v1)) {                           \
                WARN_UNALIGN(v1);                               \
                VKERN_TEMPL_2V_NP_SIMD(OP,ADV,STP,MSUF,u)       \
                /*VKERN_TEMPL_2V_PLAIN_SIMD(OP,ADV,STP,MSUF,u);*/       \
        } else {                                                \
                VKERN_TEMPL_2V_NP_SIMD(OP,ADV,STP,MSUF,);       \
                /*VKERN_TEMPL_2V_PLAIN_SIMD(OP,ADV,STP,MSUF,);*/        \
        }                                                       \
        SFIN;                                                   \
        VKERN_TEMPL_2V_SISD(OP,true,STP,SSUF);                  \
        FIN(_f2);                                               \
}

Definition at line 440 of file unroll_prefetch_simd_def.h.

#define VKERN_TEMPL_2V_CC_SIMD ( FNAME,
OP,
SSUF,
MSUF,
PREP,
SFIN,
FIN,
ADV,
TYPE,
STP   ) 

Value:

TWEAK(template <>                                               \
VEC_INLINE void FNAME < TYPE > (const unsigned long sz,         \
                        TYPE* RESTRICT const _res,              \
                        const TYPE* RESTRICT const _v1,         \
                        LCTYPED(TYPE) _f1,                      \
                        LCTYPED(TYPE) _f2))                     \
{                                                               \
        register const TYPE *v1 = _v1;                          \
        register TYPE *res= _res;                               \
        PREP(_f1, _f2);                                         \
        register long i = sz;                                   \
        /* Make sure we have proper alignment */                \
        VKERN_TEMPL_2V_SISD(OP,MISALIGNMENT_CHECK(res),STP,SSUF);       \
        if (MISALIGNMENT_CHECK(v1)) {                           \
                WARN_UNALIGN(v1);                               \
                VKERN_TEMPL_2V_NP_SIMD(OP,ADV,STP,MSUF,u)       \
                /*VKERN_TEMPL_2V_PLAIN_SIMD(OP,ADV,STP,MSUF,u);*/       \
        } else {                                                \
                VKERN_TEMPL_2V_NP_SIMD(OP,ADV,STP,MSUF,);       \
                /*VKERN_TEMPL_2V_PLAIN_SIMD(OP,ADV,STP,MSUF,);*/        \
        }                                                       \
        SFIN;                                                   \
        VKERN_TEMPL_2V_SISD(OP,true,STP,SSUF);                  \
        FIN(_f1, _f2);                                          \
}

Definition at line 466 of file unroll_prefetch_simd_def.h.

#define VKERN_TEMPL_2V_NP_SIMD ( MDOP,
ADV,
STP,
SUF,
UNA   ) 

Value:

if (LIKELY(i >= 4*ADV)) {                       \
                STP TMP, LD  UNUSED;                    \
                do {                                    \
                        UNROLL4_KERNEL4_SIMD(MDOP,ADV,SUF,UNA); \
                } while (i >= 4*ADV);                   \
        }

Definition at line 130 of file unroll_prefetch_simd_def.h.

#define VKERN_TEMPL_2V_PLAIN_SIMD ( MDOP,
ADV,
STP,
SUF,
UNA   ) 

Value:

while (i >= ADV) {                              \
                STP TMP, LD  UNUSED;                    \
                MDOP(res, v1, f1, f2, SUF, UNA);        \
                i -= ADV; v1 += ADV; res += ADV;        \
        }

Definition at line 139 of file unroll_prefetch_simd_def.h.

#define VKERN_TEMPL_2V_SIMD ( FNAME,
OP,
SSUF,
MSUF,
PREP,
SFIN,
FIN,
ADV,
TYPE,
STP   ) 

Value:

TWEAK(template <>                                               \
VEC_INLINE void FNAME < TYPE > (const unsigned long sz,         \
                        TYPE* RESTRICT const _res,              \
                        const TYPE* RESTRICT const _v1))        \
{                                                               \
        register TYPE *res = _res;                              \
        register const TYPE *v1 = _v1;                          \
        PREP;                                                   \
        register long i = sz;                                   \
        /* Make sure we have proper alignment */                \
        VKERN_TEMPL_2V_SISD(OP,MISALIGNMENT_CHECK(res),STP,SSUF);       \
        if (MISALIGNMENT_CHECK(v1)) {                           \
                WARN_UNALIGN(v1);                               \
                VKERN_TEMPL_2V_NP_SIMD(OP,ADV,STP,MSUF,u)       \
                /*VKERN_TEMPL_2V_PLAIN_SIMD(OP,ADV,STP,MSUF,u);*/       \
        } else {                                                \
                VKERN_TEMPL_2V_NP_SIMD(OP,ADV,STP,MSUF,);       \
                /*VKERN_TEMPL_2V_PLAIN_SIMD(OP,ADV,STP,MSUF,);*/        \
        }                                                       \
        SFIN;                                                   \
        VKERN_TEMPL_2V_SISD(OP,true,STP,SSUF);                  \
        FIN;                                                    \
}

Definition at line 415 of file unroll_prefetch_simd_def.h.

#define VKERN_TEMPL_2V_SISD ( SDOP,
COND,
STP,
SUF   ) 

Value:

while (COND && i) {                             \
                STP TMP, LD  UNUSED;                    \
                SDOP(res,v1,f1,f2,SUF,);                \
                --i; ++v1; ++res;                       \
        }

Definition at line 147 of file unroll_prefetch_simd_def.h.

#define VKERN_TEMPL_2V_T_SIMD ( FNAME,
OP,
SSUF,
MSUF,
PREP,
SFIN,
FIN,
ADV,
TYPE,
STP   ) 

Value:

TWEAK(template <>                                               \
VEC_INLINE void FNAME < TYPE > (const unsigned long sz,         \
                        const TYPE* RESTRICT const _res,        \
                        const TYPE* RESTRICT const _v1,         \
                        TYPE &_f2))                             \
{                                                               \
        register const TYPE *res= _res, *v1 = _v1;              \
        PREP(_f2);                                              \
        register long i = sz;                                   \
        /* Make sure we have proper alignment */                \
        VKERN_TEMPL_2V_SISD(OP,MISALIGNMENT_CHECK(res),STP,SSUF);       \
        if (MISALIGNMENT_CHECK(v1)) {                           \
                WARN_UNALIGN(v1);                               \
                VKERN_TEMPL_2V_NP_SIMD(OP,ADV,STP,MSUF,u)       \
                /*VKERN_TEMPL_2V_PLAIN_SIMD(OP,ADV,STP,MSUF,u);*/       \
        } else {                                                \
                VKERN_TEMPL_2V_NP_SIMD(OP,ADV,STP,MSUF,);       \
                /*VKERN_TEMPL_2V_PLAIN_SIMD(OP,ADV,STP,MSUF,);*/        \
        }                                                       \
        SFIN;                                                   \
        VKERN_TEMPL_2V_SISD(OP,true,STP,SSUF);                  \
        FIN(_f2);                                               \
}

Definition at line 493 of file unroll_prefetch_simd_def.h.

#define VKERN_TEMPL_3V_C_SIMD ( FNAME,
OP,
SSUF,
MSUF,
PREP,
SFIN,
FIN,
ADV,
TYPE,
STP   ) 

Definition at line 343 of file unroll_prefetch_simd_def.h.

#define VKERN_TEMPL_3V_CC_SIMD ( FNAME,
OP,
SSUF,
MSUF,
PREP,
SFIN,
FIN,
ADV,
TYPE,
STP   ) 

Definition at line 378 of file unroll_prefetch_simd_def.h.

#define VKERN_TEMPL_3V_NP_SIMD ( MDOP,
ADV,
STP,
SUF,
UNA1,
UNA2   ) 

Value:

if (LIKELY(i >= 4*ADV)) {                       \
                STP TMP, LD  UNUSED;                    \
                do {                                    \
                        UNROLL4_KERNEL5_SIMD(MDOP,ADV,SUF,UNA1,UNA2);   \
                } while (i >= 4*ADV);                   \
        }

Definition at line 64 of file unroll_prefetch_simd_def.h.

#define VKERN_TEMPL_3V_PLAIN_SIMD ( MDOP,
ADV,
STP,
SUF,
UNA1,
UNA2   ) 

Value:

while (i >= ADV) {                              \
                STP TMP, LD  UNUSED;                    \
                MDOP(res,v1,v2,f1,f2,SUF,UNA1,UNA2);    \
                i -= ADV; res += ADV; v1 += ADV; v2 +=ADV; \
        }

Definition at line 73 of file unroll_prefetch_simd_def.h.

#define VKERN_TEMPL_3V_SIMD ( FNAME,
OP,
SSUF,
MSUF,
PREP,
SFIN,
FIN,
ADV,
TYPE,
STP   ) 

TODO: Check whether enabling the non-unrolled fixup (loop tail) is beneficial.

Macro abuse ... FNAME:Function name OP: operation for each loop (macro), sse2 intrinsics SSUF: argument passed to OP macro (suffix for single data operation) MSUF: dito (suffix used for multiple data operation (SIMD)) PREP: Preparation macro before loop, called with _f1, _f2 as args (as available) SFIN: Cleanup macro after we're done with SIMD part FIN: Cleanup macro before leaving, called with _f1, _f2 (as avail) ADV: How many elements the SIMD instructions handle per insn OP (2/4) TYPE: Standard C data type (float/double) STP: SIMD data type (__m128/__m128d)

Definition at line 274 of file unroll_prefetch_simd_def.h.

#define VKERN_TEMPL_3V_SIMD_UA ( FNAME,
OP,
SSUF,
MSUF,
PREP,
SFIN,
FIN,
ADV,
TYPE,
STP   ) 

Without the unaligned check.

Definition at line 309 of file unroll_prefetch_simd_def.h.

#define VKERN_TEMPL_3V_SISD ( SDOP,
COND,
STP,
SUF   ) 

Value:

while (COND && i) {                             \
                STP TMP, LD  UNUSED;                    \
                SDOP(res,v1,v2,f1,f2,SUF,,);            \
                --i; ++res; ++v1; ++v2;                 \
        }

Definition at line 81 of file unroll_prefetch_simd_def.h.

#define WARN_UNALIGN (  )     do {} while (0)

WARN_UNALIGNED macro: If defined, the TBCI library will print a warning to stderr for unaligned SIMD accesses, which will be slower .

..

Definition at line 251 of file unroll_prefetch_simd_def.h.


Generated on Wed Nov 20 09:28:49 2013 for TBCI Numerical high perf. C++ Library by  doxygen 1.5.6