Go to the source code of this file.
Defines | |
| #define | UNROLL4_PREF_KERNEL5_SIMD(MDOP, ADV, T, SUF, UNA1, UNA2) |
| TODO: Should be merged with unroll_prefetch_def.h. | |
| #define | UNROLL4_KERNEL5_SIMD(MDOP, ADV, SUF, UNA1, UNA2) |
| Four times unrolled kernel for 5 args without prefetching. | |
| #define | VKERN_TEMPL_3V_NP_SIMD(MDOP, ADV, STP, SUF, UNA1, UNA2) |
| #define | VKERN_TEMPL_3V_PLAIN_SIMD(MDOP, ADV, STP, SUF, UNA1, UNA2) |
| #define | VKERN_TEMPL_3V_SISD(SDOP, COND, STP, SUF) |
| #define | UNROLL4_PREF_KERNEL4_SIMD(MDOP, ADV, T, SUF, UNA) |
| Four times unrolled kernel for 4 args with prefetching. | |
| #define | UNROLL4_KERNEL4_SIMD(MDOP, ADV, SUF, UNA) |
| Four times unrolled kernel for 4 args without prefetching. | |
| #define | VKERN_TEMPL_2V_NP_SIMD(MDOP, ADV, STP, SUF, UNA) |
| #define | VKERN_TEMPL_2V_PLAIN_SIMD(MDOP, ADV, STP, SUF, UNA) |
| #define | VKERN_TEMPL_2V_SISD(SDOP, COND, STP, SUF) |
| #define | UNROLL4_PREF_KERNEL3_SIMD(MDOP, ADV, T, SUF) |
| Four times unrolled kernel for 3 args with prefetching TODO: Prefetching. | |
| #define | UNROLL4_KERNEL3_SIMD(MDOP, ADV, SUF) |
| Four times unrolled kernel for 3 args without prefetching. | |
| #define | VKERN_TEMPL_1V_NP_SIMD(MDOP, ADV, STP, SUF) |
| #define | VKERN_TEMPL_1V_PLAIN_SIMD(MDOP, ADV, STP, SUF) |
| #define | VKERN_TEMPL_1V_SISD(SDOP, COND, STP, SUF) |
| #define | MISALIGNMENT_CHECK(x) ((unsigned long)x & 0x0f) |
| #define | WARN_UNALIGN(v) do {} while (0) |
| WARN_UNALIGNED macro: If defined, the TBCI library will print a warning to stderr for unaligned SIMD accesses, which will be slower . | |
| #define | VKERN_TEMPL_3V_SIMD(FNAME, OP, SSUF, MSUF, PREP, SFIN, FIN, ADV, TYPE, STP) |
| TODO: Check whether enabling the non-unrolled fixup (loop tail) is beneficial. | |
| #define | VKERN_TEMPL_3V_SIMD_UA(FNAME, OP, SSUF, MSUF, PREP, SFIN, FIN, ADV, TYPE, STP) |
| Without the unaligned check. | |
| #define | VKERN_TEMPL_3V_C_SIMD(FNAME, OP, SSUF, MSUF, PREP, SFIN, FIN, ADV, TYPE, STP) |
| #define | VKERN_TEMPL_3V_CC_SIMD(FNAME, OP, SSUF, MSUF, PREP, SFIN, FIN, ADV, TYPE, STP) |
| #define | VKERN_TEMPL_2V_SIMD(FNAME, OP, SSUF, MSUF, PREP, SFIN, FIN, ADV, TYPE, STP) |
| #define | VKERN_TEMPL_2V_C_SIMD(FNAME, OP, SSUF, MSUF, PREP, SFIN, FIN, ADV, TYPE, STP) |
| #define | VKERN_TEMPL_2V_CC_SIMD(FNAME, OP, SSUF, MSUF, PREP, SFIN, FIN, ADV, TYPE, STP) |
| #define | VKERN_TEMPL_2V_T_SIMD(FNAME, OP, SSUF, MSUF, PREP, SFIN, FIN, ADV, TYPE, STP) |
| #define | VKERN_TEMPL_1V_SIMD(FNAME, OP, SSUF, MSUF, PREP, SFIN, FIN, ADV, TYPE, STP) |
| #define | VKERN_TEMPL_1V_C_SIMD(FNAME, OP, SSUF, MSUF, PREP, SFIN, FIN, ADV, TYPE, STP) |
| #define | VKERN_TEMPL_1V_CC_SIMD(FNAME, OP, SSUF, MSUF, PREP, SFIN, FIN, ADV, TYPE, STP) |
| #define | VKERN_TEMPL_1V_T_SIMD(FNAME, OP, SSUF, MSUF, PREP, SFIN, FIN, ADV, TYPE, STP) |
(c) Kurt Garloff, <kurt@garloff.de>, 4/2005, GNU LGPL v2
Definition in file unroll_prefetch_simd_def.h.
| #define MISALIGNMENT_CHECK | ( | x | ) | ((unsigned long)x & 0x0f) |
Definition at line 234 of file unroll_prefetch_simd_def.h.
| #define UNROLL4_KERNEL3_SIMD | ( | MDOP, | |||
| ADV, | |||||
| SUF | ) |
Value:
MDOP(res, f1, f2, SUF); \
MDOP(res+ADV, f1, f2, SUF); \
i -= 4*ADV; \
MDOP(res+2*ADV, f1, f2, SUF); \
MDOP(res+3*ADV, f1, f2, SUF); \
res += 4*ADV
Definition at line 187 of file unroll_prefetch_simd_def.h.
| #define UNROLL4_KERNEL4_SIMD | ( | MDOP, | |||
| ADV, | |||||
| SUF, | |||||
| UNA | ) |
Value:
MDOP(res, v1, f1, f2, SUF, UNA); \
MDOP(res+ADV, v1+ADV, f1, f2, SUF, UNA); \
i -= 4*ADV; \
MDOP(res+2*ADV, v1+2*ADV, f1, f2, SUF, UNA); \
MDOP(res+3*ADV, v1+3*ADV, f1, f2, SUF, UNA); \
v1 += 4*ADV; res += 4*ADV
Definition at line 120 of file unroll_prefetch_simd_def.h.
| #define UNROLL4_KERNEL5_SIMD | ( | MDOP, | |||
| ADV, | |||||
| SUF, | |||||
| UNA1, | |||||
| UNA2 | ) |
Value:
MDOP(res,v1,v2,f1,f2,SUF,UNA1,UNA2); \
i -= 4*ADV; \
MDOP(res+ADV,v1+ADV,v2+ADV,f1,f2,SUF,UNA1,UNA2);\
v1 += 4*ADV; \
MDOP(res+2*ADV,v1-2*ADV,v2+2*ADV,f1,f2,SUF,UNA1,UNA2); \
v2 += 4*ADV; \
MDOP(res+3*ADV,v1-ADV,v2-ADV,f1,f2,SUF,UNA1,UNA2); \
res += 4*ADV
Definition at line 52 of file unroll_prefetch_simd_def.h.
| #define UNROLL4_PREF_KERNEL3_SIMD | ( | MDOP, | |||
| ADV, | |||||
| T, | |||||
| SUF | ) |
Value:
if (EL_PER_CL(T) <= 1) { \ MDOP(res, f1, f2, SUF); \ MDOP(res+ADV, f1, f2, SUF); \ i -= 4*ADV; \ MDOP(res+2*ADV, f1, f2, SUF); \ MDOP(res+3*ADV, f1, f2, SUF); \ res += 4*ADV; \ } else if (EL_PER_CL(T) <= 2) { \ MDOP(res, f1, f2, SUF); \ MDOP(res+ADV, f1, f2, SUF); \ i -= 4*ADV; \ MDOP(res+2*ADV, f1, f2, SUF); \ MDOP(res+3*ADV, f1, f2, SUF); \ res += 4*ADV; \ } else { \ MDOP(res, f1, f2, SUF); \ MDOP(res+ADV, f1, f2, SUF); \ i -= 4*ADV; \ MDOP(res+2*ADV, f1, f2, SUF); \ MDOP(res+3*ADV, f1, f2, SUF); \ res += 4*ADV; \ }
(FIXME: Is it needed? SSE2 capable CPUs do hardware prefetching, no???)
Definition at line 161 of file unroll_prefetch_simd_def.h.
| #define UNROLL4_PREF_KERNEL4_SIMD | ( | MDOP, | |||
| ADV, | |||||
| T, | |||||
| SUF, | |||||
| UNA | ) |
Value:
if (EL_PER_CL(T) <= 1) { \ MDOP(res,v1,f1,f2,SUF,UNA); \ i -= 4*ADV; \ MDOP(res+ADV,v1+ADV,f1,f2,SUF,UNA); \ MDOP(res+2*ADV,v1+2*ADV,f1,f2,SUF,UNA); \ v1 += 4*ADV; \ MDOP(res+3*ADV,v1-ADV,f1,f2,SUF,UNA); \ res += 4*ADV; \ } else if (EL_PER_CL(T) <= 2) { \ MDOP(res,v1,f1,f2, SUF,UNA); \ i -= 4*ADV; \ MDOP(res+ADV,v1+ADV,f1,f2,SUF,UNA); \ MDOP(res+2*ADV,v1+2*ADV,f1,f2,SUF,UNA); \ v1 += 4*ADV; \ MDOP(res+3*ADV,v1-ADV,f1,f2,SUF,UNA); \ res += 4*ADV; \ } else { \ MDOP(res,v1,f1,f2,SUF,UNA); \ i -= 4*ADV; \ MDOP(res+ADV,v1+ADV,f1,f2,SUF,UNA); \ MDOP(res+2*ADV,v1+2*ADV,f1,f2,SUF,UNA); \ v1 += 4*ADV; \ MDOP(res+3*ADV,v1-ADV,f1,f2,SUF,UNA); \ res += 4*ADV; \ }
Definition at line 91 of file unroll_prefetch_simd_def.h.
| #define UNROLL4_PREF_KERNEL5_SIMD | ( | MDOP, | |||
| ADV, | |||||
| T, | |||||
| SUF, | |||||
| UNA1, | |||||
| UNA2 | ) |
Value:
if (EL_PER_CL(T) <= 1) { \ MDOP(res,v1,v2,f1,f2,SUF,UNA1,UNA2); \ i -= 4*ADV; \ MDOP(res+ADV,v1+ADV,v2+ADV,f1,f2,SUF,UNA1,UNA2); \ v1 += 4*ADV; \ MDOP(res+2*ADV,v1-2*ADV,v2+2*ADV,f1,f2,SUF,UNA1,UNA2); \ v2 += 4*ADV; \ MDOP(res+3*ADV,v1-ADV,v2-ADV,f1,f2,SUF,UNA1,UNA2); \ res += 4*ADV; \ } else if (EL_PER_CL(T) <= 2) { \ MDOP(res,v1,v2,f1,f2,SUF,UNA1,UNA2); \ i -= 4*ADV; \ MDOP(res+ADV,v1+ADV,v2+ADV,f1,f2,SUF,UNA1,UNA2); \ v1 += 4*ADV; \ MDOP(res+2*ADV,v1-2*ADV,v2+2*ADV,f1,f2,SUF,UNA1,UNA2); \ v2 += 4*ADV; \ MDOP(res+3*ADV,v1-ADV,v2-ADV,f1,f2,SUF,UNA1,UNA2); \ res += 4*ADV; \ } else { \ MDOP(res,v1,v2,f1,f2,SUF,UNA1,UNA2); \ i -= 4*ADV; \ MDOP(res+ADV,v1+ADV,v2+ADV,f1,f2,SUF,UNA1,UNA2); \ v1 += 4*ADV; \ MDOP(res+2*ADV,v1-2*ADV,v2+2*ADV,f1,f2,SUF,UNA1,UNA2); \ v2 += 4*ADV; \ MDOP(res+4*ADV,v1-ADV,v2-ADV,f1,f2,SUF,UNA1,UNA2); \ res += 4*ADV; \ }
Note that we dropped all PREFETCH insns, HW that does SSE2 in general does prefetching as well, so we rather settle for smaller kernels. Four times unrolled kernel for 5 args with prefetching
Definition at line 20 of file unroll_prefetch_simd_def.h.
| #define VKERN_TEMPL_1V_C_SIMD | ( | FNAME, | |||
| OP, | |||||
| SSUF, | |||||
| MSUF, | |||||
| PREP, | |||||
| SFIN, | |||||
| FIN, | |||||
| ADV, | |||||
| TYPE, | |||||
| STP | ) |
Value:
TWEAK(template <> \ VEC_INLINE void FNAME < TYPE > (const unsigned long sz, \ TYPE* RESTRICT const _res, \ LCTYPED(TYPE) _f2)) \ { \ register TYPE *res= _res; \ PREP(_f2); \ register long i = sz; \ /* Make sure we have proper alignment */ \ VKERN_TEMPL_1V_SISD(OP,MISALIGNMENT_CHECK(res),STP,SSUF); \ VKERN_TEMPL_1V_NP_SIMD(OP,ADV,STP,MSUF); \ /*VKERN_TEMPL_1V_PLAIN_SIMD(OP,ADV,STP,MSUF);*/ \ SFIN; \ VKERN_TEMPL_1V_SISD(OP,true,STP,SSUF); \ FIN(_f2); \ }
Definition at line 535 of file unroll_prefetch_simd_def.h.
| #define VKERN_TEMPL_1V_CC_SIMD | ( | FNAME, | |||
| OP, | |||||
| SSUF, | |||||
| MSUF, | |||||
| PREP, | |||||
| SFIN, | |||||
| FIN, | |||||
| ADV, | |||||
| TYPE, | |||||
| STP | ) |
Value:
TWEAK(template <> \ VEC_INLINE void FNAME < TYPE > (const unsigned long sz, \ TYPE* RESTRICT const _res, \ LCTYPED(TYPE) _f1, \ LCTYPED(TYPE) _f2)) \ { \ register TYPE *res= _res; \ PREP(_f1, _f2); \ register long i = sz; \ /* Make sure we have proper alignment */ \ VKERN_TEMPL_1V_SISD(OP,MISALIGNMENT_CHECK(res),STP,SSUF); \ VKERN_TEMPL_1V_NP_SIMD(OP,ADV,STP,MSUF); \ /*VKERN_TEMPL_1V_PLAIN_SIMD(OP,ADV,STP,MSUF);*/ \ SFIN; \ VKERN_TEMPL_1V_SISD(OP,true,STP,SSUF); \ FIN(_f1, _f2); \ }
Definition at line 553 of file unroll_prefetch_simd_def.h.
| #define VKERN_TEMPL_1V_NP_SIMD | ( | MDOP, | |||
| ADV, | |||||
| STP, | |||||
| SUF | ) |
Value:
if (LIKELY(i >= 4*ADV)) { \ STP TMP UNUSED; \ do { \ UNROLL4_KERNEL3_SIMD(MDOP,ADV,SUF); \ } while (i >= 4*ADV); \ }
Definition at line 196 of file unroll_prefetch_simd_def.h.
| #define VKERN_TEMPL_1V_PLAIN_SIMD | ( | MDOP, | |||
| ADV, | |||||
| STP, | |||||
| SUF | ) |
Value:
while (i >= ADV) { \ STP TMP UNUSED; \ MDOP(res,f1,f2,SUF); \ i -= ADV; res += ADV; \ }
Definition at line 205 of file unroll_prefetch_simd_def.h.
| #define VKERN_TEMPL_1V_SIMD | ( | FNAME, | |||
| OP, | |||||
| SSUF, | |||||
| MSUF, | |||||
| PREP, | |||||
| SFIN, | |||||
| FIN, | |||||
| ADV, | |||||
| TYPE, | |||||
| STP | ) |
Value:
TWEAK(template <> \ VEC_INLINE void FNAME < TYPE > (const unsigned long sz, \ TYPE* RESTRICT const _res)) \ { \ register TYPE *res= _res; \ PREP; \ register long i = sz; \ /* Make sure we have proper alignment */ \ VKERN_TEMPL_1V_SISD(OP,MISALIGNMENT_CHECK(res),STP,SSUF); \ VKERN_TEMPL_1V_NP_SIMD(OP,ADV,STP,MSUF); \ /*VKERN_TEMPL_1V_PLAIN_SIMD(OP,ADV,STP,MSUF);*/ \ SFIN; \ VKERN_TEMPL_1V_SISD(OP,true,STP,SSUF); \ FIN; \ }
Definition at line 518 of file unroll_prefetch_simd_def.h.
| #define VKERN_TEMPL_1V_SISD | ( | SDOP, | |||
| COND, | |||||
| STP, | |||||
| SUF | ) |
Value:
while (COND && i) { \ STP TMP UNUSED; \ SDOP(res,f1,f2,SUF); \ --i; ++res; \ }
Definition at line 213 of file unroll_prefetch_simd_def.h.
| #define VKERN_TEMPL_1V_T_SIMD | ( | FNAME, | |||
| OP, | |||||
| SSUF, | |||||
| MSUF, | |||||
| PREP, | |||||
| SFIN, | |||||
| FIN, | |||||
| ADV, | |||||
| TYPE, | |||||
| STP | ) |
Value:
TWEAK(template <> \ VEC_INLINE void FNAME < TYPE > (const unsigned long sz, \ const TYPE* const _res, \ TYPE &_f2)) \ { \ register const TYPE *res= _res; \ PREP(_f2); \ register long i = sz; \ /* Make sure we have proper alignment */ \ VKERN_TEMPL_1V_SISD(OP,MISALIGNMENT_CHECK(res),STP,SSUF); \ VKERN_TEMPL_1V_NP_SIMD(OP,ADV,STP,MSUF); \ /*VKERN_TEMPL_1V_PLAIN_SIMD(OP,ADV,STP,MSUF);*/ \ SFIN; \ VKERN_TEMPL_1V_SISD(OP,true,STP,SSUF); \ FIN(_f2); \ }
Definition at line 572 of file unroll_prefetch_simd_def.h.
| #define VKERN_TEMPL_2V_C_SIMD | ( | FNAME, | |||
| OP, | |||||
| SSUF, | |||||
| MSUF, | |||||
| PREP, | |||||
| SFIN, | |||||
| FIN, | |||||
| ADV, | |||||
| TYPE, | |||||
| STP | ) |
Value:
TWEAK(template <> \ VEC_INLINE void FNAME < TYPE > (const unsigned long sz, \ TYPE* RESTRICT const _res, \ const TYPE* RESTRICT const _v1, \ LCTYPED(TYPE) _f2)) \ { \ register const TYPE *v1 = _v1; \ register TYPE *res= _res; \ PREP(_f2); \ register long i = sz; \ /* Make sure we have proper alignment */ \ VKERN_TEMPL_2V_SISD(OP,MISALIGNMENT_CHECK(res),STP,SSUF); \ if (MISALIGNMENT_CHECK(v1)) { \ WARN_UNALIGN(v1); \ VKERN_TEMPL_2V_NP_SIMD(OP,ADV,STP,MSUF,u) \ /*VKERN_TEMPL_2V_PLAIN_SIMD(OP,ADV,STP,MSUF,u);*/ \ } else { \ VKERN_TEMPL_2V_NP_SIMD(OP,ADV,STP,MSUF,); \ /*VKERN_TEMPL_2V_PLAIN_SIMD(OP,ADV,STP,MSUF,);*/ \ } \ SFIN; \ VKERN_TEMPL_2V_SISD(OP,true,STP,SSUF); \ FIN(_f2); \ }
Definition at line 440 of file unroll_prefetch_simd_def.h.
| #define VKERN_TEMPL_2V_CC_SIMD | ( | FNAME, | |||
| OP, | |||||
| SSUF, | |||||
| MSUF, | |||||
| PREP, | |||||
| SFIN, | |||||
| FIN, | |||||
| ADV, | |||||
| TYPE, | |||||
| STP | ) |
Value:
TWEAK(template <> \ VEC_INLINE void FNAME < TYPE > (const unsigned long sz, \ TYPE* RESTRICT const _res, \ const TYPE* RESTRICT const _v1, \ LCTYPED(TYPE) _f1, \ LCTYPED(TYPE) _f2)) \ { \ register const TYPE *v1 = _v1; \ register TYPE *res= _res; \ PREP(_f1, _f2); \ register long i = sz; \ /* Make sure we have proper alignment */ \ VKERN_TEMPL_2V_SISD(OP,MISALIGNMENT_CHECK(res),STP,SSUF); \ if (MISALIGNMENT_CHECK(v1)) { \ WARN_UNALIGN(v1); \ VKERN_TEMPL_2V_NP_SIMD(OP,ADV,STP,MSUF,u) \ /*VKERN_TEMPL_2V_PLAIN_SIMD(OP,ADV,STP,MSUF,u);*/ \ } else { \ VKERN_TEMPL_2V_NP_SIMD(OP,ADV,STP,MSUF,); \ /*VKERN_TEMPL_2V_PLAIN_SIMD(OP,ADV,STP,MSUF,);*/ \ } \ SFIN; \ VKERN_TEMPL_2V_SISD(OP,true,STP,SSUF); \ FIN(_f1, _f2); \ }
Definition at line 466 of file unroll_prefetch_simd_def.h.
| #define VKERN_TEMPL_2V_NP_SIMD | ( | MDOP, | |||
| ADV, | |||||
| STP, | |||||
| SUF, | |||||
| UNA | ) |
Value:
if (LIKELY(i >= 4*ADV)) { \ STP TMP, LD UNUSED; \ do { \ UNROLL4_KERNEL4_SIMD(MDOP,ADV,SUF,UNA); \ } while (i >= 4*ADV); \ }
Definition at line 130 of file unroll_prefetch_simd_def.h.
| #define VKERN_TEMPL_2V_PLAIN_SIMD | ( | MDOP, | |||
| ADV, | |||||
| STP, | |||||
| SUF, | |||||
| UNA | ) |
Value:
while (i >= ADV) { \ STP TMP, LD UNUSED; \ MDOP(res, v1, f1, f2, SUF, UNA); \ i -= ADV; v1 += ADV; res += ADV; \ }
Definition at line 139 of file unroll_prefetch_simd_def.h.
| #define VKERN_TEMPL_2V_SIMD | ( | FNAME, | |||
| OP, | |||||
| SSUF, | |||||
| MSUF, | |||||
| PREP, | |||||
| SFIN, | |||||
| FIN, | |||||
| ADV, | |||||
| TYPE, | |||||
| STP | ) |
Value:
TWEAK(template <> \ VEC_INLINE void FNAME < TYPE > (const unsigned long sz, \ TYPE* RESTRICT const _res, \ const TYPE* RESTRICT const _v1)) \ { \ register TYPE *res = _res; \ register const TYPE *v1 = _v1; \ PREP; \ register long i = sz; \ /* Make sure we have proper alignment */ \ VKERN_TEMPL_2V_SISD(OP,MISALIGNMENT_CHECK(res),STP,SSUF); \ if (MISALIGNMENT_CHECK(v1)) { \ WARN_UNALIGN(v1); \ VKERN_TEMPL_2V_NP_SIMD(OP,ADV,STP,MSUF,u) \ /*VKERN_TEMPL_2V_PLAIN_SIMD(OP,ADV,STP,MSUF,u);*/ \ } else { \ VKERN_TEMPL_2V_NP_SIMD(OP,ADV,STP,MSUF,); \ /*VKERN_TEMPL_2V_PLAIN_SIMD(OP,ADV,STP,MSUF,);*/ \ } \ SFIN; \ VKERN_TEMPL_2V_SISD(OP,true,STP,SSUF); \ FIN; \ }
Definition at line 415 of file unroll_prefetch_simd_def.h.
| #define VKERN_TEMPL_2V_SISD | ( | SDOP, | |||
| COND, | |||||
| STP, | |||||
| SUF | ) |
Value:
while (COND && i) { \ STP TMP, LD UNUSED; \ SDOP(res,v1,f1,f2,SUF,); \ --i; ++v1; ++res; \ }
Definition at line 147 of file unroll_prefetch_simd_def.h.
| #define VKERN_TEMPL_2V_T_SIMD | ( | FNAME, | |||
| OP, | |||||
| SSUF, | |||||
| MSUF, | |||||
| PREP, | |||||
| SFIN, | |||||
| FIN, | |||||
| ADV, | |||||
| TYPE, | |||||
| STP | ) |
Value:
TWEAK(template <> \ VEC_INLINE void FNAME < TYPE > (const unsigned long sz, \ const TYPE* RESTRICT const _res, \ const TYPE* RESTRICT const _v1, \ TYPE &_f2)) \ { \ register const TYPE *res= _res, *v1 = _v1; \ PREP(_f2); \ register long i = sz; \ /* Make sure we have proper alignment */ \ VKERN_TEMPL_2V_SISD(OP,MISALIGNMENT_CHECK(res),STP,SSUF); \ if (MISALIGNMENT_CHECK(v1)) { \ WARN_UNALIGN(v1); \ VKERN_TEMPL_2V_NP_SIMD(OP,ADV,STP,MSUF,u) \ /*VKERN_TEMPL_2V_PLAIN_SIMD(OP,ADV,STP,MSUF,u);*/ \ } else { \ VKERN_TEMPL_2V_NP_SIMD(OP,ADV,STP,MSUF,); \ /*VKERN_TEMPL_2V_PLAIN_SIMD(OP,ADV,STP,MSUF,);*/ \ } \ SFIN; \ VKERN_TEMPL_2V_SISD(OP,true,STP,SSUF); \ FIN(_f2); \ }
Definition at line 493 of file unroll_prefetch_simd_def.h.
| #define VKERN_TEMPL_3V_C_SIMD | ( | FNAME, | |||
| OP, | |||||
| SSUF, | |||||
| MSUF, | |||||
| PREP, | |||||
| SFIN, | |||||
| FIN, | |||||
| ADV, | |||||
| TYPE, | |||||
| STP | ) |
Definition at line 343 of file unroll_prefetch_simd_def.h.
| #define VKERN_TEMPL_3V_CC_SIMD | ( | FNAME, | |||
| OP, | |||||
| SSUF, | |||||
| MSUF, | |||||
| PREP, | |||||
| SFIN, | |||||
| FIN, | |||||
| ADV, | |||||
| TYPE, | |||||
| STP | ) |
Definition at line 378 of file unroll_prefetch_simd_def.h.
| #define VKERN_TEMPL_3V_NP_SIMD | ( | MDOP, | |||
| ADV, | |||||
| STP, | |||||
| SUF, | |||||
| UNA1, | |||||
| UNA2 | ) |
Value:
if (LIKELY(i >= 4*ADV)) { \ STP TMP, LD UNUSED; \ do { \ UNROLL4_KERNEL5_SIMD(MDOP,ADV,SUF,UNA1,UNA2); \ } while (i >= 4*ADV); \ }
Definition at line 64 of file unroll_prefetch_simd_def.h.
| #define VKERN_TEMPL_3V_PLAIN_SIMD | ( | MDOP, | |||
| ADV, | |||||
| STP, | |||||
| SUF, | |||||
| UNA1, | |||||
| UNA2 | ) |
Value:
while (i >= ADV) { \ STP TMP, LD UNUSED; \ MDOP(res,v1,v2,f1,f2,SUF,UNA1,UNA2); \ i -= ADV; res += ADV; v1 += ADV; v2 +=ADV; \ }
Definition at line 73 of file unroll_prefetch_simd_def.h.
| #define VKERN_TEMPL_3V_SIMD | ( | FNAME, | |||
| OP, | |||||
| SSUF, | |||||
| MSUF, | |||||
| PREP, | |||||
| SFIN, | |||||
| FIN, | |||||
| ADV, | |||||
| TYPE, | |||||
| STP | ) |
TODO: Check whether enabling the non-unrolled fixup (loop tail) is beneficial.
Macro abuse ... FNAME:Function name OP: operation for each loop (macro), sse2 intrinsics SSUF: argument passed to OP macro (suffix for single data operation) MSUF: dito (suffix used for multiple data operation (SIMD)) PREP: Preparation macro before loop, called with _f1, _f2 as args (as available) SFIN: Cleanup macro after we're done with SIMD part FIN: Cleanup macro before leaving, called with _f1, _f2 (as avail) ADV: How many elements the SIMD instructions handle per insn OP (2/4) TYPE: Standard C data type (float/double) STP: SIMD data type (__m128/__m128d)
Definition at line 274 of file unroll_prefetch_simd_def.h.
| #define VKERN_TEMPL_3V_SIMD_UA | ( | FNAME, | |||
| OP, | |||||
| SSUF, | |||||
| MSUF, | |||||
| PREP, | |||||
| SFIN, | |||||
| FIN, | |||||
| ADV, | |||||
| TYPE, | |||||
| STP | ) |
| #define VKERN_TEMPL_3V_SISD | ( | SDOP, | |||
| COND, | |||||
| STP, | |||||
| SUF | ) |
Value:
while (COND && i) { \ STP TMP, LD UNUSED; \ SDOP(res,v1,v2,f1,f2,SUF,,); \ --i; ++res; ++v1; ++v2; \ }
Definition at line 81 of file unroll_prefetch_simd_def.h.
| #define WARN_UNALIGN | ( | v | ) | do {} while (0) |
WARN_UNALIGNED macro: If defined, the TBCI library will print a warning to stderr for unaligned SIMD accesses, which will be slower .
..
Definition at line 251 of file unroll_prefetch_simd_def.h.
1.5.6