00001
00008 #ifndef TBCI_PERF_OPT_H
00009 #define TBCI_PERF_OPT_H
00010
00011
00012
00013
00014
00015 #if defined(__GNUC__) && __GNUC__ >= 4 && !defined(USE_PLAIN_VEC_KERNELS) \
00016 && !defined(USE_UNR_VEC_KERNELS) && !defined(USE_UNR_VEC_KERNELS2)
00017 # define USE_PLAIN_VEC_KERNELS
00018 #endif
00019
00020
00021 #ifdef __alpha__
00022 # define DEF_CACHELINE_SZ 32
00023 # ifdef __GNUC__
00024 # if __GNUC__ <= 2
00025 # if !defined(USE_PLAIN_VEC_KERNELS) && !defined(USE_UNR_VEC_KERNELS) && !defined(USE_UNR_VEC_KERNELS2)
00026
00027 # define USE_UNR_VEC_KERNELS
00028 # endif
00029 # define DEF_UNROLL_DEPTH 8
00030 # define DEF_PREFETCH_AHEAD 8
00031 # else
00032 # if !defined(USE_PLAIN_VEC_KERNELS) && !defined(USE_UNR_VEC_KERNELS) && !defined(USE_UNR_VEC_KERNELS2)
00033 # define USE_UNR_VEC_KERNELS
00034 # endif
00035 # define DEF_UNROLL_DEPTH 8
00036 # define DEF_PREFETCH_AHEAD 8
00037 # endif
00038 # else
00039 # if !defined(USE_PLAIN_VEC_KERNELS) && !defined(USE_UNR_VEC_KERNELS) && !defined(USE_UNR_VEC_KERNELS2)
00040 # define USE_UNR_VEC_KERNELS
00041 # endif
00042 # define DEF_UNROLL_DEPTH 1
00043 # define DEF_PREFETCH_AHEAD 4
00044 # endif
00045 #endif
00046
00047
00048 #if defined(__i386__) || defined(__x86_64__)
00049 # if defined(OPT_PENTIUM4) || defined(OPT_CORE2) || defined(__x86_64__)
00050 # define DEF_CACHELINE_SZ 64
00051 # else
00052 # define DEF_CACHELINE_SZ 32
00053 # endif
00054 # ifdef __GNUC__
00055 # ifdef OPT_PENTIUM4
00056 # if __GNUC__ <= 2
00057 # if !defined(USE_PLAIN_VEC_KERNELS) && !defined(USE_UNR_VEC_KERNELS) && !defined(USE_UNR_VEC_KERNELS2)
00058 # define USE_UNR_VEC_KERNELS
00059 # endif
00060 # define DEF_UNROLL_DEPTH 1
00061 # define DEF_PREFETCH_AHEAD 4
00062 # else
00063 # if !defined(USE_PLAIN_VEC_KERNELS) && !defined(USE_UNR_VEC_KERNELS) && !defined(USE_UNR_VEC_KERNELS2)
00064 # define USE_PLAIN_VEC_KERNELS
00065 # endif
00066 # define DEF_UNROLL_DEPTH 4
00067 # define DEF_PREFETCH_AHEAD 16
00068 # if !defined(FORCE_PREFETCH) && !defined(NO_PREFETCH)
00069 # define NO_PREFETCH
00070 # endif
00071 # endif
00072 # elif defined(OPT_PENTIUM3) || defined(OPT_CORE2) || defined(SSE_PREFETCH)
00073 # if !defined(FORCE_PREFETCH) && !defined(NO_PREFETCH)
00074 # define NO_PREFETCH
00075 # endif
00076 # if !defined(SSE_PREFETCH) && defined(OPT_ARCH_PENTIUM3) &&!defined(NO_PREFETCH)
00077 # define SSE_PREFETCH
00078 # endif
00079 # if !defined(USE_PLAIN_VEC_KERNELS) && !defined(USE_UNR_VEC_KERNELS) && !defined(USE_UNR_VEC_KERNELS2)
00080 # define USE_UNR_VEC_KERNELS
00081 # endif
00082 # define DEF_UNROLL_DEPTH 4
00083 # define DEF_PREFETCH_AHEAD 8
00084 # elif defined(OPT_ATHLON) || defined(AMD_PREFETCH)
00085 # if !defined(FORCE_PREFETCH) && !defined(NO_PREFETCH) && defined(__x86_64__)
00086 # define NO_PREFETCH
00087 # endif
00088 # if !defined(AMD_PREFETCH) && defined(OPT_ARCH_ATHLON) && !defined(NO_PREFETCH)
00089 # define AMD_PREFETCH
00090 # endif
00091 # if !defined(USE_PLAIN_VEC_KERNELS) && !defined(USE_UNR_VEC_KERNELS) && !defined(USE_UNR_VEC_KERNELS2)
00092 # define USE_UNR_VEC_KERNELS
00093 # endif
00094 # define DEF_UNROLL_DEPTH 1
00095 # define DEF_PREFETCH_AHEAD 8
00096 # else
00097 # if !defined(USE_PLAIN_VEC_KERNELS) && !defined(USE_UNR_VEC_KERNELS) && !defined(USE_UNR_VEC_KERNELS2)
00098 # define USE_UNR_VEC_KERNELS
00099 # endif
00100 # define DEF_UNROLL_DEPTH 8
00101 # define DEF_PREFETCH_AHEAD 4
00102 # endif
00103 # else
00104 # if !defined(USE_PLAIN_VEC_KERNELS) && !defined(USE_UNR_VEC_KERNELS) && !defined(USE_UNR_VEC_KERNELS2)
00105 # define USE_UNR_VEC_KERNELS
00106 # endif
00107 # define DEF_UNROLL_DEPTH 8
00108 # define DEF_PREFETCH_AHEAD 8
00109 # endif
00110 #endif
00111
00112
00113 #if !defined(USE_PLAIN_VEC_KERNELS) && !defined(USE_UNR_VEC_KERNELS) && !defined(USE_UNR_VEC_KERNELS2)
00114 # define USE_PLAIN_VEC_KERNELS
00115 #endif
00116 #ifndef DEF_UNROLL_DEPTH
00117 # define DEF_UNROLL_DEPTH 4
00118 #endif
00119 #ifndef DEF_PREFETCH_AHEAD
00120 # define DEF_PREFETCH_AHEAD 4
00121 #endif
00122 #ifndef DEF_CACHELINE_SZ
00123 # define DEF_CACHELINE_SZ 32
00124 #endif
00125
00128 #define DEF_CACHE_LOC_READ 2
00129 #define DEF_CACHE_LOC_WRITE 3
00130
00131
00132
00135 #ifndef PREFETCH_AHEAD
00136 # define PREFETCH_AHEAD DEF_PREFETCH_AHEAD
00137 #endif
00141 #ifndef UNROLL_DEPTH
00142 # define UNROLL_DEPTH DEF_UNROLL_DEPTH
00143 #endif
00147 #ifndef CACHELINE_SZ
00148 # define CACHELINE_SZ DEF_CACHELINE_SZ
00149 #endif
00150
00160 #ifndef CACHE_LOC_READ
00161 # define CACHE_LOC_READ DEF_CACHE_LOC_READ
00162 #endif
00163 #ifndef CACHE_LOC_WRITE
00164 # define CACHE_LOC_WRITE DEF_CACHE_LOC_WRITE
00165 #endif
00166
00167
00168 #define EL_PER_CL(T) (signed)((CACHELINE_SZ/sizeof( T ))?(CACHELINE_SZ/sizeof( T )):1)
00169 #define PREF_OFFS(T) (EL_PER_CL(T)*PREFETCH_AHEAD)
00170
00171
00172 #endif