diff --git a/cmake/ConfigureCompiler.cmake b/cmake/ConfigureCompiler.cmake index 08712439e1..0448425e44 100644 --- a/cmake/ConfigureCompiler.cmake +++ b/cmake/ConfigureCompiler.cmake @@ -209,6 +209,25 @@ else() # -mavx # -msse4.2) + # Tentatively re-enabling vector instructions + set(USE_AVX512F OFF CACHE BOOL "Enable AVX 512F instructions") + if (USE_AVX512F) + add_compile_options(-mavx512f) + endif() + set(USE_AVX ON CACHE BOOL "Enable AVX instructions") + if (USE_AVX) + add_compile_options(-mavx) + endif() + + # Intentionally using builtin memcpy. G++ does a good job on small memcpy's when the size is known at runtime. + # If the size is not known, then it falls back on the memcpy that's available at runtime (rte_memcpy, as of this + # writing; see flow.cpp). + # + # The downside of the builtin memcpy is that it's slower at large copies, so if we spend a lot of time on large + # copies of sizes that are known at compile time, this might not be a win. See the output of performance/memcpy + # for more information. + #add_compile_options(-fno-builtin-memcpy) + if (USE_VALGRIND) add_compile_options(-DVALGRIND -DUSE_VALGRIND) endif() @@ -243,12 +262,6 @@ else() -Wno-tautological-pointer-compare -Wno-format -Woverloaded-virtual) - set(USE_AVX ON CACHE BOOL "Enable AVX instructions") - if (USE_AVX) - add_compile_options(-mavx) - else() - add_compile_options(-msse4) - endif() if (USE_CCACHE) add_compile_options( -Wno-register @@ -260,20 +273,6 @@ else() endif() if (GCC) add_compile_options(-Wno-pragmas) - set(USE_AVX ON CACHE BOOL "Enable AVX instructions") - if (USE_AVX) - add_compile_options(-mavx) - else() - add_compile_options(-msse4) - endif() - # Intentionally using builtin memcpy. G++ does a good job on small memcpy's when the size is known at runtime. - # If the size is not known, then it falls back on the memcpy that's available at runtime (rte_memcpy, as of this - # writing; see flow.cpp). - # - # The downside of the builtin memcpy is that it's slower at large copies, so if we spend a lot of time on large - # copies of sizes that are known at compile time, this might not be a win. See the output of performance/memcpy - # for more information. - #add_compile_options(-fno-builtin-memcpy) # Otherwise `state [[maybe_unused]] int x;` will issue a warning. # https://stackoverflow.com/questions/50646334/maybe-unused-on-member-variable-gcc-warns-incorrectly-that-attribute-is add_compile_options(-Wno-attributes) diff --git a/flow/flow.cpp b/flow/flow.cpp index 7916accb1f..f9863b21fa 100644 --- a/flow/flow.cpp +++ b/flow/flow.cpp @@ -26,6 +26,7 @@ #include #include +#if defined (__linux__) || defined (__FreeBSD__) // For benchmarking; need a version of rte_memcpy that doesn't live in the same compilation unit as the test. void * rte_memcpy_noinline(void *__restrict __dest, const void *__restrict __src, size_t __n) { return rte_memcpy(__dest, __src, __n); @@ -36,6 +37,7 @@ __attribute__((visibility ("default"))) void *memcpy (void *__restrict __dest, c // folly_memcpy is faster for small copies, but rte seems to win out in most other circumstances return rte_memcpy(__dest, __src, __n); } +#endif // defined (__linux__) || defined (__FreeBSD__) INetwork *g_network = 0; diff --git a/flow/rte_memcpy.h b/flow/rte_memcpy.h index bf8989616c..f9c28ce112 100644 --- a/flow/rte_memcpy.h +++ b/flow/rte_memcpy.h @@ -28,6 +28,8 @@ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND #include +#if defined (__linux__) || defined (__FreeBSD__) + #ifdef __cplusplus extern "C" { #endif @@ -50,8 +52,9 @@ extern "C" { static force_inline void * rte_memcpy(void *dst, const void *src, size_t n); -#ifdef __AVX__ -//#define RTE_MACHINE_CPUFLAG_AVX512F -- our g++ is too old for this +#ifdef __AVX512F__ +#define RTE_MACHINE_CPUFLAG_AVX512F +#elif defined(__AVX__) #define RTE_MACHINE_CPUFLAG_AVX2 #endif @@ -905,4 +908,6 @@ rte_rdtsc(void) } #endif +#endif /* defined (__linux__) || defined (__FreeBSD__) */ + #endif /* _RTE_MEMCPY_X86_64_H_ */ diff --git a/flow/test_memcpy_perf.cpp b/flow/test_memcpy_perf.cpp index 7138d04599..e8d9323b09 100644 --- a/flow/test_memcpy_perf.cpp +++ b/flow/test_memcpy_perf.cpp @@ -6,13 +6,13 @@ #include #include #include -#include #include "flow/rte_memcpy.h" #include "flow/IRandom.h" #include "flow/UnitTest.h" #include "flow/flow.h" +#if defined (__linux__) || defined (__FreeBSD__) extern "C" { void* folly_memcpy(void* dst, const void* src, uint32_t length); } @@ -352,4 +352,6 @@ TEST_CASE("performance/memcpy/rte") { return Void(); } -void forceLinkMemcpyPerfTests() {} \ No newline at end of file +#endif // defined (__linux__) || defined (__FreeBSD__) + +void forceLinkMemcpyPerfTests() {}