Add cmake option to enable avx512

This commit is contained in:
Russell Sears 2020-05-13 11:59:59 -07:00
parent c5e2df99a2
commit a99ceb9c42
4 changed files with 32 additions and 24 deletions

View File

@ -209,6 +209,25 @@ else()
# -mavx
# -msse4.2)
# Tentatively re-enabling vector instructions
set(USE_AVX512F OFF CACHE BOOL "Enable AVX 512F instructions")
if (USE_AVX512F)
add_compile_options(-mavx512f)
endif()
set(USE_AVX ON CACHE BOOL "Enable AVX instructions")
if (USE_AVX)
add_compile_options(-mavx)
endif()
# Intentionally using builtin memcpy. G++ does a good job on small memcpy's when the size is known at runtime.
# If the size is not known, then it falls back on the memcpy that's available at runtime (rte_memcpy, as of this
# writing; see flow.cpp).
#
# The downside of the builtin memcpy is that it's slower at large copies, so if we spend a lot of time on large
# copies of sizes that are known at compile time, this might not be a win. See the output of performance/memcpy
# for more information.
#add_compile_options(-fno-builtin-memcpy)
if (USE_VALGRIND)
add_compile_options(-DVALGRIND -DUSE_VALGRIND)
endif()
@ -243,12 +262,6 @@ else()
-Wno-tautological-pointer-compare
-Wno-format
-Woverloaded-virtual)
set(USE_AVX ON CACHE BOOL "Enable AVX instructions")
if (USE_AVX)
add_compile_options(-mavx)
else()
add_compile_options(-msse4)
endif()
if (USE_CCACHE)
add_compile_options(
-Wno-register
@ -260,20 +273,6 @@ else()
endif()
if (GCC)
add_compile_options(-Wno-pragmas)
set(USE_AVX ON CACHE BOOL "Enable AVX instructions")
if (USE_AVX)
add_compile_options(-mavx)
else()
add_compile_options(-msse4)
endif()
# Intentionally using builtin memcpy. G++ does a good job on small memcpy's when the size is known at runtime.
# If the size is not known, then it falls back on the memcpy that's available at runtime (rte_memcpy, as of this
# writing; see flow.cpp).
#
# The downside of the builtin memcpy is that it's slower at large copies, so if we spend a lot of time on large
# copies of sizes that are known at compile time, this might not be a win. See the output of performance/memcpy
# for more information.
#add_compile_options(-fno-builtin-memcpy)
# Otherwise `state [[maybe_unused]] int x;` will issue a warning.
# https://stackoverflow.com/questions/50646334/maybe-unused-on-member-variable-gcc-warns-incorrectly-that-attribute-is
add_compile_options(-Wno-attributes)

View File

@ -26,6 +26,7 @@
#include <stdarg.h>
#include <cinttypes>
#if defined (__linux__) || defined (__FreeBSD__)
// For benchmarking; need a version of rte_memcpy that doesn't live in the same compilation unit as the test.
void * rte_memcpy_noinline(void *__restrict __dest, const void *__restrict __src, size_t __n) {
return rte_memcpy(__dest, __src, __n);
@ -36,6 +37,7 @@ __attribute__((visibility ("default"))) void *memcpy (void *__restrict __dest, c
// folly_memcpy is faster for small copies, but rte seems to win out in most other circumstances
return rte_memcpy(__dest, __src, __n);
}
#endif // defined (__linux__) || defined (__FreeBSD__)
INetwork *g_network = 0;

View File

@ -28,6 +28,8 @@ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
#include <flow/Platform.h>
#if defined (__linux__) || defined (__FreeBSD__)
#ifdef __cplusplus
extern "C" {
#endif
@ -50,8 +52,9 @@ extern "C" {
static force_inline void *
rte_memcpy(void *dst, const void *src, size_t n);
#ifdef __AVX__
//#define RTE_MACHINE_CPUFLAG_AVX512F -- our g++ is too old for this
#ifdef __AVX512F__
#define RTE_MACHINE_CPUFLAG_AVX512F
#elif defined(__AVX__)
#define RTE_MACHINE_CPUFLAG_AVX2
#endif
@ -905,4 +908,6 @@ rte_rdtsc(void)
}
#endif
#endif /* defined (__linux__) || defined (__FreeBSD__) */
#endif /* _RTE_MEMCPY_X86_64_H_ */

View File

@ -6,13 +6,13 @@
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <sys/time.h>
#include "flow/rte_memcpy.h"
#include "flow/IRandom.h"
#include "flow/UnitTest.h"
#include "flow/flow.h"
#if defined (__linux__) || defined (__FreeBSD__)
extern "C" {
void* folly_memcpy(void* dst, const void* src, uint32_t length);
}
@ -352,4 +352,6 @@ TEST_CASE("performance/memcpy/rte") {
return Void();
}
void forceLinkMemcpyPerfTests() {}
#endif // defined (__linux__) || defined (__FreeBSD__)
void forceLinkMemcpyPerfTests() {}