mirror of
https://github.com/apple/foundationdb.git
synced 2025-05-15 02:18:39 +08:00
Add cmake option to enable avx512
This commit is contained in:
parent
c5e2df99a2
commit
a99ceb9c42
@ -209,6 +209,25 @@ else()
|
||||
# -mavx
|
||||
# -msse4.2)
|
||||
|
||||
# Tentatively re-enabling vector instructions
|
||||
set(USE_AVX512F OFF CACHE BOOL "Enable AVX 512F instructions")
|
||||
if (USE_AVX512F)
|
||||
add_compile_options(-mavx512f)
|
||||
endif()
|
||||
set(USE_AVX ON CACHE BOOL "Enable AVX instructions")
|
||||
if (USE_AVX)
|
||||
add_compile_options(-mavx)
|
||||
endif()
|
||||
|
||||
# Intentionally using builtin memcpy. G++ does a good job on small memcpy's when the size is known at runtime.
|
||||
# If the size is not known, then it falls back on the memcpy that's available at runtime (rte_memcpy, as of this
|
||||
# writing; see flow.cpp).
|
||||
#
|
||||
# The downside of the builtin memcpy is that it's slower at large copies, so if we spend a lot of time on large
|
||||
# copies of sizes that are known at compile time, this might not be a win. See the output of performance/memcpy
|
||||
# for more information.
|
||||
#add_compile_options(-fno-builtin-memcpy)
|
||||
|
||||
if (USE_VALGRIND)
|
||||
add_compile_options(-DVALGRIND -DUSE_VALGRIND)
|
||||
endif()
|
||||
@ -243,12 +262,6 @@ else()
|
||||
-Wno-tautological-pointer-compare
|
||||
-Wno-format
|
||||
-Woverloaded-virtual)
|
||||
set(USE_AVX ON CACHE BOOL "Enable AVX instructions")
|
||||
if (USE_AVX)
|
||||
add_compile_options(-mavx)
|
||||
else()
|
||||
add_compile_options(-msse4)
|
||||
endif()
|
||||
if (USE_CCACHE)
|
||||
add_compile_options(
|
||||
-Wno-register
|
||||
@ -260,20 +273,6 @@ else()
|
||||
endif()
|
||||
if (GCC)
|
||||
add_compile_options(-Wno-pragmas)
|
||||
set(USE_AVX ON CACHE BOOL "Enable AVX instructions")
|
||||
if (USE_AVX)
|
||||
add_compile_options(-mavx)
|
||||
else()
|
||||
add_compile_options(-msse4)
|
||||
endif()
|
||||
# Intentionally using builtin memcpy. G++ does a good job on small memcpy's when the size is known at runtime.
|
||||
# If the size is not known, then it falls back on the memcpy that's available at runtime (rte_memcpy, as of this
|
||||
# writing; see flow.cpp).
|
||||
#
|
||||
# The downside of the builtin memcpy is that it's slower at large copies, so if we spend a lot of time on large
|
||||
# copies of sizes that are known at compile time, this might not be a win. See the output of performance/memcpy
|
||||
# for more information.
|
||||
#add_compile_options(-fno-builtin-memcpy)
|
||||
# Otherwise `state [[maybe_unused]] int x;` will issue a warning.
|
||||
# https://stackoverflow.com/questions/50646334/maybe-unused-on-member-variable-gcc-warns-incorrectly-that-attribute-is
|
||||
add_compile_options(-Wno-attributes)
|
||||
|
@ -26,6 +26,7 @@
|
||||
#include <stdarg.h>
|
||||
#include <cinttypes>
|
||||
|
||||
#if defined (__linux__) || defined (__FreeBSD__)
|
||||
// For benchmarking; need a version of rte_memcpy that doesn't live in the same compilation unit as the test.
|
||||
void * rte_memcpy_noinline(void *__restrict __dest, const void *__restrict __src, size_t __n) {
|
||||
return rte_memcpy(__dest, __src, __n);
|
||||
@ -36,6 +37,7 @@ __attribute__((visibility ("default"))) void *memcpy (void *__restrict __dest, c
|
||||
// folly_memcpy is faster for small copies, but rte seems to win out in most other circumstances
|
||||
return rte_memcpy(__dest, __src, __n);
|
||||
}
|
||||
#endif // defined (__linux__) || defined (__FreeBSD__)
|
||||
|
||||
INetwork *g_network = 0;
|
||||
|
||||
|
@ -28,6 +28,8 @@ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
|
||||
#include <flow/Platform.h>
|
||||
|
||||
#if defined (__linux__) || defined (__FreeBSD__)
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
@ -50,8 +52,9 @@ extern "C" {
|
||||
static force_inline void *
|
||||
rte_memcpy(void *dst, const void *src, size_t n);
|
||||
|
||||
#ifdef __AVX__
|
||||
//#define RTE_MACHINE_CPUFLAG_AVX512F -- our g++ is too old for this
|
||||
#ifdef __AVX512F__
|
||||
#define RTE_MACHINE_CPUFLAG_AVX512F
|
||||
#elif defined(__AVX__)
|
||||
#define RTE_MACHINE_CPUFLAG_AVX2
|
||||
#endif
|
||||
|
||||
@ -905,4 +908,6 @@ rte_rdtsc(void)
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* defined (__linux__) || defined (__FreeBSD__) */
|
||||
|
||||
#endif /* _RTE_MEMCPY_X86_64_H_ */
|
||||
|
@ -6,13 +6,13 @@
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <sys/time.h>
|
||||
|
||||
#include "flow/rte_memcpy.h"
|
||||
#include "flow/IRandom.h"
|
||||
#include "flow/UnitTest.h"
|
||||
#include "flow/flow.h"
|
||||
|
||||
#if defined (__linux__) || defined (__FreeBSD__)
|
||||
extern "C" {
|
||||
void* folly_memcpy(void* dst, const void* src, uint32_t length);
|
||||
}
|
||||
@ -352,4 +352,6 @@ TEST_CASE("performance/memcpy/rte") {
|
||||
return Void();
|
||||
}
|
||||
|
||||
void forceLinkMemcpyPerfTests() {}
|
||||
#endif // defined (__linux__) || defined (__FreeBSD__)
|
||||
|
||||
void forceLinkMemcpyPerfTests() {}
|
||||
|
Loading…
x
Reference in New Issue
Block a user