if host_cpu == 'x86_64'
- if cc.compiles('''
+ if cc.get_id() == 'msvc'
+ cdata.set('HAVE_X86_64_POPCNTQ', 1)
+ elif cc.compiles('''
void main(void)
{
long long x = 1; long long r;
((underlying_type) (expr))
#endif
+/*
+ * SSE2 instructions are part of the spec for the 64-bit x86 ISA. We assume
+ * that compilers targeting this architecture understand SSE2 intrinsics.
+ */
+#if (defined(__x86_64__) || defined(_M_AMD64))
+#define USE_SSE2
+
+/*
+ * We use the Neon instructions if the compiler provides access to them (as
+ * indicated by __ARM_NEON) and we are on aarch64. While Neon support is
+ * technically optional for aarch64, it appears that all available 64-bit
+ * hardware does have it. Neon exists in some 32-bit hardware too, but we
+ * could not realistically use it there without a run-time check, which seems
+ * not worth the trouble for now.
+ */
+#elif defined(__aarch64__) && defined(__ARM_NEON)
+#define USE_NEON
+#endif
+
/* ----------------------------------------------------------------
* Section 9: system-specific hacks
*
return pg_leftmost_one_pos64(num - 1) + 1;
}
-/*
- * With MSVC on x86_64 builds, try using native popcnt instructions via the
- * __popcnt and __popcnt64 intrinsics. These don't work the same as GCC's
- * __builtin_popcount* intrinsic functions as they always emit popcnt
- * instructions.
- */
-#if defined(_MSC_VER) && defined(_M_AMD64)
-#define HAVE_X86_64_POPCNTQ
-#endif
-
-/*
- * On x86_64, we can use the hardware popcount instruction, but only if
- * we can verify that the CPU supports it via the cpuid instruction.
- *
- * Otherwise, we fall back to a hand-rolled implementation.
- */
-#ifdef HAVE_X86_64_POPCNTQ
-#if defined(HAVE__GET_CPUID) || defined(HAVE__CPUID)
-#define TRY_POPCNT_X86_64 1
-#endif
-#endif
-
-/*
- * On AArch64, we can use Neon instructions if the compiler provides access to
- * them (as indicated by __ARM_NEON). As in simd.h, we assume that all
- * available 64-bit hardware has Neon support.
- */
-#if defined(__aarch64__) && defined(__ARM_NEON)
-#define POPCNT_AARCH64 1
-#endif
-
extern int pg_popcount32_portable(uint32 word);
extern int pg_popcount64_portable(uint64 word);
extern uint64 pg_popcount_portable(const char *buf, int bytes);
extern uint64 pg_popcount_masked_portable(const char *buf, int bytes, bits8 mask);
-#ifdef TRY_POPCNT_X86_64
+#ifdef HAVE_X86_64_POPCNTQ
/*
* Attempt to use SSE4.2 or AVX-512 instructions, but perform a runtime check
* first.
extern PGDLLIMPORT uint64 (*pg_popcount_optimized) (const char *buf, int bytes);
extern PGDLLIMPORT uint64 (*pg_popcount_masked_optimized) (const char *buf, int bytes, bits8 mask);
-#elif POPCNT_AARCH64
+#elif defined(USE_NEON)
/* Use the Neon version of pg_popcount{32,64} without function pointer. */
extern int pg_popcount32(uint32 word);
extern int pg_popcount64(uint64 word);
extern uint64 pg_popcount_optimized(const char *buf, int bytes);
extern uint64 pg_popcount_masked_optimized(const char *buf, int bytes, bits8 mask);
-#endif /* TRY_POPCNT_X86_64 */
+#endif
/*
* Returns the number of 1-bits in buf.
#ifndef SIMD_H
#define SIMD_H
-#if (defined(__x86_64__) || defined(_M_AMD64))
+#if defined(USE_SSE2)
/*
- * SSE2 instructions are part of the spec for the 64-bit x86 ISA. We assume
- * that compilers targeting this architecture understand SSE2 intrinsics.
- *
* We use emmintrin.h rather than the comprehensive header immintrin.h in
* order to exclude extensions beyond SSE2. This is because MSVC, at least,
* will allow the use of intrinsics that haven't been enabled at compile
* time.
*/
#include <emmintrin.h>
-#define USE_SSE2
typedef __m128i Vector8;
typedef __m128i Vector32;
-#elif defined(__aarch64__) && defined(__ARM_NEON)
-/*
- * We use the Neon instructions if the compiler provides access to them (as
- * indicated by __ARM_NEON) and we are on aarch64. While Neon support is
- * technically optional for aarch64, it appears that all available 64-bit
- * hardware does have it. Neon exists in some 32-bit hardware too, but we
- * could not realistically use it there without a run-time check, which seems
- * not worth the trouble for now.
- */
+#elif defined(USE_NEON)
#include <arm_neon.h>
-#define USE_NEON
typedef uint8x16_t Vector8;
typedef uint32x4_t Vector32;
return popcnt;
}
-#if !defined(TRY_POPCNT_X86_64) && !defined(POPCNT_AARCH64)
+#if !defined(HAVE_X86_64_POPCNTQ) && !defined(USE_NEON)
/*
* When special CPU instructions are not available, there's no point in using
return pg_popcount_masked_portable(buf, bytes, mask);
}
-#endif /* ! TRY_POPCNT_X86_64 && ! POPCNT_AARCH64 */
+#endif /* ! HAVE_X86_64_POPCNTQ && ! USE_NEON */
*/
#include "c.h"
-#include "port/pg_bitutils.h"
-
-#ifdef POPCNT_AARCH64
+#ifdef USE_NEON
#include <arm_neon.h>
#endif
#endif
+#include "port/pg_bitutils.h"
+
/*
* The Neon versions are built regardless of whether we are building the SVE
* versions.
return popcnt;
}
-#endif /* POPCNT_AARCH64 */
+#endif /* USE_NEON */
*/
#include "c.h"
-#include "port/pg_bitutils.h"
-
-#ifdef TRY_POPCNT_X86_64
+#ifdef HAVE_X86_64_POPCNTQ
#if defined(HAVE__GET_CPUID) || defined(HAVE__GET_CPUID_COUNT)
#include <cpuid.h>
#include <intrin.h>
#endif
+#include "port/pg_bitutils.h"
+
/*
* The SSE4.2 versions are built regardless of whether we are building the
* AVX-512 versions.
return popcnt;
}
-#endif /* TRY_POPCNT_X86_64 */
+#endif /* HAVE_X86_64_POPCNTQ */