From e8f7aa00ee94935714bf545e1ea9b49fc8a2de15 Mon Sep 17 00:00:00 2001 From: pcherenkov <pcherenkov@gmail.com> Date: Wed, 4 Apr 2012 16:23:39 +0400 Subject: [PATCH] ported CRC32/cpu_feature to gcc-intrinsic-enabled code --- core/CMakeLists.txt | 3 + core/cpu_feature.m | 207 ++++++------------------------------------ core/crc32.c | 6 +- include/cpu_feature.h | 12 +-- include/crc32.h | 4 +- 5 files changed, 37 insertions(+), 195 deletions(-) diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt index 8072287ed7..fb753cf491 100644 --- a/core/CMakeLists.txt +++ b/core/CMakeLists.txt @@ -48,6 +48,9 @@ execute_process(COMMAND ${CMAKE_COMMAND} -E touch_nocreate # set_property(DIRECTORY PROPERTY CLEAN_NO_CUSTOM 1) +set_source_files_properties(cpu_feature.m + PROPERTIES COMPILE_FLAGS "-msse3 -msse4") + # # Used by modules. # diff --git a/core/cpu_feature.m b/core/cpu_feature.m index 5ac4ad40ed..e9d2f22f47 100644 --- a/core/cpu_feature.m +++ b/core/cpu_feature.m @@ -27,202 +27,53 @@ #include <errno.h> #include <stdlib.h> -#include "cpu_feature.h" - -#if defined (__i386__) || defined (__x86_64__) - -enum { eAX=0, eBX, eCX, eDX }; - -static const struct cpuid_feature { - unsigned int ri; - u_int32_t bitmask; -} cpu_mask[] = { - {eDX, (1 << 28)}, /* HT */ - {eCX, (1 << 19)}, /* SSE 4.1 */ - {eCX, (1 << 20)}, /* SSE 4.2 */ - {eCX, (1 << 31)} /* HYPERV */ -}; -static const size_t LEN_cpu_mask = sizeof(cpu_mask) / sizeof (cpu_mask[0]); - -#define SCALE_F sizeof(unsigned long) - -#if defined (__x86_64__) - #define REX_PRE "0x48, " -#elif defined (__i386__) - #define REX_PRE +#if !defined (__x86_64__) && !defined (__i386__) + #error "Only x86 and x86_64 architectures supported" #endif +#ifndef __GNUC__ + #error This module uses GCC intrinsic header(s) and should be compiled using gcc. +#endif -/* Hw-calculate CRC32 per byte (for the unaligned portion of data buffer). */ -/* NOTE: the function below was adopted from Linux 2.6 kernel source tree, - licensed under GPL. */ -static u_int32_t -crc32c_hw_byte(u_int32_t crc, unsigned char const *data, size_t length) -{ - while (length--) { - __asm__ __volatile__( - ".byte 0xf2, 0xf, 0x38, 0xf0, 0xf1" - :"=S"(crc) - :"0"(crc), "c"(*data) - ); - data++; - } +/* GCC intrinsic headers */ +#include <cpuid.h> +#include <smmintrin.h> - return crc; -} +#include "cpu_feature.h" -/* Hw-calculate CRC32 for the given data buffer. */ -/* NOTE: the function below was adopted from Linux 2.6 kernel source tree, - licensed under GPL. */ -static u_int32_t -crc32c_hw_intel(u_int32_t crc, unsigned char const *buf, size_t len) +u_int32_t +crc32c_hw(u_int32_t crc, const unsigned char *buf, unsigned int len) { - unsigned int iquotient = len / SCALE_F; - unsigned int iremainder = len % SCALE_F; - unsigned long *ptmp = (unsigned long *)buf; - - while (iquotient--) { - __asm__ __volatile__( - ".byte 0xf2, " REX_PRE "0xf, 0x38, 0xf1, 0xf1;" - :"=S"(crc) - :"0"(crc), "c"(*ptmp) - ); - ptmp++; - } - - if (iremainder) { - crc = crc32c_hw_byte(crc, (unsigned char *)ptmp, - iremainder); - } - - return crc; -} +#define SCALE_F sizeof(unsigned long) + size_t nwords = len / SCALE_F, nbytes = len % SCALE_F; + unsigned long *pword; + unsigned char *pbyte; - -/* Toggle x86 flag-register bits, as per mask. */ -static void -toggle_x86_flags(long mask, long* orig, long* toggled) -{ - long forig = 0, fres = 0; - -#if defined (__i386__) - asm ( - "pushfl; popl %%eax; movl %%eax, %0; xorl %2, %%eax; " - "pushl %%eax; popfl; pushfl; popl %%eax; pushl %0; popfl " - : "=r" (forig), "=a" (fres) - : "m" (mask) - ); -#elif __x86_64__ - asm ( - "pushfq; popq %%rax; movq %%rax, %0; xorq %2, %%rax; " - "pushq %%rax; popfq; pushfq; popq %%rax; pushq %0; popfq " - : "=r" (forig), "=a" (fres) - : "m" (mask) - ); + for (pword = (unsigned long *)buf; nwords--; ++pword) +#if defined (__x86_64__) + crc = (u_int32_t)_mm_crc32_u64((u_int64_t)crc, *pword); +#elif defined (__i386__) + crc = _mm_crc32_u32(crc, *pword); #endif - if (orig) - *orig = forig; - if (toggled) - *toggled = fres; - return; -} - + if (nbytes) + for (pbyte = (unsigned char*)pword; nbytes--; ++pbyte) + crc = _mm_crc32_u8(crc, *pbyte); -/* Is CPUID instruction available ? */ -static int -can_cpuid() -{ - long of = -1, tf = -1; - - /* x86 flag register masks */ - enum { - cpuf_AC = (1 << 18), /* bit 18 */ - cpuf_ID = (1 << 21) /* bit 21 */ - }; - - - /* Check if AC (alignment) flag could be toggled: - if not - it's i386, thus no CPUID. - */ - toggle_x86_flags(cpuf_AC, &of, &tf); - if ((of & cpuf_AC) == (tf & cpuf_AC)) { - return 0; - } - - /* Next try toggling CPUID (ID) flag. */ - toggle_x86_flags(cpuf_ID, &of, &tf); - if ((of & cpuf_ID) == (tf & cpuf_ID)) { - return 0; - } - - return 1; -} - - -/* Retrieve CPUID data using info as the EAX key. */ -static void -get_cpuid(long info, long* eax, long* ebx, long* ecx, long *edx) -{ - *eax = info; - -#if defined (__i386__) - asm __volatile__ ( - "movl %%ebx, %%edi; " /* must save ebx for 32-bit PIC code */ - "cpuid; " - "movl %%ebx, %%esi; " - "movl %%edi, %%ebx; " - : "+a" (*eax), "=S" (*ebx), "=c" (*ecx), "=d" (*edx) - : - : "%edi" - ); -#elif defined (__x86_64__) - asm __volatile__ ( - "cpuid; " - : "+a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) - ); -#endif + return crc; } -/* Check whether CPU has a certain feature. */ bool -cpu_has(unsigned int feature) -{ - long info = 1, reg[4] = {0,0,0,0}; - - if (!can_cpuid() || feature > LEN_cpu_mask) - return false; - - get_cpuid(info, ®[eAX], ®[eBX], ®[eCX], ®[eDX]); - - return (reg[cpu_mask[feature].ri] & cpu_mask[feature].bitmask) ? 1 : 0; -} - - -u_int32_t -crc32c_hw(u_int32_t crc, const unsigned char *buf, unsigned int len) +sse42_enabled_cpu() { - return crc32c_hw_intel (crc, (unsigned char const*)buf, len); -} - -#else /* other (yet unsupported architectures) */ + unsigned int ax, bx, cx, dx; -bool -cpu_has(unsigned int feature) -{ - (void)feature; - return false; -} + if (__get_cpuid(1 /* level */, &ax, &bx, &cx, &dx) == 0) + return 0; /* not supported */ -u_int32_t -crc32c_hw(u_int32_t crc, const unsigned char *buf, unsigned int len) -{ - (void)crc; (void)buf, (void)len; - abort(); - return 0; + return (cx & (1 << 20)) != 0; } -#endif /* defined (__i386__) || defined (__x86_64__) */ diff --git a/core/crc32.c b/core/crc32.c index 097f651166..1d77af4936 100644 --- a/core/crc32.c +++ b/core/crc32.c @@ -38,10 +38,6 @@ crc32_func crc32_calc = NULL; void crc32_init() { -#if defined (__i386__) || defined (__x86_64__) - crc32_calc = cpu_has(cpuf_sse4_2) ? &crc32c_hw : &crc32c; -#else - crc32_calc = &crc32c; -#endif + crc32_calc = sse42_enabled_cpu() ? &crc32c_hw : &crc32c; } diff --git a/include/cpu_feature.h b/include/cpu_feature.h index 5fdcaa899e..fad19e75e8 100644 --- a/include/cpu_feature.h +++ b/include/cpu_feature.h @@ -28,21 +28,13 @@ #include <sys/types.h> #include <stdbool.h> -/* CPU feature capabilities to use with cpu_has (feature). */ - -#if defined (__i386__) || defined (__x86_64__) -enum { - cpuf_ht = 0, cpuf_sse4_1, cpuf_sse4_2, cpuf_hypervisor -}; -#endif - -/* Check whether CPU has a certain feature. +/* Check whether CPU supports SSE 4.2 (needed to compute CRC32 in hardware). * * @param feature indetifier (see above) of the target feature * * @return true if feature is available, false if unavailable. */ -bool cpu_has(unsigned int feature); +bool sse42_enabled_cpu(); /* Hardware-calculate CRC32 for the given data buffer. diff --git a/include/crc32.h b/include/crc32.h index f1dba110f2..1e5d0aa65e 100644 --- a/include/crc32.h +++ b/include/crc32.h @@ -28,10 +28,10 @@ * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ +#include <sys/types.h> #include <util.h> -typedef u32 (*crc32_func)(u32 crc, const unsigned char *buf, - unsigned int len); +typedef u_int32_t (*crc32_func)(u_int32_t crc, const unsigned char *buf, unsigned int len); /* * Pointer to an architecture-specific implementation of -- GitLab