Skip to content

Commit 7e2dbb3

Browse files
committed
VPCLMULQDQ implementation for Intel's CRC32 folding.
Based on PR intel/zlib#28.
1 parent 63bc287 commit 7e2dbb3

10 files changed

Lines changed: 222 additions & 1 deletion

File tree

CMakeLists.txt

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@ elseif(BASEARCH_X86_FOUND)
110110
option(WITH_SSSE3 "Build with SSSE3" ON)
111111
option(WITH_SSE4 "Build with SSE4" ON)
112112
option(WITH_PCLMULQDQ "Build with PCLMULQDQ" ON)
113+
option(WITH_VPCLMULQDQ "Build with VPCLMULQDQ" ON)
113114
endif()
114115

115116
option(INSTALL_UTILS "Copy minigzip and minideflate during install" OFF)
@@ -784,11 +785,28 @@ if(WITH_OPTIM)
784785
add_feature_info(PCLMUL_CRC 1 "Support CRC hash generation using PCLMULQDQ, using \"${SSSE3FLAG} ${SSE4FLAG} ${PCLMULFLAG}\"")
785786
list(APPEND ZLIB_ARCH_SRCS ${PCLMULQDQ_SRCS})
786787
set_property(SOURCE ${PCLMULQDQ_SRCS} PROPERTY COMPILE_FLAGS "${SSSE3FLAG} ${SSE4FLAG} ${PCLMULFLAG} ${NOLTOFLAG}")
788+
789+
if(WITH_VPCLMULQDQ)
790+
check_vpclmulqdq_intrinsics()
791+
if(HAVE_VPCLMULQDQ_INTRIN)
792+
add_definitions(-DX86_VPCLMULQDQ_CRC)
793+
set(VPCLMULQDQ_SRCS ${ARCHDIR}/crc32_fold_vpclmulqdq.c)
794+
add_feature_info(VPCLMUL_CRC 1 "Support CRC hash generation using VPCLMULQDQ, using \"${VPCLMULFLAG}\"")
795+
list(APPEND ZLIB_ARCH_SRCS ${VPCLMULQDQ_SRCS})
796+
set_property(SOURCE ${VPCLMULQDQ_SRCS} PROPERTY COMPILE_FLAGS "${VPCLMULFLAG} ${NOLTOFLAG}")
797+
else()
798+
set(WITH_VPCLMULQDQ OFF)
799+
endif()
800+
else()
801+
set(WITH_VPCLMULQDQ OFF)
802+
endif()
787803
else()
788804
set(WITH_PCLMULQDQ OFF)
805+
set(WITH_VPCLMULQDQ OFF)
789806
endif()
790807
else()
791808
set(WITH_PCLMULQDQ OFF)
809+
set(WITH_VPCLMULQDQ OFF)
792810
endif()
793811
endif()
794812
endif()
@@ -1408,6 +1426,7 @@ elseif(BASEARCH_X86_FOUND)
14081426
add_feature_info(WITH_SSSE3 WITH_SSSE3 "Build with SSSE3")
14091427
add_feature_info(WITH_SSE4 WITH_SSE4 "Build with SSE4")
14101428
add_feature_info(WITH_PCLMULQDQ WITH_PCLMULQDQ "Build with PCLMULQDQ")
1429+
add_feature_info(WITH_VPCLMULQDQ WITH_VPCLMULQDQ "Build with VPCLMULQDQ")
14111430
endif()
14121431

14131432
add_feature_info(INSTALL_UTILS INSTALL_UTILS "Copy minigzip and minideflate during install")

README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ Features
2222
* Deflate medium and quick algorithms based on Intels zlib fork
2323
* Support for CPU intrinsics when available
2424
* Adler32 implementation using SSSE3, AVX2, Neon, VMX & VSX
25-
* CRC32-B implementation using PCLMULQDQ & ACLE
25+
* CRC32-B implementation using PCLMULQDQ, VPCLMULQDQ, & ACLE
2626
* Hash table implementation using CRC32-C intrinsics on x86 and ARM
2727
* Slide hash implementations using SSE2, AVX2, Neon, VMX & VSX
2828
* Compare256/258 implementations using SSE4.2 & AVX2
@@ -200,6 +200,7 @@ Advanced Build Options
200200
| WITH_SSE2 | | Build with SSE2 intrinsics | ON |
201201
| WITH_SSE4 | | Build with SSE4 intrinsics | ON |
202202
| WITH_PCLMULQDQ | | Build with PCLMULQDQ intrinsics | ON |
203+
| WITH_VPCLMULQDQ | --without-vpclmulqdq | Build with VPCLMULQDQ intrinsics | ON |
203204
| WITH_ACLE | --without-acle | Build with ACLE intrinsics | ON |
204205
| WITH_NEON | --without-neon | Build with NEON intrinsics | ON |
205206
| WITH_ALTIVEC | --without-altivec | Build with AltiVec (VMX) intrinsics | ON |

arch/x86/INDEX.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,5 @@ Contents
55
|:-|:-|
66
|deflate_quick.c|SSE4 optimized deflate strategy for use as level 1|
77
|crc32_fold_pclmulqdq.c|SSE4 + PCLMULQDQ optimized CRC folding implementation|
8+
|crc32_fold_vpclmulqdq.c|VPCLMULQDQ optimized CRC folding implementation|
89
|slide_hash_sse2.c|SSE2 optimized slide_hash|

arch/x86/Makefile.in

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ SSE2FLAG=-msse2
1313
SSSE3FLAG=-mssse3
1414
SSE4FLAG=-msse4
1515
PCLMULFLAG=-mpclmul
16+
VPCLMULFLAG=-mvpclmulqdq
1617
NOLTOFLAG=
1718

1819
SRCDIR=.
@@ -29,6 +30,7 @@ all: \
2930
compare258_sse.o compare258_sse.lo \
3031
insert_string_sse.o insert_string_sse.lo \
3132
crc32_fold_pclmulqdq.o crc32_fold_pclmulqdq.lo \
33+
crc32_fold_vpclmulqdq.o crc32_fold_vpclmulqdq.lo \
3234
slide_hash_avx.o slide_hash_avx.lo \
3335
slide_hash_sse.o slide_hash_sse.lo
3436

@@ -74,6 +76,12 @@ crc32_fold_pclmulqdq.o:
7476
crc32_fold_pclmulqdq.lo:
7577
$(CC) $(SFLAGS) $(PCLMULFLAG) $(SSE4FLAG) $(NOLTOFLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_fold_pclmulqdq.c
7678

79+
crc32_fold_vpclmulqdq.o:
80+
$(CC) $(CFLAGS) $(VPCLMULFLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_fold_vpclmulqdq.c
81+
82+
crc32_fold_vpclmulqdq.lo:
83+
$(CC) $(SFLAGS) $(VPCLMULFLAG) $(NOLTOFLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_fold_vpclmulqdq.c
84+
7785
slide_hash_avx.o:
7886
$(CC) $(CFLAGS) $(AVX2FLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/slide_hash_avx.c
7987

arch/x86/crc32_fold_pclmulqdq.c

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,11 @@
2525

2626
#include "../../crc32_fold.h"
2727

28+
#ifdef X86_VPCLMULQDQ_CRC
29+
extern size_t fold_16_vpclmulqdq(__m128i *xmm_crc0, __m128i *xmm_crc1,
30+
__m128i *xmm_crc2, __m128i *xmm_crc3, uint8_t *dst, const uint8_t *src, size_t len);
31+
#endif
32+
2833
static void fold_1(__m128i *xmm_crc0, __m128i *xmm_crc1, __m128i *xmm_crc2, __m128i *xmm_crc3) {
2934
const __m128i xmm_fold4 = _mm_set_epi32( 0x00000001, 0x54442bd4,
3035
0x00000001, 0xc6e41596);
@@ -274,6 +279,16 @@ Z_INTERNAL void crc32_fold_copy_pclmulqdq(crc32_fold *crc, uint8_t *dst, const u
274279
xmm_crc_part = _mm_setzero_si128();
275280
}
276281

282+
#ifdef X86_VPCLMULQDQ_CRC
283+
if (x86_cpu_has_vpclmulqdq && (len >= 256)) {
284+
size_t n = fold_16_vpclmulqdq(&xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3, dst, src, len);
285+
286+
len -= n;
287+
src += n;
288+
dst += n;
289+
}
290+
#endif
291+
277292
while (len >= 64) {
278293
crc32_fold_load((__m128i *)src, &xmm_t0, &xmm_t1, &xmm_t2, &xmm_t3);
279294

arch/x86/crc32_fold_vpclmulqdq.c

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
/* crc32_fold_vpclmulqdq.c -- VPCMULQDQ-based CRC32 folding implementation.
2+
* Copyright Wang Yang (wangyang.guo@intel.com)
3+
* For conditions of distribution and use, see copyright notice in zlib.h
4+
*/
5+
6+
#ifdef X86_VPCLMULQDQ_CRC
7+
#include "../../zutil.h"
8+
9+
#include <immintrin.h>
10+
11+
size_t fold_16_vpclmulqdq(__m128i *xmm_crc0, __m128i *xmm_crc1,
12+
__m128i *xmm_crc2, __m128i *xmm_crc3, uint8_t *dst, const uint8_t *src, size_t len) {
13+
size_t len_tmp = len;
14+
__m512i zmm_t0, zmm_t1, zmm_t2, zmm_t3;
15+
__m512i zmm_crc0, zmm_crc1, zmm_crc2, zmm_crc3;
16+
__m512i z0, z1, z2, z3;
17+
z_const __m512i zmm_fold4 = _mm512_set4_epi32(
18+
0x00000001, 0x54442bd4, 0x00000001, 0xc6e41596);
19+
z_const __m512i zmm_fold16 = _mm512_set4_epi32(
20+
0x00000001, 0x1542778a, 0x00000001, 0x322d1430);
21+
22+
// zmm register init
23+
zmm_crc0 = _mm512_setzero_si512();
24+
zmm_t0 = _mm512_loadu_si512((__m512i *)src);
25+
zmm_crc1 = _mm512_loadu_si512((__m512i *)src + 1);
26+
zmm_crc2 = _mm512_loadu_si512((__m512i *)src + 2);
27+
zmm_crc3 = _mm512_loadu_si512((__m512i *)src + 3);
28+
29+
/* already have intermediate CRC in xmm registers
30+
* fold4 with 4 xmm_crc to get zmm_crc0
31+
*/
32+
zmm_crc0 = _mm512_inserti32x4(zmm_crc0, *xmm_crc0, 0);
33+
zmm_crc0 = _mm512_inserti32x4(zmm_crc0, *xmm_crc1, 1);
34+
zmm_crc0 = _mm512_inserti32x4(zmm_crc0, *xmm_crc2, 2);
35+
zmm_crc0 = _mm512_inserti32x4(zmm_crc0, *xmm_crc3, 3);
36+
z0 = _mm512_clmulepi64_epi128(zmm_crc0, zmm_fold4, 0x01);
37+
zmm_crc0 = _mm512_clmulepi64_epi128(zmm_crc0, zmm_fold4, 0x10);
38+
zmm_crc0 = _mm512_xor_si512(z0, zmm_crc0);
39+
zmm_crc0 = _mm512_xor_si512(zmm_crc0, zmm_t0);
40+
41+
_mm512_storeu_si512((__m512i *)dst, zmm_t0);
42+
_mm512_storeu_si512((__m512i *)dst + 1, zmm_crc1);
43+
_mm512_storeu_si512((__m512i *)dst + 2, zmm_crc2);
44+
_mm512_storeu_si512((__m512i *)dst + 3, zmm_crc3);
45+
len -= 256;
46+
src += 256;
47+
dst += 256;
48+
49+
// fold-16 loops
50+
while (len >= 256) {
51+
zmm_t0 = _mm512_loadu_si512((__m512i *)src);
52+
zmm_t1 = _mm512_loadu_si512((__m512i *)src + 1);
53+
zmm_t2 = _mm512_loadu_si512((__m512i *)src + 2);
54+
zmm_t3 = _mm512_loadu_si512((__m512i *)src + 3);
55+
56+
z0 = _mm512_clmulepi64_epi128(zmm_crc0, zmm_fold16, 0x01);
57+
z1 = _mm512_clmulepi64_epi128(zmm_crc1, zmm_fold16, 0x01);
58+
z2 = _mm512_clmulepi64_epi128(zmm_crc2, zmm_fold16, 0x01);
59+
z3 = _mm512_clmulepi64_epi128(zmm_crc3, zmm_fold16, 0x01);
60+
61+
zmm_crc0 = _mm512_clmulepi64_epi128(zmm_crc0, zmm_fold16, 0x10);
62+
zmm_crc1 = _mm512_clmulepi64_epi128(zmm_crc1, zmm_fold16, 0x10);
63+
zmm_crc2 = _mm512_clmulepi64_epi128(zmm_crc2, zmm_fold16, 0x10);
64+
zmm_crc3 = _mm512_clmulepi64_epi128(zmm_crc3, zmm_fold16, 0x10);
65+
66+
zmm_crc0 = _mm512_xor_si512(z0, zmm_crc0);
67+
zmm_crc1 = _mm512_xor_si512(z1, zmm_crc1);
68+
zmm_crc2 = _mm512_xor_si512(z2, zmm_crc2);
69+
zmm_crc3 = _mm512_xor_si512(z3, zmm_crc3);
70+
71+
zmm_crc0 = _mm512_xor_si512(zmm_crc0, zmm_t0);
72+
zmm_crc1 = _mm512_xor_si512(zmm_crc1, zmm_t1);
73+
zmm_crc2 = _mm512_xor_si512(zmm_crc2, zmm_t2);
74+
zmm_crc3 = _mm512_xor_si512(zmm_crc3, zmm_t3);
75+
76+
_mm512_storeu_si512((__m512i *)dst, zmm_t0);
77+
_mm512_storeu_si512((__m512i *)dst + 1, zmm_t1);
78+
_mm512_storeu_si512((__m512i *)dst + 2, zmm_t2);
79+
_mm512_storeu_si512((__m512i *)dst + 3, zmm_t3);
80+
len -= 256;
81+
src += 256;
82+
dst += 256;
83+
}
84+
// zmm_crc[0,1,2,3] -> zmm_crc0
85+
z0 = _mm512_clmulepi64_epi128(zmm_crc0, zmm_fold4, 0x01);
86+
zmm_crc0 = _mm512_clmulepi64_epi128(zmm_crc0, zmm_fold4, 0x10);
87+
zmm_crc0 = _mm512_xor_si512(z0, zmm_crc0);
88+
zmm_crc0 = _mm512_xor_si512(zmm_crc0, zmm_crc1);
89+
90+
z0 = _mm512_clmulepi64_epi128(zmm_crc0, zmm_fold4, 0x01);
91+
zmm_crc0 = _mm512_clmulepi64_epi128(zmm_crc0, zmm_fold4, 0x10);
92+
zmm_crc0 = _mm512_xor_si512(z0, zmm_crc0);
93+
zmm_crc0 = _mm512_xor_si512(zmm_crc0, zmm_crc2);
94+
95+
z0 = _mm512_clmulepi64_epi128(zmm_crc0, zmm_fold4, 0x01);
96+
zmm_crc0 = _mm512_clmulepi64_epi128(zmm_crc0, zmm_fold4, 0x10);
97+
zmm_crc0 = _mm512_xor_si512(z0, zmm_crc0);
98+
zmm_crc0 = _mm512_xor_si512(zmm_crc0, zmm_crc3);
99+
100+
// zmm_crc0 -> xmm_crc[0, 1, 2, 3]
101+
*xmm_crc0 = _mm512_extracti32x4_epi32(zmm_crc0, 0);
102+
*xmm_crc1 = _mm512_extracti32x4_epi32(zmm_crc0, 1);
103+
*xmm_crc2 = _mm512_extracti32x4_epi32(zmm_crc0, 2);
104+
*xmm_crc3 = _mm512_extracti32x4_epi32(zmm_crc0, 3);
105+
106+
return (len_tmp - len); // return n bytes processed
107+
}
108+
#endif

arch/x86/x86.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ Z_INTERNAL int x86_cpu_has_sse2;
2222
Z_INTERNAL int x86_cpu_has_ssse3;
2323
Z_INTERNAL int x86_cpu_has_sse42;
2424
Z_INTERNAL int x86_cpu_has_pclmulqdq;
25+
Z_INTERNAL int x86_cpu_has_vpclmulqdq;
2526
Z_INTERNAL int x86_cpu_has_tzcnt;
2627

2728
static void cpuid(int info, unsigned* eax, unsigned* ebx, unsigned* ecx, unsigned* edx) {
@@ -73,8 +74,10 @@ void Z_INTERNAL x86_check_features(void) {
7374
x86_cpu_has_tzcnt = ebx & 0x8;
7475
// check AVX2 bit
7576
x86_cpu_has_avx2 = ebx & 0x20;
77+
x86_cpu_has_vpclmulqdq = ecx & 0x400;
7678
} else {
7779
x86_cpu_has_tzcnt = 0;
7880
x86_cpu_has_avx2 = 0;
81+
x86_cpu_has_vpclmulqdq = 0;
7982
}
8083
}

arch/x86/x86.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ extern int x86_cpu_has_sse2;
1111
extern int x86_cpu_has_ssse3;
1212
extern int x86_cpu_has_sse42;
1313
extern int x86_cpu_has_pclmulqdq;
14+
extern int x86_cpu_has_vpclmulqdq;
1415
extern int x86_cpu_has_tzcnt;
1516

1617
void Z_INTERNAL x86_check_features(void);

cmake/detect-intrinsics.cmake

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,32 @@ macro(check_pclmulqdq_intrinsics)
8686
endif()
8787
endmacro()
8888

89+
macro(check_vpclmulqdq_intrinsics)
90+
if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
91+
if(NOT NATIVEFLAG)
92+
set(VPCLMULFLAG "-mvpclmulqdq")
93+
endif()
94+
endif()
95+
# Check whether compiler supports VPCLMULQDQ intrinsics
96+
if(NOT (APPLE AND "${ARCH}" MATCHES "i386"))
97+
set(CMAKE_REQUIRED_FLAGS "${VPCLMULFLAG}")
98+
check_c_source_compile_or_run(
99+
"#include <immintrin.h>
100+
int main(void) {
101+
__m512i a = _mm512_setzero_si512();
102+
__m512i b = _mm512_setzero_si512();
103+
__m512i c = _mm512_clmulepi64_epi128(a, b, 0x10);
104+
(void)c;
105+
return 0;
106+
}"
107+
HAVE_VPCLMULQDQ_INTRIN
108+
)
109+
set(CMAKE_REQUIRED_FLAGS)
110+
else()
111+
set(HAVE_VPCLMULQDQ_INTRIN OFF)
112+
endif()
113+
endmacro()
114+
89115
macro(check_ppc_intrinsics)
90116
# Check if compiler supports AltiVec
91117
set(CMAKE_REQUIRED_FLAGS "-maltivec")

configure

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ compat=0
9090
cover=0
9191
build32=0
9292
build64=0
93+
buildvpclmulqdq=1
9394
buildacle=1
9495
buildaltivec=1
9596
buildpower8=1
@@ -108,6 +109,7 @@ ssse3flag="-mssse3"
108109
sse4flag="-msse4"
109110
sse42flag="-msse4.2"
110111
pclmulflag="-mpclmul"
112+
vpclmulflag="-mvpclmulqdq"
111113
acleflag=
112114
neonflag=
113115
noltoflag="-fno-lto"
@@ -190,6 +192,7 @@ case "$1" in
190192
--cover) cover=1; shift ;;
191193
-3* | --32) build32=1; shift ;;
192194
-6* | --64) build64=1; shift ;;
195+
--without-vpclmulqdq) buildvpclmulqdq=0; shift ;;
193196
--without-acle) buildacle=0; shift ;;
194197
--without-neon) buildneon=0; shift ;;
195198
--without-altivec) buildaltivec=0 ; shift ;;
@@ -256,6 +259,7 @@ if test $native -eq 1; then
256259
sse4flag=""
257260
sse42flag=""
258261
pclmulflag=""
262+
vpclmulflag=""
259263
noltoflag=""
260264
fi
261265

@@ -1084,6 +1088,28 @@ EOF
10841088
fi
10851089
}
10861090

1091+
check_vpclmulqdq_intrinsics() {
1092+
# Check whether compiler supports VPCLMULQDQ intrinsics
1093+
cat > $test.c << EOF
1094+
#include <immintrin.h>
1095+
#include <wmmintrin.h>
1096+
int main(void) {
1097+
__m512i a = _mm512_setzero_si512();
1098+
__m512i b = _mm512_setzero_si512();
1099+
__m512i c = _mm512_clmulepi64_epi128(a, b, 0x10);
1100+
(void)c;
1101+
return 0;
1102+
}
1103+
EOF
1104+
if try ${CC} ${CFLAGS} ${vpclmulflag} $test.c; then
1105+
echo "Checking for VPCLMULQDQ intrinsics ... Yes." | tee -a configure.log
1106+
HAVE_VPCLMULQDQ_INTRIN=1
1107+
else
1108+
echo "Checking for VPCLMULQDQ intrinsics ... No." | tee -a configure.log
1109+
HAVE_VPCLMULQDQ_INTRIN=0
1110+
fi
1111+
}
1112+
10871113
check_ppc_intrinsics() {
10881114
cat > $test.c << EOF
10891115
#include <altivec.h>
@@ -1372,6 +1398,17 @@ case "${ARCH}" in
13721398
ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} crc32_fold_pclmulqdq.o"
13731399
ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} crc32_fold_pclmulqdq.lo"
13741400
fi
1401+
1402+
if test $buildvpclmulqdq -eq 1; then
1403+
check_vpclmulqdq_intrinsics
1404+
1405+
if test ${HAVE_VPCLMULQDQ_INTRIN} -eq 1; then
1406+
CFLAGS="${CFLAGS} -DX86_VPCLMULQDQ_CRC"
1407+
SFLAGS="${SFLAGS} -DX86_VPCLMULQDQ_CRC"
1408+
ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} crc32_fold_vpclmulqdq.o"
1409+
ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} crc32_fold_vpclmulqdq.lo"
1410+
fi
1411+
fi
13751412
fi
13761413
;;
13771414

@@ -1778,6 +1815,7 @@ echo sse2flag = $sse2flag >> configure.log
17781815
echo ssse3flag = $ssse3flag >> configure.log
17791816
echo sse4flag = $sse4flag >> configure.log
17801817
echo pclmulflag = $pclmulflag >> configure.log
1818+
echo vpclmulflag = $vpclmulflag >> configure.log
17811819
echo acleflag = $acleflag >> configure.log
17821820
echo neonflag = $neonflag >> configure.log
17831821
echo ARCHDIR = ${ARCHDIR} >> configure.log
@@ -1911,6 +1949,7 @@ sed < $SRCDIR/$ARCHDIR/Makefile.in "
19111949
/^SSSE3FLAG *=/s#=.*#=$ssse3flag#
19121950
/^SSE4FLAG *=/s#=.*#=$sse4flag#
19131951
/^PCLMULFLAG *=/s#=.*#=$pclmulflag#
1952+
/^VPCLMULFLAG *=/s#=.*#=$vpclmulflag#
19141953
/^ACLEFLAG *=/s#=.*#=$acleflag#
19151954
/^NEONFLAG *=/s#=.*#=$neonflag#
19161955
/^NOLTOFLAG *=/s#=.*#=$noltoflag#

0 commit comments

Comments
 (0)