diff --git a/Fix-build-errot-on-x86_64.patch b/Fix-build-error-on-x86_64.patch similarity index 100% rename from Fix-build-errot-on-x86_64.patch rename to Fix-build-error-on-x86_64.patch diff --git a/Fix-hyperscan-gcc10.patch b/Fix-hyperscan-gcc10.patch index 0e23997..9852254 100644 --- a/Fix-hyperscan-gcc10.patch +++ b/Fix-hyperscan-gcc10.patch @@ -1,19 +1,37 @@ +From f6f765b3c022cbf01c86dac7f9875cf18e9f9980 Mon Sep 17 00:00:00 2001 +From: sdlzx +Date: Wed, 6 Oct 2021 10:25:36 +0800 +Subject: [PATCH] Fix hyperscan build error + +The command "gcc -Q --help=target" outputs nothing during obs build, +so we manually set "GNUCC_ARCH" to "native" to avoid string manipulation errors. + +Signed-off-by: sdlzx +--- + CMakeLists.txt | 10 +++++++--- + 1 file changed, 7 insertions(+), 3 deletions(-) + diff --git a/CMakeLists.txt b/CMakeLists.txt -index ef09142..3eb6d73 100644 +index b5f8fb4..5cf41ef 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt -@@ -194,8 +194,12 @@ else() - string(REGEX REPLACE ".*march=[ \t]*([^ \n]*)[ \n].*" "\\1" - GNUCC_ARCH "${_GCC_OUTPUT}") - -- # test the parsed flag -- set (EXEC_ARGS ${CC_ARG1} -E - -mtune=${GNUCC_ARCH}) -+ # test the parsed flag -+ if (!GNUCC_ARCH EQUAL NULL) -+ set (EXEC_ARGS ${CC_ARG1} -E - -mtune=${GNUCC_ARCH}) -+ else () -+ set (EXEC_ARGS ${CC_ARG1} -E - -mtune=native) -+ endif() +@@ -197,9 +197,13 @@ else() execute_process(COMMAND ${CMAKE_C_COMPILER} ${EXEC_ARGS} - OUTPUT_QUIET ERROR_QUIET - INPUT_FILE /dev/null + OUTPUT_VARIABLE _GCC_OUTPUT) + string(FIND "${_GCC_OUTPUT}" "march" POS) +- string(SUBSTRING "${_GCC_OUTPUT}" ${POS} -1 _GCC_OUTPUT) +- string(REGEX REPLACE "march=[ \t]*([^ \n]*)[ \n].*" "\\1" +- GNUCC_ARCH "${_GCC_OUTPUT}") ++ if (POS EQUAL -1) ++ set (GNUCC_ARCH "native") ++ else() ++ string(SUBSTRING "${_GCC_OUTPUT}" ${POS} -1 _GCC_OUTPUT) ++ string(REGEX REPLACE "march=[ \t]*([^ \n]*)[ \n].*" "\\1" ++ GNUCC_ARCH "${_GCC_OUTPUT}") ++ endif() + + # test the parsed flag + set (EXEC_ARGS ${CC_ARG1} -E - -mtune=${GNUCC_ARCH}) +-- +2.31.1 + diff --git a/hyperscan-5.2.1.tar.gz b/hyperscan-5.2.1.tar.gz deleted file mode 100644 index 8a86749..0000000 Binary files a/hyperscan-5.2.1.tar.gz and /dev/null differ diff --git a/hyperscan-5.4.0.tar.gz b/hyperscan-5.4.0.tar.gz new file mode 100644 index 0000000..94a94cf Binary files /dev/null and b/hyperscan-5.4.0.tar.gz differ diff --git a/hyperscan-aarch64-support.patch b/hyperscan-aarch64-support.patch index ce442f2..4ec64bf 100644 --- a/hyperscan-aarch64-support.patch +++ b/hyperscan-aarch64-support.patch @@ -1,4 +1,4 @@ -From 4b9292cffbf650004a52507c2994cb3ef12d7330 Mon Sep 17 00:00:00 2001 +From ddbbd2d1c012b53a1370927c065647e799368d0f Mon Sep 17 00:00:00 2001 From: Yikun Jiang Date: Mon, 20 Jul 2020 17:20:15 +0800 Subject: [PATCH] Add aarch64 support @@ -30,7 +30,7 @@ Subject: [PATCH] Add aarch64 support src/util/popcount.h | 6 +- src/util/simd_arm.h | 1069 ++++ src/util/simd_types.h | 42 +- - src/util/simd_utils.h | 1297 +--- + src/util/simd_utils.h | 1389 +---- src/util/simd_x86.h | 1334 ++++ src/util/state_compress.c | 42 +- tools/hscollider/CMakeLists.txt | 9 +- @@ -38,7 +38,7 @@ Subject: [PATCH] Add aarch64 support unit/internal/simd_utils.cpp | 128 +- util/CMakeLists.txt | 8 +- util/ExpressionParser.cpp | 397 ++ - 34 files changed, 10995 insertions(+), 1708 deletions(-) + 34 files changed, 10994 insertions(+), 1801 deletions(-) create mode 100644 .travis.yml create mode 100644 ThirdPartyNotice.md create mode 100644 src/parser/Parser.cpp @@ -101,7 +101,7 @@ index 0000000..7bfc3f1 + - sudo make collide_quick_test_block \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt -index 83197af..ef09142 100644 +index 8bc6077..b5f8fb4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -74,6 +74,7 @@ include (${CMAKE_MODULE_PATH}/boost.cmake) @@ -112,7 +112,7 @@ index 83197af..ef09142 100644 if(PYTHONINTERP_FOUND) set(PYTHON ${PYTHON_EXECUTABLE}) -@@ -182,24 +183,30 @@ else() +@@ -189,24 +190,30 @@ else() # cpuid info and then chooses the best microarch it can (and replaces # the flag), so use that for tune. @@ -121,9 +121,9 @@ index 83197af..ef09142 100644 - set (EXEC_ARGS ${CC_ARG1} -c -Q --help=target -march=native -mtune=native) - execute_process(COMMAND ${CMAKE_C_COMPILER} ${EXEC_ARGS} - OUTPUT_VARIABLE _GCC_OUTPUT) -- string(FIND "${_GCC_OUTPUT}" "Known" POS) -- string(SUBSTRING "${_GCC_OUTPUT}" 0 ${POS} _GCC_OUTPUT) -- string(REGEX REPLACE ".*march=[ \t]*([^ \n]*)[ \n].*" "\\1" +- string(FIND "${_GCC_OUTPUT}" "march" POS) +- string(SUBSTRING "${_GCC_OUTPUT}" ${POS} -1 _GCC_OUTPUT) +- string(REGEX REPLACE "march=[ \t]*([^ \n]*)[ \n].*" "\\1" - GNUCC_ARCH "${_GCC_OUTPUT}") - - # test the parsed flag @@ -140,9 +140,9 @@ index 83197af..ef09142 100644 + set (EXEC_ARGS ${CC_ARG1} -c -Q --help=target -march=native -mtune=native) + execute_process(COMMAND ${CMAKE_C_COMPILER} ${EXEC_ARGS} + OUTPUT_VARIABLE _GCC_OUTPUT) -+ string(FIND "${_GCC_OUTPUT}" "Known" POS) -+ string(SUBSTRING "${_GCC_OUTPUT}" 0 ${POS} _GCC_OUTPUT) -+ string(REGEX REPLACE ".*march=[ \t]*([^ \n]*)[ \n].*" "\\1" ++ string(FIND "${_GCC_OUTPUT}" "march" POS) ++ string(SUBSTRING "${_GCC_OUTPUT}" ${POS} -1 _GCC_OUTPUT) ++ string(REGEX REPLACE "march=[ \t]*([^ \n]*)[ \n].*" "\\1" + GNUCC_ARCH "${_GCC_OUTPUT}") + + # test the parsed flag @@ -161,7 +161,7 @@ index 83197af..ef09142 100644 endif() set(TUNE_FLAG ${GNUCC_ARCH}) else () -@@ -232,6 +239,13 @@ else() +@@ -239,6 +246,13 @@ else() set(EXTRA_C_FLAGS "${OPT_C_FLAG} -std=c99 -Wall -Wextra -Wshadow -Wcast-qual -fno-strict-aliasing") set(EXTRA_CXX_FLAGS "${OPT_CXX_FLAG} -std=c++11 -Wall -Wextra -Wshadow -Wswitch -Wreturn-type -Wcast-qual -Wno-deprecated -Wnon-virtual-dtor -fno-strict-aliasing") @@ -175,7 +175,7 @@ index 83197af..ef09142 100644 if (NOT RELEASE_BUILD) # -Werror is most useful during development, don't potentially break # release builds -@@ -245,11 +259,19 @@ else() +@@ -252,11 +266,19 @@ else() endif() if (NOT CMAKE_C_FLAGS MATCHES .*march.* AND NOT CMAKE_C_FLAGS MATCHES .*mtune.*) @@ -197,7 +197,7 @@ index 83197af..ef09142 100644 endif() if(CMAKE_COMPILER_IS_GNUCC) -@@ -272,7 +294,6 @@ else() +@@ -279,7 +301,6 @@ else() set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -fno-omit-frame-pointer") endif() @@ -205,7 +205,7 @@ index 83197af..ef09142 100644 if (CMAKE_C_COMPILER_ID MATCHES "Intel") set(SKYLAKE_FLAG "-xCORE-AVX512") else () -@@ -281,10 +302,18 @@ else() +@@ -289,10 +310,18 @@ else() endif() CHECK_INCLUDE_FILES(unistd.h HAVE_UNISTD_H) @@ -228,7 +228,7 @@ index 83197af..ef09142 100644 CHECK_FUNCTION_EXISTS(posix_memalign HAVE_POSIX_MEMALIGN) CHECK_FUNCTION_EXISTS(_aligned_malloc HAVE__ALIGNED_MALLOC) -@@ -317,6 +346,9 @@ if (CMAKE_SYSTEM_NAME MATCHES "Linux") +@@ -325,6 +354,9 @@ if (CMAKE_SYSTEM_NAME MATCHES "Linux") (CMAKE_VERSION VERSION_GREATER "3.0" AND CMAKE_GENERATOR MATCHES "Ninja"))) message (STATUS "Building the fat runtime requires the Unix Makefiles generator, or Ninja with CMake v3.0 or higher") set (FAT_RUNTIME_REQUISITES FALSE) @@ -238,8 +238,8 @@ index 83197af..ef09142 100644 else() include (${CMAKE_MODULE_PATH}/attrib.cmake) if (NOT HAS_C_ATTR_IFUNC) -@@ -329,7 +361,9 @@ if (CMAKE_SYSTEM_NAME MATCHES "Linux") - CMAKE_DEPENDENT_OPTION(FAT_RUNTIME "Build a library that supports multiple microarchitecures" ${RELEASE_BUILD} "FAT_RUNTIME_REQUISITES" OFF) +@@ -337,7 +369,9 @@ if (CMAKE_SYSTEM_NAME MATCHES "Linux") + CMAKE_DEPENDENT_OPTION(FAT_RUNTIME "Build a library that supports multiple microarchitectures" ${RELEASE_BUILD} "FAT_RUNTIME_REQUISITES" OFF) endif () -include (${CMAKE_MODULE_PATH}/arch.cmake) @@ -249,7 +249,7 @@ index 83197af..ef09142 100644 # testing a builtin takes a little more work CHECK_C_SOURCE_COMPILES("void *aa_test(void *x) { return __builtin_assume_aligned(x, 16);}\nint main(void) { return 0; }" HAVE_CC_BUILTIN_ASSUME_ALIGNED) -@@ -395,12 +429,6 @@ if (CXX_IGNORED_ATTR) +@@ -403,12 +437,6 @@ if (CXX_IGNORED_ATTR) set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wno-ignored-attributes") endif() @@ -262,7 +262,7 @@ index 83197af..ef09142 100644 # note this for later # g++ doesn't have this flag but clang does CHECK_CXX_COMPILER_FLAG("-Wweak-vtables" CXX_WEAK_VTABLES) -@@ -449,6 +477,14 @@ else() +@@ -463,6 +491,14 @@ else() set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") endif() @@ -277,7 +277,7 @@ index 83197af..ef09142 100644 add_subdirectory(util) add_subdirectory(doc/dev-reference) -@@ -538,14 +574,27 @@ set_source_files_properties( +@@ -552,14 +588,27 @@ set_source_files_properties( PROPERTIES COMPILE_FLAGS "${RAGEL_C_FLAGS}") @@ -422,7 +422,7 @@ index 0000000..f95cdac +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + diff --git a/cmake/config.h.in b/cmake/config.h.in -index 203f0af..561c65f 100644 +index 5454643..336cf19 100644 --- a/cmake/config.h.in +++ b/cmake/config.h.in @@ -15,6 +15,9 @@ @@ -435,7 +435,7 @@ index 203f0af..561c65f 100644 /* internal build, switch on dump support. */ #cmakedefine DUMP_SUPPORT -@@ -45,6 +48,12 @@ +@@ -48,6 +51,12 @@ /* C compiler has intrin.h */ #cmakedefine HAVE_C_INTRIN_H @@ -797,10 +797,10 @@ index 59ad3f3..035d3ff 100644 } } diff --git a/src/nfa/limex_exceptional.h b/src/nfa/limex_exceptional.h -index e770c32..57746c9 100644 +index 6c7335f..8304215 100644 --- a/src/nfa/limex_exceptional.h +++ b/src/nfa/limex_exceptional.h -@@ -129,7 +129,7 @@ int RUN_EXCEPTION_FN(const EXCEPTION_T *e, STATE_ARG, +@@ -131,7 +131,7 @@ int RUN_EXCEPTION_FN(const EXCEPTION_T *e, STATE_ARG, union RepeatControl *repeat_ctrl = ctx->repeat_ctrl + info->ctrlIndex; char *repeat_state = ctx->repeat_state + info->stateOffset; @@ -809,7 +809,7 @@ index e770c32..57746c9 100644 char cyclic_on = TESTBIT_STATE(*STATE_ARG_P, info->cyclicState); processPosTrigger(repeat, repeat_ctrl, repeat_state, offset, cyclic_on); -@@ -138,7 +138,7 @@ int RUN_EXCEPTION_FN(const EXCEPTION_T *e, STATE_ARG, +@@ -140,7 +140,7 @@ int RUN_EXCEPTION_FN(const EXCEPTION_T *e, STATE_ARG, assert(e->trigger == LIMEX_TRIGGER_TUG); enum TriggerResult rv = processTugTrigger(repeat, repeat_ctrl, repeat_state, offset); @@ -818,7 +818,7 @@ index e770c32..57746c9 100644 *cacheable = DO_NOT_CACHE_RESULT_AND_FLUSH_BR_ENTRIES; DEBUG_PRINTF("tug found no valid matches in repeat state\n"); return 1; // continue -@@ -148,7 +148,7 @@ int RUN_EXCEPTION_FN(const EXCEPTION_T *e, STATE_ARG, +@@ -150,7 +150,7 @@ int RUN_EXCEPTION_FN(const EXCEPTION_T *e, STATE_ARG, assert(e->hasSquash == LIMEX_SQUASH_TUG); *succ = AND_STATE(*succ, LOAD_FROM_ENG(&e->squash)); return 1; // continue @@ -827,7 +827,7 @@ index e770c32..57746c9 100644 new_cache->br = 1; } else { assert(rv == TRIGGER_SUCCESS); -@@ -158,7 +158,7 @@ int RUN_EXCEPTION_FN(const EXCEPTION_T *e, STATE_ARG, +@@ -160,7 +160,7 @@ int RUN_EXCEPTION_FN(const EXCEPTION_T *e, STATE_ARG, } // Some exceptions fire accepts. @@ -836,7 +836,7 @@ index e770c32..57746c9 100644 if (flags & CALLBACK_OUTPUT) { const ReportID *reports = (const ReportID *)((const char *)limex + e->reports); -@@ -169,7 +169,7 @@ int RUN_EXCEPTION_FN(const EXCEPTION_T *e, STATE_ARG, +@@ -171,7 +171,7 @@ int RUN_EXCEPTION_FN(const EXCEPTION_T *e, STATE_ARG, return 0; // halt } if (*cacheable == CACHE_RESULT) { @@ -845,7 +845,7 @@ index e770c32..57746c9 100644 new_cache->reports = reports; } else { *cacheable = DO_NOT_CACHE_RESULT; -@@ -192,8 +192,8 @@ int RUN_EXCEPTION_FN(const EXCEPTION_T *e, STATE_ARG, +@@ -194,8 +194,8 @@ int RUN_EXCEPTION_FN(const EXCEPTION_T *e, STATE_ARG, // Some exceptions squash states behind them. Note that we squash states in // 'succ', not local_succ. @@ -856,7 +856,7 @@ index e770c32..57746c9 100644 *succ = AND_STATE(*succ, LOAD_FROM_ENG(&e->squash)); if (*cacheable == CACHE_RESULT) { *cacheable = DO_NOT_CACHE_RESULT; -@@ -266,12 +266,12 @@ int PE_FN(STATE_ARG, ESTATE_ARG, u32 diffmask, STATE_T *succ, +@@ -331,12 +331,12 @@ int PE_FN(STATE_ARG, ESTATE_ARG, UNUSED u32 diffmask, STATE_T *succ, u32 idx = local_index + base_index[t]; const EXCEPTION_T *e = &exceptions[idx]; @@ -871,7 +871,7 @@ index e770c32..57746c9 100644 return PE_RV_HALT; } } while (word); -@@ -283,7 +283,7 @@ int PE_FN(STATE_ARG, ESTATE_ARG, u32 diffmask, STATE_T *succ, +@@ -349,7 +349,7 @@ int PE_FN(STATE_ARG, ESTATE_ARG, UNUSED u32 diffmask, STATE_T *succ, *succ = OR_STATE(*succ, ctx->local_succ); #endif @@ -880,7 +880,7 @@ index e770c32..57746c9 100644 ctx->cached_estate = estate; #ifndef BIG_MODEL ctx->cached_esucc = local_succ; -@@ -293,7 +293,7 @@ int PE_FN(STATE_ARG, ESTATE_ARG, u32 diffmask, STATE_T *succ, +@@ -359,7 +359,7 @@ int PE_FN(STATE_ARG, ESTATE_ARG, UNUSED u32 diffmask, STATE_T *succ, ctx->cached_reports = new_cache.reports; ctx->cached_br = new_cache.br; } else if (cacheable == DO_NOT_CACHE_RESULT_AND_FLUSH_BR_ENTRIES) { @@ -890,10 +890,10 @@ index e770c32..57746c9 100644 } } diff --git a/src/nfa/limex_internal.h b/src/nfa/limex_internal.h -index db703f0..e3e47e7 100644 +index 23b1bd9..0e27c79 100644 --- a/src/nfa/limex_internal.h +++ b/src/nfa/limex_internal.h -@@ -118,7 +118,7 @@ struct NFAException##size { \ +@@ -119,7 +119,7 @@ struct NFAException##size { \ u32 repeatOffset; /**< offset to NFARepeatInfo, or MO_INVALID_IDX */ \ u8 hasSquash; /**< from enum LimExSquash */ \ u8 trigger; /**< from enum LimExTrigger */ \ @@ -8693,11 +8693,11 @@ index 976208b..4456679 100644 for (; d + 16 <= d_end; d_end -= 16) { m128 data = loadu128(d_end - 16); diff --git a/src/util/arch.h b/src/util/arch.h -index c78ee9c..be2c23c 100644 +index 985fec6..fe4a910 100644 --- a/src/util/arch.h +++ b/src/util/arch.h -@@ -57,6 +57,10 @@ - #define HAVE_AVX512 +@@ -61,6 +61,10 @@ + #define HAVE_AVX512VBMI #endif +#if defined(__aarch64__) @@ -8707,7 +8707,7 @@ index c78ee9c..be2c23c 100644 /* * ICC and MSVC don't break out POPCNT or BMI/2 as separate pre-def macros */ -@@ -83,4 +87,11 @@ +@@ -87,4 +91,11 @@ #define NO_ASM #endif @@ -8720,7 +8720,7 @@ index c78ee9c..be2c23c 100644 + #endif // UTIL_ARCH_H_ diff --git a/src/util/cpuid_flags.c b/src/util/cpuid_flags.c -index 0b529c0..1ad2ee4 100644 +index c00ce58..e0f6368 100644 --- a/src/util/cpuid_flags.c +++ b/src/util/cpuid_flags.c @@ -39,7 +39,7 @@ @@ -8732,16 +8732,16 @@ index 0b529c0..1ad2ee4 100644 if (check_avx2()) { DEBUG_PRINTF("AVX2 enabled\n"); cap |= HS_CPU_FEATURES_AVX2; -@@ -58,7 +58,7 @@ u64a cpuid_flags(void) { - (defined(FAT_RUNTIME) && !defined(BUILD_AVX512)) - cap &= ~HS_CPU_FEATURES_AVX512; +@@ -68,7 +68,7 @@ u64a cpuid_flags(void) { + (defined(FAT_RUNTIME) && !defined(BUILD_AVX512VBMI)) + cap &= ~HS_CPU_FEATURES_AVX512VBMI; #endif - +#endif return cap; } -@@ -68,6 +68,7 @@ struct family_id { +@@ -78,6 +78,7 @@ struct family_id { u32 tune; }; @@ -8749,15 +8749,15 @@ index 0b529c0..1ad2ee4 100644 /* from table 35-1 of the Intel 64 and IA32 Arch. Software Developer's Manual * and "Intel Architecture and Processor Identification With CPUID Model and * Family Numbers" */ -@@ -106,6 +107,7 @@ static const struct family_id known_microarch[] = { - { 0x6, 0x9E, HS_TUNE_FAMILY_SKL }, /* Kabylake desktop */ +@@ -121,6 +122,7 @@ static const struct family_id known_microarch[] = { + { 0x6, 0x6C, HS_TUNE_FAMILY_ICX }, /* Icelake Xeon */ }; +#endif #ifdef DUMP_SUPPORT static UNUSED -@@ -127,6 +129,7 @@ const char *dumpTune(u32 tune) { +@@ -144,6 +146,7 @@ const char *dumpTune(u32 tune) { #endif u32 cpuid_tune(void) { @@ -8765,7 +8765,7 @@ index 0b529c0..1ad2ee4 100644 unsigned int eax, ebx, ecx, edx; cpuid(1, 0, &eax, &ebx, &ecx, &edx); -@@ -154,6 +157,6 @@ u32 cpuid_tune(void) { +@@ -171,6 +174,6 @@ u32 cpuid_tune(void) { DEBUG_PRINTF("found tune flag %s\n", dumpTune(tune) ); return tune; } @@ -8788,7 +8788,7 @@ index 527c6d5..3125bd1 100644 #define CPUID_H_ #endif diff --git a/src/util/cpuid_inline.h b/src/util/cpuid_inline.h -index b6768cc..0f515fd 100644 +index b7b4245..b228c1d 100644 --- a/src/util/cpuid_inline.h +++ b/src/util/cpuid_inline.h @@ -32,17 +32,20 @@ @@ -8820,7 +8820,7 @@ index b6768cc..0f515fd 100644 // ECX #define CPUID_SSE3 (1 << 0) -@@ -92,11 +96,12 @@ void cpuid(unsigned int op, unsigned int leaf, unsigned int *eax, +@@ -93,11 +97,12 @@ void cpuid(unsigned int op, unsigned int leaf, unsigned int *eax, #define CPUID_XCR0_AVX512 \ (CPUID_XCR0_OPMASK | CPUID_XCR0_ZMM_Hi256 | CPUID_XCR0_Hi16_ZMM) @@ -8834,7 +8834,7 @@ index b6768cc..0f515fd 100644 u32 a, d; __asm__ volatile ( "xgetbv\n" -@@ -206,6 +211,16 @@ int check_popcnt(void) { +@@ -252,6 +257,16 @@ int check_popcnt(void) { cpuid(1, 0, &eax, &ebx, &ecx, &edx); return !!(ecx & CPUID_POPCNT); } @@ -10047,10 +10047,10 @@ index 962cad6..62d39ec 100644 #endif /* SIMD_TYPES_H */ - diff --git a/src/util/simd_utils.h b/src/util/simd_utils.h -index c144971..8a54ccc 100644 +index d1f060b..7e926b2 100644 --- a/src/util/simd_utils.h +++ b/src/util/simd_utils.h -@@ -26,1300 +26,13 @@ +@@ -26,1395 +26,14 @@ * POSSIBILITY OF SUCH DAMAGE. */ @@ -10166,6 +10166,12 @@ index c144971..8a54ccc 100644 -#define eq128(a, b) _mm_cmpeq_epi8((a), (b)) -#define movemask128(a) ((u32)_mm_movemask_epi8((a))) - +-#if defined(HAVE_AVX512) +-static really_inline m128 cast512to128(const m512 in) { +- return _mm512_castsi512_si128(in); +-} +-#endif +- -static really_inline m128 set16x8(u8 c) { - return _mm_set1_epi8(c); -} @@ -10178,6 +10184,20 @@ index c144971..8a54ccc 100644 - return _mm_cvtsi128_si32(in); -} - +-#if defined(HAVE_AVX512) +-static really_inline u32 movd512(const m512 in) { +- // NOTE: seems gcc doesn't support _mm512_cvtsi512_si32(in), +- // so we use 2-step convertions to work around. +- return _mm_cvtsi128_si32(_mm512_castsi512_si128(in)); +-} +- +-static really_inline u64a movq512(const m512 in) { +- // NOTE: seems AVX512 doesn't support _mm512_cvtsi512_si64(in), +- // so we use 2-step convertions to work around. +- return _mm_cvtsi128_si64(_mm512_castsi512_si128(in)); +-} +-#endif +- -static really_inline u64a movq(const m128 in) { -#if defined(ARCH_X86_64) - return _mm_cvtsi128_si64(in); @@ -10231,6 +10251,28 @@ index c144971..8a54ccc 100644 - return _mm_or_si128(a,b); -} - +-#if defined(HAVE_AVX512VBMI) +-static really_inline m512 expand128(m128 a) { +- return _mm512_broadcast_i32x4(a); +-} +- +-static really_inline m512 expand256(m256 a) { +- return _mm512_broadcast_i64x4(a); +-} +- +-static really_inline m512 expand384(m384 a) { +- u64a *lo = (u64a*)&a.lo; +- u64a *mid = (u64a*)&a.mid; +- u64a *hi = (u64a*)&a.hi; +- return _mm512_set_epi64(0ULL, 0ULL, hi[1], hi[0], mid[1], mid[0], +- lo[1], lo[0]); +-} ++#if defined(__x86_64__) ++#include "simd_x86.h" ++#elif defined(__aarch64__) ++#include "simd_arm.h" + #endif + -static really_inline m128 andnot128(m128 a, m128 b) { - return _mm_andnot_si128(a, b); -} @@ -10281,8 +10323,8 @@ index c144971..8a54ccc 100644 -extern const u8 simd_onebit_masks[]; -#ifdef __cplusplus -} --#endif -- + #endif + -static really_inline -m128 mask1bit128(unsigned int n) { - assert(n < sizeof(m128) * 8); @@ -10346,6 +10388,12 @@ index c144971..8a54ccc 100644 -m512 maskz_pshufb_m512(__mmask64 k, m512 a, m512 b) { - return _mm512_maskz_shuffle_epi8(k, a, b); -} +- +-#if defined(HAVE_AVX512VBMI) +-#define vpermb512(idx, a) _mm512_permutexvar_epi8(idx, a) +-#define maskz_vpermb512(k, idx, a) _mm512_maskz_permutexvar_epi8(k, idx, a) +-#endif +- -#endif - -static really_inline @@ -10989,13 +11037,9 @@ index c144971..8a54ccc 100644 -#else - m512 rv = {zeroes256(), zeroes256()}; - return rv; -+#if defined(__x86_64__) -+#include "simd_x86.h" -+#elif defined(__aarch64__) -+#include "simd_arm.h" - #endif +-#endif -} - +- -static really_inline -m512 ones512(void) { -#if defined(HAVE_AVX512) @@ -11019,6 +11063,11 @@ index c144971..8a54ccc 100644 -} - -static really_inline +-m512 set16x32(u32 a) { +- return _mm512_set1_epi32(a); +-} +- +-static really_inline -m512 set512_64(u64a hi_3, u64a hi_2, u64a hi_1, u64a hi_0, - u64a lo_3, u64a lo_2, u64a lo_1, u64a lo_0) { - return _mm512_set_epi64(hi_3, hi_2, hi_1, hi_0, @@ -11035,6 +11084,26 @@ index c144971..8a54ccc 100644 -m512 set4x128(m128 a) { - return _mm512_broadcast_i32x4(a); -} +- +-static really_inline +-m512 sadd_u8_m512(m512 a, m512 b) { +- return _mm512_adds_epu8(a, b); +-} +- +-static really_inline +-m512 max_u8_m512(m512 a, m512 b) { +- return _mm512_max_epu8(a, b); +-} +- +-static really_inline +-m512 min_u8_m512(m512 a, m512 b) { +- return _mm512_min_epu8(a, b); +-} +- +-static really_inline +-m512 sub_u8_m512(m512 a, m512 b) { +- return _mm512_sub_epi8(a, b); +-} -#endif - -static really_inline @@ -11222,6 +11291,22 @@ index c144971..8a54ccc 100644 -#endif -} - +-// unaligned store +-static really_inline +-void storeu512(void *ptr, m512 a) { +-#if defined(HAVE_AVX512) +- _mm512_storeu_si512((m512 *)ptr, a); +-#elif defined(HAVE_AVX2) +- storeu256(ptr, a.lo); +- storeu256((char *)ptr + 32, a.hi); +-#else +- storeu128(ptr, a.lo.lo); +- storeu128((char *)ptr + 16, a.lo.hi); +- storeu128((char *)ptr + 32, a.hi.lo); +- storeu128((char *)ptr + 48, a.hi.hi); +-#endif +-} +- -#if defined(HAVE_AVX512) -static really_inline -m512 loadu_maskz_m512(__mmask64 k, const void *ptr) { @@ -11234,9 +11319,19 @@ index c144971..8a54ccc 100644 -} - -static really_inline +-void storeu_mask_m512(void *ptr, __mmask64 k, m512 a) { +- _mm512_mask_storeu_epi8(ptr, k, a); +-} +- +-static really_inline -m512 set_mask_m512(__mmask64 k) { - return _mm512_movm_epi8(k); -} +- +-static really_inline +-m256 loadu_maskz_m256(__mmask32 k, const void *ptr) { +- return _mm256_maskz_loadu_epi8(k, ptr); +-} -#endif - -// packed unaligned store of first N bytes @@ -11355,8 +11450,6 @@ index c144971..8a54ccc 100644 -} - -#endif -+#endif -\ No newline at end of file diff --git a/src/util/simd_x86.h b/src/util/simd_x86.h new file mode 100644 index 0000000..59ac642 @@ -13937,5 +14030,5 @@ index 0000000..687fc39 + return (cs != ExpressionParser_error) && (p == pe); +} -- -2.23.0 +2.31.1 diff --git a/hyperscan.spec b/hyperscan.spec index e118dea..b991cc1 100644 --- a/hyperscan.spec +++ b/hyperscan.spec @@ -1,6 +1,6 @@ Name: hyperscan -Version: 5.2.1 -Release: 4 +Version: 5.4.0 +Release: 1 Summary: High-performance regular expression matching library License: BSD @@ -8,7 +8,7 @@ URL: https://www.hyperscan.io/ Source0: https://github.com/intel/%{name}/archive/v%{version}.tar.gz#/%{name}-%{version}.tar.gz Patch0: hyperscan-aarch64-support.patch -Patch1: Fix-build-errot-on-x86_64.patch +Patch1: Fix-build-error-on-x86_64.patch Patch2: Fix-hyperscan-gcc10.patch BuildRequires: gcc-c++ @@ -75,6 +75,9 @@ needed for developing Hyperscan applications. %{_includedir}/hs/ %changelog +* Wed Oct 06 2021 sdlzx - 5.4.0-1 +- Update to 5.4.0 + * Wed Aug 04 2021 wangyong - 5.2.1-4 - Fix build error caused by GCC upgrade to GCC-10