mirror of
https://github.com/fish-shell/fish-shell
synced 2024-12-26 12:53:13 +00:00
parent
3252d0fd03
commit
73ecf1576b
21 changed files with 3039 additions and 2598 deletions
170
pcre2/CMakeLists.txt
vendored
170
pcre2/CMakeLists.txt
vendored
|
@ -1,6 +1,5 @@
|
|||
# CMakeLists.txt
|
||||
#
|
||||
#
|
||||
# This file enables PCRE2 to be built with the CMake configuration and build
|
||||
# tool. Download CMake in source or binary form from http://www.cmake.org/
|
||||
# Converted to support PCRE2 from the original PCRE file, August 2014.
|
||||
|
@ -85,6 +84,14 @@
|
|||
# 2018-11-14 PH removed unnecessary checks for stdint.h and inttypes.h
|
||||
# 2018-11-16 PH added PCRE2GREP_SUPPORT_CALLOUT_FORK support and tidied
|
||||
# 2019-02-16 PH hacked to avoid CMP0026 policy issue (see comments below)
|
||||
# 2020-03-16 PH renamed dftables as pcre2_dftables (as elsewhere)
|
||||
# 2020-03-24 PH changed CMAKE_MODULE_PATH definition to add, not replace
|
||||
# 2020-04-08 Carlo added function check for secure_getenv, fixed strerror
|
||||
# 2020-04-16 enh added check for __attribute__((uninitialized))
|
||||
# 2020-04-25 PH applied patches from Uwe Korn to support pkg-config and
|
||||
# library versioning.
|
||||
# 2020-04-25 Carlo added function check for mkostemp used in ProtExecAllocator
|
||||
# 2020-04-28 PH added function check for memfd_create based on Carlo's patch
|
||||
|
||||
PROJECT(PCRE2 C)
|
||||
|
||||
|
@ -95,14 +102,26 @@ CMAKE_MINIMUM_REQUIRED(VERSION 2.8.0)
|
|||
# GET_TARGET_PROPERTY. This should no longer be required.
|
||||
# CMAKE_POLICY(SET CMP0026 OLD)
|
||||
|
||||
SET(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake) # for FindReadline.cmake
|
||||
# For FindReadline.cmake. This was changed to allow setting CMAKE_MODULE_PATH
|
||||
# on the command line.
|
||||
# SET(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
|
||||
|
||||
LIST(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
|
||||
|
||||
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -I${PROJECT_SOURCE_DIR}/src")
|
||||
|
||||
# external packages
|
||||
FIND_PACKAGE( BZip2 )
|
||||
FIND_PACKAGE( ZLIB )
|
||||
FIND_PACKAGE( Readline )
|
||||
FIND_PACKAGE( Editline )
|
||||
|
||||
# Configuration checks
|
||||
|
||||
INCLUDE(CheckIncludeFile)
|
||||
INCLUDE(CheckCSourceCompiles)
|
||||
INCLUDE(CheckFunctionExists)
|
||||
INCLUDE(CheckSymbolExists)
|
||||
INCLUDE(CheckIncludeFile)
|
||||
INCLUDE(CheckTypeSize)
|
||||
|
||||
CHECK_INCLUDE_FILE(dirent.h HAVE_DIRENT_H)
|
||||
|
@ -113,9 +132,19 @@ CHECK_INCLUDE_FILE(sys/types.h HAVE_SYS_TYPES_H)
|
|||
CHECK_INCLUDE_FILE(unistd.h HAVE_UNISTD_H)
|
||||
CHECK_INCLUDE_FILE(windows.h HAVE_WINDOWS_H)
|
||||
|
||||
CHECK_FUNCTION_EXISTS(bcopy HAVE_BCOPY)
|
||||
CHECK_FUNCTION_EXISTS(memmove HAVE_MEMMOVE)
|
||||
CHECK_FUNCTION_EXISTS(strerror HAVE_STRERROR)
|
||||
CHECK_FUNCTION_EXISTS(bcopy HAVE_BCOPY)
|
||||
CHECK_FUNCTION_EXISTS(memfd_create HAVE_MEMFD_CREATE)
|
||||
CHECK_FUNCTION_EXISTS(memmove HAVE_MEMMOVE)
|
||||
CHECK_FUNCTION_EXISTS(secure_getenv HAVE_SECURE_GETENV)
|
||||
CHECK_FUNCTION_EXISTS(strerror HAVE_STRERROR)
|
||||
|
||||
set(ORIG_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS})
|
||||
set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -Werror")
|
||||
CHECK_C_SOURCE_COMPILES(
|
||||
"int main() { char buf[128] __attribute__((uninitialized)); (void)buf; return 0; }"
|
||||
HAVE_ATTRIBUTE_UNINITIALIZED
|
||||
)
|
||||
set(CMAKE_REQUIRED_FLAGS ${ORIG_CMAKE_REQUIRED_FLAGS})
|
||||
|
||||
# User-configurable options
|
||||
#
|
||||
|
@ -171,8 +200,12 @@ SET(PCRE2_HEAP_MATCH_RECURSE OFF CACHE BOOL
|
|||
SET(PCRE2_SUPPORT_JIT OFF CACHE BOOL
|
||||
"Enable support for Just-in-time compiling.")
|
||||
|
||||
SET(PCRE2_SUPPORT_JIT_SEALLOC OFF CACHE BOOL
|
||||
"Enable SELinux compatible execmem allocator in JIT (experimental).")
|
||||
IF(${CMAKE_SYSTEM_NAME} MATCHES Linux|NetBSD)
|
||||
SET(PCRE2_SUPPORT_JIT_SEALLOC OFF CACHE BOOL
|
||||
"Enable SELinux compatible execmem allocator in JIT (experimental).")
|
||||
ELSE(${CMAKE_SYSTEM_NAME} MATCHES Linux|NetBSD)
|
||||
SET(PCRE2_SUPPORT_JIT_SEALLOC IGNORE)
|
||||
ENDIF(${CMAKE_SYSTEM_NAME} MATCHES Linux|NetBSD)
|
||||
|
||||
SET(PCRE2GREP_SUPPORT_JIT ON CACHE BOOL
|
||||
"Enable use of Just-in-time compiling in pcre2grep.")
|
||||
|
@ -298,7 +331,19 @@ IF(PCRE2_SUPPORT_JIT)
|
|||
ENDIF(PCRE2_SUPPORT_JIT)
|
||||
|
||||
IF(PCRE2_SUPPORT_JIT_SEALLOC)
|
||||
SET(SLJIT_PROT_EXECUTABLE_ALLOCATOR 1)
|
||||
SET(CMAKE_REQUIRED_DEFINITIONS -D_GNU_SOURCE)
|
||||
CHECK_SYMBOL_EXISTS(mkostemp stdlib.h REQUIRED)
|
||||
UNSET(CMAKE_REQUIRED_DEFINITIONS)
|
||||
IF(${REQUIRED})
|
||||
IF(${CMAKE_SYSTEM_NAME} MATCHES Linux|NetBSD)
|
||||
ADD_DEFINITIONS(-D_GNU_SOURCE)
|
||||
SET(SLJIT_PROT_EXECUTABLE_ALLOCATOR 1)
|
||||
ELSE(${CMAKE_SYSTEM_NAME} MATCHES Linux|NetBSD)
|
||||
MESSAGE(FATAL_ERROR "Your configuration is not supported")
|
||||
ENDIF(${CMAKE_SYSTEM_NAME} MATCHES Linux|NetBSD)
|
||||
ELSE(${REQUIRED})
|
||||
SET(PCRE2_SUPPORT_JIT_SEALLOC OFF)
|
||||
ENDIF(${REQUIRED})
|
||||
ENDIF(PCRE2_SUPPORT_JIT_SEALLOC)
|
||||
|
||||
IF(PCRE2GREP_SUPPORT_JIT)
|
||||
|
@ -394,12 +439,13 @@ file(STRINGS ${PROJECT_SOURCE_DIR}/configure.ac
|
|||
LIMIT_COUNT 50 # Read only the first 50 lines of the file
|
||||
)
|
||||
|
||||
set(SEARCHED_VARIABLES "pcre2_major" "pcre2_minor" "pcre2_prerelease" "pcre2_date")
|
||||
set(SEARCHED_VARIABLES "pcre2_major" "pcre2_minor" "pcre2_prerelease" "pcre2_date"
|
||||
"libpcre2_posix_version" "libpcre2_8_version" "libpcre2_16_version" "libpcre2_32_version")
|
||||
foreach(configure_line ${configure_lines})
|
||||
foreach(_substitution_variable ${SEARCHED_VARIABLES})
|
||||
string(TOUPPER ${_substitution_variable} _substitution_variable_upper)
|
||||
if (NOT ${_substitution_variable_upper})
|
||||
string(REGEX MATCH "m4_define\\(${_substitution_variable}, \\[(.*)\\]" MACTHED_STRING ${configure_line})
|
||||
string(REGEX MATCH "m4_define\\(${_substitution_variable}, *\\[(.*)\\]" MACTHED_STRING ${configure_line})
|
||||
if (CMAKE_MATCH_1)
|
||||
set(${_substitution_variable_upper} ${CMAKE_MATCH_1})
|
||||
endif()
|
||||
|
@ -407,21 +453,74 @@ foreach(configure_line ${configure_lines})
|
|||
endforeach()
|
||||
endforeach()
|
||||
|
||||
macro(PARSE_LIB_VERSION VARIABLE_PREFIX)
|
||||
string(REPLACE ":" ";" ${VARIABLE_PREFIX}_VERSION_LIST ${${VARIABLE_PREFIX}_VERSION})
|
||||
list(GET ${VARIABLE_PREFIX}_VERSION_LIST 0 ${VARIABLE_PREFIX}_VERSION_CURRENT)
|
||||
list(GET ${VARIABLE_PREFIX}_VERSION_LIST 1 ${VARIABLE_PREFIX}_VERSION_REVISION)
|
||||
list(GET ${VARIABLE_PREFIX}_VERSION_LIST 2 ${VARIABLE_PREFIX}_VERSION_AGE)
|
||||
|
||||
math(EXPR ${VARIABLE_PREFIX}_SOVERSION "${${VARIABLE_PREFIX}_VERSION_CURRENT} - ${${VARIABLE_PREFIX}_VERSION_AGE}")
|
||||
math(EXPR ${VARIABLE_PREFIX}_MACHO_COMPATIBILITY_VERSION "${${VARIABLE_PREFIX}_VERSION_CURRENT} + 1")
|
||||
math(EXPR ${VARIABLE_PREFIX}_MACHO_CURRENT_VERSION "${${VARIABLE_PREFIX}_VERSION_CURRENT} + 1")
|
||||
set(${VARIABLE_PREFIX}_MACHO_CURRENT_VERSION "${${VARIABLE_PREFIX}_MACHO_CURRENT_VERSION}.${${VARIABLE_PREFIX}_VERSION_REVISION}}")
|
||||
set(${VARIABLE_PREFIX}_VERSION "${${VARIABLE_PREFIX}_SOVERSION}.${${VARIABLE_PREFIX}_VERSION_AGE}.${${VARIABLE_PREFIX}_VERSION_REVISION}")
|
||||
endmacro()
|
||||
|
||||
PARSE_LIB_VERSION(LIBPCRE2_POSIX)
|
||||
PARSE_LIB_VERSION(LIBPCRE2_8)
|
||||
PARSE_LIB_VERSION(LIBPCRE2_16)
|
||||
PARSE_LIB_VERSION(LIBPCRE2_32)
|
||||
|
||||
CONFIGURE_FILE(src/pcre2.h.in
|
||||
${PROJECT_BINARY_DIR}/pcre2.h
|
||||
@ONLY)
|
||||
|
||||
# What about pcre2-config and libpcre2.pc?
|
||||
# Generate pkg-config files
|
||||
|
||||
SET(PACKAGE_VERSION "${PCRE2_MAJOR}.${PCRE2_MINOR}")
|
||||
SET(prefix ${CMAKE_INSTALL_PREFIX})
|
||||
|
||||
SET(exec_prefix "\${prefix}")
|
||||
SET(libdir "\${exec_prefix}/lib")
|
||||
SET(includedir "\${prefix}/include")
|
||||
CONFIGURE_FILE(libpcre2-posix.pc.in libpcre2-posix.pc @ONLY)
|
||||
SET(pkg_config_files ${pkg_config_files} "${CMAKE_CURRENT_BINARY_DIR}/libpcre2-posix.pc")
|
||||
|
||||
IF(PCRE2_BUILD_PCRE2_8)
|
||||
CONFIGURE_FILE(libpcre2-8.pc.in libpcre2-8.pc @ONLY)
|
||||
SET(pkg_config_files ${pkg_config_files} "${CMAKE_CURRENT_BINARY_DIR}/libpcre2-8.pc")
|
||||
SET(enable_pcre2_8 "yes")
|
||||
ELSE()
|
||||
SET(enable_pcre2_8 "no")
|
||||
ENDIF()
|
||||
|
||||
IF(PCRE2_BUILD_PCRE2_16)
|
||||
CONFIGURE_FILE(libpcre2-16.pc.in libpcre2-16.pc @ONLY)
|
||||
SET(pkg_config_files ${pkg_config_files} "${CMAKE_CURRENT_BINARY_DIR}/libpcre2-16.pc")
|
||||
SET(enable_pcre2_16 "yes")
|
||||
ELSE()
|
||||
SET(enable_pcre2_16 "no")
|
||||
ENDIF()
|
||||
|
||||
IF(PCRE2_BUILD_PCRE2_32)
|
||||
CONFIGURE_FILE(libpcre2-32.pc.in libpcre2-32.pc @ONLY)
|
||||
SET(pkg_config_files ${pkg_config_files} "${CMAKE_CURRENT_BINARY_DIR}/libpcre2-32.pc")
|
||||
SET(enable_pcre2_32 "yes")
|
||||
ELSE()
|
||||
SET(enable_pcre2_32 "no")
|
||||
ENDIF()
|
||||
|
||||
CONFIGURE_FILE(pcre2-config.in pcre2-config @ONLY)
|
||||
|
||||
# Character table generation
|
||||
|
||||
OPTION(PCRE2_REBUILD_CHARTABLES "Rebuild char tables" OFF)
|
||||
IF(PCRE2_REBUILD_CHARTABLES)
|
||||
ADD_EXECUTABLE(dftables src/dftables.c)
|
||||
ADD_EXECUTABLE(pcre2_dftables src/pcre2_dftables.c)
|
||||
ADD_CUSTOM_COMMAND(
|
||||
COMMENT "Generating character tables (pcre2_chartables.c) for current locale"
|
||||
DEPENDS dftables
|
||||
COMMAND dftables
|
||||
DEPENDS pcre2_dftables
|
||||
COMMAND pcre2_dftables
|
||||
ARGS ${PROJECT_BINARY_DIR}/pcre2_chartables.c
|
||||
OUTPUT ${PROJECT_BINARY_DIR}/pcre2_chartables.c
|
||||
)
|
||||
|
@ -446,6 +545,7 @@ SET(PCRE2_SOURCES
|
|||
src/pcre2_error.c
|
||||
src/pcre2_extuni.c
|
||||
src/pcre2_find_bracket.c
|
||||
src/pcre2_jit_compile.c
|
||||
src/pcre2_maketables.c
|
||||
src/pcre2_match.c
|
||||
src/pcre2_match_data.c
|
||||
|
@ -464,6 +564,9 @@ SET(PCRE2_SOURCES
|
|||
src/pcre2_xclass.c
|
||||
)
|
||||
|
||||
SET(PCRE2POSIX_HEADERS src/pcre2posix.h)
|
||||
SET(PCRE2POSIX_SOURCES src/pcre2posix.c)
|
||||
|
||||
IF(MINGW AND NOT PCRE2_STATIC)
|
||||
IF (EXISTS ${PROJECT_SOURCE_DIR}/pcre2.rc)
|
||||
ADD_CUSTOM_COMMAND(OUTPUT ${PROJECT_SOURCE_DIR}/pcre2.o
|
||||
|
@ -531,12 +634,20 @@ SET(targets)
|
|||
|
||||
IF(PCRE2_BUILD_PCRE2_8)
|
||||
ADD_LIBRARY(pcre2-8 ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h)
|
||||
SET_PROPERTY(TARGET pcre2-8
|
||||
PROPERTY COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8)
|
||||
SET_TARGET_PROPERTIES(pcre2-8 PROPERTIES
|
||||
COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8
|
||||
MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_8_MACHO_COMPATIBILITY_VERSION}"
|
||||
MACHO_CURRENT_VERSION "${LIBPCRE2_8_MACHO_CURRENT_VERSION}"
|
||||
VERSION ${LIBPCRE2_8_VERSION}
|
||||
SOVERSION ${LIBPCRE2_8_SOVERSION})
|
||||
SET(targets ${targets} pcre2-8)
|
||||
ADD_LIBRARY(pcre2-posix ${PCRE2POSIX_HEADERS} ${PCRE2POSIX_SOURCES})
|
||||
SET_PROPERTY(TARGET pcre2-posix
|
||||
PROPERTY COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8)
|
||||
SET_TARGET_PROPERTIES(pcre2-posix PROPERTIES
|
||||
COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8
|
||||
MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_POSIX_MACHO_COMPATIBILITY_VERSION}"
|
||||
MACHO_CURRENT_VERSION "${LIBPCRE2_POSIX_MACHO_CURRENT_VERSION}"
|
||||
VERSION ${LIBPCRE2_POSIX_VERSION}
|
||||
SOVERSION ${LIBPCRE2_POSIX_SOVERSION})
|
||||
SET(targets ${targets} pcre2-posix)
|
||||
TARGET_LINK_LIBRARIES(pcre2-posix pcre2-8)
|
||||
|
||||
|
@ -554,8 +665,12 @@ ENDIF(PCRE2_BUILD_PCRE2_8)
|
|||
|
||||
IF(PCRE2_BUILD_PCRE2_16)
|
||||
ADD_LIBRARY(pcre2-16 ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h)
|
||||
SET_PROPERTY(TARGET pcre2-16
|
||||
PROPERTY COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=16)
|
||||
SET_TARGET_PROPERTIES(pcre2-16 PROPERTIES
|
||||
COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=16
|
||||
MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_32_MACHO_COMPATIBILITY_VERSION}"
|
||||
MACHO_CURRENT_VERSION "${LIBPCRE2_32_MACHO_CURRENT_VERSION}"
|
||||
VERSION ${LIBPCRE2_16_VERSION}
|
||||
SOVERSION ${LIBPCRE2_16_SOVERSION})
|
||||
SET(targets ${targets} pcre2-16)
|
||||
|
||||
IF(MINGW AND NOT PCRE2_STATIC)
|
||||
|
@ -572,8 +687,12 @@ ENDIF(PCRE2_BUILD_PCRE2_16)
|
|||
|
||||
IF(PCRE2_BUILD_PCRE2_32)
|
||||
ADD_LIBRARY(pcre2-32 ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h)
|
||||
SET_PROPERTY(TARGET pcre2-32
|
||||
PROPERTY COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=32)
|
||||
SET_TARGET_PROPERTIES(pcre2-32 PROPERTIES
|
||||
COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=32
|
||||
MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_32_MACHO_COMPATIBILITY_VERSION}"
|
||||
MACHO_CURRENT_VERSION "${LIBPCRE2_32_MACHO_CURRENT_VERSION}"
|
||||
VERSION ${LIBPCRE2_32_VERSION}
|
||||
SOVERSION ${LIBPCRE2_32_SOVERSION})
|
||||
SET(targets ${targets} pcre2-32)
|
||||
|
||||
IF(MINGW AND NOT PCRE2_STATIC)
|
||||
|
@ -746,6 +865,11 @@ INSTALL(TARGETS ${targets}
|
|||
RUNTIME DESTINATION bin
|
||||
LIBRARY DESTINATION lib
|
||||
ARCHIVE DESTINATION lib)
|
||||
INSTALL(FILES ${pkg_config_files} DESTINATION lib/pkgconfig)
|
||||
INSTALL(FILES "${CMAKE_CURRENT_BINARY_DIR}/pcre2-config"
|
||||
DESTINATION bin
|
||||
# Set 0755 permissions
|
||||
PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE)
|
||||
|
||||
INSTALL(FILES ${PCRE2_HEADERS} ${PCRE2POSIX_HEADERS} DESTINATION include)
|
||||
|
||||
|
|
6
pcre2/LICENCE
vendored
6
pcre2/LICENCE
vendored
|
@ -26,7 +26,7 @@ Email domain: cam.ac.uk
|
|||
University of Cambridge Computing Service,
|
||||
Cambridge, England.
|
||||
|
||||
Copyright (c) 1997-2019 University of Cambridge
|
||||
Copyright (c) 1997-2020 University of Cambridge
|
||||
All rights reserved.
|
||||
|
||||
|
||||
|
@ -37,7 +37,7 @@ Written by: Zoltan Herczeg
|
|||
Email local part: hzmester
|
||||
Email domain: freemail.hu
|
||||
|
||||
Copyright(c) 2010-2019 Zoltan Herczeg
|
||||
Copyright(c) 2010-2020 Zoltan Herczeg
|
||||
All rights reserved.
|
||||
|
||||
|
||||
|
@ -48,7 +48,7 @@ Written by: Zoltan Herczeg
|
|||
Email local part: hzmester
|
||||
Email domain: freemail.hu
|
||||
|
||||
Copyright(c) 2009-2019 Zoltan Herczeg
|
||||
Copyright(c) 2009-2020 Zoltan Herczeg
|
||||
All rights reserved.
|
||||
|
||||
|
||||
|
|
4
pcre2/config-cmake.h.in
vendored
4
pcre2/config-cmake.h.in
vendored
|
@ -1,5 +1,6 @@
|
|||
/* config.h for CMake builds */
|
||||
|
||||
#cmakedefine HAVE_ATTRIBUTE_UNINITIALIZED 1
|
||||
#cmakedefine HAVE_DIRENT_H 1
|
||||
#cmakedefine HAVE_INTTYPES_H 1
|
||||
#cmakedefine HAVE_STDINT_H 1
|
||||
|
@ -10,7 +11,10 @@
|
|||
#cmakedefine HAVE_WINDOWS_H 1
|
||||
|
||||
#cmakedefine HAVE_BCOPY 1
|
||||
#cmakedefine HAVE_MEMFD_CREATE 1
|
||||
#cmakedefine HAVE_MEMMOVE 1
|
||||
#cmakedefine HAVE_SECURE_GETENV 1
|
||||
#cmakedefine HAVE_STRERROR 1
|
||||
|
||||
#cmakedefine PCRE2_STATIC 1
|
||||
|
||||
|
|
12
pcre2/configure.ac
vendored
12
pcre2/configure.ac
vendored
|
@ -1,7 +1,13 @@
|
|||
m4_define(pcre2_major, [10])
|
||||
m4_define(pcre2_minor, [34])
|
||||
m4_define(pcre2_minor, [35])
|
||||
m4_define(pcre2_prerelease, [])
|
||||
m4_define(pcre2_date, [2019-11-21])
|
||||
m4_define(pcre2_date, [2020-05-09])
|
||||
|
||||
# Libtool shared library interface versions (current:revision:age)
|
||||
m4_define(libpcre2_8_version, [10:0:10])
|
||||
m4_define(libpcre2_16_version, [10:0:10])
|
||||
m4_define(libpcre2_32_version, [10:0:10])
|
||||
m4_define(libpcre2_posix_version, [2:3:0])
|
||||
|
||||
# NOTE: The CMakeLists.txt file searches for the above variables in the first
|
||||
# 50 lines of this file. Please update that if the variables above are moved.
|
||||
# 50 lines of this file. Please update that if the variables above are moved.
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
/* This is the public header file for the PCRE library, second API, to be
|
||||
#included by applications that call PCRE2 functions.
|
||||
|
||||
Copyright (c) 2016-2019 University of Cambridge
|
||||
Copyright (c) 2016-2020 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -181,6 +181,9 @@ pcre2_jit_match() ignores the latter since it bypasses all sanity checks). */
|
|||
#define PCRE2_SUBSTITUTE_OVERFLOW_LENGTH 0x00001000u /* pcre2_substitute() only */
|
||||
#define PCRE2_NO_JIT 0x00002000u /* Not for pcre2_dfa_match() */
|
||||
#define PCRE2_COPY_MATCHED_SUBJECT 0x00004000u
|
||||
#define PCRE2_SUBSTITUTE_LITERAL 0x00008000u /* pcre2_substitute() only */
|
||||
#define PCRE2_SUBSTITUTE_MATCHED 0x00010000u /* pcre2_substitute() only */
|
||||
#define PCRE2_SUBSTITUTE_REPLACEMENT_ONLY 0x00020000u /* pcre2_substitute() only */
|
||||
|
||||
/* Options for pcre2_pattern_convert(). */
|
||||
|
||||
|
@ -445,6 +448,7 @@ released, the numbers must not be changed. */
|
|||
#define PCRE2_CONFIG_HEAPLIMIT 12
|
||||
#define PCRE2_CONFIG_NEVER_BACKSLASH_C 13
|
||||
#define PCRE2_CONFIG_COMPILED_WIDTHS 14
|
||||
#define PCRE2_CONFIG_TABLES_LENGTH 15
|
||||
|
||||
|
||||
/* Types for code units in patterns and subject strings. */
|
||||
|
|
|
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
|
|||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2019 University of Cambridge
|
||||
New API code Copyright (c) 2016-2020 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -292,6 +292,7 @@ possessification, and if so, fills a list with its properties.
|
|||
Arguments:
|
||||
code points to start of expression
|
||||
utf TRUE if in UTF mode
|
||||
ucp TRUE if in UCP mode
|
||||
fcc points to the case-flipping table
|
||||
list points to output list
|
||||
list[0] will be filled with the opcode
|
||||
|
@ -304,7 +305,7 @@ Returns: points to the start of the next opcode if *code is accepted
|
|||
*/
|
||||
|
||||
static PCRE2_SPTR
|
||||
get_chr_property_list(PCRE2_SPTR code, BOOL utf, const uint8_t *fcc,
|
||||
get_chr_property_list(PCRE2_SPTR code, BOOL utf, BOOL ucp, const uint8_t *fcc,
|
||||
uint32_t *list)
|
||||
{
|
||||
PCRE2_UCHAR c = *code;
|
||||
|
@ -316,7 +317,8 @@ uint32_t chr;
|
|||
uint32_t *clist_dest;
|
||||
const uint32_t *clist_src;
|
||||
#else
|
||||
(void)utf; /* Suppress "unused parameter" compiler warning */
|
||||
(void)utf; /* Suppress "unused parameter" compiler warnings */
|
||||
(void)ucp;
|
||||
#endif
|
||||
|
||||
list[0] = c;
|
||||
|
@ -396,7 +398,7 @@ switch(c)
|
|||
list[2] = chr;
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (chr < 128 || (chr < 256 && !utf))
|
||||
if (chr < 128 || (chr < 256 && !utf && !ucp))
|
||||
list[3] = fcc[chr];
|
||||
else
|
||||
list[3] = UCD_OTHERCASE(chr);
|
||||
|
@ -503,6 +505,7 @@ which case the base cannot be possessified.
|
|||
Arguments:
|
||||
code points to the byte code
|
||||
utf TRUE in UTF mode
|
||||
ucp TRUE in UCP mode
|
||||
cb compile data block
|
||||
base_list the data list of the base opcode
|
||||
base_end the end of the base opcode
|
||||
|
@ -512,7 +515,7 @@ Returns: TRUE if the auto-possessification is possible
|
|||
*/
|
||||
|
||||
static BOOL
|
||||
compare_opcodes(PCRE2_SPTR code, BOOL utf, const compile_block *cb,
|
||||
compare_opcodes(PCRE2_SPTR code, BOOL utf, BOOL ucp, const compile_block *cb,
|
||||
const uint32_t *base_list, PCRE2_SPTR base_end, int *rec_limit)
|
||||
{
|
||||
PCRE2_UCHAR c;
|
||||
|
@ -651,7 +654,7 @@ for(;;)
|
|||
|
||||
while (*next_code == OP_ALT)
|
||||
{
|
||||
if (!compare_opcodes(code, utf, cb, base_list, base_end, rec_limit))
|
||||
if (!compare_opcodes(code, utf, ucp, cb, base_list, base_end, rec_limit))
|
||||
return FALSE;
|
||||
code = next_code + 1 + LINK_SIZE;
|
||||
next_code += GET(next_code, 1);
|
||||
|
@ -672,7 +675,8 @@ for(;;)
|
|||
/* The bracket content will be checked by the OP_BRA/OP_CBRA case above. */
|
||||
|
||||
next_code += 1 + LINK_SIZE;
|
||||
if (!compare_opcodes(next_code, utf, cb, base_list, base_end, rec_limit))
|
||||
if (!compare_opcodes(next_code, utf, ucp, cb, base_list, base_end,
|
||||
rec_limit))
|
||||
return FALSE;
|
||||
|
||||
code += PRIV(OP_lengths)[c];
|
||||
|
@ -688,7 +692,7 @@ for(;;)
|
|||
/* We now have the next appropriate opcode to compare with the base. Check
|
||||
for a supported opcode, and load its properties. */
|
||||
|
||||
code = get_chr_property_list(code, utf, cb->fcc, list);
|
||||
code = get_chr_property_list(code, utf, ucp, cb->fcc, list);
|
||||
if (code == NULL) return FALSE; /* Unsupported */
|
||||
|
||||
/* If either opcode is a small character list, set pointers for comparing
|
||||
|
@ -1100,7 +1104,6 @@ leaving the remainder of the pattern unpossessified.
|
|||
|
||||
Arguments:
|
||||
code points to start of the byte code
|
||||
utf TRUE in UTF mode
|
||||
cb compile data block
|
||||
|
||||
Returns: 0 for success
|
||||
|
@ -1108,13 +1111,15 @@ Returns: 0 for success
|
|||
*/
|
||||
|
||||
int
|
||||
PRIV(auto_possessify)(PCRE2_UCHAR *code, BOOL utf, const compile_block *cb)
|
||||
PRIV(auto_possessify)(PCRE2_UCHAR *code, const compile_block *cb)
|
||||
{
|
||||
PCRE2_UCHAR c;
|
||||
PCRE2_SPTR end;
|
||||
PCRE2_UCHAR *repeat_opcode;
|
||||
uint32_t list[8];
|
||||
int rec_limit = 1000; /* Was 10,000 but clang+ASAN uses a lot of stack. */
|
||||
BOOL utf = (cb->external_options & PCRE2_UTF) != 0;
|
||||
BOOL ucp = (cb->external_options & PCRE2_UCP) != 0;
|
||||
|
||||
for (;;)
|
||||
{
|
||||
|
@ -1126,10 +1131,11 @@ for (;;)
|
|||
{
|
||||
c -= get_repeat_base(c) - OP_STAR;
|
||||
end = (c <= OP_MINUPTO) ?
|
||||
get_chr_property_list(code, utf, cb->fcc, list) : NULL;
|
||||
get_chr_property_list(code, utf, ucp, cb->fcc, list) : NULL;
|
||||
list[1] = c == OP_STAR || c == OP_PLUS || c == OP_QUERY || c == OP_UPTO;
|
||||
|
||||
if (end != NULL && compare_opcodes(end, utf, cb, list, end, &rec_limit))
|
||||
if (end != NULL && compare_opcodes(end, utf, ucp, cb, list, end,
|
||||
&rec_limit))
|
||||
{
|
||||
switch(c)
|
||||
{
|
||||
|
@ -1181,11 +1187,11 @@ for (;;)
|
|||
if (c >= OP_CRSTAR && c <= OP_CRMINRANGE)
|
||||
{
|
||||
/* end must not be NULL. */
|
||||
end = get_chr_property_list(code, utf, cb->fcc, list);
|
||||
end = get_chr_property_list(code, utf, ucp, cb->fcc, list);
|
||||
|
||||
list[1] = (c & 1) == 0;
|
||||
|
||||
if (compare_opcodes(end, utf, cb, list, end, &rec_limit))
|
||||
if (compare_opcodes(end, utf, ucp, cb, list, end, &rec_limit))
|
||||
{
|
||||
switch (c)
|
||||
{
|
||||
|
|
|
@ -2,17 +2,21 @@
|
|||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* This file was automatically written by the dftables auxiliary
|
||||
/* This file was automatically written by the pcre2_dftables auxiliary
|
||||
program. It contains character tables that are used when no external
|
||||
tables are passed to PCRE2 by the application that calls it. The tables
|
||||
are used only for characters whose code values are less than 256. */
|
||||
|
||||
/*The dftables program (which is distributed with PCRE2) can be used to
|
||||
build alternative versions of this file. This is necessary if you are
|
||||
/* This set of tables was written in the C locale. */
|
||||
|
||||
/* The pcre2_ftables program (which is distributed with PCRE2) can be used
|
||||
to build alternative versions of this file. This is necessary if you are
|
||||
running in an EBCDIC environment, or if you want to default to a different
|
||||
encoding, for example ISO-8859-1. When dftables is run, it creates these
|
||||
tables in the current locale. This happens automatically if PCRE2 is
|
||||
configured with --enable-rebuild-chartables. */
|
||||
encoding, for example ISO-8859-1. When pcre2_dftables is run, it creates
|
||||
these tables in the "C" locale by default. This happens automatically if
|
||||
PCRE2 is configured with --enable-rebuild-chartables. However, you can run
|
||||
pcre2_dftables manually with the -L option to build tables using the LC_ALL
|
||||
locale. */
|
||||
|
||||
/* The following #include is present because without it gcc 4.x may remove
|
||||
the array definition from the final binary if PCRE2 is built into a static
|
||||
|
@ -102,54 +106,54 @@ const uint8_t PRIV(default_tables)[] = {
|
|||
/* This table contains bit maps for various character classes. Each map is 32
|
||||
bytes long and the bits run from the least significant end of each byte. The
|
||||
classes that have their own maps are: space, xdigit, digit, upper, lower, word,
|
||||
graph print, punct, and cntrl. Other classes are built from combinations. */
|
||||
graph, print, punct, and cntrl. Other classes are built from combinations. */
|
||||
|
||||
0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
|
||||
0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00, /* space */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03, /* xdigit */
|
||||
0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03, /* digit */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* upper */
|
||||
0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* lower */
|
||||
0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03, /* word */
|
||||
0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
|
||||
0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff, /* graph */
|
||||
0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
|
||||
0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff, /* print */
|
||||
0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
|
||||
0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc, /* punct */
|
||||
0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
|
||||
0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00, /* cntrl */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
|
|
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
|
|||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2019 University of Cambridge
|
||||
New API code Copyright (c) 2016-2020 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -1195,13 +1195,14 @@ if (code == NULL) return NULL;
|
|||
newcode = code->memctl.malloc(code->blocksize, code->memctl.memory_data);
|
||||
if (newcode == NULL) return NULL;
|
||||
memcpy(newcode, code, code->blocksize);
|
||||
newcode->executable_jit = NULL;
|
||||
|
||||
/* If the code is one that has been deserialized, increment the reference count
|
||||
in the decoded tables. */
|
||||
|
||||
if ((code->flags & PCRE2_DEREF_TABLES) != 0)
|
||||
{
|
||||
ref_count = (PCRE2_SIZE *)(code->tables + tables_length);
|
||||
ref_count = (PCRE2_SIZE *)(code->tables + TABLES_LENGTH);
|
||||
(*ref_count)++;
|
||||
}
|
||||
|
||||
|
@ -1229,16 +1230,17 @@ if (code == NULL) return NULL;
|
|||
newcode = code->memctl.malloc(code->blocksize, code->memctl.memory_data);
|
||||
if (newcode == NULL) return NULL;
|
||||
memcpy(newcode, code, code->blocksize);
|
||||
newcode->executable_jit = NULL;
|
||||
|
||||
newtables = code->memctl.malloc(tables_length + sizeof(PCRE2_SIZE),
|
||||
newtables = code->memctl.malloc(TABLES_LENGTH + sizeof(PCRE2_SIZE),
|
||||
code->memctl.memory_data);
|
||||
if (newtables == NULL)
|
||||
{
|
||||
code->memctl.free((void *)newcode, code->memctl.memory_data);
|
||||
return NULL;
|
||||
}
|
||||
memcpy(newtables, code->tables, tables_length);
|
||||
ref_count = (PCRE2_SIZE *)(newtables + tables_length);
|
||||
memcpy(newtables, code->tables, TABLES_LENGTH);
|
||||
ref_count = (PCRE2_SIZE *)(newtables + TABLES_LENGTH);
|
||||
*ref_count = 1;
|
||||
|
||||
newcode->tables = newtables;
|
||||
|
@ -1259,6 +1261,8 @@ PCRE2_SIZE* ref_count;
|
|||
|
||||
if (code != NULL)
|
||||
{
|
||||
if (code->executable_jit != NULL)
|
||||
PRIV(jit_free)(code->executable_jit, &code->memctl);
|
||||
|
||||
if ((code->flags & PCRE2_DEREF_TABLES) != 0)
|
||||
{
|
||||
|
@ -1266,7 +1270,7 @@ if (code != NULL)
|
|||
be freed when there are no more references to them. The *ref_count should
|
||||
always be > 0. */
|
||||
|
||||
ref_count = (PCRE2_SIZE *)(code->tables + tables_length);
|
||||
ref_count = (PCRE2_SIZE *)(code->tables + TABLES_LENGTH);
|
||||
if (*ref_count > 0)
|
||||
{
|
||||
(*ref_count)--;
|
||||
|
@ -3649,7 +3653,7 @@ while (ptr < ptrend)
|
|||
if (ptr >= ptrend) goto UNCLOSED_PARENTHESIS;
|
||||
|
||||
/* If ( is not followed by ? it is either a capture or a special verb or an
|
||||
alpha assertion. */
|
||||
alpha assertion or a positive non-atomic lookahead. */
|
||||
|
||||
if (*ptr != CHAR_QUESTION_MARK)
|
||||
{
|
||||
|
@ -3681,10 +3685,10 @@ while (ptr < ptrend)
|
|||
break;
|
||||
|
||||
/* Handle "alpha assertions" such as (*pla:...). Most of these are
|
||||
synonyms for the historical symbolic assertions, but the script run ones
|
||||
are new. They are distinguished by starting with a lower case letter.
|
||||
Checking both ends of the alphabet makes this work in all character
|
||||
codes. */
|
||||
synonyms for the historical symbolic assertions, but the script run and
|
||||
non-atomic lookaround ones are new. They are distinguished by starting
|
||||
with a lower case letter. Checking both ends of the alphabet makes this
|
||||
work in all character codes. */
|
||||
|
||||
else if (CHMAX_255(c) && (cb->ctypes[c] & ctype_lcletter) != 0)
|
||||
{
|
||||
|
@ -3743,9 +3747,7 @@ while (ptr < ptrend)
|
|||
goto POSITIVE_LOOK_AHEAD;
|
||||
|
||||
case META_LOOKAHEAD_NA:
|
||||
*parsed_pattern++ = meta;
|
||||
ptr++;
|
||||
goto POST_ASSERTION;
|
||||
goto POSITIVE_NONATOMIC_LOOK_AHEAD;
|
||||
|
||||
case META_LOOKAHEADNOT:
|
||||
goto NEGATIVE_LOOK_AHEAD;
|
||||
|
@ -4434,6 +4436,12 @@ while (ptr < ptrend)
|
|||
ptr++;
|
||||
goto POST_ASSERTION;
|
||||
|
||||
case CHAR_ASTERISK:
|
||||
POSITIVE_NONATOMIC_LOOK_AHEAD: /* Come from (?* */
|
||||
*parsed_pattern++ = META_LOOKAHEAD_NA;
|
||||
ptr++;
|
||||
goto POST_ASSERTION;
|
||||
|
||||
case CHAR_EXCLAMATION_MARK:
|
||||
NEGATIVE_LOOK_AHEAD: /* Come from (*nla: */
|
||||
*parsed_pattern++ = META_LOOKAHEADNOT;
|
||||
|
@ -4443,20 +4451,23 @@ while (ptr < ptrend)
|
|||
|
||||
/* ---- Lookbehind assertions ---- */
|
||||
|
||||
/* (?< followed by = or ! is a lookbehind assertion. Otherwise (?< is the
|
||||
start of the name of a capturing group. */
|
||||
/* (?< followed by = or ! or * is a lookbehind assertion. Otherwise (?<
|
||||
is the start of the name of a capturing group. */
|
||||
|
||||
case CHAR_LESS_THAN_SIGN:
|
||||
if (ptrend - ptr <= 1 ||
|
||||
(ptr[1] != CHAR_EQUALS_SIGN && ptr[1] != CHAR_EXCLAMATION_MARK))
|
||||
(ptr[1] != CHAR_EQUALS_SIGN &&
|
||||
ptr[1] != CHAR_EXCLAMATION_MARK &&
|
||||
ptr[1] != CHAR_ASTERISK))
|
||||
{
|
||||
terminator = CHAR_GREATER_THAN_SIGN;
|
||||
goto DEFINE_NAME;
|
||||
}
|
||||
*parsed_pattern++ = (ptr[1] == CHAR_EQUALS_SIGN)?
|
||||
META_LOOKBEHIND : META_LOOKBEHINDNOT;
|
||||
META_LOOKBEHIND : (ptr[1] == CHAR_EXCLAMATION_MARK)?
|
||||
META_LOOKBEHINDNOT : META_LOOKBEHIND_NA;
|
||||
|
||||
POST_LOOKBEHIND: /* Come from (*plb: (*naplb: and (*nlb: */
|
||||
POST_LOOKBEHIND: /* Come from (*plb: (*naplb: and (*nlb: */
|
||||
*has_lookbehind = TRUE;
|
||||
offset = (PCRE2_SIZE)(ptr - cb->start_pattern - 2);
|
||||
PUTOFFSET(offset, parsed_pattern);
|
||||
|
@ -4629,8 +4640,6 @@ while (ptr < ptrend)
|
|||
*parsed_pattern++ = META_KET;
|
||||
}
|
||||
|
||||
|
||||
|
||||
if (top_nest == (nest_save *)(cb->start_workspace)) top_nest = NULL;
|
||||
else top_nest--;
|
||||
}
|
||||
|
@ -4895,7 +4904,7 @@ range. */
|
|||
if ((options & PCRE2_CASELESS) != 0)
|
||||
{
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if ((options & PCRE2_UTF) != 0)
|
||||
if ((options & (PCRE2_UTF|PCRE2_UCP)) != 0)
|
||||
{
|
||||
int rc;
|
||||
uint32_t oc, od;
|
||||
|
@ -5310,7 +5319,8 @@ dynamically as we process the pattern. */
|
|||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
BOOL utf = (options & PCRE2_UTF) != 0;
|
||||
#else /* No UTF support */
|
||||
BOOL ucp = (options & PCRE2_UCP) != 0;
|
||||
#else /* No Unicode support */
|
||||
BOOL utf = FALSE;
|
||||
#endif
|
||||
|
||||
|
@ -5555,12 +5565,12 @@ for (;; pptr++)
|
|||
zerofirstcu = firstcu;
|
||||
zerofirstcuflags = firstcuflags;
|
||||
|
||||
/* For caseless UTF mode, check whether this character has more than
|
||||
one other case. If so, generate a special OP_NOTPROP item instead of
|
||||
/* For caseless UTF or UCP mode, check whether this character has more
|
||||
than one other case. If so, generate a special OP_NOTPROP item instead of
|
||||
OP_NOTI. */
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf && (options & PCRE2_CASELESS) != 0 &&
|
||||
if ((utf||ucp) && (options & PCRE2_CASELESS) != 0 &&
|
||||
(d = UCD_CASESET(c)) != 0)
|
||||
{
|
||||
*code++ = OP_NOTPROP;
|
||||
|
@ -5593,7 +5603,7 @@ for (;; pptr++)
|
|||
uint32_t d;
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf && c > 127) d = UCD_OTHERCASE(c); else
|
||||
if ((utf || ucp) && c > 127) d = UCD_OTHERCASE(c); else
|
||||
#endif
|
||||
{
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||
|
@ -6667,23 +6677,11 @@ for (;; pptr++)
|
|||
}
|
||||
|
||||
/* For a back reference, update the back reference map and the
|
||||
maximum back reference. Then, for each group, we must check to
|
||||
see if it is recursive, that is, it is inside the group that it
|
||||
references. A flag is set so that the group can be made atomic.
|
||||
*/
|
||||
maximum back reference. */
|
||||
|
||||
cb->backref_map |= (groupnumber < 32)? (1u << groupnumber) : 1;
|
||||
if (groupnumber > cb->top_backref)
|
||||
cb->top_backref = groupnumber;
|
||||
|
||||
for (oc = cb->open_caps; oc != NULL; oc = oc->next)
|
||||
{
|
||||
if (oc->number == groupnumber)
|
||||
{
|
||||
oc->flag = TRUE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -7077,15 +7075,18 @@ for (;; pptr++)
|
|||
previous[GET(previous, 1)] != OP_ALT)
|
||||
goto END_REPEAT;
|
||||
|
||||
/* There is no sense in actually repeating assertions. The only
|
||||
potential use of repetition is in cases when the assertion is optional.
|
||||
Therefore, if the minimum is greater than zero, just ignore the repeat.
|
||||
If the maximum is not zero or one, set it to 1. */
|
||||
/* Perl allows all assertions to be quantified, and when they contain
|
||||
capturing parentheses and/or are optional there are potential uses for
|
||||
this feature. PCRE2 used to force the maximum quantifier to 1 on the
|
||||
invalid grounds that further repetition was never useful. This was
|
||||
always a bit pointless, since an assertion could be wrapped with a
|
||||
repeated group to achieve the effect. General repetition is now
|
||||
permitted, but if the maximum is unlimited it is set to one more than
|
||||
the minimum. */
|
||||
|
||||
if (op_previous < OP_ONCE) /* Assertion */
|
||||
{
|
||||
if (repeat_min > 0) goto END_REPEAT;
|
||||
if (repeat_max > 1) repeat_max = 1;
|
||||
if (repeat_max == REPEAT_UNLIMITED) repeat_max = repeat_min + 1;
|
||||
}
|
||||
|
||||
/* The case of a zero minimum is special because of the need to stick
|
||||
|
@ -7678,19 +7679,6 @@ for (;; pptr++)
|
|||
|
||||
cb->backref_map |= (meta_arg < 32)? (1u << meta_arg) : 1;
|
||||
if (meta_arg > cb->top_backref) cb->top_backref = meta_arg;
|
||||
|
||||
/* Check to see if this back reference is recursive, that it, it
|
||||
is inside the group that it references. A flag is set so that the
|
||||
group can be made atomic. */
|
||||
|
||||
for (oc = cb->open_caps; oc != NULL; oc = oc->next)
|
||||
{
|
||||
if (oc->number == meta_arg)
|
||||
{
|
||||
oc->flag = TRUE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
|
||||
|
@ -7836,11 +7824,12 @@ for (;; pptr++)
|
|||
NORMAL_CHAR_SET: /* Character is already in meta */
|
||||
matched_char = TRUE;
|
||||
|
||||
/* For caseless UTF mode, check whether this character has more than one
|
||||
other case. If so, generate a special OP_PROP item instead of OP_CHARI. */
|
||||
/* For caseless UTF or UCP mode, check whether this character has more than
|
||||
one other case. If so, generate a special OP_PROP item instead of OP_CHARI.
|
||||
*/
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf && (options & PCRE2_CASELESS) != 0)
|
||||
if ((utf||ucp) && (options & PCRE2_CASELESS) != 0)
|
||||
{
|
||||
uint32_t caseset = UCD_CASESET(meta);
|
||||
if (caseset != 0)
|
||||
|
@ -8049,7 +8038,6 @@ if (*code == OP_CBRA)
|
|||
capnumber = GET2(code, 1 + LINK_SIZE);
|
||||
capitem.number = capnumber;
|
||||
capitem.next = cb->open_caps;
|
||||
capitem.flag = FALSE;
|
||||
capitem.assert_depth = cb->assert_depth;
|
||||
cb->open_caps = &capitem;
|
||||
}
|
||||
|
@ -8178,26 +8166,9 @@ for (;;)
|
|||
PUT(code, 1, (int)(code - start_bracket));
|
||||
code += 1 + LINK_SIZE;
|
||||
|
||||
/* If it was a capturing subpattern, check to see if it contained any
|
||||
recursive back references. If so, we must wrap it in atomic brackets. In
|
||||
any event, remove the block from the chain. */
|
||||
/* If it was a capturing subpattern, remove the block from the chain. */
|
||||
|
||||
if (capnumber > 0)
|
||||
{
|
||||
if (cb->open_caps->flag)
|
||||
{
|
||||
(void)memmove(start_bracket + 1 + LINK_SIZE, start_bracket,
|
||||
CU2BYTES(code - start_bracket));
|
||||
*start_bracket = OP_ONCE;
|
||||
code += 1 + LINK_SIZE;
|
||||
PUT(start_bracket, 1, (int)(code - start_bracket));
|
||||
*code = OP_KET;
|
||||
PUT(code, 1, (int)(code - start_bracket));
|
||||
code += 1 + LINK_SIZE;
|
||||
length += 2 + 2*LINK_SIZE;
|
||||
}
|
||||
cb->open_caps = cb->open_caps->next;
|
||||
}
|
||||
if (capnumber > 0) cb->open_caps = cb->open_caps->next;
|
||||
|
||||
/* Set values to pass back */
|
||||
|
||||
|
@ -8832,9 +8803,10 @@ memset(slot + IMM2_SIZE + length, 0,
|
|||
|
||||
/* This function is called to skip parts of the parsed pattern when finding the
|
||||
length of a lookbehind branch. It is called after (*ACCEPT) and (*FAIL) to find
|
||||
the end of the branch, it is called to skip over an internal lookaround, and it
|
||||
is also called to skip to the end of a class, during which it will never
|
||||
encounter nested groups (but there's no need to have special code for that).
|
||||
the end of the branch, it is called to skip over an internal lookaround or
|
||||
(DEFINE) group, and it is also called to skip to the end of a class, during
|
||||
which it will never encounter nested groups (but there's no need to have
|
||||
special code for that).
|
||||
|
||||
When called to find the end of a branch or group, pptr must point to the first
|
||||
meta code inside the branch, not the branch-starting code. In other cases it
|
||||
|
@ -9312,14 +9284,21 @@ for (;; pptr++)
|
|||
itemlength = grouplength;
|
||||
break;
|
||||
|
||||
/* Check nested groups - advance past the initial data for each type and
|
||||
then seek a fixed length with get_grouplength(). */
|
||||
/* A (DEFINE) group is never obeyed inline and so it does not contribute to
|
||||
the length of this branch. Skip from the following item to the next
|
||||
unpaired ket. */
|
||||
|
||||
case META_COND_DEFINE:
|
||||
pptr = parsed_skip(pptr + 1, PSKIP_KET);
|
||||
break;
|
||||
|
||||
/* Check other nested groups - advance past the initial data for each type
|
||||
and then seek a fixed length with get_grouplength(). */
|
||||
|
||||
case META_COND_NAME:
|
||||
case META_COND_NUMBER:
|
||||
case META_COND_RNAME:
|
||||
case META_COND_RNUMBER:
|
||||
case META_COND_DEFINE:
|
||||
pptr += 2 + SIZEOFFSET;
|
||||
goto CHECK_GROUP;
|
||||
|
||||
|
@ -9576,6 +9555,10 @@ for (; *pptr != META_END; pptr++)
|
|||
break;
|
||||
|
||||
case META_COND_DEFINE:
|
||||
pptr += SIZEOFFSET;
|
||||
nestlevel++;
|
||||
break;
|
||||
|
||||
case META_COND_NAME:
|
||||
case META_COND_NUMBER:
|
||||
case META_COND_RNAME:
|
||||
|
@ -9656,6 +9639,7 @@ pcre2_compile(PCRE2_SPTR pattern, PCRE2_SIZE patlen, uint32_t options,
|
|||
int *errorptr, PCRE2_SIZE *erroroffset, pcre2_compile_context *ccontext)
|
||||
{
|
||||
BOOL utf; /* Set TRUE for UTF mode */
|
||||
BOOL ucp; /* Set TRUE for UCP mode */
|
||||
BOOL has_lookbehind = FALSE; /* Set TRUE if a lookbehind is found */
|
||||
BOOL zero_terminated; /* Set TRUE for zero-terminated pattern */
|
||||
pcre2_real_code *re = NULL; /* What we will return */
|
||||
|
@ -9943,8 +9927,8 @@ if (utf)
|
|||
|
||||
/* Check UCP lockout. */
|
||||
|
||||
if ((cb.external_options & (PCRE2_UCP|PCRE2_NEVER_UCP)) ==
|
||||
(PCRE2_UCP|PCRE2_NEVER_UCP))
|
||||
ucp = (cb.external_options & PCRE2_UCP) != 0;
|
||||
if (ucp && (cb.external_options & PCRE2_NEVER_UCP) != 0)
|
||||
{
|
||||
errorcode = ERR75;
|
||||
goto HAD_EARLY_ERROR;
|
||||
|
@ -10154,6 +10138,7 @@ write to the last 8 bytes of the structure before setting the fields. */
|
|||
memset((char *)re + sizeof(pcre2_real_code) - 8, 0, 8);
|
||||
re->memctl = ccontext->memctl;
|
||||
re->tables = tables;
|
||||
re->executable_jit = NULL;
|
||||
memset(re->start_bitmap, 0, 32 * sizeof(uint8_t));
|
||||
re->blocksize = re_blocksize;
|
||||
re->magic_number = MAGIC_NUMBER;
|
||||
|
@ -10319,7 +10304,7 @@ function call. */
|
|||
if (errorcode == 0 && (re->overall_options & PCRE2_NO_AUTO_POSSESS) == 0)
|
||||
{
|
||||
PCRE2_UCHAR *temp = (PCRE2_UCHAR *)codestart;
|
||||
if (PRIV(auto_possessify)(temp, utf, &cb) != 0) errorcode = ERR80;
|
||||
if (PRIV(auto_possessify)(temp, &cb) != 0) errorcode = ERR80;
|
||||
}
|
||||
|
||||
/* Failed to compile, or error while post-processing. */
|
||||
|
@ -10367,21 +10352,25 @@ if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
|
|||
|
||||
if ((firstcuflags & REQ_CASELESS) != 0)
|
||||
{
|
||||
if (firstcu < 128 || (!utf && firstcu < 255))
|
||||
if (firstcu < 128 || (!utf && !ucp && firstcu < 255))
|
||||
{
|
||||
if (cb.fcc[firstcu] != firstcu) re->flags |= PCRE2_FIRSTCASELESS;
|
||||
}
|
||||
|
||||
/* The first code unit is > 128 in UTF mode, or > 255 otherwise. In
|
||||
8-bit UTF mode, codepoints in the range 128-255 are introductory code
|
||||
points and cannot have another case. In 16-bit and 32-bit modes, we can
|
||||
check wide characters when UTF (and therefore UCP) is supported. */
|
||||
/* The first code unit is > 128 in UTF or UCP mode, or > 255 otherwise.
|
||||
In 8-bit UTF mode, codepoints in the range 128-255 are introductory code
|
||||
points and cannot have another case, but if UCP is set they may do. */
|
||||
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8
|
||||
else if (firstcu <= MAX_UTF_CODE_POINT &&
|
||||
#ifdef SUPPORT_UNICODE
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
else if (ucp && !utf && UCD_OTHERCASE(firstcu) != firstcu)
|
||||
re->flags |= PCRE2_FIRSTCASELESS;
|
||||
#else
|
||||
else if ((utf || ucp) && firstcu <= MAX_UTF_CODE_POINT &&
|
||||
UCD_OTHERCASE(firstcu) != firstcu)
|
||||
re->flags |= PCRE2_FIRSTCASELESS;
|
||||
#endif
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -10430,14 +10419,20 @@ if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
|
|||
|
||||
if ((reqcuflags & REQ_CASELESS) != 0)
|
||||
{
|
||||
if (reqcu < 128 || (!utf && reqcu < 255))
|
||||
if (reqcu < 128 || (!utf && !ucp && reqcu < 255))
|
||||
{
|
||||
if (cb.fcc[reqcu] != reqcu) re->flags |= PCRE2_LASTCASELESS;
|
||||
}
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8
|
||||
else if (reqcu <= MAX_UTF_CODE_POINT && UCD_OTHERCASE(reqcu) != reqcu)
|
||||
re->flags |= PCRE2_LASTCASELESS;
|
||||
#ifdef SUPPORT_UNICODE
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
else if (ucp && !utf && UCD_OTHERCASE(reqcu) != reqcu)
|
||||
re->flags |= PCRE2_LASTCASELESS;
|
||||
#else
|
||||
else if ((utf || ucp) && reqcu <= MAX_UTF_CODE_POINT &&
|
||||
UCD_OTHERCASE(reqcu) != reqcu)
|
||||
re->flags |= PCRE2_LASTCASELESS;
|
||||
#endif
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
|
|||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2017 University of Cambridge
|
||||
New API code Copyright (c) 2016-2020 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -43,7 +43,8 @@ POSSIBILITY OF SUCH DAMAGE.
|
|||
#endif
|
||||
|
||||
/* Save the configured link size, which is in bytes. In 16-bit and 32-bit modes
|
||||
its value gets changed by pcre2_internal.h to be in code units. */
|
||||
its value gets changed by pcre2_intmodedep.h (included by pcre2_internal.h) to
|
||||
be in code units. */
|
||||
|
||||
static int configured_link_size = LINK_SIZE;
|
||||
|
||||
|
@ -94,6 +95,7 @@ if (where == NULL) /* Requests a length */
|
|||
case PCRE2_CONFIG_NEWLINE:
|
||||
case PCRE2_CONFIG_PARENSLIMIT:
|
||||
case PCRE2_CONFIG_STACKRECURSE: /* Obsolete */
|
||||
case PCRE2_CONFIG_TABLES_LENGTH:
|
||||
case PCRE2_CONFIG_UNICODE:
|
||||
return sizeof(uint32_t);
|
||||
|
||||
|
@ -191,6 +193,10 @@ switch (what)
|
|||
*((uint32_t *)where) = 0;
|
||||
break;
|
||||
|
||||
case PCRE2_CONFIG_TABLES_LENGTH:
|
||||
*((uint32_t *)where) = TABLES_LENGTH;
|
||||
break;
|
||||
|
||||
case PCRE2_CONFIG_UNICODE_VERSION:
|
||||
{
|
||||
#if defined SUPPORT_UNICODE
|
||||
|
|
|
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
|
|||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2019 University of Cambridge
|
||||
New API code Copyright (c) 2016-2020 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -548,6 +548,7 @@ PCRE2_SPTR start_code = mb->start_code;
|
|||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
BOOL utf = (mb->poptions & PCRE2_UTF) != 0;
|
||||
BOOL utf_or_ucp = utf || (mb->poptions & PCRE2_UCP) != 0;
|
||||
#else
|
||||
BOOL utf = FALSE;
|
||||
#endif
|
||||
|
@ -2190,7 +2191,7 @@ for (;;)
|
|||
if (clen == 0) break;
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
if (utf_or_ucp)
|
||||
{
|
||||
if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else
|
||||
{
|
||||
|
@ -2204,7 +2205,7 @@ for (;;)
|
|||
}
|
||||
else
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
/* Not UTF mode */
|
||||
/* Not UTF or UCP mode */
|
||||
{
|
||||
if (TABLE_GET(c, lcc, c) == TABLE_GET(d, lcc, d))
|
||||
{ ADD_NEW(state_offset + 2, 0); }
|
||||
|
@ -2339,7 +2340,7 @@ for (;;)
|
|||
{
|
||||
uint32_t otherd;
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf && d >= 128)
|
||||
if (utf_or_ucp && d >= 128)
|
||||
otherd = UCD_OTHERCASE(d);
|
||||
else
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
@ -2374,7 +2375,7 @@ for (;;)
|
|||
if (caseless)
|
||||
{
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf && d >= 128)
|
||||
if (utf_or_ucp && d >= 128)
|
||||
otherd = UCD_OTHERCASE(d);
|
||||
else
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
@ -2417,7 +2418,7 @@ for (;;)
|
|||
if (caseless)
|
||||
{
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf && d >= 128)
|
||||
if (utf_or_ucp && d >= 128)
|
||||
otherd = UCD_OTHERCASE(d);
|
||||
else
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
@ -2458,7 +2459,7 @@ for (;;)
|
|||
if (caseless)
|
||||
{
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf && d >= 128)
|
||||
if (utf_or_ucp && d >= 128)
|
||||
otherd = UCD_OTHERCASE(d);
|
||||
else
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
@ -2491,7 +2492,7 @@ for (;;)
|
|||
if (caseless)
|
||||
{
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf && d >= 128)
|
||||
if (utf_or_ucp && d >= 128)
|
||||
otherd = UCD_OTHERCASE(d);
|
||||
else
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
@ -2531,7 +2532,7 @@ for (;;)
|
|||
if (caseless)
|
||||
{
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf && d >= 128)
|
||||
if (utf_or_ucp && d >= 128)
|
||||
otherd = UCD_OTHERCASE(d);
|
||||
else
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
@ -3526,10 +3527,15 @@ if ((re->flags & PCRE2_FIRSTSET) != 0)
|
|||
if ((re->flags & PCRE2_FIRSTCASELESS) != 0)
|
||||
{
|
||||
first_cu2 = TABLE_GET(first_cu, mb->tables + fcc_offset, first_cu);
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8
|
||||
if (utf && first_cu > 127)
|
||||
#ifdef SUPPORT_UNICODE
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
if (first_cu > 127 && !utf && (re->overall_options & PCRE2_UCP) != 0)
|
||||
first_cu2 = (PCRE2_UCHAR)UCD_OTHERCASE(first_cu);
|
||||
#else
|
||||
if (first_cu > 127 && (utf || (re->overall_options & PCRE2_UCP) != 0))
|
||||
first_cu2 = (PCRE2_UCHAR)UCD_OTHERCASE(first_cu);
|
||||
#endif
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
}
|
||||
}
|
||||
else
|
||||
|
@ -3545,9 +3551,15 @@ if ((re->flags & PCRE2_LASTSET) != 0)
|
|||
if ((re->flags & PCRE2_LASTCASELESS) != 0)
|
||||
{
|
||||
req_cu2 = TABLE_GET(req_cu, mb->tables + fcc_offset, req_cu);
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8
|
||||
if (utf && req_cu > 127) req_cu2 = (PCRE2_UCHAR)UCD_OTHERCASE(req_cu);
|
||||
#ifdef SUPPORT_UNICODE
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
if (req_cu > 127 && !utf && (re->overall_options & PCRE2_UCP) != 0)
|
||||
req_cu2 = (PCRE2_UCHAR)UCD_OTHERCASE(req_cu);
|
||||
#else
|
||||
if (req_cu > 127 && (utf || (re->overall_options & PCRE2_UCP) != 0))
|
||||
req_cu2 = (PCRE2_UCHAR)UCD_OTHERCASE(req_cu);
|
||||
#endif
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
|
|||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2019 University of Cambridge
|
||||
New API code Copyright (c) 2016-2020 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -76,6 +76,17 @@ typedef int BOOL;
|
|||
#include <valgrind/memcheck.h>
|
||||
#endif
|
||||
|
||||
/* -ftrivial-auto-var-init support supports initializing all local variables
|
||||
to avoid some classes of bug, but this can cause an unacceptable slowdown
|
||||
for large on-stack arrays in hot functions. This macro lets us annotate
|
||||
such arrays. */
|
||||
|
||||
#ifdef HAVE_ATTRIBUTE_UNINITIALIZED
|
||||
#define PCRE2_KEEP_UNINITIALIZED __attribute__((uninitialized))
|
||||
#else
|
||||
#define PCRE2_KEEP_UNINITIALIZED
|
||||
#endif
|
||||
|
||||
/* Older versions of MSVC lack snprintf(). This define allows for
|
||||
warning/error-free compilation and testing with MSVC compilers back to at least
|
||||
MSVC 10/2010. Except for VC6 (which is missing some fundamentals and fails). */
|
||||
|
@ -579,7 +590,7 @@ total length of the tables. */
|
|||
#define fcc_offset 256 /* Flip case */
|
||||
#define cbits_offset 512 /* Character classes */
|
||||
#define ctypes_offset (cbits_offset + cbit_length) /* Character types */
|
||||
#define tables_length (ctypes_offset + 256)
|
||||
#define TABLES_LENGTH (ctypes_offset + 256)
|
||||
|
||||
|
||||
/* -------------------- Character and string names ------------------------ */
|
||||
|
@ -1759,13 +1770,11 @@ typedef struct pcre2_memctl {
|
|||
|
||||
/* Structure for building a chain of open capturing subpatterns during
|
||||
compiling, so that instructions to close them can be compiled when (*ACCEPT) is
|
||||
encountered. This is also used to identify subpatterns that contain recursive
|
||||
back references to themselves, so that they can be made atomic. */
|
||||
encountered. */
|
||||
|
||||
typedef struct open_capitem {
|
||||
struct open_capitem *next; /* Chain link */
|
||||
uint16_t number; /* Capture number */
|
||||
uint16_t flag; /* Set TRUE if recursive back ref */
|
||||
uint16_t assert_depth; /* Assertion depth when opened */
|
||||
} open_capitem;
|
||||
|
||||
|
@ -1954,7 +1963,7 @@ is available. */
|
|||
#define _pcre2_was_newline PCRE2_SUFFIX(_pcre2_was_newline_)
|
||||
#define _pcre2_xclass PCRE2_SUFFIX(_pcre2_xclass_)
|
||||
|
||||
extern int _pcre2_auto_possessify(PCRE2_UCHAR *, BOOL,
|
||||
extern int _pcre2_auto_possessify(PCRE2_UCHAR *,
|
||||
const compile_block *);
|
||||
extern int _pcre2_check_escape(PCRE2_SPTR *, PCRE2_SPTR, uint32_t *,
|
||||
int *, uint32_t, uint32_t, BOOL, compile_block *);
|
||||
|
|
|
@ -618,6 +618,7 @@ here.) */
|
|||
typedef struct pcre2_real_code {
|
||||
pcre2_memctl memctl; /* Memory control fields */
|
||||
const uint8_t *tables; /* The character tables */
|
||||
void *executable_jit; /* Pointer to JIT code */
|
||||
uint8_t start_bitmap[32]; /* Bitmap for starting code unit < 256 */
|
||||
CODE_BLOCKSIZE_TYPE blocksize; /* Total (bytes) that was malloc-ed */
|
||||
uint32_t magic_number; /* Paranoid and endianness check */
|
||||
|
|
|
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
|
|||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2019 University of Cambridge
|
||||
New API code Copyright (c) 2016-2020 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -41,10 +41,11 @@ POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
/* This module contains the external function pcre2_maketables(), which builds
|
||||
character tables for PCRE2 in the current locale. The file is compiled on its
|
||||
own as part of the PCRE2 library. However, it is also included in the
|
||||
compilation of dftables.c, in which case the macro DFTABLES is defined. */
|
||||
own as part of the PCRE2 library. It is also included in the compilation of
|
||||
pcre2_dftables.c as a freestanding program, in which case the macro
|
||||
PCRE2_DFTABLES is defined. */
|
||||
|
||||
#ifndef DFTABLES
|
||||
#ifndef PCRE2_DFTABLES /* Compiling the library */
|
||||
# ifdef HAVE_CONFIG_H
|
||||
# include "config.h"
|
||||
# endif
|
||||
|
@ -61,28 +62,29 @@ compilation of dftables.c, in which case the macro DFTABLES is defined. */
|
|||
a pointer to them. They are build using the ctype functions, and consequently
|
||||
their contents will depend upon the current locale setting. When compiled as
|
||||
part of the library, the store is obtained via a general context malloc, if
|
||||
supplied, but when DFTABLES is defined (when compiling the dftables auxiliary
|
||||
program) malloc() is used, and the function has a different name so as not to
|
||||
clash with the prototype in pcre2.h.
|
||||
supplied, but when PCRE2_DFTABLES is defined (when compiling the pcre2_dftables
|
||||
freestanding auxiliary program) malloc() is used, and the function has a
|
||||
different name so as not to clash with the prototype in pcre2.h.
|
||||
|
||||
Arguments: none when DFTABLES is defined
|
||||
else a PCRE2 general context or NULL
|
||||
Arguments: none when PCRE2_DFTABLES is defined
|
||||
else a PCRE2 general context or NULL
|
||||
Returns: pointer to the contiguous block of data
|
||||
else NULL if memory allocation failed
|
||||
*/
|
||||
|
||||
#ifdef DFTABLES /* Included in freestanding dftables.c program */
|
||||
#ifdef PCRE2_DFTABLES /* Included in freestanding pcre2_dftables program */
|
||||
static const uint8_t *maketables(void)
|
||||
{
|
||||
uint8_t *yield = (uint8_t *)malloc(tables_length);
|
||||
uint8_t *yield = (uint8_t *)malloc(TABLES_LENGTH);
|
||||
|
||||
#else /* Not DFTABLES, compiling the library */
|
||||
#else /* Not PCRE2_DFTABLES, that is, compiling the library */
|
||||
PCRE2_EXP_DEFN const uint8_t * PCRE2_CALL_CONVENTION
|
||||
pcre2_maketables(pcre2_general_context *gcontext)
|
||||
{
|
||||
uint8_t *yield = (uint8_t *)((gcontext != NULL)?
|
||||
gcontext->memctl.malloc(tables_length, gcontext->memctl.memory_data) :
|
||||
malloc(tables_length));
|
||||
#endif /* DFTABLES */
|
||||
gcontext->memctl.malloc(TABLES_LENGTH, gcontext->memctl.memory_data) :
|
||||
malloc(TABLES_LENGTH));
|
||||
#endif /* PCRE2_DFTABLES */
|
||||
|
||||
int i;
|
||||
uint8_t *p;
|
||||
|
@ -103,8 +105,8 @@ exclusive ones - in some locales things may be different.
|
|||
|
||||
Note that the table for "space" includes everything "isspace" gives, including
|
||||
VT in the default locale. This makes it work for the POSIX class [:space:].
|
||||
From release 8.34 is is also correct for Perl space, because Perl added VT at
|
||||
release 5.18.
|
||||
From PCRE1 release 8.34 and for all PCRE2 releases it is also correct for Perl
|
||||
space, because Perl added VT at release 5.18.
|
||||
|
||||
Note also that it is possible for a character to be alnum or alpha without
|
||||
being lower or upper, such as "male and female ordinals" (\xAA and \xBA) in the
|
||||
|
@ -114,24 +116,24 @@ test for alnum specially. */
|
|||
memset(p, 0, cbit_length);
|
||||
for (i = 0; i < 256; i++)
|
||||
{
|
||||
if (isdigit(i)) p[cbit_digit + i/8] |= 1u << (i&7);
|
||||
if (isupper(i)) p[cbit_upper + i/8] |= 1u << (i&7);
|
||||
if (islower(i)) p[cbit_lower + i/8] |= 1u << (i&7);
|
||||
if (isalnum(i)) p[cbit_word + i/8] |= 1u << (i&7);
|
||||
if (i == '_') p[cbit_word + i/8] |= 1u << (i&7);
|
||||
if (isspace(i)) p[cbit_space + i/8] |= 1u << (i&7);
|
||||
if (isxdigit(i))p[cbit_xdigit + i/8] |= 1u << (i&7);
|
||||
if (isgraph(i)) p[cbit_graph + i/8] |= 1u << (i&7);
|
||||
if (isprint(i)) p[cbit_print + i/8] |= 1u << (i&7);
|
||||
if (ispunct(i)) p[cbit_punct + i/8] |= 1u << (i&7);
|
||||
if (iscntrl(i)) p[cbit_cntrl + i/8] |= 1u << (i&7);
|
||||
if (isdigit(i)) p[cbit_digit + i/8] |= 1u << (i&7);
|
||||
if (isupper(i)) p[cbit_upper + i/8] |= 1u << (i&7);
|
||||
if (islower(i)) p[cbit_lower + i/8] |= 1u << (i&7);
|
||||
if (isalnum(i)) p[cbit_word + i/8] |= 1u << (i&7);
|
||||
if (i == '_') p[cbit_word + i/8] |= 1u << (i&7);
|
||||
if (isspace(i)) p[cbit_space + i/8] |= 1u << (i&7);
|
||||
if (isxdigit(i)) p[cbit_xdigit + i/8] |= 1u << (i&7);
|
||||
if (isgraph(i)) p[cbit_graph + i/8] |= 1u << (i&7);
|
||||
if (isprint(i)) p[cbit_print + i/8] |= 1u << (i&7);
|
||||
if (ispunct(i)) p[cbit_punct + i/8] |= 1u << (i&7);
|
||||
if (iscntrl(i)) p[cbit_cntrl + i/8] |= 1u << (i&7);
|
||||
}
|
||||
p += cbit_length;
|
||||
|
||||
/* Finally, the character type table. In this, we used to exclude VT from the
|
||||
white space chars, because Perl didn't recognize it as such for \s and for
|
||||
comments within regexes. However, Perl changed at release 5.18, so PCRE changed
|
||||
at release 8.34. */
|
||||
comments within regexes. However, Perl changed at release 5.18, so PCRE1
|
||||
changed at release 8.34 and it's always been this way for PCRE2. */
|
||||
|
||||
for (i = 0; i < 256; i++)
|
||||
{
|
||||
|
@ -147,7 +149,7 @@ for (i = 0; i < 256; i++)
|
|||
return yield;
|
||||
}
|
||||
|
||||
#ifndef DFTABLES
|
||||
#ifndef PCRE2_DFTABLES /* Compiling the library */
|
||||
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
|
||||
pcre2_maketables_free(pcre2_general_context *gcontext, const uint8_t *tables)
|
||||
{
|
||||
|
|
|
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
|
|||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2015-2019 University of Cambridge
|
||||
New API code Copyright (c) 2015-2020 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -381,8 +381,12 @@ length = Fovector[offset+1] - Fovector[offset];
|
|||
if (caseless)
|
||||
{
|
||||
#if defined SUPPORT_UNICODE
|
||||
if ((mb->poptions & PCRE2_UTF) != 0)
|
||||
BOOL utf = (mb->poptions & PCRE2_UTF) != 0;
|
||||
|
||||
if (utf || (mb->poptions & PCRE2_UCP) != 0)
|
||||
{
|
||||
PCRE2_SPTR endptr = p + length;
|
||||
|
||||
/* Match characters up to the end of the reference. NOTE: the number of
|
||||
code units matched may differ, because in UTF-8 there are some characters
|
||||
whose upper and lower case codes have different numbers of bytes. For
|
||||
|
@ -390,16 +394,25 @@ if (caseless)
|
|||
bytes in UTF-8); a sequence of 3 of the former uses 6 bytes, as does a
|
||||
sequence of two of the latter. It is important, therefore, to check the
|
||||
length along the reference, not along the subject (earlier code did this
|
||||
wrong). */
|
||||
wrong). UCP without uses Unicode properties but without UTF encoding. */
|
||||
|
||||
PCRE2_SPTR endptr = p + length;
|
||||
while (p < endptr)
|
||||
{
|
||||
uint32_t c, d;
|
||||
const ucd_record *ur;
|
||||
if (eptr >= mb->end_subject) return 1; /* Partial match */
|
||||
GETCHARINC(c, eptr);
|
||||
GETCHARINC(d, p);
|
||||
|
||||
if (utf)
|
||||
{
|
||||
GETCHARINC(c, eptr);
|
||||
GETCHARINC(d, p);
|
||||
}
|
||||
else
|
||||
{
|
||||
c = *eptr++;
|
||||
d = *p++;
|
||||
}
|
||||
|
||||
ur = GET_UCD(d);
|
||||
if (c != d && c != (uint32_t)((int)d + ur->other_case))
|
||||
{
|
||||
|
@ -415,7 +428,7 @@ if (caseless)
|
|||
else
|
||||
#endif
|
||||
|
||||
/* Not in UTF mode */
|
||||
/* Not in UTF or UCP mode */
|
||||
{
|
||||
for (; length > 0; length--)
|
||||
{
|
||||
|
@ -432,7 +445,8 @@ if (caseless)
|
|||
}
|
||||
|
||||
/* In the caseful case, we can just compare the code units, whether or not we
|
||||
are in UTF mode. When partial matching, we have to do this unit-by-unit. */
|
||||
are in UTF and/or UCP mode. When partial matching, we have to do this unit by
|
||||
unit. */
|
||||
|
||||
else
|
||||
{
|
||||
|
@ -574,8 +588,8 @@ match(PCRE2_SPTR start_eptr, PCRE2_SPTR start_ecode, PCRE2_SIZE *ovector,
|
|||
heapframe *F; /* Current frame pointer */
|
||||
heapframe *N = NULL; /* Temporary frame pointers */
|
||||
heapframe *P = NULL;
|
||||
heapframe *assert_accept_frame; /* For passing back the frame with captures */
|
||||
PCRE2_SIZE frame_copy_size; /* Amount to copy when creating a new frame */
|
||||
heapframe *assert_accept_frame = NULL; /* For passing back a frame with captures */
|
||||
PCRE2_SIZE frame_copy_size; /* Amount to copy when creating a new frame */
|
||||
|
||||
/* Local variables that do not need to be preserved over calls to RRMATCH(). */
|
||||
|
||||
|
@ -598,12 +612,13 @@ BOOL condition; /* Used in conditional groups */
|
|||
BOOL cur_is_word; /* Used in "word" tests */
|
||||
BOOL prev_is_word; /* Used in "word" tests */
|
||||
|
||||
/* UTF flag */
|
||||
/* UTF and UCP flags */
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
BOOL utf = (mb->poptions & PCRE2_UTF) != 0;
|
||||
BOOL ucp = (mb->poptions & PCRE2_UCP) != 0;
|
||||
#else
|
||||
BOOL utf = FALSE;
|
||||
BOOL utf = FALSE; /* Required for convenience even when no Unicode support */
|
||||
#endif
|
||||
|
||||
/* This is the length of the last part of a backtracking frame that must be
|
||||
|
@ -928,6 +943,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
|||
}
|
||||
else
|
||||
#endif
|
||||
|
||||
/* Not UTF mode */
|
||||
{
|
||||
if (mb->end_subject - Feptr < 1)
|
||||
|
@ -987,10 +1003,30 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
|||
if (dc != fc && dc != UCD_OTHERCASE(fc)) RRETURN(MATCH_NOMATCH);
|
||||
}
|
||||
}
|
||||
|
||||
/* If UCP is set without UTF we must do the same as above, but with one
|
||||
character per code unit. */
|
||||
|
||||
else if (ucp)
|
||||
{
|
||||
uint32_t cc = UCHAR21(Feptr);
|
||||
fc = Fecode[1];
|
||||
if (fc < 128)
|
||||
{
|
||||
if (mb->lcc[fc] != TABLE_GET(cc, mb->lcc, cc)) RRETURN(MATCH_NOMATCH);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (cc != fc && cc != UCD_OTHERCASE(fc)) RRETURN(MATCH_NOMATCH);
|
||||
}
|
||||
Feptr++;
|
||||
Fecode += 2;
|
||||
}
|
||||
|
||||
else
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
/* Not UTF mode; use the table for characters < 256. */
|
||||
/* Not UTF or UCP mode; use the table for characters < 256. */
|
||||
{
|
||||
if (TABLE_GET(Fecode[1], mb->lcc, Fecode[1])
|
||||
!= TABLE_GET(*Feptr, mb->lcc, *Feptr)) RRETURN(MATCH_NOMATCH);
|
||||
|
@ -1010,6 +1046,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
|||
SCHECK_PARTIAL();
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
}
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
|
@ -1026,15 +1063,42 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
|||
if (ch > 127)
|
||||
ch = UCD_OTHERCASE(ch);
|
||||
else
|
||||
ch = TABLE_GET(ch, mb->fcc, ch);
|
||||
ch = (mb->fcc)[ch];
|
||||
if (ch == fc) RRETURN(MATCH_NOMATCH);
|
||||
}
|
||||
}
|
||||
|
||||
/* UCP without UTF is as above, but with one character per code unit. */
|
||||
|
||||
else if (ucp)
|
||||
{
|
||||
uint32_t ch;
|
||||
fc = UCHAR21INC(Feptr);
|
||||
ch = Fecode[1];
|
||||
Fecode += 2;
|
||||
|
||||
if (ch == fc)
|
||||
{
|
||||
RRETURN(MATCH_NOMATCH); /* Caseful match */
|
||||
}
|
||||
else if (Fop == OP_NOTI) /* If caseless */
|
||||
{
|
||||
if (ch > 127)
|
||||
ch = UCD_OTHERCASE(ch);
|
||||
else
|
||||
ch = (mb->fcc)[ch];
|
||||
if (ch == fc) RRETURN(MATCH_NOMATCH);
|
||||
}
|
||||
}
|
||||
|
||||
else
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
/* Neither UTF nor UCP is set */
|
||||
|
||||
{
|
||||
uint32_t ch = Fecode[1];
|
||||
fc = *Feptr++;
|
||||
fc = UCHAR21INC(Feptr);
|
||||
if (ch == fc || (Fop == OP_NOTI && TABLE_GET(ch, mb->fcc, ch) == fc))
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
Fecode += 2;
|
||||
|
@ -1244,7 +1308,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
|||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
/* When not in UTF mode, load a single-code-unit character. Then proceed as
|
||||
above. */
|
||||
above, using Unicode casing if either UTF or UCP is set. */
|
||||
|
||||
Lc = *Fecode++;
|
||||
|
||||
|
@ -1253,11 +1317,15 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
|||
if (Fop >= OP_STARI)
|
||||
{
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
/* Lc must be < 128 in UTF-8 mode. */
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (ucp && !utf && Lc > 127) Loc = UCD_OTHERCASE(Lc);
|
||||
else
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
/* Lc will be < 128 in UTF-8 mode. */
|
||||
Loc = mb->fcc[Lc];
|
||||
#else /* 16-bit & 32-bit */
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf && Lc > 127) Loc = UCD_OTHERCASE(Lc);
|
||||
if ((utf || ucp) && Lc > 127) Loc = UCD_OTHERCASE(Lc);
|
||||
else
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
Loc = TABLE_GET(Lc, mb->fcc, Lc);
|
||||
|
@ -1490,7 +1558,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
|||
if (Fop >= OP_NOTSTARI) /* Caseless */
|
||||
{
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf && Lc > 127)
|
||||
if ((utf || ucp) && Lc > 127)
|
||||
Loc = UCD_OTHERCASE(Lc);
|
||||
else
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
@ -6045,7 +6113,6 @@ BOOL firstline;
|
|||
BOOL has_first_cu = FALSE;
|
||||
BOOL has_req_cu = FALSE;
|
||||
BOOL startline;
|
||||
BOOL utf;
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
BOOL memchr_not_found_first_cu = FALSE;
|
||||
|
@ -6069,13 +6136,19 @@ PCRE2_SPTR match_partial;
|
|||
BOOL use_jit;
|
||||
#endif
|
||||
|
||||
/* This flag is needed even when Unicode is not supported for convenience
|
||||
(it is used by the IS_NEWLINE macro). */
|
||||
|
||||
BOOL utf = FALSE;
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
BOOL ucp = FALSE;
|
||||
BOOL allow_invalid;
|
||||
uint32_t fragment_options = 0;
|
||||
#ifdef SUPPORT_JIT
|
||||
BOOL jit_checked_utf = FALSE;
|
||||
#endif
|
||||
#endif
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
PCRE2_SIZE frame_size;
|
||||
|
||||
|
@ -6091,7 +6164,8 @@ proves to be too small, it is replaced by a larger one on the heap. To get a
|
|||
vector of the size required that is aligned for pointers, allocate it as a
|
||||
vector of pointers. */
|
||||
|
||||
PCRE2_SPTR stack_frames_vector[START_FRAMES_SIZE/sizeof(PCRE2_SPTR)];
|
||||
PCRE2_SPTR stack_frames_vector[START_FRAMES_SIZE/sizeof(PCRE2_SPTR)]
|
||||
PCRE2_KEEP_UNINITIALIZED;
|
||||
mb->stack_frames = (heapframe *)stack_frames_vector;
|
||||
|
||||
/* A length equal to PCRE2_ZERO_TERMINATED implies a zero-terminated
|
||||
|
@ -6147,12 +6221,13 @@ use_jit = (re->executable_jit != NULL &&
|
|||
(options & ~PUBLIC_JIT_MATCH_OPTIONS) == 0);
|
||||
#endif
|
||||
|
||||
/* Initialize UTF parameters. */
|
||||
/* Initialize UTF/UCP parameters. */
|
||||
|
||||
utf = (re->overall_options & PCRE2_UTF) != 0;
|
||||
#ifdef SUPPORT_UNICODE
|
||||
utf = (re->overall_options & PCRE2_UTF) != 0;
|
||||
allow_invalid = (re->overall_options & PCRE2_MATCH_INVALID_UTF) != 0;
|
||||
#endif
|
||||
ucp = (re->overall_options & PCRE2_UCP) != 0;
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
/* Convert the partial matching flags into an integer. */
|
||||
|
||||
|
@ -6589,9 +6664,13 @@ if ((re->flags & PCRE2_FIRSTSET) != 0)
|
|||
if ((re->flags & PCRE2_FIRSTCASELESS) != 0)
|
||||
{
|
||||
first_cu2 = TABLE_GET(first_cu, mb->fcc, first_cu);
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8
|
||||
if (utf && first_cu > 127) first_cu2 = UCD_OTHERCASE(first_cu);
|
||||
#ifdef SUPPORT_UNICODE
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
if (first_cu > 127 && ucp && !utf) first_cu2 = UCD_OTHERCASE(first_cu);
|
||||
#else
|
||||
if (first_cu > 127 && (utf || ucp)) first_cu2 = UCD_OTHERCASE(first_cu);
|
||||
#endif
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
}
|
||||
}
|
||||
else
|
||||
|
@ -6607,9 +6686,13 @@ if ((re->flags & PCRE2_LASTSET) != 0)
|
|||
if ((re->flags & PCRE2_LASTCASELESS) != 0)
|
||||
{
|
||||
req_cu2 = TABLE_GET(req_cu, mb->fcc, req_cu);
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8
|
||||
if (utf && req_cu > 127) req_cu2 = UCD_OTHERCASE(req_cu);
|
||||
#ifdef SUPPORT_UNICODE
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
if (req_cu > 127 && ucp && !utf) req_cu2 = UCD_OTHERCASE(req_cu);
|
||||
#else
|
||||
if (req_cu > 127 && (utf || ucp)) req_cu2 = UCD_OTHERCASE(req_cu);
|
||||
#endif
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -6756,15 +6839,16 @@ for(;;)
|
|||
#endif
|
||||
}
|
||||
|
||||
/* If we can't find the required code unit, having reached the true end
|
||||
of the subject, break the bumpalong loop, to force a match failure,
|
||||
except when doing partial matching, when we let the next cycle run at
|
||||
the end of the subject. To see why, consider the pattern /(?<=abc)def/,
|
||||
which partially matches "abc", even though the string does not contain
|
||||
the starting character "d". If we have not reached the true end of the
|
||||
subject (PCRE2_FIRSTLINE caused end_subject to be temporarily modified)
|
||||
we also let the cycle run, because the matching string is legitimately
|
||||
allowed to start with the first code unit of a newline. */
|
||||
/* If we can't find the required first code unit, having reached the
|
||||
true end of the subject, break the bumpalong loop, to force a match
|
||||
failure, except when doing partial matching, when we let the next cycle
|
||||
run at the end of the subject. To see why, consider the pattern
|
||||
/(?<=abc)def/, which partially matches "abc", even though the string
|
||||
does not contain the starting character "d". If we have not reached the
|
||||
true end of the subject (PCRE2_FIRSTLINE caused end_subject to be
|
||||
temporarily modified) we also let the cycle run, because the matching
|
||||
string is legitimately allowed to start with the first code unit of a
|
||||
newline. */
|
||||
|
||||
if (mb->partial == 0 && start_match >= mb->end_subject)
|
||||
{
|
||||
|
|
|
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
|
|||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2018 University of Cambridge
|
||||
New API code Copyright (c) 2016-2020 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -90,7 +90,7 @@ if (codes == NULL || serialized_bytes == NULL || serialized_size == NULL)
|
|||
if (number_of_codes <= 0) return PCRE2_ERROR_BADDATA;
|
||||
|
||||
/* Compute total size. */
|
||||
total_size = sizeof(pcre2_serialized_data) + tables_length;
|
||||
total_size = sizeof(pcre2_serialized_data) + TABLES_LENGTH;
|
||||
tables = NULL;
|
||||
|
||||
for (i = 0; i < number_of_codes; i++)
|
||||
|
@ -121,8 +121,8 @@ data->number_of_codes = number_of_codes;
|
|||
|
||||
/* Copy all compiled code data. */
|
||||
dst_bytes = bytes + sizeof(pcre2_serialized_data);
|
||||
memcpy(dst_bytes, tables, tables_length);
|
||||
dst_bytes += tables_length;
|
||||
memcpy(dst_bytes, tables, TABLES_LENGTH);
|
||||
dst_bytes += TABLES_LENGTH;
|
||||
|
||||
for (i = 0; i < number_of_codes; i++)
|
||||
{
|
||||
|
@ -142,7 +142,9 @@ for (i = 0; i < number_of_codes; i++)
|
|||
(void)memset(dst_bytes + offsetof(pcre2_real_code, memctl), 0,
|
||||
sizeof(pcre2_memctl));
|
||||
(void)memset(dst_bytes + offsetof(pcre2_real_code, tables), 0,
|
||||
sizeof(void *));
|
||||
sizeof(void *));
|
||||
(void)memset(dst_bytes + offsetof(pcre2_real_code, executable_jit), 0,
|
||||
sizeof(void *));
|
||||
|
||||
dst_bytes += re->blocksize;
|
||||
}
|
||||
|
@ -187,12 +189,12 @@ src_bytes = bytes + sizeof(pcre2_serialized_data);
|
|||
/* Decode tables. The reference count for the tables is stored immediately
|
||||
following them. */
|
||||
|
||||
tables = memctl->malloc(tables_length + sizeof(PCRE2_SIZE), memctl->memory_data);
|
||||
tables = memctl->malloc(TABLES_LENGTH + sizeof(PCRE2_SIZE), memctl->memory_data);
|
||||
if (tables == NULL) return PCRE2_ERROR_NOMEMORY;
|
||||
|
||||
memcpy(tables, src_bytes, tables_length);
|
||||
*(PCRE2_SIZE *)(tables + tables_length) = number_of_codes;
|
||||
src_bytes += tables_length;
|
||||
memcpy(tables, src_bytes, TABLES_LENGTH);
|
||||
*(PCRE2_SIZE *)(tables + TABLES_LENGTH) = number_of_codes;
|
||||
src_bytes += TABLES_LENGTH;
|
||||
|
||||
/* Decode the byte stream. We must not try to read the size from the compiled
|
||||
code block in the stream, because it might be unaligned, which causes errors on
|
||||
|
@ -238,6 +240,7 @@ for (i = 0; i < number_of_codes; i++)
|
|||
/* At the moment only one table is supported. */
|
||||
|
||||
dst_re->tables = tables;
|
||||
dst_re->executable_jit = NULL;
|
||||
dst_re->flags |= PCRE2_DEREF_TABLES;
|
||||
|
||||
codes[i] = dst_re;
|
||||
|
|
|
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
|
|||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2019 University of Cambridge
|
||||
New API code Copyright (c) 2016-2020 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -58,7 +58,7 @@ collecting data (e.g. minimum matching length). */
|
|||
|
||||
/* Returns from set_start_bits() */
|
||||
|
||||
enum { SSB_FAIL, SSB_DONE, SSB_CONTINUE, SSB_UNKNOWN };
|
||||
enum { SSB_FAIL, SSB_DONE, SSB_CONTINUE, SSB_UNKNOWN, SSB_TOODEEP };
|
||||
|
||||
|
||||
/*************************************************
|
||||
|
@ -772,15 +772,19 @@ Arguments:
|
|||
p points to the first code unit of the character
|
||||
caseless TRUE if caseless
|
||||
utf TRUE for UTF mode
|
||||
ucp TRUE for UCP mode
|
||||
|
||||
Returns: pointer after the character
|
||||
*/
|
||||
|
||||
static PCRE2_SPTR
|
||||
set_table_bit(pcre2_real_code *re, PCRE2_SPTR p, BOOL caseless, BOOL utf)
|
||||
set_table_bit(pcre2_real_code *re, PCRE2_SPTR p, BOOL caseless, BOOL utf,
|
||||
BOOL ucp)
|
||||
{
|
||||
uint32_t c = *p++; /* First code unit */
|
||||
(void)utf; /* Stop compiler warning when UTF not supported */
|
||||
|
||||
(void)utf; /* Stop compiler warnings when UTF not supported */
|
||||
(void)ucp;
|
||||
|
||||
/* In 16-bit and 32-bit modes, code units greater than 0xff set the bit for
|
||||
0xff. */
|
||||
|
@ -810,22 +814,26 @@ if (utf)
|
|||
if (caseless)
|
||||
{
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
if (utf || ucp)
|
||||
{
|
||||
c = UCD_OTHERCASE(c);
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
PCRE2_UCHAR buff[6];
|
||||
c = UCD_OTHERCASE(c);
|
||||
(void)PRIV(ord2utf)(c, buff);
|
||||
SET_BIT(buff[0]);
|
||||
if (utf)
|
||||
{
|
||||
PCRE2_UCHAR buff[6];
|
||||
(void)PRIV(ord2utf)(c, buff);
|
||||
SET_BIT(buff[0]);
|
||||
}
|
||||
else if (c < 256) SET_BIT(c);
|
||||
#else /* 16-bit or 32-bit mode */
|
||||
c = UCD_OTHERCASE(c);
|
||||
if (c > 0xff) SET_BIT(0xff); else SET_BIT(c);
|
||||
#endif
|
||||
}
|
||||
|
||||
else
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
/* Not UTF */
|
||||
/* Not UTF or UCP */
|
||||
|
||||
if (MAX_255(c)) SET_BIT(re->tables[fcc_offset + c]);
|
||||
}
|
||||
|
@ -924,19 +932,26 @@ The SSB_CONTINUE return is useful for parenthesized groups in patterns such as
|
|||
must continue at the outer level to find at least one mandatory code unit. At
|
||||
the outermost level, this function fails unless the result is SSB_DONE.
|
||||
|
||||
We restrict recursion (for nested groups) to 1000 to avoid stack overflow
|
||||
issues.
|
||||
|
||||
Arguments:
|
||||
re points to the compiled regex block
|
||||
code points to an expression
|
||||
utf TRUE if in UTF mode
|
||||
ucp TRUE if in UCP mode
|
||||
depthptr pointer to recurse depth
|
||||
|
||||
Returns: SSB_FAIL => Failed to find any starting code units
|
||||
SSB_DONE => Found mandatory starting code units
|
||||
SSB_CONTINUE => Found optional starting code units
|
||||
SSB_UNKNOWN => Hit an unrecognized opcode
|
||||
SSB_TOODEEP => Recursion is too deep
|
||||
*/
|
||||
|
||||
static int
|
||||
set_start_bits(pcre2_real_code *re, PCRE2_SPTR code, BOOL utf)
|
||||
set_start_bits(pcre2_real_code *re, PCRE2_SPTR code, BOOL utf, BOOL ucp,
|
||||
int *depthptr)
|
||||
{
|
||||
uint32_t c;
|
||||
int yield = SSB_DONE;
|
||||
|
@ -947,6 +962,9 @@ int table_limit = utf? 16:32;
|
|||
int table_limit = 32;
|
||||
#endif
|
||||
|
||||
*depthptr += 1;
|
||||
if (*depthptr > 1000) return SSB_TOODEEP;
|
||||
|
||||
do
|
||||
{
|
||||
BOOL try_next = TRUE;
|
||||
|
@ -1103,13 +1121,17 @@ do
|
|||
case OP_SCRIPT_RUN:
|
||||
case OP_ASSERT:
|
||||
case OP_ASSERT_NA:
|
||||
rc = set_start_bits(re, tcode, utf);
|
||||
if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc;
|
||||
if (rc == SSB_DONE) try_next = FALSE; else
|
||||
rc = set_start_bits(re, tcode, utf, ucp, depthptr);
|
||||
if (rc == SSB_DONE)
|
||||
{
|
||||
try_next = FALSE;
|
||||
}
|
||||
else if (rc == SSB_CONTINUE)
|
||||
{
|
||||
do tcode += GET(tcode, 1); while (*tcode == OP_ALT);
|
||||
tcode += 1 + LINK_SIZE;
|
||||
}
|
||||
else return rc; /* FAIL, UNKNOWN, or TOODEEP */
|
||||
break;
|
||||
|
||||
/* If we hit ALT or KET, it means we haven't found anything mandatory in
|
||||
|
@ -1155,8 +1177,8 @@ do
|
|||
case OP_BRAZERO:
|
||||
case OP_BRAMINZERO:
|
||||
case OP_BRAPOSZERO:
|
||||
rc = set_start_bits(re, ++tcode, utf);
|
||||
if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc;
|
||||
rc = set_start_bits(re, ++tcode, utf, ucp, depthptr);
|
||||
if (rc == SSB_FAIL || rc == SSB_UNKNOWN || rc == SSB_TOODEEP) return rc;
|
||||
do tcode += GET(tcode,1); while (*tcode == OP_ALT);
|
||||
tcode += 1 + LINK_SIZE;
|
||||
break;
|
||||
|
@ -1177,7 +1199,7 @@ do
|
|||
case OP_QUERY:
|
||||
case OP_MINQUERY:
|
||||
case OP_POSQUERY:
|
||||
tcode = set_table_bit(re, tcode + 1, FALSE, utf);
|
||||
tcode = set_table_bit(re, tcode + 1, FALSE, utf, ucp);
|
||||
break;
|
||||
|
||||
case OP_STARI:
|
||||
|
@ -1186,7 +1208,7 @@ do
|
|||
case OP_QUERYI:
|
||||
case OP_MINQUERYI:
|
||||
case OP_POSQUERYI:
|
||||
tcode = set_table_bit(re, tcode + 1, TRUE, utf);
|
||||
tcode = set_table_bit(re, tcode + 1, TRUE, utf, ucp);
|
||||
break;
|
||||
|
||||
/* Single-char upto sets the bit and tries the next */
|
||||
|
@ -1194,13 +1216,13 @@ do
|
|||
case OP_UPTO:
|
||||
case OP_MINUPTO:
|
||||
case OP_POSUPTO:
|
||||
tcode = set_table_bit(re, tcode + 1 + IMM2_SIZE, FALSE, utf);
|
||||
tcode = set_table_bit(re, tcode + 1 + IMM2_SIZE, FALSE, utf, ucp);
|
||||
break;
|
||||
|
||||
case OP_UPTOI:
|
||||
case OP_MINUPTOI:
|
||||
case OP_POSUPTOI:
|
||||
tcode = set_table_bit(re, tcode + 1 + IMM2_SIZE, TRUE, utf);
|
||||
tcode = set_table_bit(re, tcode + 1 + IMM2_SIZE, TRUE, utf, ucp);
|
||||
break;
|
||||
|
||||
/* At least one single char sets the bit and stops */
|
||||
|
@ -1212,7 +1234,7 @@ do
|
|||
case OP_PLUS:
|
||||
case OP_MINPLUS:
|
||||
case OP_POSPLUS:
|
||||
(void)set_table_bit(re, tcode + 1, FALSE, utf);
|
||||
(void)set_table_bit(re, tcode + 1, FALSE, utf, ucp);
|
||||
try_next = FALSE;
|
||||
break;
|
||||
|
||||
|
@ -1223,7 +1245,7 @@ do
|
|||
case OP_PLUSI:
|
||||
case OP_MINPLUSI:
|
||||
case OP_POSPLUSI:
|
||||
(void)set_table_bit(re, tcode + 1, TRUE, utf);
|
||||
(void)set_table_bit(re, tcode + 1, TRUE, utf, ucp);
|
||||
try_next = FALSE;
|
||||
break;
|
||||
|
||||
|
@ -1652,6 +1674,7 @@ PRIV(study)(pcre2_real_code *re)
|
|||
int count = 0;
|
||||
PCRE2_UCHAR *code;
|
||||
BOOL utf = (re->overall_options & PCRE2_UTF) != 0;
|
||||
BOOL ucp = (re->overall_options & PCRE2_UCP) != 0;
|
||||
|
||||
/* Find start of compiled code */
|
||||
|
||||
|
@ -1664,7 +1687,8 @@ code units. */
|
|||
|
||||
if ((re->flags & (PCRE2_FIRSTSET|PCRE2_STARTLINE)) == 0)
|
||||
{
|
||||
int rc = set_start_bits(re, code, utf);
|
||||
int depth = 0;
|
||||
int rc = set_start_bits(re, code, utf, ucp, &depth);
|
||||
if (rc == SSB_UNKNOWN) return 1;
|
||||
|
||||
/* If a list of starting code units was set up, scan the list to see if only
|
||||
|
@ -1712,27 +1736,27 @@ if ((re->flags & (PCRE2_FIRSTSET|PCRE2_STARTLINE)) == 0)
|
|||
}
|
||||
|
||||
/* c contains the code unit value, in the range 0-255. In 8-bit UTF
|
||||
mode, only values < 128 can be used. */
|
||||
mode, only values < 128 can be used. In all the other cases, c is a
|
||||
character value. */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
if (c > 127) goto DONE;
|
||||
if (utf && c > 127) goto DONE;
|
||||
#endif
|
||||
if (a < 0) a = c; /* First one found */
|
||||
if (a < 0) a = c; /* First one found, save in a */
|
||||
else if (b < 0) /* Second one found */
|
||||
{
|
||||
int d = TABLE_GET((unsigned int)c, re->tables + fcc_offset, c);
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
if (utf && UCD_CASESET(c) != 0) goto DONE; /* Multiple case set */
|
||||
#else /* 16-bit or 32-bit */
|
||||
if (UCD_CASESET(c) != 0) goto DONE; /* Multiple case set */
|
||||
if (utf && c > 127) d = UCD_OTHERCASE(c);
|
||||
#endif /* Code width */
|
||||
if (utf || ucp)
|
||||
{
|
||||
if (UCD_CASESET(c) != 0) goto DONE; /* Multiple case set */
|
||||
if (c > 127) d = UCD_OTHERCASE(c);
|
||||
}
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
if (d != a) goto DONE; /* Not other case of a */
|
||||
b = c;
|
||||
if (d != a) goto DONE; /* Not the other case of a */
|
||||
b = c; /* Save second in b */
|
||||
}
|
||||
else goto DONE; /* More than two characters found */
|
||||
}
|
||||
|
|
|
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
|
|||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2019 University of Cambridge
|
||||
New API code Copyright (c) 2016-2020 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -49,8 +49,9 @@ POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#define SUBSTITUTE_OPTIONS \
|
||||
(PCRE2_SUBSTITUTE_EXTENDED|PCRE2_SUBSTITUTE_GLOBAL| \
|
||||
PCRE2_SUBSTITUTE_OVERFLOW_LENGTH|PCRE2_SUBSTITUTE_UNKNOWN_UNSET| \
|
||||
PCRE2_SUBSTITUTE_UNSET_EMPTY)
|
||||
PCRE2_SUBSTITUTE_LITERAL|PCRE2_SUBSTITUTE_MATCHED| \
|
||||
PCRE2_SUBSTITUTE_OVERFLOW_LENGTH|PCRE2_SUBSTITUTE_REPLACEMENT_ONLY| \
|
||||
PCRE2_SUBSTITUTE_UNKNOWN_UNSET|PCRE2_SUBSTITUTE_UNSET_EMPTY)
|
||||
|
||||
|
||||
|
||||
|
@ -194,6 +195,7 @@ overflow, either give an error immediately, or keep on, accumulating the
|
|||
length. */
|
||||
|
||||
#define CHECKMEMCPY(from,length) \
|
||||
{ \
|
||||
if (!overflowed && lengthleft < length) \
|
||||
{ \
|
||||
if ((suboptions & PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) == 0) goto NOROOM; \
|
||||
|
@ -209,7 +211,8 @@ length. */
|
|||
memcpy(buffer + buff_offset, from, CU2BYTES(length)); \
|
||||
buff_offset += length; \
|
||||
lengthleft -= length; \
|
||||
}
|
||||
} \
|
||||
}
|
||||
|
||||
/* Here's the function */
|
||||
|
||||
|
@ -226,11 +229,14 @@ int forcecasereset = 0;
|
|||
uint32_t ovector_count;
|
||||
uint32_t goptions = 0;
|
||||
uint32_t suboptions;
|
||||
BOOL match_data_created = FALSE;
|
||||
BOOL literal = FALSE;
|
||||
pcre2_match_data *internal_match_data = NULL;
|
||||
BOOL escaped_literal = FALSE;
|
||||
BOOL overflowed = FALSE;
|
||||
BOOL use_existing_match;
|
||||
BOOL replacement_only;
|
||||
#ifdef SUPPORT_UNICODE
|
||||
BOOL utf = (code->overall_options & PCRE2_UTF) != 0;
|
||||
BOOL ucp = (code->overall_options & PCRE2_UCP) != 0;
|
||||
#endif
|
||||
PCRE2_UCHAR temp[6];
|
||||
PCRE2_SPTR ptr;
|
||||
|
@ -248,23 +254,54 @@ lengthleft = buff_length = *blength;
|
|||
*blength = PCRE2_UNSET;
|
||||
ovecsave[0] = ovecsave[1] = ovecsave[2] = PCRE2_UNSET;
|
||||
|
||||
/* Partial matching is not valid. This must come after setting *blength to
|
||||
/* Partial matching is not valid. This must come after setting *blength to
|
||||
PCRE2_UNSET, so as not to imply an offset in the replacement. */
|
||||
|
||||
if ((options & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) != 0)
|
||||
return PCRE2_ERROR_BADOPTION;
|
||||
|
||||
/* If no match data block is provided, create one. */
|
||||
/* Check for using a match that has already happened. Note that the subject
|
||||
pointer in the match data may be NULL after a no-match. */
|
||||
|
||||
use_existing_match = ((options & PCRE2_SUBSTITUTE_MATCHED) != 0);
|
||||
replacement_only = ((options & PCRE2_SUBSTITUTE_REPLACEMENT_ONLY) != 0);
|
||||
|
||||
/* If starting from an existing match, there must be an externally provided
|
||||
match data block. We create an internal match_data block in two cases: (a) an
|
||||
external one is not supplied (and we are not starting from an existing match);
|
||||
(b) an existing match is to be used for the first substitution. In the latter
|
||||
case, we copy the existing match into the internal block. This ensures that no
|
||||
changes are made to the existing match data block. */
|
||||
|
||||
if (match_data == NULL)
|
||||
{
|
||||
pcre2_general_context *gcontext;
|
||||
if (use_existing_match) return PCRE2_ERROR_NULL;
|
||||
gcontext = (mcontext == NULL)?
|
||||
(pcre2_general_context *)code :
|
||||
(pcre2_general_context *)mcontext;
|
||||
match_data = internal_match_data =
|
||||
pcre2_match_data_create_from_pattern(code, gcontext);
|
||||
if (internal_match_data == NULL) return PCRE2_ERROR_NOMEMORY;
|
||||
}
|
||||
|
||||
else if (use_existing_match)
|
||||
{
|
||||
pcre2_general_context *gcontext = (mcontext == NULL)?
|
||||
(pcre2_general_context *)code :
|
||||
(pcre2_general_context *)mcontext;
|
||||
match_data = pcre2_match_data_create_from_pattern(code, gcontext);
|
||||
if (match_data == NULL) return PCRE2_ERROR_NOMEMORY;
|
||||
match_data_created = TRUE;
|
||||
int pairs = (code->top_bracket + 1 < match_data->oveccount)?
|
||||
code->top_bracket + 1 : match_data->oveccount;
|
||||
internal_match_data = pcre2_match_data_create(match_data->oveccount,
|
||||
gcontext);
|
||||
if (internal_match_data == NULL) return PCRE2_ERROR_NOMEMORY;
|
||||
memcpy(internal_match_data, match_data, offsetof(pcre2_match_data, ovector)
|
||||
+ 2*pairs*sizeof(PCRE2_SIZE));
|
||||
match_data = internal_match_data;
|
||||
}
|
||||
|
||||
/* Remember ovector details */
|
||||
|
||||
ovector = pcre2_get_ovector_pointer(match_data);
|
||||
ovector_count = pcre2_get_ovector_count(match_data);
|
||||
|
||||
|
@ -286,7 +323,7 @@ repend = replacement + rlength;
|
|||
#ifdef SUPPORT_UNICODE
|
||||
if (utf && (options & PCRE2_NO_UTF_CHECK) == 0)
|
||||
{
|
||||
rc = PRIV(valid_utf)(replacement, rlength, &(match_data->rightchar));
|
||||
rc = PRIV(valid_utf)(replacement, rlength, &(match_data->startchar));
|
||||
if (rc != 0)
|
||||
{
|
||||
match_data->leftchar = 0;
|
||||
|
@ -300,7 +337,7 @@ if (utf && (options & PCRE2_NO_UTF_CHECK) == 0)
|
|||
suboptions = options & SUBSTITUTE_OPTIONS;
|
||||
options &= ~SUBSTITUTE_OPTIONS;
|
||||
|
||||
/* Copy up to the start offset */
|
||||
/* Error if the start match offset is greater than the length of the subject. */
|
||||
|
||||
if (start_offset > length)
|
||||
{
|
||||
|
@ -308,9 +345,13 @@ if (start_offset > length)
|
|||
rc = PCRE2_ERROR_BADOFFSET;
|
||||
goto EXIT;
|
||||
}
|
||||
CHECKMEMCPY(subject, start_offset);
|
||||
|
||||
/* Loop for global substituting. */
|
||||
/* Copy up to the start offset, unless only the replacement is required. */
|
||||
|
||||
if (!replacement_only) CHECKMEMCPY(subject, start_offset);
|
||||
|
||||
/* Loop for global substituting. If PCRE2_SUBSTITUTE_MATCHED is set, the first
|
||||
match is taken from the match_data that was passed in. */
|
||||
|
||||
subs = 0;
|
||||
do
|
||||
|
@ -318,7 +359,12 @@ do
|
|||
PCRE2_SPTR ptrstack[PTR_STACK_SIZE];
|
||||
uint32_t ptrstackptr = 0;
|
||||
|
||||
rc = pcre2_match(code, subject, length, start_offset, options|goptions,
|
||||
if (use_existing_match)
|
||||
{
|
||||
rc = match_data->rc;
|
||||
use_existing_match = FALSE;
|
||||
}
|
||||
else rc = pcre2_match(code, subject, length, start_offset, options|goptions,
|
||||
match_data, mcontext);
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
|
@ -364,44 +410,44 @@ do
|
|||
#endif
|
||||
}
|
||||
|
||||
/* Copy what we have advanced past, reset the special global options, and
|
||||
continue to the next match. */
|
||||
/* Copy what we have advanced past (unless not required), reset the special
|
||||
global options, and continue to the next match. */
|
||||
|
||||
fraglength = start_offset - save_start;
|
||||
CHECKMEMCPY(subject + save_start, fraglength);
|
||||
if (!replacement_only) CHECKMEMCPY(subject + save_start, fraglength);
|
||||
goptions = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Handle a successful match. Matches that use \K to end before they start
|
||||
or start before the current point in the subject are not supported. */
|
||||
|
||||
|
||||
if (ovector[1] < ovector[0] || ovector[0] < start_offset)
|
||||
{
|
||||
rc = PCRE2_ERROR_BADSUBSPATTERN;
|
||||
goto EXIT;
|
||||
}
|
||||
|
||||
/* Check for the same match as previous. This is legitimate after matching an
|
||||
|
||||
/* Check for the same match as previous. This is legitimate after matching an
|
||||
empty string that starts after the initial match offset. We have tried again
|
||||
at the match point in case the pattern is one like /(?<=\G.)/ which can never
|
||||
match at its starting point, so running the match achieves the bumpalong. If
|
||||
we do get the same (null) match at the original match point, it isn't such a
|
||||
pattern, so we now do the empty string magic. In all other cases, a repeat
|
||||
match should never occur. */
|
||||
|
||||
|
||||
if (ovecsave[0] == ovector[0] && ovecsave[1] == ovector[1])
|
||||
{
|
||||
if (ovector[0] == ovector[1] && ovecsave[2] != start_offset)
|
||||
{
|
||||
goptions = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
|
||||
ovecsave[2] = start_offset;
|
||||
continue; /* Back to the top of the loop */
|
||||
{
|
||||
if (ovector[0] == ovector[1] && ovecsave[2] != start_offset)
|
||||
{
|
||||
goptions = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
|
||||
ovecsave[2] = start_offset;
|
||||
continue; /* Back to the top of the loop */
|
||||
}
|
||||
rc = PCRE2_ERROR_INTERNAL_DUPMATCH;
|
||||
goto EXIT;
|
||||
}
|
||||
|
||||
goto EXIT;
|
||||
}
|
||||
|
||||
/* Count substitutions with a paranoid check for integer overflow; surely no
|
||||
real call to this function would ever hit this! */
|
||||
|
||||
|
@ -412,21 +458,30 @@ do
|
|||
}
|
||||
subs++;
|
||||
|
||||
/* Copy the text leading up to the match, and remember where the insert
|
||||
begins and how many ovector pairs are set. */
|
||||
/* Copy the text leading up to the match (unless not required), and remember
|
||||
where the insert begins and how many ovector pairs are set. */
|
||||
|
||||
if (rc == 0) rc = ovector_count;
|
||||
fraglength = ovector[0] - start_offset;
|
||||
CHECKMEMCPY(subject + start_offset, fraglength);
|
||||
if (!replacement_only) CHECKMEMCPY(subject + start_offset, fraglength);
|
||||
scb.output_offsets[0] = buff_offset;
|
||||
scb.oveccount = rc;
|
||||
|
||||
/* Process the replacement string. Literal mode is set by \Q, but only in
|
||||
extended mode when backslashes are being interpreted. In extended mode we
|
||||
must handle nested substrings that are to be reprocessed. */
|
||||
/* Process the replacement string. If the entire replacement is literal, just
|
||||
copy it with length check. */
|
||||
|
||||
ptr = replacement;
|
||||
for (;;)
|
||||
if ((suboptions & PCRE2_SUBSTITUTE_LITERAL) != 0)
|
||||
{
|
||||
CHECKMEMCPY(ptr, rlength);
|
||||
}
|
||||
|
||||
/* Within a non-literal replacement, which must be scanned character by
|
||||
character, local literal mode can be set by \Q, but only in extended mode
|
||||
when backslashes are being interpreted. In extended mode we must handle
|
||||
nested substrings that are to be reprocessed. */
|
||||
|
||||
else for (;;)
|
||||
{
|
||||
uint32_t ch;
|
||||
unsigned int chlen;
|
||||
|
@ -443,11 +498,11 @@ do
|
|||
|
||||
/* Handle the next character */
|
||||
|
||||
if (literal)
|
||||
if (escaped_literal)
|
||||
{
|
||||
if (ptr[0] == CHAR_BACKSLASH && ptr < repend - 1 && ptr[1] == CHAR_E)
|
||||
{
|
||||
literal = FALSE;
|
||||
escaped_literal = FALSE;
|
||||
ptr += 2;
|
||||
continue;
|
||||
}
|
||||
|
@ -704,7 +759,7 @@ do
|
|||
if (forcecase != 0)
|
||||
{
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
if (utf || ucp)
|
||||
{
|
||||
uint32_t type = UCD_CHARTYPE(ch);
|
||||
if (PRIV(ucp_gentype)[type] == ucp_L &&
|
||||
|
@ -784,7 +839,7 @@ do
|
|||
continue;
|
||||
|
||||
case ESC_Q:
|
||||
literal = TRUE;
|
||||
escaped_literal = TRUE;
|
||||
continue;
|
||||
|
||||
case 0: /* Data character */
|
||||
|
@ -806,7 +861,7 @@ do
|
|||
if (forcecase != 0)
|
||||
{
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
if (utf || ucp)
|
||||
{
|
||||
uint32_t type = UCD_CHARTYPE(ch);
|
||||
if (PRIV(ucp_gentype)[type] == ucp_L &&
|
||||
|
@ -835,53 +890,59 @@ do
|
|||
} /* End handling a literal code unit */
|
||||
} /* End of loop for scanning the replacement. */
|
||||
|
||||
/* The replacement has been copied to the output, or its size has been
|
||||
remembered. Do the callout if there is one and we have done an actual
|
||||
/* The replacement has been copied to the output, or its size has been
|
||||
remembered. Do the callout if there is one and we have done an actual
|
||||
replacement. */
|
||||
|
||||
|
||||
if (!overflowed && mcontext != NULL && mcontext->substitute_callout != NULL)
|
||||
{
|
||||
scb.subscount = subs;
|
||||
scb.subscount = subs;
|
||||
scb.output_offsets[1] = buff_offset;
|
||||
rc = mcontext->substitute_callout(&scb, mcontext->substitute_callout_data);
|
||||
rc = mcontext->substitute_callout(&scb, mcontext->substitute_callout_data);
|
||||
|
||||
/* A non-zero return means cancel this substitution. Instead, copy the
|
||||
/* A non-zero return means cancel this substitution. Instead, copy the
|
||||
matched string fragment. */
|
||||
|
||||
if (rc != 0)
|
||||
{
|
||||
PCRE2_SIZE newlength = scb.output_offsets[1] - scb.output_offsets[0];
|
||||
PCRE2_SIZE oldlength = ovector[1] - ovector[0];
|
||||
|
||||
|
||||
buff_offset -= newlength;
|
||||
lengthleft += newlength;
|
||||
CHECKMEMCPY(subject + ovector[0], oldlength);
|
||||
|
||||
if (!replacement_only) CHECKMEMCPY(subject + ovector[0], oldlength);
|
||||
|
||||
/* A negative return means do not do any more. */
|
||||
|
||||
|
||||
if (rc < 0) suboptions &= (~PCRE2_SUBSTITUTE_GLOBAL);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/* Save the details of this match. See above for how this data is used. If we
|
||||
matched an empty string, do the magic for global matches. Finally, update the
|
||||
start offset to point to the rest of the subject string. */
|
||||
|
||||
ovecsave[0] = ovector[0];
|
||||
ovecsave[1] = ovector[1];
|
||||
matched an empty string, do the magic for global matches. Update the start
|
||||
offset to point to the rest of the subject string. If we re-used an existing
|
||||
match for the first match, switch to the internal match data block. */
|
||||
|
||||
ovecsave[0] = ovector[0];
|
||||
ovecsave[1] = ovector[1];
|
||||
ovecsave[2] = start_offset;
|
||||
|
||||
|
||||
goptions = (ovector[0] != ovector[1] || ovector[0] > start_offset)? 0 :
|
||||
PCRE2_ANCHORED|PCRE2_NOTEMPTY_ATSTART;
|
||||
start_offset = ovector[1];
|
||||
} while ((suboptions & PCRE2_SUBSTITUTE_GLOBAL) != 0); /* Repeat "do" loop */
|
||||
|
||||
/* Copy the rest of the subject. */
|
||||
/* Copy the rest of the subject unless not required, and terminate the output
|
||||
with a binary zero. */
|
||||
|
||||
if (!replacement_only)
|
||||
{
|
||||
fraglength = length - start_offset;
|
||||
CHECKMEMCPY(subject + start_offset, fraglength);
|
||||
}
|
||||
|
||||
fraglength = length - start_offset;
|
||||
CHECKMEMCPY(subject + start_offset, fraglength);
|
||||
temp[0] = 0;
|
||||
CHECKMEMCPY(temp , 1);
|
||||
CHECKMEMCPY(temp, 1);
|
||||
|
||||
/* If overflowed is set it means the PCRE2_SUBSTITUTE_OVERFLOW_LENGTH is set,
|
||||
and matching has carried on after a full buffer, in order to compute the length
|
||||
|
@ -903,7 +964,7 @@ else
|
|||
}
|
||||
|
||||
EXIT:
|
||||
if (match_data_created) pcre2_match_data_free(match_data);
|
||||
if (internal_match_data != NULL) pcre2_match_data_free(internal_match_data);
|
||||
else match_data->rc = rc;
|
||||
return rc;
|
||||
|
||||
|
|
|
@ -265,6 +265,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
|
|||
#define STRING_Chakma0 STR_C STR_h STR_a STR_k STR_m STR_a "\0"
|
||||
#define STRING_Cham0 STR_C STR_h STR_a STR_m "\0"
|
||||
#define STRING_Cherokee0 STR_C STR_h STR_e STR_r STR_o STR_k STR_e STR_e "\0"
|
||||
#define STRING_Chorasmian0 STR_C STR_h STR_o STR_r STR_a STR_s STR_m STR_i STR_a STR_n "\0"
|
||||
#define STRING_Cn0 STR_C STR_n "\0"
|
||||
#define STRING_Co0 STR_C STR_o "\0"
|
||||
#define STRING_Common0 STR_C STR_o STR_m STR_m STR_o STR_n "\0"
|
||||
|
@ -275,6 +276,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
|
|||
#define STRING_Cyrillic0 STR_C STR_y STR_r STR_i STR_l STR_l STR_i STR_c "\0"
|
||||
#define STRING_Deseret0 STR_D STR_e STR_s STR_e STR_r STR_e STR_t "\0"
|
||||
#define STRING_Devanagari0 STR_D STR_e STR_v STR_a STR_n STR_a STR_g STR_a STR_r STR_i "\0"
|
||||
#define STRING_Dives_Akuru0 STR_D STR_i STR_v STR_e STR_s STR_UNDERSCORE STR_A STR_k STR_u STR_r STR_u "\0"
|
||||
#define STRING_Dogra0 STR_D STR_o STR_g STR_r STR_a "\0"
|
||||
#define STRING_Duployan0 STR_D STR_u STR_p STR_l STR_o STR_y STR_a STR_n "\0"
|
||||
#define STRING_Egyptian_Hieroglyphs0 STR_E STR_g STR_y STR_p STR_t STR_i STR_a STR_n STR_UNDERSCORE STR_H STR_i STR_e STR_r STR_o STR_g STR_l STR_y STR_p STR_h STR_s "\0"
|
||||
|
@ -306,6 +308,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
|
|||
#define STRING_Katakana0 STR_K STR_a STR_t STR_a STR_k STR_a STR_n STR_a "\0"
|
||||
#define STRING_Kayah_Li0 STR_K STR_a STR_y STR_a STR_h STR_UNDERSCORE STR_L STR_i "\0"
|
||||
#define STRING_Kharoshthi0 STR_K STR_h STR_a STR_r STR_o STR_s STR_h STR_t STR_h STR_i "\0"
|
||||
#define STRING_Khitan_Small_Script0 STR_K STR_h STR_i STR_t STR_a STR_n STR_UNDERSCORE STR_S STR_m STR_a STR_l STR_l STR_UNDERSCORE STR_S STR_c STR_r STR_i STR_p STR_t "\0"
|
||||
#define STRING_Khmer0 STR_K STR_h STR_m STR_e STR_r "\0"
|
||||
#define STRING_Khojki0 STR_K STR_h STR_o STR_j STR_k STR_i "\0"
|
||||
#define STRING_Khudawadi0 STR_K STR_h STR_u STR_d STR_a STR_w STR_a STR_d STR_i "\0"
|
||||
|
@ -429,6 +432,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
|
|||
#define STRING_Xsp0 STR_X STR_s STR_p "\0"
|
||||
#define STRING_Xuc0 STR_X STR_u STR_c "\0"
|
||||
#define STRING_Xwd0 STR_X STR_w STR_d "\0"
|
||||
#define STRING_Yezidi0 STR_Y STR_e STR_z STR_i STR_d STR_i "\0"
|
||||
#define STRING_Yi0 STR_Y STR_i "\0"
|
||||
#define STRING_Z0 STR_Z "\0"
|
||||
#define STRING_Zanabazar_Square0 STR_Z STR_a STR_n STR_a STR_b STR_a STR_z STR_a STR_r STR_UNDERSCORE STR_S STR_q STR_u STR_a STR_r STR_e "\0"
|
||||
|
@ -464,6 +468,7 @@ const char PRIV(utt_names)[] =
|
|||
STRING_Chakma0
|
||||
STRING_Cham0
|
||||
STRING_Cherokee0
|
||||
STRING_Chorasmian0
|
||||
STRING_Cn0
|
||||
STRING_Co0
|
||||
STRING_Common0
|
||||
|
@ -474,6 +479,7 @@ const char PRIV(utt_names)[] =
|
|||
STRING_Cyrillic0
|
||||
STRING_Deseret0
|
||||
STRING_Devanagari0
|
||||
STRING_Dives_Akuru0
|
||||
STRING_Dogra0
|
||||
STRING_Duployan0
|
||||
STRING_Egyptian_Hieroglyphs0
|
||||
|
@ -505,6 +511,7 @@ const char PRIV(utt_names)[] =
|
|||
STRING_Katakana0
|
||||
STRING_Kayah_Li0
|
||||
STRING_Kharoshthi0
|
||||
STRING_Khitan_Small_Script0
|
||||
STRING_Khmer0
|
||||
STRING_Khojki0
|
||||
STRING_Khudawadi0
|
||||
|
@ -628,6 +635,7 @@ const char PRIV(utt_names)[] =
|
|||
STRING_Xsp0
|
||||
STRING_Xuc0
|
||||
STRING_Xwd0
|
||||
STRING_Yezidi0
|
||||
STRING_Yi0
|
||||
STRING_Z0
|
||||
STRING_Zanabazar_Square0
|
||||
|
@ -663,176 +671,180 @@ const ucp_type_table PRIV(utt)[] = {
|
|||
{ 203, PT_SC, ucp_Chakma },
|
||||
{ 210, PT_SC, ucp_Cham },
|
||||
{ 215, PT_SC, ucp_Cherokee },
|
||||
{ 224, PT_PC, ucp_Cn },
|
||||
{ 227, PT_PC, ucp_Co },
|
||||
{ 230, PT_SC, ucp_Common },
|
||||
{ 237, PT_SC, ucp_Coptic },
|
||||
{ 244, PT_PC, ucp_Cs },
|
||||
{ 247, PT_SC, ucp_Cuneiform },
|
||||
{ 257, PT_SC, ucp_Cypriot },
|
||||
{ 265, PT_SC, ucp_Cyrillic },
|
||||
{ 274, PT_SC, ucp_Deseret },
|
||||
{ 282, PT_SC, ucp_Devanagari },
|
||||
{ 293, PT_SC, ucp_Dogra },
|
||||
{ 299, PT_SC, ucp_Duployan },
|
||||
{ 308, PT_SC, ucp_Egyptian_Hieroglyphs },
|
||||
{ 329, PT_SC, ucp_Elbasan },
|
||||
{ 337, PT_SC, ucp_Elymaic },
|
||||
{ 345, PT_SC, ucp_Ethiopic },
|
||||
{ 354, PT_SC, ucp_Georgian },
|
||||
{ 363, PT_SC, ucp_Glagolitic },
|
||||
{ 374, PT_SC, ucp_Gothic },
|
||||
{ 381, PT_SC, ucp_Grantha },
|
||||
{ 389, PT_SC, ucp_Greek },
|
||||
{ 395, PT_SC, ucp_Gujarati },
|
||||
{ 404, PT_SC, ucp_Gunjala_Gondi },
|
||||
{ 418, PT_SC, ucp_Gurmukhi },
|
||||
{ 427, PT_SC, ucp_Han },
|
||||
{ 431, PT_SC, ucp_Hangul },
|
||||
{ 438, PT_SC, ucp_Hanifi_Rohingya },
|
||||
{ 454, PT_SC, ucp_Hanunoo },
|
||||
{ 462, PT_SC, ucp_Hatran },
|
||||
{ 469, PT_SC, ucp_Hebrew },
|
||||
{ 476, PT_SC, ucp_Hiragana },
|
||||
{ 485, PT_SC, ucp_Imperial_Aramaic },
|
||||
{ 502, PT_SC, ucp_Inherited },
|
||||
{ 512, PT_SC, ucp_Inscriptional_Pahlavi },
|
||||
{ 534, PT_SC, ucp_Inscriptional_Parthian },
|
||||
{ 557, PT_SC, ucp_Javanese },
|
||||
{ 566, PT_SC, ucp_Kaithi },
|
||||
{ 573, PT_SC, ucp_Kannada },
|
||||
{ 581, PT_SC, ucp_Katakana },
|
||||
{ 590, PT_SC, ucp_Kayah_Li },
|
||||
{ 599, PT_SC, ucp_Kharoshthi },
|
||||
{ 610, PT_SC, ucp_Khmer },
|
||||
{ 616, PT_SC, ucp_Khojki },
|
||||
{ 623, PT_SC, ucp_Khudawadi },
|
||||
{ 633, PT_GC, ucp_L },
|
||||
{ 635, PT_LAMP, 0 },
|
||||
{ 638, PT_SC, ucp_Lao },
|
||||
{ 642, PT_SC, ucp_Latin },
|
||||
{ 648, PT_SC, ucp_Lepcha },
|
||||
{ 655, PT_SC, ucp_Limbu },
|
||||
{ 661, PT_SC, ucp_Linear_A },
|
||||
{ 670, PT_SC, ucp_Linear_B },
|
||||
{ 679, PT_SC, ucp_Lisu },
|
||||
{ 684, PT_PC, ucp_Ll },
|
||||
{ 687, PT_PC, ucp_Lm },
|
||||
{ 690, PT_PC, ucp_Lo },
|
||||
{ 693, PT_PC, ucp_Lt },
|
||||
{ 696, PT_PC, ucp_Lu },
|
||||
{ 699, PT_SC, ucp_Lycian },
|
||||
{ 706, PT_SC, ucp_Lydian },
|
||||
{ 713, PT_GC, ucp_M },
|
||||
{ 715, PT_SC, ucp_Mahajani },
|
||||
{ 724, PT_SC, ucp_Makasar },
|
||||
{ 732, PT_SC, ucp_Malayalam },
|
||||
{ 742, PT_SC, ucp_Mandaic },
|
||||
{ 750, PT_SC, ucp_Manichaean },
|
||||
{ 761, PT_SC, ucp_Marchen },
|
||||
{ 769, PT_SC, ucp_Masaram_Gondi },
|
||||
{ 783, PT_PC, ucp_Mc },
|
||||
{ 786, PT_PC, ucp_Me },
|
||||
{ 789, PT_SC, ucp_Medefaidrin },
|
||||
{ 801, PT_SC, ucp_Meetei_Mayek },
|
||||
{ 814, PT_SC, ucp_Mende_Kikakui },
|
||||
{ 828, PT_SC, ucp_Meroitic_Cursive },
|
||||
{ 845, PT_SC, ucp_Meroitic_Hieroglyphs },
|
||||
{ 866, PT_SC, ucp_Miao },
|
||||
{ 871, PT_PC, ucp_Mn },
|
||||
{ 874, PT_SC, ucp_Modi },
|
||||
{ 879, PT_SC, ucp_Mongolian },
|
||||
{ 889, PT_SC, ucp_Mro },
|
||||
{ 893, PT_SC, ucp_Multani },
|
||||
{ 901, PT_SC, ucp_Myanmar },
|
||||
{ 909, PT_GC, ucp_N },
|
||||
{ 911, PT_SC, ucp_Nabataean },
|
||||
{ 921, PT_SC, ucp_Nandinagari },
|
||||
{ 933, PT_PC, ucp_Nd },
|
||||
{ 936, PT_SC, ucp_New_Tai_Lue },
|
||||
{ 948, PT_SC, ucp_Newa },
|
||||
{ 953, PT_SC, ucp_Nko },
|
||||
{ 957, PT_PC, ucp_Nl },
|
||||
{ 960, PT_PC, ucp_No },
|
||||
{ 963, PT_SC, ucp_Nushu },
|
||||
{ 969, PT_SC, ucp_Nyiakeng_Puachue_Hmong },
|
||||
{ 992, PT_SC, ucp_Ogham },
|
||||
{ 998, PT_SC, ucp_Ol_Chiki },
|
||||
{ 1007, PT_SC, ucp_Old_Hungarian },
|
||||
{ 1021, PT_SC, ucp_Old_Italic },
|
||||
{ 1032, PT_SC, ucp_Old_North_Arabian },
|
||||
{ 1050, PT_SC, ucp_Old_Permic },
|
||||
{ 1061, PT_SC, ucp_Old_Persian },
|
||||
{ 1073, PT_SC, ucp_Old_Sogdian },
|
||||
{ 1085, PT_SC, ucp_Old_South_Arabian },
|
||||
{ 1103, PT_SC, ucp_Old_Turkic },
|
||||
{ 1114, PT_SC, ucp_Oriya },
|
||||
{ 1120, PT_SC, ucp_Osage },
|
||||
{ 1126, PT_SC, ucp_Osmanya },
|
||||
{ 1134, PT_GC, ucp_P },
|
||||
{ 1136, PT_SC, ucp_Pahawh_Hmong },
|
||||
{ 1149, PT_SC, ucp_Palmyrene },
|
||||
{ 1159, PT_SC, ucp_Pau_Cin_Hau },
|
||||
{ 1171, PT_PC, ucp_Pc },
|
||||
{ 1174, PT_PC, ucp_Pd },
|
||||
{ 1177, PT_PC, ucp_Pe },
|
||||
{ 1180, PT_PC, ucp_Pf },
|
||||
{ 1183, PT_SC, ucp_Phags_Pa },
|
||||
{ 1192, PT_SC, ucp_Phoenician },
|
||||
{ 1203, PT_PC, ucp_Pi },
|
||||
{ 1206, PT_PC, ucp_Po },
|
||||
{ 1209, PT_PC, ucp_Ps },
|
||||
{ 1212, PT_SC, ucp_Psalter_Pahlavi },
|
||||
{ 1228, PT_SC, ucp_Rejang },
|
||||
{ 1235, PT_SC, ucp_Runic },
|
||||
{ 1241, PT_GC, ucp_S },
|
||||
{ 1243, PT_SC, ucp_Samaritan },
|
||||
{ 1253, PT_SC, ucp_Saurashtra },
|
||||
{ 1264, PT_PC, ucp_Sc },
|
||||
{ 1267, PT_SC, ucp_Sharada },
|
||||
{ 1275, PT_SC, ucp_Shavian },
|
||||
{ 1283, PT_SC, ucp_Siddham },
|
||||
{ 1291, PT_SC, ucp_SignWriting },
|
||||
{ 1303, PT_SC, ucp_Sinhala },
|
||||
{ 1311, PT_PC, ucp_Sk },
|
||||
{ 1314, PT_PC, ucp_Sm },
|
||||
{ 1317, PT_PC, ucp_So },
|
||||
{ 1320, PT_SC, ucp_Sogdian },
|
||||
{ 1328, PT_SC, ucp_Sora_Sompeng },
|
||||
{ 1341, PT_SC, ucp_Soyombo },
|
||||
{ 1349, PT_SC, ucp_Sundanese },
|
||||
{ 1359, PT_SC, ucp_Syloti_Nagri },
|
||||
{ 1372, PT_SC, ucp_Syriac },
|
||||
{ 1379, PT_SC, ucp_Tagalog },
|
||||
{ 1387, PT_SC, ucp_Tagbanwa },
|
||||
{ 1396, PT_SC, ucp_Tai_Le },
|
||||
{ 1403, PT_SC, ucp_Tai_Tham },
|
||||
{ 1412, PT_SC, ucp_Tai_Viet },
|
||||
{ 1421, PT_SC, ucp_Takri },
|
||||
{ 1427, PT_SC, ucp_Tamil },
|
||||
{ 1433, PT_SC, ucp_Tangut },
|
||||
{ 1440, PT_SC, ucp_Telugu },
|
||||
{ 1447, PT_SC, ucp_Thaana },
|
||||
{ 1454, PT_SC, ucp_Thai },
|
||||
{ 1459, PT_SC, ucp_Tibetan },
|
||||
{ 1467, PT_SC, ucp_Tifinagh },
|
||||
{ 1476, PT_SC, ucp_Tirhuta },
|
||||
{ 1484, PT_SC, ucp_Ugaritic },
|
||||
{ 1493, PT_SC, ucp_Unknown },
|
||||
{ 1501, PT_SC, ucp_Vai },
|
||||
{ 1505, PT_SC, ucp_Wancho },
|
||||
{ 1512, PT_SC, ucp_Warang_Citi },
|
||||
{ 1524, PT_ALNUM, 0 },
|
||||
{ 1528, PT_PXSPACE, 0 },
|
||||
{ 1532, PT_SPACE, 0 },
|
||||
{ 1536, PT_UCNC, 0 },
|
||||
{ 1540, PT_WORD, 0 },
|
||||
{ 1544, PT_SC, ucp_Yi },
|
||||
{ 1547, PT_GC, ucp_Z },
|
||||
{ 1549, PT_SC, ucp_Zanabazar_Square },
|
||||
{ 1566, PT_PC, ucp_Zl },
|
||||
{ 1569, PT_PC, ucp_Zp },
|
||||
{ 1572, PT_PC, ucp_Zs }
|
||||
{ 224, PT_SC, ucp_Chorasmian },
|
||||
{ 235, PT_PC, ucp_Cn },
|
||||
{ 238, PT_PC, ucp_Co },
|
||||
{ 241, PT_SC, ucp_Common },
|
||||
{ 248, PT_SC, ucp_Coptic },
|
||||
{ 255, PT_PC, ucp_Cs },
|
||||
{ 258, PT_SC, ucp_Cuneiform },
|
||||
{ 268, PT_SC, ucp_Cypriot },
|
||||
{ 276, PT_SC, ucp_Cyrillic },
|
||||
{ 285, PT_SC, ucp_Deseret },
|
||||
{ 293, PT_SC, ucp_Devanagari },
|
||||
{ 304, PT_SC, ucp_Dives_Akuru },
|
||||
{ 316, PT_SC, ucp_Dogra },
|
||||
{ 322, PT_SC, ucp_Duployan },
|
||||
{ 331, PT_SC, ucp_Egyptian_Hieroglyphs },
|
||||
{ 352, PT_SC, ucp_Elbasan },
|
||||
{ 360, PT_SC, ucp_Elymaic },
|
||||
{ 368, PT_SC, ucp_Ethiopic },
|
||||
{ 377, PT_SC, ucp_Georgian },
|
||||
{ 386, PT_SC, ucp_Glagolitic },
|
||||
{ 397, PT_SC, ucp_Gothic },
|
||||
{ 404, PT_SC, ucp_Grantha },
|
||||
{ 412, PT_SC, ucp_Greek },
|
||||
{ 418, PT_SC, ucp_Gujarati },
|
||||
{ 427, PT_SC, ucp_Gunjala_Gondi },
|
||||
{ 441, PT_SC, ucp_Gurmukhi },
|
||||
{ 450, PT_SC, ucp_Han },
|
||||
{ 454, PT_SC, ucp_Hangul },
|
||||
{ 461, PT_SC, ucp_Hanifi_Rohingya },
|
||||
{ 477, PT_SC, ucp_Hanunoo },
|
||||
{ 485, PT_SC, ucp_Hatran },
|
||||
{ 492, PT_SC, ucp_Hebrew },
|
||||
{ 499, PT_SC, ucp_Hiragana },
|
||||
{ 508, PT_SC, ucp_Imperial_Aramaic },
|
||||
{ 525, PT_SC, ucp_Inherited },
|
||||
{ 535, PT_SC, ucp_Inscriptional_Pahlavi },
|
||||
{ 557, PT_SC, ucp_Inscriptional_Parthian },
|
||||
{ 580, PT_SC, ucp_Javanese },
|
||||
{ 589, PT_SC, ucp_Kaithi },
|
||||
{ 596, PT_SC, ucp_Kannada },
|
||||
{ 604, PT_SC, ucp_Katakana },
|
||||
{ 613, PT_SC, ucp_Kayah_Li },
|
||||
{ 622, PT_SC, ucp_Kharoshthi },
|
||||
{ 633, PT_SC, ucp_Khitan_Small_Script },
|
||||
{ 653, PT_SC, ucp_Khmer },
|
||||
{ 659, PT_SC, ucp_Khojki },
|
||||
{ 666, PT_SC, ucp_Khudawadi },
|
||||
{ 676, PT_GC, ucp_L },
|
||||
{ 678, PT_LAMP, 0 },
|
||||
{ 681, PT_SC, ucp_Lao },
|
||||
{ 685, PT_SC, ucp_Latin },
|
||||
{ 691, PT_SC, ucp_Lepcha },
|
||||
{ 698, PT_SC, ucp_Limbu },
|
||||
{ 704, PT_SC, ucp_Linear_A },
|
||||
{ 713, PT_SC, ucp_Linear_B },
|
||||
{ 722, PT_SC, ucp_Lisu },
|
||||
{ 727, PT_PC, ucp_Ll },
|
||||
{ 730, PT_PC, ucp_Lm },
|
||||
{ 733, PT_PC, ucp_Lo },
|
||||
{ 736, PT_PC, ucp_Lt },
|
||||
{ 739, PT_PC, ucp_Lu },
|
||||
{ 742, PT_SC, ucp_Lycian },
|
||||
{ 749, PT_SC, ucp_Lydian },
|
||||
{ 756, PT_GC, ucp_M },
|
||||
{ 758, PT_SC, ucp_Mahajani },
|
||||
{ 767, PT_SC, ucp_Makasar },
|
||||
{ 775, PT_SC, ucp_Malayalam },
|
||||
{ 785, PT_SC, ucp_Mandaic },
|
||||
{ 793, PT_SC, ucp_Manichaean },
|
||||
{ 804, PT_SC, ucp_Marchen },
|
||||
{ 812, PT_SC, ucp_Masaram_Gondi },
|
||||
{ 826, PT_PC, ucp_Mc },
|
||||
{ 829, PT_PC, ucp_Me },
|
||||
{ 832, PT_SC, ucp_Medefaidrin },
|
||||
{ 844, PT_SC, ucp_Meetei_Mayek },
|
||||
{ 857, PT_SC, ucp_Mende_Kikakui },
|
||||
{ 871, PT_SC, ucp_Meroitic_Cursive },
|
||||
{ 888, PT_SC, ucp_Meroitic_Hieroglyphs },
|
||||
{ 909, PT_SC, ucp_Miao },
|
||||
{ 914, PT_PC, ucp_Mn },
|
||||
{ 917, PT_SC, ucp_Modi },
|
||||
{ 922, PT_SC, ucp_Mongolian },
|
||||
{ 932, PT_SC, ucp_Mro },
|
||||
{ 936, PT_SC, ucp_Multani },
|
||||
{ 944, PT_SC, ucp_Myanmar },
|
||||
{ 952, PT_GC, ucp_N },
|
||||
{ 954, PT_SC, ucp_Nabataean },
|
||||
{ 964, PT_SC, ucp_Nandinagari },
|
||||
{ 976, PT_PC, ucp_Nd },
|
||||
{ 979, PT_SC, ucp_New_Tai_Lue },
|
||||
{ 991, PT_SC, ucp_Newa },
|
||||
{ 996, PT_SC, ucp_Nko },
|
||||
{ 1000, PT_PC, ucp_Nl },
|
||||
{ 1003, PT_PC, ucp_No },
|
||||
{ 1006, PT_SC, ucp_Nushu },
|
||||
{ 1012, PT_SC, ucp_Nyiakeng_Puachue_Hmong },
|
||||
{ 1035, PT_SC, ucp_Ogham },
|
||||
{ 1041, PT_SC, ucp_Ol_Chiki },
|
||||
{ 1050, PT_SC, ucp_Old_Hungarian },
|
||||
{ 1064, PT_SC, ucp_Old_Italic },
|
||||
{ 1075, PT_SC, ucp_Old_North_Arabian },
|
||||
{ 1093, PT_SC, ucp_Old_Permic },
|
||||
{ 1104, PT_SC, ucp_Old_Persian },
|
||||
{ 1116, PT_SC, ucp_Old_Sogdian },
|
||||
{ 1128, PT_SC, ucp_Old_South_Arabian },
|
||||
{ 1146, PT_SC, ucp_Old_Turkic },
|
||||
{ 1157, PT_SC, ucp_Oriya },
|
||||
{ 1163, PT_SC, ucp_Osage },
|
||||
{ 1169, PT_SC, ucp_Osmanya },
|
||||
{ 1177, PT_GC, ucp_P },
|
||||
{ 1179, PT_SC, ucp_Pahawh_Hmong },
|
||||
{ 1192, PT_SC, ucp_Palmyrene },
|
||||
{ 1202, PT_SC, ucp_Pau_Cin_Hau },
|
||||
{ 1214, PT_PC, ucp_Pc },
|
||||
{ 1217, PT_PC, ucp_Pd },
|
||||
{ 1220, PT_PC, ucp_Pe },
|
||||
{ 1223, PT_PC, ucp_Pf },
|
||||
{ 1226, PT_SC, ucp_Phags_Pa },
|
||||
{ 1235, PT_SC, ucp_Phoenician },
|
||||
{ 1246, PT_PC, ucp_Pi },
|
||||
{ 1249, PT_PC, ucp_Po },
|
||||
{ 1252, PT_PC, ucp_Ps },
|
||||
{ 1255, PT_SC, ucp_Psalter_Pahlavi },
|
||||
{ 1271, PT_SC, ucp_Rejang },
|
||||
{ 1278, PT_SC, ucp_Runic },
|
||||
{ 1284, PT_GC, ucp_S },
|
||||
{ 1286, PT_SC, ucp_Samaritan },
|
||||
{ 1296, PT_SC, ucp_Saurashtra },
|
||||
{ 1307, PT_PC, ucp_Sc },
|
||||
{ 1310, PT_SC, ucp_Sharada },
|
||||
{ 1318, PT_SC, ucp_Shavian },
|
||||
{ 1326, PT_SC, ucp_Siddham },
|
||||
{ 1334, PT_SC, ucp_SignWriting },
|
||||
{ 1346, PT_SC, ucp_Sinhala },
|
||||
{ 1354, PT_PC, ucp_Sk },
|
||||
{ 1357, PT_PC, ucp_Sm },
|
||||
{ 1360, PT_PC, ucp_So },
|
||||
{ 1363, PT_SC, ucp_Sogdian },
|
||||
{ 1371, PT_SC, ucp_Sora_Sompeng },
|
||||
{ 1384, PT_SC, ucp_Soyombo },
|
||||
{ 1392, PT_SC, ucp_Sundanese },
|
||||
{ 1402, PT_SC, ucp_Syloti_Nagri },
|
||||
{ 1415, PT_SC, ucp_Syriac },
|
||||
{ 1422, PT_SC, ucp_Tagalog },
|
||||
{ 1430, PT_SC, ucp_Tagbanwa },
|
||||
{ 1439, PT_SC, ucp_Tai_Le },
|
||||
{ 1446, PT_SC, ucp_Tai_Tham },
|
||||
{ 1455, PT_SC, ucp_Tai_Viet },
|
||||
{ 1464, PT_SC, ucp_Takri },
|
||||
{ 1470, PT_SC, ucp_Tamil },
|
||||
{ 1476, PT_SC, ucp_Tangut },
|
||||
{ 1483, PT_SC, ucp_Telugu },
|
||||
{ 1490, PT_SC, ucp_Thaana },
|
||||
{ 1497, PT_SC, ucp_Thai },
|
||||
{ 1502, PT_SC, ucp_Tibetan },
|
||||
{ 1510, PT_SC, ucp_Tifinagh },
|
||||
{ 1519, PT_SC, ucp_Tirhuta },
|
||||
{ 1527, PT_SC, ucp_Ugaritic },
|
||||
{ 1536, PT_SC, ucp_Unknown },
|
||||
{ 1544, PT_SC, ucp_Vai },
|
||||
{ 1548, PT_SC, ucp_Wancho },
|
||||
{ 1555, PT_SC, ucp_Warang_Citi },
|
||||
{ 1567, PT_ALNUM, 0 },
|
||||
{ 1571, PT_PXSPACE, 0 },
|
||||
{ 1575, PT_SPACE, 0 },
|
||||
{ 1579, PT_UCNC, 0 },
|
||||
{ 1583, PT_WORD, 0 },
|
||||
{ 1587, PT_SC, ucp_Yezidi },
|
||||
{ 1594, PT_SC, ucp_Yi },
|
||||
{ 1597, PT_GC, ucp_Z },
|
||||
{ 1599, PT_SC, ucp_Zanabazar_Square },
|
||||
{ 1616, PT_PC, ucp_Zl },
|
||||
{ 1619, PT_PC, ucp_Zp },
|
||||
{ 1622, PT_PC, ucp_Zs }
|
||||
};
|
||||
|
||||
const size_t PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table);
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -286,7 +286,12 @@ enum {
|
|||
ucp_Elymaic,
|
||||
ucp_Nandinagari,
|
||||
ucp_Nyiakeng_Puachue_Hmong,
|
||||
ucp_Wancho
|
||||
ucp_Wancho,
|
||||
/* New for Unicode 13.0.0 */
|
||||
ucp_Chorasmian,
|
||||
ucp_Dives_Akuru,
|
||||
ucp_Khitan_Small_Script,
|
||||
ucp_Yezidi
|
||||
};
|
||||
|
||||
#endif /* PCRE2_UCP_H_IDEMPOTENT_GUARD */
|
||||
|
|
|
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
|
|||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2017 University of Cambridge
|
||||
New API code Copyright (c) 2016-2020 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -347,7 +347,7 @@ for (p = string; length > 0; p++)
|
|||
length--;
|
||||
if ((*p & 0xfc00) != 0xdc00)
|
||||
{
|
||||
*erroroffset = p - string;
|
||||
*erroroffset = p - string - 1;
|
||||
return PCRE2_ERROR_UTF16_ERR2;
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue