mirror of
https://github.com/fish-shell/fish-shell
synced 2025-01-12 21:18:53 +00:00
Merge branch 'Integration_2.3.0'
Merge the pcre2 10.20 to 10.21 update. Closes #2943.
This commit is contained in:
commit
9aeed0dc06
122 changed files with 8599 additions and 5728 deletions
|
@ -62,7 +62,7 @@ extra_confdir = @extra_confdir@
|
|||
#
|
||||
|
||||
PCRE2_WIDTH = @WCHAR_T_BITS@
|
||||
PCRE2_DIR = pcre2-10.20
|
||||
PCRE2_DIR = pcre2-10.21
|
||||
PCRE2_LIBDIR = $(PCRE2_DIR)/.libs
|
||||
PCRE2_LIB = $(PCRE2_LIBDIR)/libpcre2-$(PCRE2_WIDTH).a
|
||||
PCRE2_H = $(PCRE2_DIR)/src/pcre2.h
|
||||
|
|
|
@ -71,14 +71,14 @@ if set -q c_files[1]
|
|||
oclint-xcodebuild xcodebuild.log > /dev/null
|
||||
end
|
||||
if test $all = yes
|
||||
oclint-json-compilation-database -e '/pcre2-10.20/' -- -enable-global-analysis 2>& 1
|
||||
oclint-json-compilation-database -e '/pcre2-10.21/' -- -enable-global-analysis 2>& 1
|
||||
else
|
||||
set i_files
|
||||
for f in $c_files
|
||||
set i_files $i_files -i $f
|
||||
end
|
||||
echo oclint-json-compilation-database -e '/pcre2-10.20/' $i_files
|
||||
oclint-json-compilation-database -e '/pcre2-10.20/' $i_files 2>& 1
|
||||
echo oclint-json-compilation-database -e '/pcre2-10.21/' $i_files
|
||||
oclint-json-compilation-database -e '/pcre2-10.21/' $i_files 2>& 1
|
||||
end
|
||||
else
|
||||
# Presumably we're on Linux or other platform not requiring special
|
||||
|
|
|
@ -849,7 +849,7 @@ if test "x$included_pcre2" = "xyes"; then
|
|||
AC_MSG_NOTICE([using included PCRE2 library])
|
||||
# unfortunately these get added to the global configuration
|
||||
ac_configure_args="$ac_configure_args --disable-pcre2-8 --enable-pcre2-$WCHAR_T_BITS --disable-shared"
|
||||
AC_CONFIG_SUBDIRS([pcre2-10.20])
|
||||
AC_CONFIG_SUBDIRS([pcre2-10.21])
|
||||
|
||||
PCRE2_CXXFLAGS='-I$(PCRE2_DIR)/src'
|
||||
PCRE2_LIBS='-L$(PCRE2_LIBDIR) -lpcre2-$(PCRE2_WIDTH)'
|
||||
|
|
|
@ -66,6 +66,7 @@
|
|||
/* End PBXAggregateTarget section */
|
||||
|
||||
/* Begin PBXBuildFile section */
|
||||
63A2C0E91CC60F3B00973404 /* pcre2_find_bracket.c in Sources */ = {isa = PBXBuildFile; fileRef = 63A2C0E81CC5F9FB00973404 /* pcre2_find_bracket.c */; };
|
||||
D00769121990137800CA4627 /* autoload.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0C6FCC914CFA4B0004CE8AD /* autoload.cpp */; };
|
||||
D00769131990137800CA4627 /* builtin_test.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0F3373A1506DE3C00ECEFC0 /* builtin_test.cpp */; };
|
||||
D00769141990137800CA4627 /* color.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0B6B0FE14E88BA400AD6C10 /* color.cpp */; };
|
||||
|
@ -448,6 +449,7 @@
|
|||
|
||||
/* Begin PBXFileReference section */
|
||||
4E142D731B56B5D7008783C8 /* config.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = config.h; path = ../osx/config.h; sourceTree = "<group>"; };
|
||||
63A2C0E81CC5F9FB00973404 /* pcre2_find_bracket.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = pcre2_find_bracket.c; sourceTree = "<group>"; };
|
||||
D00769421990137800CA4627 /* fish_tests */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = fish_tests; sourceTree = BUILT_PRODUCTS_DIR; };
|
||||
D00F63F019137E9D00FCCDEC /* fish_version.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = fish_version.cpp; sourceTree = "<group>"; };
|
||||
D01A2D23169B730A00767098 /* man1 */ = {isa = PBXFileReference; lastKnownFileType = text; name = man1; path = pages_for_manpath/man1; sourceTree = BUILT_PRODUCTS_DIR; };
|
||||
|
@ -459,30 +461,30 @@
|
|||
D032388A1849D1980032CF2C /* pager.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = pager.h; sourceTree = "<group>"; };
|
||||
D03EE83814DF88B200FC7150 /* lru.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = lru.h; sourceTree = "<group>"; };
|
||||
D04F7F7B1BA4BF4000B0F227 /* builtin_string.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = builtin_string.cpp; sourceTree = "<group>"; };
|
||||
D04F7F8D1BA4DCD900B0F227 /* pcre2_compile.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = pcre2_compile.c; path = "pcre2-10.20/src/pcre2_compile.c"; sourceTree = SOURCE_ROOT; };
|
||||
D04F7F901BA4DCE900B0F227 /* pcre2_config.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = pcre2_config.c; path = "pcre2-10.20/src/pcre2_config.c"; sourceTree = SOURCE_ROOT; };
|
||||
D04F7F931BA4DCFA00B0F227 /* pcre2_context.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = pcre2_context.c; path = "pcre2-10.20/src/pcre2_context.c"; sourceTree = SOURCE_ROOT; };
|
||||
D04F7F961BA4DD1100B0F227 /* pcre2_dfa_match.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = pcre2_dfa_match.c; path = "pcre2-10.20/src/pcre2_dfa_match.c"; sourceTree = SOURCE_ROOT; };
|
||||
D04F7F991BA4DD2000B0F227 /* pcre2_error.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = pcre2_error.c; path = "pcre2-10.20/src/pcre2_error.c"; sourceTree = SOURCE_ROOT; };
|
||||
D04F7F9C1BA4DD4A00B0F227 /* pcre2_maketables.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = pcre2_maketables.c; path = "pcre2-10.20/src/pcre2_maketables.c"; sourceTree = SOURCE_ROOT; };
|
||||
D04F7F9F1BA4DD5900B0F227 /* pcre2_match.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = pcre2_match.c; path = "pcre2-10.20/src/pcre2_match.c"; sourceTree = SOURCE_ROOT; };
|
||||
D04F7FA21BA4DD6900B0F227 /* pcre2_match_data.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = pcre2_match_data.c; path = "pcre2-10.20/src/pcre2_match_data.c"; sourceTree = SOURCE_ROOT; };
|
||||
D04F7FA51BA4DD7300B0F227 /* pcre2_newline.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = pcre2_newline.c; path = "pcre2-10.20/src/pcre2_newline.c"; sourceTree = SOURCE_ROOT; };
|
||||
D04F7FA81BA4DD8400B0F227 /* pcre2_ord2utf.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = pcre2_ord2utf.c; path = "pcre2-10.20/src/pcre2_ord2utf.c"; sourceTree = SOURCE_ROOT; };
|
||||
D04F7FAB1BA4DDA500B0F227 /* pcre2_pattern_info.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = pcre2_pattern_info.c; path = "pcre2-10.20/src/pcre2_pattern_info.c"; sourceTree = SOURCE_ROOT; };
|
||||
D04F7FAE1BA4DDB500B0F227 /* pcre2_serialize.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = pcre2_serialize.c; path = "pcre2-10.20/src/pcre2_serialize.c"; sourceTree = SOURCE_ROOT; };
|
||||
D04F7FB11BA4DDBF00B0F227 /* pcre2_string_utils.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = pcre2_string_utils.c; path = "pcre2-10.20/src/pcre2_string_utils.c"; sourceTree = SOURCE_ROOT; };
|
||||
D04F7FB41BA4DDC900B0F227 /* pcre2_study.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = pcre2_study.c; path = "pcre2-10.20/src/pcre2_study.c"; sourceTree = SOURCE_ROOT; };
|
||||
D04F7FB71BA4DDEB00B0F227 /* pcre2_substitute.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = pcre2_substitute.c; path = "pcre2-10.20/src/pcre2_substitute.c"; sourceTree = SOURCE_ROOT; };
|
||||
D04F7FB81BA4DDEB00B0F227 /* pcre2_substring.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = pcre2_substring.c; path = "pcre2-10.20/src/pcre2_substring.c"; sourceTree = SOURCE_ROOT; };
|
||||
D04F7FB91BA4DDEB00B0F227 /* pcre2_tables.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = pcre2_tables.c; path = "pcre2-10.20/src/pcre2_tables.c"; sourceTree = SOURCE_ROOT; };
|
||||
D04F7FBA1BA4DDEB00B0F227 /* pcre2_ucd.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = pcre2_ucd.c; path = "pcre2-10.20/src/pcre2_ucd.c"; sourceTree = SOURCE_ROOT; };
|
||||
D04F7FBB1BA4DDEB00B0F227 /* pcre2_valid_utf.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = pcre2_valid_utf.c; path = "pcre2-10.20/src/pcre2_valid_utf.c"; sourceTree = SOURCE_ROOT; };
|
||||
D04F7FBC1BA4DDEB00B0F227 /* pcre2_xclass.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = pcre2_xclass.c; path = "pcre2-10.20/src/pcre2_xclass.c"; sourceTree = SOURCE_ROOT; };
|
||||
D04F7FC91BA4DE3500B0F227 /* pcre2_jit_compile.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = pcre2_jit_compile.c; path = "pcre2-10.20/src/pcre2_jit_compile.c"; sourceTree = SOURCE_ROOT; };
|
||||
D04F7F8D1BA4DCD900B0F227 /* pcre2_compile.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = pcre2_compile.c; sourceTree = "<group>"; };
|
||||
D04F7F901BA4DCE900B0F227 /* pcre2_config.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = pcre2_config.c; sourceTree = "<group>"; };
|
||||
D04F7F931BA4DCFA00B0F227 /* pcre2_context.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = pcre2_context.c; sourceTree = "<group>"; };
|
||||
D04F7F961BA4DD1100B0F227 /* pcre2_dfa_match.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = pcre2_dfa_match.c; sourceTree = "<group>"; };
|
||||
D04F7F991BA4DD2000B0F227 /* pcre2_error.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = pcre2_error.c; sourceTree = "<group>"; };
|
||||
D04F7F9C1BA4DD4A00B0F227 /* pcre2_maketables.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = pcre2_maketables.c; sourceTree = "<group>"; };
|
||||
D04F7F9F1BA4DD5900B0F227 /* pcre2_match.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = pcre2_match.c; sourceTree = "<group>"; };
|
||||
D04F7FA21BA4DD6900B0F227 /* pcre2_match_data.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = pcre2_match_data.c; sourceTree = "<group>"; };
|
||||
D04F7FA51BA4DD7300B0F227 /* pcre2_newline.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = pcre2_newline.c; sourceTree = "<group>"; };
|
||||
D04F7FA81BA4DD8400B0F227 /* pcre2_ord2utf.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = pcre2_ord2utf.c; sourceTree = "<group>"; };
|
||||
D04F7FAB1BA4DDA500B0F227 /* pcre2_pattern_info.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = pcre2_pattern_info.c; sourceTree = "<group>"; };
|
||||
D04F7FAE1BA4DDB500B0F227 /* pcre2_serialize.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = pcre2_serialize.c; sourceTree = "<group>"; };
|
||||
D04F7FB11BA4DDBF00B0F227 /* pcre2_string_utils.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = pcre2_string_utils.c; sourceTree = "<group>"; };
|
||||
D04F7FB41BA4DDC900B0F227 /* pcre2_study.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = pcre2_study.c; sourceTree = "<group>"; };
|
||||
D04F7FB71BA4DDEB00B0F227 /* pcre2_substitute.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = pcre2_substitute.c; sourceTree = "<group>"; };
|
||||
D04F7FB81BA4DDEB00B0F227 /* pcre2_substring.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = pcre2_substring.c; sourceTree = "<group>"; };
|
||||
D04F7FB91BA4DDEB00B0F227 /* pcre2_tables.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = pcre2_tables.c; sourceTree = "<group>"; };
|
||||
D04F7FBA1BA4DDEB00B0F227 /* pcre2_ucd.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = pcre2_ucd.c; sourceTree = "<group>"; };
|
||||
D04F7FBB1BA4DDEB00B0F227 /* pcre2_valid_utf.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = pcre2_valid_utf.c; sourceTree = "<group>"; };
|
||||
D04F7FBC1BA4DDEB00B0F227 /* pcre2_xclass.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = pcre2_xclass.c; sourceTree = "<group>"; };
|
||||
D04F7FC91BA4DE3500B0F227 /* pcre2_jit_compile.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = pcre2_jit_compile.c; sourceTree = "<group>"; };
|
||||
D04F7FD01BA4E29300B0F227 /* libpcre2.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libpcre2.a; sourceTree = BUILT_PRODUCTS_DIR; };
|
||||
D04F7FF31BA4E6F300B0F227 /* pcre2_auto_possess.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = pcre2_auto_possess.c; path = "pcre2-10.20/src/pcre2_auto_possess.c"; sourceTree = SOURCE_ROOT; };
|
||||
D04F7FF71BA4E82C00B0F227 /* pcre2_chartables.c.dist */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; name = pcre2_chartables.c.dist; path = "pcre2-10.20/src/pcre2_chartables.c.dist"; sourceTree = SOURCE_ROOT; };
|
||||
D04F7FF31BA4E6F300B0F227 /* pcre2_auto_possess.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = pcre2_auto_possess.c; sourceTree = "<group>"; };
|
||||
D04F7FF71BA4E82C00B0F227 /* pcre2_chartables.c.dist */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = pcre2_chartables.c.dist; sourceTree = "<group>"; };
|
||||
D052D8091868F7FC003ABCBD /* parse_execution.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = parse_execution.cpp; sourceTree = "<group>"; };
|
||||
D052D80A1868F7FC003ABCBD /* parse_execution.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = parse_execution.h; sourceTree = "<group>"; };
|
||||
D07B247215BCC15700D4ADB4 /* add-shell */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.script.sh; name = "add-shell"; path = "build_tools/osx_package_scripts/add-shell"; sourceTree = "<group>"; };
|
||||
|
@ -663,6 +665,7 @@
|
|||
D04F7F8B1BA4DC7600B0F227 /* pcre */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
63A2C0E81CC5F9FB00973404 /* pcre2_find_bracket.c */,
|
||||
D04F7FF31BA4E6F300B0F227 /* pcre2_auto_possess.c */,
|
||||
D04F7F8D1BA4DCD900B0F227 /* pcre2_compile.c */,
|
||||
D04F7F901BA4DCE900B0F227 /* pcre2_config.c */,
|
||||
|
@ -688,7 +691,8 @@
|
|||
D04F7FF71BA4E82C00B0F227 /* pcre2_chartables.c.dist */,
|
||||
);
|
||||
name = pcre;
|
||||
sourceTree = "<group>";
|
||||
path = "pcre2-10.21/src";
|
||||
sourceTree = SOURCE_ROOT;
|
||||
};
|
||||
D08A328E17B4455100F3A533 /* fish_tests */ = {
|
||||
isa = PBXGroup;
|
||||
|
@ -1273,6 +1277,7 @@
|
|||
D04F7FDE1BA4E3AC00B0F227 /* pcre2_newline.c in Sources */,
|
||||
D04F7FDF1BA4E3AC00B0F227 /* pcre2_ord2utf.c in Sources */,
|
||||
D04F7FE01BA4E3AC00B0F227 /* pcre2_pattern_info.c in Sources */,
|
||||
63A2C0E91CC60F3B00973404 /* pcre2_find_bracket.c in Sources */,
|
||||
D04F7FF41BA4E6F300B0F227 /* pcre2_auto_possess.c in Sources */,
|
||||
D04F7FE11BA4E3AC00B0F227 /* pcre2_serialize.c in Sources */,
|
||||
D04F7FE21BA4E3AC00B0F227 /* pcre2_string_utils.c in Sources */,
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
/* This is the public header file for the PCRE library, second API, to be
|
||||
#included by applications that call PCRE2 functions.
|
||||
|
||||
Copyright (c) 2015 University of Cambridge
|
||||
Copyright (c) 2016 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -42,9 +42,9 @@ POSSIBILITY OF SUCH DAMAGE.
|
|||
/* The current PCRE version information. */
|
||||
|
||||
#define PCRE2_MAJOR 10
|
||||
#define PCRE2_MINOR 20
|
||||
#define PCRE2_MINOR 21
|
||||
#define PCRE2_PRERELEASE
|
||||
#define PCRE2_DATE 2015-06-30
|
||||
#define PCRE2_DATE 2016-01-12
|
||||
|
||||
/* When an application links to a PCRE DLL in Windows, the symbols that are
|
||||
imported have to be identified as such. When building PCRE2, the appropriate
|
||||
|
@ -120,6 +120,8 @@ D is inspected during pcre2_dfa_match() execution
|
|||
#define PCRE2_UTF 0x00080000u /* C J M D */
|
||||
#define PCRE2_NEVER_BACKSLASH_C 0x00100000u /* C */
|
||||
#define PCRE2_ALT_CIRCUMFLEX 0x00200000u /* J M D */
|
||||
#define PCRE2_ALT_VERBNAMES 0x00400000u /* C */
|
||||
#define PCRE2_USE_OFFSET_LIMIT 0x00800000u /* J M D */
|
||||
|
||||
/* These are for pcre2_jit_compile(). */
|
||||
|
||||
|
@ -144,9 +146,13 @@ sanity checks). */
|
|||
#define PCRE2_DFA_RESTART 0x00000040u
|
||||
#define PCRE2_DFA_SHORTEST 0x00000080u
|
||||
|
||||
/* This is an additional option for pcre2_substitute(). */
|
||||
/* These are additional options for pcre2_substitute(). */
|
||||
|
||||
#define PCRE2_SUBSTITUTE_GLOBAL 0x00000100u
|
||||
#define PCRE2_SUBSTITUTE_GLOBAL 0x00000100u
|
||||
#define PCRE2_SUBSTITUTE_EXTENDED 0x00000200u
|
||||
#define PCRE2_SUBSTITUTE_UNSET_EMPTY 0x00000400u
|
||||
#define PCRE2_SUBSTITUTE_UNKNOWN_UNSET 0x00000800u
|
||||
#define PCRE2_SUBSTITUTE_OVERFLOW_LENGTH 0x00001000u
|
||||
|
||||
/* Newline and \R settings, for use in compile contexts. The newline values
|
||||
must be kept in step with values set in config.h and both sets must all be
|
||||
|
@ -233,6 +239,12 @@ numbers must not be changed. */
|
|||
#define PCRE2_ERROR_RECURSIONLIMIT (-53)
|
||||
#define PCRE2_ERROR_UNAVAILABLE (-54)
|
||||
#define PCRE2_ERROR_UNSET (-55)
|
||||
#define PCRE2_ERROR_BADOFFSETLIMIT (-56)
|
||||
#define PCRE2_ERROR_BADREPESCAPE (-57)
|
||||
#define PCRE2_ERROR_REPMISSINGBRACE (-58)
|
||||
#define PCRE2_ERROR_BADSUBSTITUTION (-59)
|
||||
#define PCRE2_ERROR_BADSUBSPATTERN (-60)
|
||||
#define PCRE2_ERROR_TOOMANYREPLACE (-61)
|
||||
|
||||
/* Request types for pcre2_pattern_info() */
|
||||
|
||||
|
@ -259,6 +271,7 @@ numbers must not be changed. */
|
|||
#define PCRE2_INFO_NEWLINE 20
|
||||
#define PCRE2_INFO_RECURSIONLIMIT 21
|
||||
#define PCRE2_INFO_SIZE 22
|
||||
#define PCRE2_INFO_HASBACKSLASHC 23
|
||||
|
||||
/* Request types for pcre2_config(). */
|
||||
|
||||
|
@ -291,6 +304,7 @@ define special values to indicate zero-terminated strings and unset offsets in
|
|||
the offset vector (ovector). */
|
||||
|
||||
#define PCRE2_SIZE size_t
|
||||
#define PCRE2_SIZE_MAX SIZE_MAX
|
||||
#define PCRE2_ZERO_TERMINATED (~(PCRE2_SIZE)0)
|
||||
#define PCRE2_UNSET (~(PCRE2_SIZE)0)
|
||||
|
||||
|
@ -388,6 +402,8 @@ PCRE2_EXP_DECL void pcre2_compile_context_free(pcre2_compile_context *); \
|
|||
PCRE2_EXP_DECL int pcre2_set_bsr(pcre2_compile_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int pcre2_set_character_tables(pcre2_compile_context *, \
|
||||
const unsigned char *); \
|
||||
PCRE2_EXP_DECL int pcre2_set_max_pattern_length(pcre2_compile_context *, \
|
||||
PCRE2_SIZE); \
|
||||
PCRE2_EXP_DECL int pcre2_set_newline(pcre2_compile_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int pcre2_set_parens_nest_limit(pcre2_compile_context *, \
|
||||
uint32_t); \
|
||||
|
@ -405,6 +421,8 @@ PCRE2_EXP_DECL int pcre2_set_callout(pcre2_match_context *, \
|
|||
int (*)(pcre2_callout_block *, void *), void *); \
|
||||
PCRE2_EXP_DECL int pcre2_set_match_limit(pcre2_match_context *, \
|
||||
uint32_t); \
|
||||
PCRE2_EXP_DECL int pcre2_set_offset_limit(pcre2_match_context *, \
|
||||
PCRE2_SIZE); \
|
||||
PCRE2_EXP_DECL int pcre2_set_recursion_limit(pcre2_match_context *, \
|
||||
uint32_t); \
|
||||
PCRE2_EXP_DECL int pcre2_set_recursion_memory_management( \
|
||||
|
@ -606,8 +624,10 @@ pcre2_compile are called by application code. */
|
|||
#define pcre2_set_character_tables PCRE2_SUFFIX(pcre2_set_character_tables_)
|
||||
#define pcre2_set_compile_recursion_guard PCRE2_SUFFIX(pcre2_set_compile_recursion_guard_)
|
||||
#define pcre2_set_match_limit PCRE2_SUFFIX(pcre2_set_match_limit_)
|
||||
#define pcre2_set_max_pattern_length PCRE2_SUFFIX(pcre2_set_max_pattern_length_)
|
||||
#define pcre2_set_newline PCRE2_SUFFIX(pcre2_set_newline_)
|
||||
#define pcre2_set_parens_nest_limit PCRE2_SUFFIX(pcre2_set_parens_nest_limit_)
|
||||
#define pcre2_set_offset_limit PCRE2_SUFFIX(pcre2_set_offset_limit_)
|
||||
#define pcre2_set_recursion_limit PCRE2_SUFFIX(pcre2_set_recursion_limit_)
|
||||
#define pcre2_set_recursion_memory_management PCRE2_SUFFIX(pcre2_set_recursion_memory_management_)
|
||||
#define pcre2_substitute PCRE2_SUFFIX(pcre2_substitute_)
|
||||
|
|
|
@ -1,674 +0,0 @@
|
|||
GNU GENERAL PUBLIC LICENSE
|
||||
Version 3, 29 June 2007
|
||||
|
||||
Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
|
||||
Everyone is permitted to copy and distribute verbatim copies
|
||||
of this license document, but changing it is not allowed.
|
||||
|
||||
Preamble
|
||||
|
||||
The GNU General Public License is a free, copyleft license for
|
||||
software and other kinds of works.
|
||||
|
||||
The licenses for most software and other practical works are designed
|
||||
to take away your freedom to share and change the works. By contrast,
|
||||
the GNU General Public License is intended to guarantee your freedom to
|
||||
share and change all versions of a program--to make sure it remains free
|
||||
software for all its users. We, the Free Software Foundation, use the
|
||||
GNU General Public License for most of our software; it applies also to
|
||||
any other work released this way by its authors. You can apply it to
|
||||
your programs, too.
|
||||
|
||||
When we speak of free software, we are referring to freedom, not
|
||||
price. Our General Public Licenses are designed to make sure that you
|
||||
have the freedom to distribute copies of free software (and charge for
|
||||
them if you wish), that you receive source code or can get it if you
|
||||
want it, that you can change the software or use pieces of it in new
|
||||
free programs, and that you know you can do these things.
|
||||
|
||||
To protect your rights, we need to prevent others from denying you
|
||||
these rights or asking you to surrender the rights. Therefore, you have
|
||||
certain responsibilities if you distribute copies of the software, or if
|
||||
you modify it: responsibilities to respect the freedom of others.
|
||||
|
||||
For example, if you distribute copies of such a program, whether
|
||||
gratis or for a fee, you must pass on to the recipients the same
|
||||
freedoms that you received. You must make sure that they, too, receive
|
||||
or can get the source code. And you must show them these terms so they
|
||||
know their rights.
|
||||
|
||||
Developers that use the GNU GPL protect your rights with two steps:
|
||||
(1) assert copyright on the software, and (2) offer you this License
|
||||
giving you legal permission to copy, distribute and/or modify it.
|
||||
|
||||
For the developers' and authors' protection, the GPL clearly explains
|
||||
that there is no warranty for this free software. For both users' and
|
||||
authors' sake, the GPL requires that modified versions be marked as
|
||||
changed, so that their problems will not be attributed erroneously to
|
||||
authors of previous versions.
|
||||
|
||||
Some devices are designed to deny users access to install or run
|
||||
modified versions of the software inside them, although the manufacturer
|
||||
can do so. This is fundamentally incompatible with the aim of
|
||||
protecting users' freedom to change the software. The systematic
|
||||
pattern of such abuse occurs in the area of products for individuals to
|
||||
use, which is precisely where it is most unacceptable. Therefore, we
|
||||
have designed this version of the GPL to prohibit the practice for those
|
||||
products. If such problems arise substantially in other domains, we
|
||||
stand ready to extend this provision to those domains in future versions
|
||||
of the GPL, as needed to protect the freedom of users.
|
||||
|
||||
Finally, every program is threatened constantly by software patents.
|
||||
States should not allow patents to restrict development and use of
|
||||
software on general-purpose computers, but in those that do, we wish to
|
||||
avoid the special danger that patents applied to a free program could
|
||||
make it effectively proprietary. To prevent this, the GPL assures that
|
||||
patents cannot be used to render the program non-free.
|
||||
|
||||
The precise terms and conditions for copying, distribution and
|
||||
modification follow.
|
||||
|
||||
TERMS AND CONDITIONS
|
||||
|
||||
0. Definitions.
|
||||
|
||||
"This License" refers to version 3 of the GNU General Public License.
|
||||
|
||||
"Copyright" also means copyright-like laws that apply to other kinds of
|
||||
works, such as semiconductor masks.
|
||||
|
||||
"The Program" refers to any copyrightable work licensed under this
|
||||
License. Each licensee is addressed as "you". "Licensees" and
|
||||
"recipients" may be individuals or organizations.
|
||||
|
||||
To "modify" a work means to copy from or adapt all or part of the work
|
||||
in a fashion requiring copyright permission, other than the making of an
|
||||
exact copy. The resulting work is called a "modified version" of the
|
||||
earlier work or a work "based on" the earlier work.
|
||||
|
||||
A "covered work" means either the unmodified Program or a work based
|
||||
on the Program.
|
||||
|
||||
To "propagate" a work means to do anything with it that, without
|
||||
permission, would make you directly or secondarily liable for
|
||||
infringement under applicable copyright law, except executing it on a
|
||||
computer or modifying a private copy. Propagation includes copying,
|
||||
distribution (with or without modification), making available to the
|
||||
public, and in some countries other activities as well.
|
||||
|
||||
To "convey" a work means any kind of propagation that enables other
|
||||
parties to make or receive copies. Mere interaction with a user through
|
||||
a computer network, with no transfer of a copy, is not conveying.
|
||||
|
||||
An interactive user interface displays "Appropriate Legal Notices"
|
||||
to the extent that it includes a convenient and prominently visible
|
||||
feature that (1) displays an appropriate copyright notice, and (2)
|
||||
tells the user that there is no warranty for the work (except to the
|
||||
extent that warranties are provided), that licensees may convey the
|
||||
work under this License, and how to view a copy of this License. If
|
||||
the interface presents a list of user commands or options, such as a
|
||||
menu, a prominent item in the list meets this criterion.
|
||||
|
||||
1. Source Code.
|
||||
|
||||
The "source code" for a work means the preferred form of the work
|
||||
for making modifications to it. "Object code" means any non-source
|
||||
form of a work.
|
||||
|
||||
A "Standard Interface" means an interface that either is an official
|
||||
standard defined by a recognized standards body, or, in the case of
|
||||
interfaces specified for a particular programming language, one that
|
||||
is widely used among developers working in that language.
|
||||
|
||||
The "System Libraries" of an executable work include anything, other
|
||||
than the work as a whole, that (a) is included in the normal form of
|
||||
packaging a Major Component, but which is not part of that Major
|
||||
Component, and (b) serves only to enable use of the work with that
|
||||
Major Component, or to implement a Standard Interface for which an
|
||||
implementation is available to the public in source code form. A
|
||||
"Major Component", in this context, means a major essential component
|
||||
(kernel, window system, and so on) of the specific operating system
|
||||
(if any) on which the executable work runs, or a compiler used to
|
||||
produce the work, or an object code interpreter used to run it.
|
||||
|
||||
The "Corresponding Source" for a work in object code form means all
|
||||
the source code needed to generate, install, and (for an executable
|
||||
work) run the object code and to modify the work, including scripts to
|
||||
control those activities. However, it does not include the work's
|
||||
System Libraries, or general-purpose tools or generally available free
|
||||
programs which are used unmodified in performing those activities but
|
||||
which are not part of the work. For example, Corresponding Source
|
||||
includes interface definition files associated with source files for
|
||||
the work, and the source code for shared libraries and dynamically
|
||||
linked subprograms that the work is specifically designed to require,
|
||||
such as by intimate data communication or control flow between those
|
||||
subprograms and other parts of the work.
|
||||
|
||||
The Corresponding Source need not include anything that users
|
||||
can regenerate automatically from other parts of the Corresponding
|
||||
Source.
|
||||
|
||||
The Corresponding Source for a work in source code form is that
|
||||
same work.
|
||||
|
||||
2. Basic Permissions.
|
||||
|
||||
All rights granted under this License are granted for the term of
|
||||
copyright on the Program, and are irrevocable provided the stated
|
||||
conditions are met. This License explicitly affirms your unlimited
|
||||
permission to run the unmodified Program. The output from running a
|
||||
covered work is covered by this License only if the output, given its
|
||||
content, constitutes a covered work. This License acknowledges your
|
||||
rights of fair use or other equivalent, as provided by copyright law.
|
||||
|
||||
You may make, run and propagate covered works that you do not
|
||||
convey, without conditions so long as your license otherwise remains
|
||||
in force. You may convey covered works to others for the sole purpose
|
||||
of having them make modifications exclusively for you, or provide you
|
||||
with facilities for running those works, provided that you comply with
|
||||
the terms of this License in conveying all material for which you do
|
||||
not control copyright. Those thus making or running the covered works
|
||||
for you must do so exclusively on your behalf, under your direction
|
||||
and control, on terms that prohibit them from making any copies of
|
||||
your copyrighted material outside their relationship with you.
|
||||
|
||||
Conveying under any other circumstances is permitted solely under
|
||||
the conditions stated below. Sublicensing is not allowed; section 10
|
||||
makes it unnecessary.
|
||||
|
||||
3. Protecting Users' Legal Rights From Anti-Circumvention Law.
|
||||
|
||||
No covered work shall be deemed part of an effective technological
|
||||
measure under any applicable law fulfilling obligations under article
|
||||
11 of the WIPO copyright treaty adopted on 20 December 1996, or
|
||||
similar laws prohibiting or restricting circumvention of such
|
||||
measures.
|
||||
|
||||
When you convey a covered work, you waive any legal power to forbid
|
||||
circumvention of technological measures to the extent such circumvention
|
||||
is effected by exercising rights under this License with respect to
|
||||
the covered work, and you disclaim any intention to limit operation or
|
||||
modification of the work as a means of enforcing, against the work's
|
||||
users, your or third parties' legal rights to forbid circumvention of
|
||||
technological measures.
|
||||
|
||||
4. Conveying Verbatim Copies.
|
||||
|
||||
You may convey verbatim copies of the Program's source code as you
|
||||
receive it, in any medium, provided that you conspicuously and
|
||||
appropriately publish on each copy an appropriate copyright notice;
|
||||
keep intact all notices stating that this License and any
|
||||
non-permissive terms added in accord with section 7 apply to the code;
|
||||
keep intact all notices of the absence of any warranty; and give all
|
||||
recipients a copy of this License along with the Program.
|
||||
|
||||
You may charge any price or no price for each copy that you convey,
|
||||
and you may offer support or warranty protection for a fee.
|
||||
|
||||
5. Conveying Modified Source Versions.
|
||||
|
||||
You may convey a work based on the Program, or the modifications to
|
||||
produce it from the Program, in the form of source code under the
|
||||
terms of section 4, provided that you also meet all of these conditions:
|
||||
|
||||
a) The work must carry prominent notices stating that you modified
|
||||
it, and giving a relevant date.
|
||||
|
||||
b) The work must carry prominent notices stating that it is
|
||||
released under this License and any conditions added under section
|
||||
7. This requirement modifies the requirement in section 4 to
|
||||
"keep intact all notices".
|
||||
|
||||
c) You must license the entire work, as a whole, under this
|
||||
License to anyone who comes into possession of a copy. This
|
||||
License will therefore apply, along with any applicable section 7
|
||||
additional terms, to the whole of the work, and all its parts,
|
||||
regardless of how they are packaged. This License gives no
|
||||
permission to license the work in any other way, but it does not
|
||||
invalidate such permission if you have separately received it.
|
||||
|
||||
d) If the work has interactive user interfaces, each must display
|
||||
Appropriate Legal Notices; however, if the Program has interactive
|
||||
interfaces that do not display Appropriate Legal Notices, your
|
||||
work need not make them do so.
|
||||
|
||||
A compilation of a covered work with other separate and independent
|
||||
works, which are not by their nature extensions of the covered work,
|
||||
and which are not combined with it such as to form a larger program,
|
||||
in or on a volume of a storage or distribution medium, is called an
|
||||
"aggregate" if the compilation and its resulting copyright are not
|
||||
used to limit the access or legal rights of the compilation's users
|
||||
beyond what the individual works permit. Inclusion of a covered work
|
||||
in an aggregate does not cause this License to apply to the other
|
||||
parts of the aggregate.
|
||||
|
||||
6. Conveying Non-Source Forms.
|
||||
|
||||
You may convey a covered work in object code form under the terms
|
||||
of sections 4 and 5, provided that you also convey the
|
||||
machine-readable Corresponding Source under the terms of this License,
|
||||
in one of these ways:
|
||||
|
||||
a) Convey the object code in, or embodied in, a physical product
|
||||
(including a physical distribution medium), accompanied by the
|
||||
Corresponding Source fixed on a durable physical medium
|
||||
customarily used for software interchange.
|
||||
|
||||
b) Convey the object code in, or embodied in, a physical product
|
||||
(including a physical distribution medium), accompanied by a
|
||||
written offer, valid for at least three years and valid for as
|
||||
long as you offer spare parts or customer support for that product
|
||||
model, to give anyone who possesses the object code either (1) a
|
||||
copy of the Corresponding Source for all the software in the
|
||||
product that is covered by this License, on a durable physical
|
||||
medium customarily used for software interchange, for a price no
|
||||
more than your reasonable cost of physically performing this
|
||||
conveying of source, or (2) access to copy the
|
||||
Corresponding Source from a network server at no charge.
|
||||
|
||||
c) Convey individual copies of the object code with a copy of the
|
||||
written offer to provide the Corresponding Source. This
|
||||
alternative is allowed only occasionally and noncommercially, and
|
||||
only if you received the object code with such an offer, in accord
|
||||
with subsection 6b.
|
||||
|
||||
d) Convey the object code by offering access from a designated
|
||||
place (gratis or for a charge), and offer equivalent access to the
|
||||
Corresponding Source in the same way through the same place at no
|
||||
further charge. You need not require recipients to copy the
|
||||
Corresponding Source along with the object code. If the place to
|
||||
copy the object code is a network server, the Corresponding Source
|
||||
may be on a different server (operated by you or a third party)
|
||||
that supports equivalent copying facilities, provided you maintain
|
||||
clear directions next to the object code saying where to find the
|
||||
Corresponding Source. Regardless of what server hosts the
|
||||
Corresponding Source, you remain obligated to ensure that it is
|
||||
available for as long as needed to satisfy these requirements.
|
||||
|
||||
e) Convey the object code using peer-to-peer transmission, provided
|
||||
you inform other peers where the object code and Corresponding
|
||||
Source of the work are being offered to the general public at no
|
||||
charge under subsection 6d.
|
||||
|
||||
A separable portion of the object code, whose source code is excluded
|
||||
from the Corresponding Source as a System Library, need not be
|
||||
included in conveying the object code work.
|
||||
|
||||
A "User Product" is either (1) a "consumer product", which means any
|
||||
tangible personal property which is normally used for personal, family,
|
||||
or household purposes, or (2) anything designed or sold for incorporation
|
||||
into a dwelling. In determining whether a product is a consumer product,
|
||||
doubtful cases shall be resolved in favor of coverage. For a particular
|
||||
product received by a particular user, "normally used" refers to a
|
||||
typical or common use of that class of product, regardless of the status
|
||||
of the particular user or of the way in which the particular user
|
||||
actually uses, or expects or is expected to use, the product. A product
|
||||
is a consumer product regardless of whether the product has substantial
|
||||
commercial, industrial or non-consumer uses, unless such uses represent
|
||||
the only significant mode of use of the product.
|
||||
|
||||
"Installation Information" for a User Product means any methods,
|
||||
procedures, authorization keys, or other information required to install
|
||||
and execute modified versions of a covered work in that User Product from
|
||||
a modified version of its Corresponding Source. The information must
|
||||
suffice to ensure that the continued functioning of the modified object
|
||||
code is in no case prevented or interfered with solely because
|
||||
modification has been made.
|
||||
|
||||
If you convey an object code work under this section in, or with, or
|
||||
specifically for use in, a User Product, and the conveying occurs as
|
||||
part of a transaction in which the right of possession and use of the
|
||||
User Product is transferred to the recipient in perpetuity or for a
|
||||
fixed term (regardless of how the transaction is characterized), the
|
||||
Corresponding Source conveyed under this section must be accompanied
|
||||
by the Installation Information. But this requirement does not apply
|
||||
if neither you nor any third party retains the ability to install
|
||||
modified object code on the User Product (for example, the work has
|
||||
been installed in ROM).
|
||||
|
||||
The requirement to provide Installation Information does not include a
|
||||
requirement to continue to provide support service, warranty, or updates
|
||||
for a work that has been modified or installed by the recipient, or for
|
||||
the User Product in which it has been modified or installed. Access to a
|
||||
network may be denied when the modification itself materially and
|
||||
adversely affects the operation of the network or violates the rules and
|
||||
protocols for communication across the network.
|
||||
|
||||
Corresponding Source conveyed, and Installation Information provided,
|
||||
in accord with this section must be in a format that is publicly
|
||||
documented (and with an implementation available to the public in
|
||||
source code form), and must require no special password or key for
|
||||
unpacking, reading or copying.
|
||||
|
||||
7. Additional Terms.
|
||||
|
||||
"Additional permissions" are terms that supplement the terms of this
|
||||
License by making exceptions from one or more of its conditions.
|
||||
Additional permissions that are applicable to the entire Program shall
|
||||
be treated as though they were included in this License, to the extent
|
||||
that they are valid under applicable law. If additional permissions
|
||||
apply only to part of the Program, that part may be used separately
|
||||
under those permissions, but the entire Program remains governed by
|
||||
this License without regard to the additional permissions.
|
||||
|
||||
When you convey a copy of a covered work, you may at your option
|
||||
remove any additional permissions from that copy, or from any part of
|
||||
it. (Additional permissions may be written to require their own
|
||||
removal in certain cases when you modify the work.) You may place
|
||||
additional permissions on material, added by you to a covered work,
|
||||
for which you have or can give appropriate copyright permission.
|
||||
|
||||
Notwithstanding any other provision of this License, for material you
|
||||
add to a covered work, you may (if authorized by the copyright holders of
|
||||
that material) supplement the terms of this License with terms:
|
||||
|
||||
a) Disclaiming warranty or limiting liability differently from the
|
||||
terms of sections 15 and 16 of this License; or
|
||||
|
||||
b) Requiring preservation of specified reasonable legal notices or
|
||||
author attributions in that material or in the Appropriate Legal
|
||||
Notices displayed by works containing it; or
|
||||
|
||||
c) Prohibiting misrepresentation of the origin of that material, or
|
||||
requiring that modified versions of such material be marked in
|
||||
reasonable ways as different from the original version; or
|
||||
|
||||
d) Limiting the use for publicity purposes of names of licensors or
|
||||
authors of the material; or
|
||||
|
||||
e) Declining to grant rights under trademark law for use of some
|
||||
trade names, trademarks, or service marks; or
|
||||
|
||||
f) Requiring indemnification of licensors and authors of that
|
||||
material by anyone who conveys the material (or modified versions of
|
||||
it) with contractual assumptions of liability to the recipient, for
|
||||
any liability that these contractual assumptions directly impose on
|
||||
those licensors and authors.
|
||||
|
||||
All other non-permissive additional terms are considered "further
|
||||
restrictions" within the meaning of section 10. If the Program as you
|
||||
received it, or any part of it, contains a notice stating that it is
|
||||
governed by this License along with a term that is a further
|
||||
restriction, you may remove that term. If a license document contains
|
||||
a further restriction but permits relicensing or conveying under this
|
||||
License, you may add to a covered work material governed by the terms
|
||||
of that license document, provided that the further restriction does
|
||||
not survive such relicensing or conveying.
|
||||
|
||||
If you add terms to a covered work in accord with this section, you
|
||||
must place, in the relevant source files, a statement of the
|
||||
additional terms that apply to those files, or a notice indicating
|
||||
where to find the applicable terms.
|
||||
|
||||
Additional terms, permissive or non-permissive, may be stated in the
|
||||
form of a separately written license, or stated as exceptions;
|
||||
the above requirements apply either way.
|
||||
|
||||
8. Termination.
|
||||
|
||||
You may not propagate or modify a covered work except as expressly
|
||||
provided under this License. Any attempt otherwise to propagate or
|
||||
modify it is void, and will automatically terminate your rights under
|
||||
this License (including any patent licenses granted under the third
|
||||
paragraph of section 11).
|
||||
|
||||
However, if you cease all violation of this License, then your
|
||||
license from a particular copyright holder is reinstated (a)
|
||||
provisionally, unless and until the copyright holder explicitly and
|
||||
finally terminates your license, and (b) permanently, if the copyright
|
||||
holder fails to notify you of the violation by some reasonable means
|
||||
prior to 60 days after the cessation.
|
||||
|
||||
Moreover, your license from a particular copyright holder is
|
||||
reinstated permanently if the copyright holder notifies you of the
|
||||
violation by some reasonable means, this is the first time you have
|
||||
received notice of violation of this License (for any work) from that
|
||||
copyright holder, and you cure the violation prior to 30 days after
|
||||
your receipt of the notice.
|
||||
|
||||
Termination of your rights under this section does not terminate the
|
||||
licenses of parties who have received copies or rights from you under
|
||||
this License. If your rights have been terminated and not permanently
|
||||
reinstated, you do not qualify to receive new licenses for the same
|
||||
material under section 10.
|
||||
|
||||
9. Acceptance Not Required for Having Copies.
|
||||
|
||||
You are not required to accept this License in order to receive or
|
||||
run a copy of the Program. Ancillary propagation of a covered work
|
||||
occurring solely as a consequence of using peer-to-peer transmission
|
||||
to receive a copy likewise does not require acceptance. However,
|
||||
nothing other than this License grants you permission to propagate or
|
||||
modify any covered work. These actions infringe copyright if you do
|
||||
not accept this License. Therefore, by modifying or propagating a
|
||||
covered work, you indicate your acceptance of this License to do so.
|
||||
|
||||
10. Automatic Licensing of Downstream Recipients.
|
||||
|
||||
Each time you convey a covered work, the recipient automatically
|
||||
receives a license from the original licensors, to run, modify and
|
||||
propagate that work, subject to this License. You are not responsible
|
||||
for enforcing compliance by third parties with this License.
|
||||
|
||||
An "entity transaction" is a transaction transferring control of an
|
||||
organization, or substantially all assets of one, or subdividing an
|
||||
organization, or merging organizations. If propagation of a covered
|
||||
work results from an entity transaction, each party to that
|
||||
transaction who receives a copy of the work also receives whatever
|
||||
licenses to the work the party's predecessor in interest had or could
|
||||
give under the previous paragraph, plus a right to possession of the
|
||||
Corresponding Source of the work from the predecessor in interest, if
|
||||
the predecessor has it or can get it with reasonable efforts.
|
||||
|
||||
You may not impose any further restrictions on the exercise of the
|
||||
rights granted or affirmed under this License. For example, you may
|
||||
not impose a license fee, royalty, or other charge for exercise of
|
||||
rights granted under this License, and you may not initiate litigation
|
||||
(including a cross-claim or counterclaim in a lawsuit) alleging that
|
||||
any patent claim is infringed by making, using, selling, offering for
|
||||
sale, or importing the Program or any portion of it.
|
||||
|
||||
11. Patents.
|
||||
|
||||
A "contributor" is a copyright holder who authorizes use under this
|
||||
License of the Program or a work on which the Program is based. The
|
||||
work thus licensed is called the contributor's "contributor version".
|
||||
|
||||
A contributor's "essential patent claims" are all patent claims
|
||||
owned or controlled by the contributor, whether already acquired or
|
||||
hereafter acquired, that would be infringed by some manner, permitted
|
||||
by this License, of making, using, or selling its contributor version,
|
||||
but do not include claims that would be infringed only as a
|
||||
consequence of further modification of the contributor version. For
|
||||
purposes of this definition, "control" includes the right to grant
|
||||
patent sublicenses in a manner consistent with the requirements of
|
||||
this License.
|
||||
|
||||
Each contributor grants you a non-exclusive, worldwide, royalty-free
|
||||
patent license under the contributor's essential patent claims, to
|
||||
make, use, sell, offer for sale, import and otherwise run, modify and
|
||||
propagate the contents of its contributor version.
|
||||
|
||||
In the following three paragraphs, a "patent license" is any express
|
||||
agreement or commitment, however denominated, not to enforce a patent
|
||||
(such as an express permission to practice a patent or covenant not to
|
||||
sue for patent infringement). To "grant" such a patent license to a
|
||||
party means to make such an agreement or commitment not to enforce a
|
||||
patent against the party.
|
||||
|
||||
If you convey a covered work, knowingly relying on a patent license,
|
||||
and the Corresponding Source of the work is not available for anyone
|
||||
to copy, free of charge and under the terms of this License, through a
|
||||
publicly available network server or other readily accessible means,
|
||||
then you must either (1) cause the Corresponding Source to be so
|
||||
available, or (2) arrange to deprive yourself of the benefit of the
|
||||
patent license for this particular work, or (3) arrange, in a manner
|
||||
consistent with the requirements of this License, to extend the patent
|
||||
license to downstream recipients. "Knowingly relying" means you have
|
||||
actual knowledge that, but for the patent license, your conveying the
|
||||
covered work in a country, or your recipient's use of the covered work
|
||||
in a country, would infringe one or more identifiable patents in that
|
||||
country that you have reason to believe are valid.
|
||||
|
||||
If, pursuant to or in connection with a single transaction or
|
||||
arrangement, you convey, or propagate by procuring conveyance of, a
|
||||
covered work, and grant a patent license to some of the parties
|
||||
receiving the covered work authorizing them to use, propagate, modify
|
||||
or convey a specific copy of the covered work, then the patent license
|
||||
you grant is automatically extended to all recipients of the covered
|
||||
work and works based on it.
|
||||
|
||||
A patent license is "discriminatory" if it does not include within
|
||||
the scope of its coverage, prohibits the exercise of, or is
|
||||
conditioned on the non-exercise of one or more of the rights that are
|
||||
specifically granted under this License. You may not convey a covered
|
||||
work if you are a party to an arrangement with a third party that is
|
||||
in the business of distributing software, under which you make payment
|
||||
to the third party based on the extent of your activity of conveying
|
||||
the work, and under which the third party grants, to any of the
|
||||
parties who would receive the covered work from you, a discriminatory
|
||||
patent license (a) in connection with copies of the covered work
|
||||
conveyed by you (or copies made from those copies), or (b) primarily
|
||||
for and in connection with specific products or compilations that
|
||||
contain the covered work, unless you entered into that arrangement,
|
||||
or that patent license was granted, prior to 28 March 2007.
|
||||
|
||||
Nothing in this License shall be construed as excluding or limiting
|
||||
any implied license or other defenses to infringement that may
|
||||
otherwise be available to you under applicable patent law.
|
||||
|
||||
12. No Surrender of Others' Freedom.
|
||||
|
||||
If conditions are imposed on you (whether by court order, agreement or
|
||||
otherwise) that contradict the conditions of this License, they do not
|
||||
excuse you from the conditions of this License. If you cannot convey a
|
||||
covered work so as to satisfy simultaneously your obligations under this
|
||||
License and any other pertinent obligations, then as a consequence you may
|
||||
not convey it at all. For example, if you agree to terms that obligate you
|
||||
to collect a royalty for further conveying from those to whom you convey
|
||||
the Program, the only way you could satisfy both those terms and this
|
||||
License would be to refrain entirely from conveying the Program.
|
||||
|
||||
13. Use with the GNU Affero General Public License.
|
||||
|
||||
Notwithstanding any other provision of this License, you have
|
||||
permission to link or combine any covered work with a work licensed
|
||||
under version 3 of the GNU Affero General Public License into a single
|
||||
combined work, and to convey the resulting work. The terms of this
|
||||
License will continue to apply to the part which is the covered work,
|
||||
but the special requirements of the GNU Affero General Public License,
|
||||
section 13, concerning interaction through a network will apply to the
|
||||
combination as such.
|
||||
|
||||
14. Revised Versions of this License.
|
||||
|
||||
The Free Software Foundation may publish revised and/or new versions of
|
||||
the GNU General Public License from time to time. Such new versions will
|
||||
be similar in spirit to the present version, but may differ in detail to
|
||||
address new problems or concerns.
|
||||
|
||||
Each version is given a distinguishing version number. If the
|
||||
Program specifies that a certain numbered version of the GNU General
|
||||
Public License "or any later version" applies to it, you have the
|
||||
option of following the terms and conditions either of that numbered
|
||||
version or of any later version published by the Free Software
|
||||
Foundation. If the Program does not specify a version number of the
|
||||
GNU General Public License, you may choose any version ever published
|
||||
by the Free Software Foundation.
|
||||
|
||||
If the Program specifies that a proxy can decide which future
|
||||
versions of the GNU General Public License can be used, that proxy's
|
||||
public statement of acceptance of a version permanently authorizes you
|
||||
to choose that version for the Program.
|
||||
|
||||
Later license versions may give you additional or different
|
||||
permissions. However, no additional obligations are imposed on any
|
||||
author or copyright holder as a result of your choosing to follow a
|
||||
later version.
|
||||
|
||||
15. Disclaimer of Warranty.
|
||||
|
||||
THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
|
||||
APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
|
||||
HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
|
||||
OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
|
||||
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
|
||||
IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
|
||||
ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
|
||||
|
||||
16. Limitation of Liability.
|
||||
|
||||
IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
|
||||
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
|
||||
THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
|
||||
GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
|
||||
USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
|
||||
DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
|
||||
PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
|
||||
EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
|
||||
SUCH DAMAGES.
|
||||
|
||||
17. Interpretation of Sections 15 and 16.
|
||||
|
||||
If the disclaimer of warranty and limitation of liability provided
|
||||
above cannot be given local legal effect according to their terms,
|
||||
reviewing courts shall apply local law that most closely approximates
|
||||
an absolute waiver of all civil liability in connection with the
|
||||
Program, unless a warranty or assumption of liability accompanies a
|
||||
copy of the Program in return for a fee.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
How to Apply These Terms to Your New Programs
|
||||
|
||||
If you develop a new program, and you want it to be of the greatest
|
||||
possible use to the public, the best way to achieve this is to make it
|
||||
free software which everyone can redistribute and change under these terms.
|
||||
|
||||
To do so, attach the following notices to the program. It is safest
|
||||
to attach them to the start of each source file to most effectively
|
||||
state the exclusion of warranty; and each file should have at least
|
||||
the "copyright" line and a pointer to where the full notice is found.
|
||||
|
||||
<one line to give the program's name and a brief idea of what it does.>
|
||||
Copyright (C) <year> <name of author>
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
Also add information on how to contact you by electronic and paper mail.
|
||||
|
||||
If the program does terminal interaction, make it output a short
|
||||
notice like this when it starts in an interactive mode:
|
||||
|
||||
<program> Copyright (C) <year> <name of author>
|
||||
This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
|
||||
This is free software, and you are welcome to redistribute it
|
||||
under certain conditions; type `show c' for details.
|
||||
|
||||
The hypothetical commands `show w' and `show c' should show the appropriate
|
||||
parts of the General Public License. Of course, your program's commands
|
||||
might be different; for a GUI interface, you would use an "about box".
|
||||
|
||||
You should also get your employer (if you work as a programmer) or school,
|
||||
if any, to sign a "copyright disclaimer" for the program, if necessary.
|
||||
For more information on this, and how to apply and follow the GNU GPL, see
|
||||
<http://www.gnu.org/licenses/>.
|
||||
|
||||
The GNU General Public License does not permit incorporating your program
|
||||
into proprietary programs. If your program is a subroutine library, you
|
||||
may consider it more useful to permit linking proprietary applications with
|
||||
the library. If this is what you want to do, use the GNU Lesser General
|
||||
Public License instead of this License. But first, please read
|
||||
<http://www.gnu.org/philosophy/why-not-lgpl.html>.
|
|
@ -1,47 +0,0 @@
|
|||
News about PCRE2 releases
|
||||
-------------------------
|
||||
|
||||
Version 10.20 30-June-2015
|
||||
--------------------------
|
||||
|
||||
1. Callouts with string arguments and the pcre2_callout_enumerate() function
|
||||
have been implemented.
|
||||
|
||||
2. The PCRE2_NEVER_BACKSLASH_C option, which locks out the use of \C, is added.
|
||||
|
||||
3. The PCRE2_ALT_CIRCUMFLEX option lets ^ match after a newline at the end of a
|
||||
subject in multiline mode.
|
||||
|
||||
4. The way named subpatterns are handled has been refactored. The previous
|
||||
approach had several bugs.
|
||||
|
||||
5. The handling of \c in EBCDIC environments has been changed to conform to the
|
||||
perlebcdic document. This is an incompatible change.
|
||||
|
||||
6. Bugs have been mended, many of them discovered by fuzzers.
|
||||
|
||||
|
||||
Version 10.10 06-March-2015
|
||||
---------------------------
|
||||
|
||||
1. Serialization and de-serialization functions have been added to the API,
|
||||
making it possible to save and restore sets of compiled patterns, though
|
||||
restoration must be done in the same environment that was used for compilation.
|
||||
|
||||
2. The (*NO_JIT) feature has been added; this makes it possible for a pattern
|
||||
creator to specify that JIT is not to be used.
|
||||
|
||||
3. A number of bugs have been fixed. In particular, bugs that caused building
|
||||
on Windows using CMake to fail have been mended.
|
||||
|
||||
|
||||
Version 10.00 05-January-2015
|
||||
-----------------------------
|
||||
|
||||
Version 10.00 is the first release of PCRE2, a revised API for the PCRE
|
||||
library. Changes prior to 10.00 are logged in the ChangeLog file for the old
|
||||
API, up to item 20 for release 8.36. New programs are recommended to use the
|
||||
new library. Programs that use the original (PCRE1) API will need changing
|
||||
before linking with the new library.
|
||||
|
||||
****
|
|
@ -1,315 +0,0 @@
|
|||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2014 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Match and substitute *
|
||||
*************************************************/
|
||||
|
||||
/* This function applies a compiled re to a subject string and creates a new
|
||||
string with substitutions. The first 7 arguments are the same as for
|
||||
pcre2_match(). Either string length may be PCRE2_ZERO_TERMINATED.
|
||||
|
||||
Arguments:
|
||||
code points to the compiled expression
|
||||
subject points to the subject string
|
||||
length length of subject string (may contain binary zeros)
|
||||
start_offset where to start in the subject string
|
||||
options option bits
|
||||
match_data points to a match_data block, or is NULL
|
||||
context points a PCRE2 context
|
||||
replacement points to the replacement string
|
||||
rlength length of replacement string
|
||||
buffer where to put the substituted string
|
||||
blength points to length of buffer; updated to length of string
|
||||
|
||||
Returns: >= 0 number of substitutions made
|
||||
< 0 an error code
|
||||
PCRE2_ERROR_BADREPLACEMENT means invalid use of $
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_substitute(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length,
|
||||
PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data,
|
||||
pcre2_match_context *mcontext, PCRE2_SPTR replacement, PCRE2_SIZE rlength,
|
||||
PCRE2_UCHAR *buffer, PCRE2_SIZE *blength)
|
||||
{
|
||||
int rc;
|
||||
int subs;
|
||||
uint32_t ovector_count;
|
||||
uint32_t goptions = 0;
|
||||
BOOL match_data_created = FALSE;
|
||||
BOOL global = FALSE;
|
||||
PCRE2_SIZE buff_offset, lengthleft, fraglength;
|
||||
PCRE2_SIZE *ovector;
|
||||
|
||||
/* Partial matching is not valid. */
|
||||
|
||||
if ((options & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) != 0)
|
||||
return PCRE2_ERROR_BADOPTION;
|
||||
|
||||
/* If no match data block is provided, create one. */
|
||||
|
||||
if (match_data == NULL)
|
||||
{
|
||||
pcre2_general_context *gcontext = (mcontext == NULL)?
|
||||
(pcre2_general_context *)code :
|
||||
(pcre2_general_context *)mcontext;
|
||||
match_data = pcre2_match_data_create_from_pattern(code, gcontext);
|
||||
if (match_data == NULL) return PCRE2_ERROR_NOMEMORY;
|
||||
match_data_created = TRUE;
|
||||
}
|
||||
ovector = pcre2_get_ovector_pointer(match_data);
|
||||
ovector_count = pcre2_get_ovector_count(match_data);
|
||||
|
||||
/* Check UTF replacement string if necessary. */
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if ((code->overall_options & PCRE2_UTF) != 0 &&
|
||||
(options & PCRE2_NO_UTF_CHECK) == 0)
|
||||
{
|
||||
rc = PRIV(valid_utf)(replacement, rlength, &(match_data->rightchar));
|
||||
if (rc != 0)
|
||||
{
|
||||
match_data->leftchar = 0;
|
||||
goto EXIT;
|
||||
}
|
||||
}
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
/* Notice the global option and remove it from the options that are passed to
|
||||
pcre2_match(). */
|
||||
|
||||
if ((options & PCRE2_SUBSTITUTE_GLOBAL) != 0)
|
||||
{
|
||||
options &= ~PCRE2_SUBSTITUTE_GLOBAL;
|
||||
global = TRUE;
|
||||
}
|
||||
|
||||
/* Find lengths of zero-terminated strings. */
|
||||
|
||||
if (length == PCRE2_ZERO_TERMINATED) length = PRIV(strlen)(subject);
|
||||
if (rlength == PCRE2_ZERO_TERMINATED) rlength = PRIV(strlen)(replacement);
|
||||
|
||||
/* Copy up to the start offset */
|
||||
|
||||
if (start_offset > *blength) goto NOROOM;
|
||||
memcpy(buffer, subject, start_offset * (PCRE2_CODE_UNIT_WIDTH/8));
|
||||
buff_offset = start_offset;
|
||||
lengthleft = *blength - start_offset;
|
||||
|
||||
/* Loop for global substituting. */
|
||||
|
||||
subs = 0;
|
||||
do
|
||||
{
|
||||
PCRE2_SIZE i;
|
||||
|
||||
rc = pcre2_match(code, subject, length, start_offset, options|goptions,
|
||||
match_data, mcontext);
|
||||
|
||||
/* Any error other than no match returns the error code. No match when not
|
||||
doing the special after-empty-match global rematch, or when at the end of the
|
||||
subject, breaks the global loop. Otherwise, advance the starting point by one
|
||||
character, copying it to the output, and try again. */
|
||||
|
||||
if (rc < 0)
|
||||
{
|
||||
PCRE2_SIZE save_start;
|
||||
|
||||
if (rc != PCRE2_ERROR_NOMATCH) goto EXIT;
|
||||
if (goptions == 0 || start_offset >= length) break;
|
||||
|
||||
save_start = start_offset++;
|
||||
if ((code->overall_options & PCRE2_UTF) != 0)
|
||||
{
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
while (start_offset < length && (subject[start_offset] & 0xc0) == 0x80)
|
||||
start_offset++;
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||
while (start_offset < length &&
|
||||
(subject[start_offset] & 0xfc00) == 0xdc00)
|
||||
start_offset++;
|
||||
#endif
|
||||
}
|
||||
|
||||
fraglength = start_offset - save_start;
|
||||
if (lengthleft < fraglength) goto NOROOM;
|
||||
memcpy(buffer + buff_offset, subject + save_start,
|
||||
fraglength*(PCRE2_CODE_UNIT_WIDTH/8));
|
||||
buff_offset += fraglength;
|
||||
lengthleft -= fraglength;
|
||||
|
||||
goptions = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Handle a successful match. */
|
||||
|
||||
subs++;
|
||||
if (rc == 0) rc = ovector_count;
|
||||
fraglength = ovector[0] - start_offset;
|
||||
if (fraglength >= lengthleft) goto NOROOM;
|
||||
memcpy(buffer + buff_offset, subject + start_offset,
|
||||
fraglength*(PCRE2_CODE_UNIT_WIDTH/8));
|
||||
buff_offset += fraglength;
|
||||
lengthleft -= fraglength;
|
||||
|
||||
for (i = 0; i < rlength; i++)
|
||||
{
|
||||
if (replacement[i] == CHAR_DOLLAR_SIGN)
|
||||
{
|
||||
int group, n;
|
||||
BOOL inparens;
|
||||
PCRE2_SIZE sublength;
|
||||
PCRE2_UCHAR next;
|
||||
PCRE2_UCHAR name[33];
|
||||
|
||||
if (++i == rlength) goto BAD;
|
||||
if ((next = replacement[i]) == CHAR_DOLLAR_SIGN) goto LITERAL;
|
||||
|
||||
group = -1;
|
||||
n = 0;
|
||||
inparens = FALSE;
|
||||
|
||||
if (next == CHAR_LEFT_CURLY_BRACKET)
|
||||
{
|
||||
if (++i == rlength) goto BAD;
|
||||
next = replacement[i];
|
||||
inparens = TRUE;
|
||||
}
|
||||
|
||||
if (next >= CHAR_0 && next <= CHAR_9)
|
||||
{
|
||||
group = next - CHAR_0;
|
||||
while (++i < rlength)
|
||||
{
|
||||
next = replacement[i];
|
||||
if (next < CHAR_0 || next > CHAR_9) break;
|
||||
group = group * 10 + next - CHAR_0;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
const uint8_t *ctypes = code->tables + ctypes_offset;
|
||||
while (MAX_255(next) && (ctypes[next] & ctype_word) != 0)
|
||||
{
|
||||
name[n++] = next;
|
||||
if (n > 32) goto BAD;
|
||||
if (i == rlength) break;
|
||||
next = replacement[++i];
|
||||
}
|
||||
if (n == 0) goto BAD;
|
||||
name[n] = 0;
|
||||
}
|
||||
|
||||
if (inparens)
|
||||
{
|
||||
if (i == rlength || next != CHAR_RIGHT_CURLY_BRACKET) goto BAD;
|
||||
}
|
||||
else i--; /* Last code unit of name/number */
|
||||
|
||||
/* Have found a syntactically correct group number or name. */
|
||||
|
||||
sublength = lengthleft;
|
||||
if (group < 0)
|
||||
rc = pcre2_substring_copy_byname(match_data, name,
|
||||
buffer + buff_offset, &sublength);
|
||||
else
|
||||
rc = pcre2_substring_copy_bynumber(match_data, group,
|
||||
buffer + buff_offset, &sublength);
|
||||
|
||||
if (rc < 0) goto EXIT;
|
||||
buff_offset += sublength;
|
||||
lengthleft -= sublength;
|
||||
}
|
||||
|
||||
/* Handle a literal code unit */
|
||||
|
||||
else
|
||||
{
|
||||
LITERAL:
|
||||
if (lengthleft-- < 1) goto NOROOM;
|
||||
buffer[buff_offset++] = replacement[i];
|
||||
}
|
||||
}
|
||||
|
||||
/* The replacement has been copied to the output. Update the start offset to
|
||||
point to the rest of the subject string. If we matched an empty string,
|
||||
do the magic for global matches. */
|
||||
|
||||
start_offset = ovector[1];
|
||||
goptions = (ovector[0] != ovector[1])? 0 :
|
||||
PCRE2_ANCHORED|PCRE2_NOTEMPTY_ATSTART;
|
||||
} while (global); /* Repeat "do" loop */
|
||||
|
||||
/* Copy the rest of the subject and return the number of substitutions. */
|
||||
|
||||
rc = subs;
|
||||
fraglength = length - start_offset;
|
||||
if (fraglength + 1 > lengthleft) goto NOROOM;
|
||||
memcpy(buffer + buff_offset, subject + start_offset,
|
||||
fraglength*(PCRE2_CODE_UNIT_WIDTH/8));
|
||||
buff_offset += fraglength;
|
||||
buffer[buff_offset] = 0;
|
||||
*blength = buff_offset;
|
||||
|
||||
EXIT:
|
||||
if (match_data_created) pcre2_match_data_free(match_data);
|
||||
else match_data->rc = rc;
|
||||
return rc;
|
||||
|
||||
NOROOM:
|
||||
rc = PCRE2_ERROR_NOMEMORY;
|
||||
goto EXIT;
|
||||
|
||||
BAD:
|
||||
rc = PCRE2_ERROR_BADREPLACEMENT;
|
||||
goto EXIT;
|
||||
}
|
||||
|
||||
/* End of pcre2_substitute.c */
|
|
@ -12,4 +12,4 @@ pcre2_chartables.c
|
|||
pcre2-config
|
||||
pcre2test
|
||||
pcre2.h
|
||||
stamp-h1
|
||||
stamp-h1
|
|
@ -148,7 +148,7 @@ while (<STDIN>)
|
|||
printf("<li><a name=\"TOC%d\" href=\"#SEC%d\">$title</a>\n",
|
||||
$ref, $ref);
|
||||
printf TEMP ("<br><a name=\"SEC%d\" href=\"#TOC1\">$title</a><br>\n",
|
||||
$ref, $ref);
|
||||
$ref);
|
||||
$ref++;
|
||||
}
|
||||
else
|
|
@ -8,7 +8,7 @@ Email domain: cam.ac.uk
|
|||
University of Cambridge Computing Service,
|
||||
Cambridge, England.
|
||||
|
||||
Copyright (c) 1997-2015 University of Cambridge
|
||||
Copyright (c) 1997-2016 University of Cambridge
|
||||
All rights reserved
|
||||
|
||||
|
||||
|
@ -19,7 +19,7 @@ Written by: Zoltan Herczeg
|
|||
Email local part: hzmester
|
||||
Emain domain: freemail.hu
|
||||
|
||||
Copyright(c) 2010-2015 Zoltan Herczeg
|
||||
Copyright(c) 2010-2016 Zoltan Herczeg
|
||||
All rights reserved.
|
||||
|
||||
|
||||
|
@ -30,7 +30,7 @@ Written by: Zoltan Herczeg
|
|||
Email local part: hzmester
|
||||
Emain domain: freemail.hu
|
||||
|
||||
Copyright(c) 2009-2015 Zoltan Herczeg
|
||||
Copyright(c) 2009-2016 Zoltan Herczeg
|
||||
All rights reserved.
|
||||
|
||||
####
|
|
@ -67,7 +67,10 @@
|
|||
# 2013-10-08 PH got rid of the "source" command, which is a bash-ism (use ".")
|
||||
# 2013-11-05 PH added support for PARENS_NEST_LIMIT
|
||||
# 2014-08-29 PH converted the file for PCRE2 (which has no C++).
|
||||
# 2015-04024 PH added support for PCRE2_DEBUG
|
||||
# 2015-04-24 PH added support for PCRE2_DEBUG
|
||||
# 2015-07-16 PH updated for new pcre2_find_bracket source module
|
||||
# 2015-08-24 PH correct C_FLAGS setting (patch from Roy Ivy III)
|
||||
# 2015-10=16 PH added support for never-backslash-C
|
||||
|
||||
PROJECT(PCRE2 C)
|
||||
|
||||
|
@ -79,7 +82,7 @@ CMAKE_POLICY(SET CMP0026 OLD)
|
|||
|
||||
SET(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake) # for FindReadline.cmake
|
||||
|
||||
SET(CMAKE_C_FLAGS -I${PROJECT_SOURCE_DIR}/src)
|
||||
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -I${PROJECT_SOURCE_DIR}/src")
|
||||
|
||||
# external packages
|
||||
FIND_PACKAGE( BZip2 )
|
||||
|
@ -160,6 +163,9 @@ SET(PCRE2_SUPPORT_UNICODE ON CACHE BOOL
|
|||
SET(PCRE2_SUPPORT_BSR_ANYCRLF OFF CACHE BOOL
|
||||
"ON=Backslash-R matches only LF CR and CRLF, OFF=Backslash-R matches all Unicode Linebreaks")
|
||||
|
||||
SET(PCRE2_NEVER_BACKSLASH_C OFF CACHE BOOL
|
||||
"If ON, backslash-C (upper case C) is locked out.")
|
||||
|
||||
SET(PCRE2_SUPPORT_VALGRIND OFF CACHE BOOL
|
||||
"Enable Valgrind support.")
|
||||
|
||||
|
@ -250,6 +256,10 @@ IF(PCRE2_SUPPORT_BSR_ANYCRLF)
|
|||
SET(BSR_ANYCRLF 1)
|
||||
ENDIF(PCRE2_SUPPORT_BSR_ANYCRLF)
|
||||
|
||||
IF(PCRE2_NEVER_BACKSLASH_C)
|
||||
SET(NEVER_BACKSLASH_C 1)
|
||||
ENDIF(PCRE2_NEVER_BACKSLASH_C)
|
||||
|
||||
IF(PCRE2_SUPPORT_UNICODE)
|
||||
SET(SUPPORT_UNICODE 1)
|
||||
ENDIF(PCRE2_SUPPORT_UNICODE)
|
||||
|
@ -390,6 +400,7 @@ SET(PCRE2_SOURCES
|
|||
src/pcre2_context.c
|
||||
src/pcre2_dfa_match.c
|
||||
src/pcre2_error.c
|
||||
src/pcre2_find_bracket.c
|
||||
src/pcre2_jit_compile.c
|
||||
src/pcre2_maketables.c
|
||||
src/pcre2_match.c
|
||||
|
@ -716,6 +727,7 @@ IF(PCRE2_SHOW_REPORT)
|
|||
MESSAGE(STATUS " Enable Unicode support .......... : ${PCRE2_SUPPORT_UNICODE}")
|
||||
MESSAGE(STATUS " Newline char/sequence ........... : ${PCRE2_NEWLINE}")
|
||||
MESSAGE(STATUS " \\R matches only ANYCRLF ......... : ${PCRE2_SUPPORT_BSR_ANYCRLF}")
|
||||
MESSAGE(STATUS " \\C is disabled .................. : ${PCRE2_NEVER_BACKSLASH_C}")
|
||||
MESSAGE(STATUS " EBCDIC coding ................... : ${PCRE2_EBCDIC}")
|
||||
MESSAGE(STATUS " EBCDIC coding with NL=0x25 ...... : ${PCRE2_EBCDIC_NL25}")
|
||||
MESSAGE(STATUS " Rebuild char tables ............. : ${PCRE2_REBUILD_CHARTABLES}")
|
5
pcre2-10.21/COPYING
Normal file
5
pcre2-10.21/COPYING
Normal file
|
@ -0,0 +1,5 @@
|
|||
PCRE2 LICENCE
|
||||
|
||||
Please see the file LICENCE in the PCRE2 distribution for licensing details.
|
||||
|
||||
End
|
|
@ -1,6 +1,397 @@
|
|||
Change Log for PCRE2
|
||||
--------------------
|
||||
|
||||
Version 10.21 12-January-2016
|
||||
-----------------------------
|
||||
|
||||
1. Improve matching speed of patterns starting with + or * in JIT.
|
||||
|
||||
2. Use memchr() to find the first character in an unanchored match in 8-bit
|
||||
mode in the interpreter. This gives a significant speed improvement.
|
||||
|
||||
3. Removed a redundant copy of the opcode_possessify table in the
|
||||
pcre2_auto_possessify.c source.
|
||||
|
||||
4. Fix typos in dftables.c for z/OS.
|
||||
|
||||
5. Change 36 for 10.20 broke the handling of [[:>:]] and [[:<:]] in that
|
||||
processing them could involve a buffer overflow if the following character was
|
||||
an opening parenthesis.
|
||||
|
||||
6. Change 36 for 10.20 also introduced a bug in processing this pattern:
|
||||
/((?x)(*:0))#(?'/. Specifically: if a setting of (?x) was followed by a (*MARK)
|
||||
setting (which (*:0) is), then (?x) did not get unset at the end of its group
|
||||
during the scan for named groups, and hence the external # was incorrectly
|
||||
treated as a comment and the invalid (?' at the end of the pattern was not
|
||||
diagnosed. This caused a buffer overflow during the real compile. This bug was
|
||||
discovered by Karl Skomski with the LLVM fuzzer.
|
||||
|
||||
7. Moved the pcre2_find_bracket() function from src/pcre2_compile.c into its
|
||||
own source module to avoid a circular dependency between src/pcre2_compile.c
|
||||
and src/pcre2_study.c
|
||||
|
||||
8. A callout with a string argument containing an opening square bracket, for
|
||||
example /(?C$[$)(?<]/, was incorrectly processed and could provoke a buffer
|
||||
overflow. This bug was discovered by Karl Skomski with the LLVM fuzzer.
|
||||
|
||||
9. The handling of callouts during the pre-pass for named group identification
|
||||
has been tightened up.
|
||||
|
||||
10. The quantifier {1} can be ignored, whether greedy, non-greedy, or
|
||||
possessive. This is a very minor optimization.
|
||||
|
||||
11. A possessively repeated conditional group that could match an empty string,
|
||||
for example, /(?(R))*+/, was incorrectly compiled.
|
||||
|
||||
12. The Unicode tables have been updated to Unicode 8.0.0 (thanks to Christian
|
||||
Persch).
|
||||
|
||||
13. An empty comment (?#) in a pattern was incorrectly processed and could
|
||||
provoke a buffer overflow. This bug was discovered by Karl Skomski with the
|
||||
LLVM fuzzer.
|
||||
|
||||
14. Fix infinite recursion in the JIT compiler when certain patterns such as
|
||||
/(?:|a|){100}x/ are analysed.
|
||||
|
||||
15. Some patterns with character classes involving [: and \\ were incorrectly
|
||||
compiled and could cause reading from uninitialized memory or an incorrect
|
||||
error diagnosis. Examples are: /[[:\\](?<[::]/ and /[[:\\](?'abc')[a:]. The
|
||||
first of these bugs was discovered by Karl Skomski with the LLVM fuzzer.
|
||||
|
||||
16. Pathological patterns containing many nested occurrences of [: caused
|
||||
pcre2_compile() to run for a very long time. This bug was found by the LLVM
|
||||
fuzzer.
|
||||
|
||||
17. A missing closing parenthesis for a callout with a string argument was not
|
||||
being diagnosed, possibly leading to a buffer overflow. This bug was found by
|
||||
the LLVM fuzzer.
|
||||
|
||||
18. A conditional group with only one branch has an implicit empty alternative
|
||||
branch and must therefore be treated as potentially matching an empty string.
|
||||
|
||||
19. If (?R was followed by - or + incorrect behaviour happened instead of a
|
||||
diagnostic. This bug was discovered by Karl Skomski with the LLVM fuzzer.
|
||||
|
||||
20. Another bug that was introduced by change 36 for 10.20: conditional groups
|
||||
whose condition was an assertion preceded by an explicit callout with a string
|
||||
argument might be incorrectly processed, especially if the string contained \Q.
|
||||
This bug was discovered by Karl Skomski with the LLVM fuzzer.
|
||||
|
||||
21. Compiling PCRE2 with the sanitize options of clang showed up a number of
|
||||
very pedantic coding infelicities and a buffer overflow while checking a UTF-8
|
||||
string if the final multi-byte UTF-8 character was truncated.
|
||||
|
||||
22. For Perl compatibility in EBCDIC environments, ranges such as a-z in a
|
||||
class, where both values are literal letters in the same case, omit the
|
||||
non-letter EBCDIC code points within the range.
|
||||
|
||||
23. Finding the minimum matching length of complex patterns with back
|
||||
references and/or recursions can take a long time. There is now a cut-off that
|
||||
gives up trying to find a minimum length when things get too complex.
|
||||
|
||||
24. An optimization has been added that speeds up finding the minimum matching
|
||||
length for patterns containing repeated capturing groups or recursions.
|
||||
|
||||
25. If a pattern contained a back reference to a group whose number was
|
||||
duplicated as a result of appearing in a (?|...) group, the computation of the
|
||||
minimum matching length gave a wrong result, which could cause incorrect "no
|
||||
match" errors. For such patterns, a minimum matching length cannot at present
|
||||
be computed.
|
||||
|
||||
26. Added a check for integer overflow in conditions (?(<digits>) and
|
||||
(?(R<digits>). This omission was discovered by Karl Skomski with the LLVM
|
||||
fuzzer.
|
||||
|
||||
27. Fixed an issue when \p{Any} inside an xclass did not read the current
|
||||
character.
|
||||
|
||||
28. If pcre2grep was given the -q option with -c or -l, or when handling a
|
||||
binary file, it incorrectly wrote output to stdout.
|
||||
|
||||
29. The JIT compiler did not restore the control verb head in case of *THEN
|
||||
control verbs. This issue was found by Karl Skomski with a custom LLVM fuzzer.
|
||||
|
||||
30. The way recursive references such as (?3) are compiled has been re-written
|
||||
because the old way was the cause of many issues. Now, conversion of the group
|
||||
number into a pattern offset does not happen until the pattern has been
|
||||
completely compiled. This does mean that detection of all infinitely looping
|
||||
recursions is postponed till match time. In the past, some easy ones were
|
||||
detected at compile time. This re-writing was done in response to yet another
|
||||
bug found by the LLVM fuzzer.
|
||||
|
||||
31. A test for a back reference to a non-existent group was missing for items
|
||||
such as \987. This caused incorrect code to be compiled. This issue was found
|
||||
by Karl Skomski with a custom LLVM fuzzer.
|
||||
|
||||
32. Error messages for syntax errors following \g and \k were giving inaccurate
|
||||
offsets in the pattern.
|
||||
|
||||
33. Improve the performance of starting single character repetitions in JIT.
|
||||
|
||||
34. (*LIMIT_MATCH=) now gives an error instead of setting the value to 0.
|
||||
|
||||
35. Error messages for syntax errors in *LIMIT_MATCH and *LIMIT_RECURSION now
|
||||
give the right offset instead of zero.
|
||||
|
||||
36. The JIT compiler should not check repeats after a {0,1} repeat byte code.
|
||||
This issue was found by Karl Skomski with a custom LLVM fuzzer.
|
||||
|
||||
37. The JIT compiler should restore the control chain for empty possessive
|
||||
repeats. This issue was found by Karl Skomski with a custom LLVM fuzzer.
|
||||
|
||||
38. A bug which was introduced by the single character repetition optimization
|
||||
was fixed.
|
||||
|
||||
39. Match limit check added to recursion. This issue was found by Karl Skomski
|
||||
with a custom LLVM fuzzer.
|
||||
|
||||
40. Arrange for the UTF check in pcre2_match() and pcre2_dfa_match() to look
|
||||
only at the part of the subject that is relevant when the starting offset is
|
||||
non-zero.
|
||||
|
||||
41. Improve first character match in JIT with SSE2 on x86.
|
||||
|
||||
42. Fix two assertion fails in JIT. These issues were found by Karl Skomski
|
||||
with a custom LLVM fuzzer.
|
||||
|
||||
43. Correct the setting of CMAKE_C_FLAGS in CMakeLists.txt (patch from Roy Ivy
|
||||
III).
|
||||
|
||||
44. Fix bug in RunTest.bat for new test 14, and adjust the script for the added
|
||||
test (there are now 20 in total).
|
||||
|
||||
45. Fixed a corner case of range optimization in JIT.
|
||||
|
||||
46. Add the ${*MARK} facility to pcre2_substitute().
|
||||
|
||||
47. Modifier lists in pcre2test were splitting at spaces without the required
|
||||
commas.
|
||||
|
||||
48. Implemented PCRE2_ALT_VERBNAMES.
|
||||
|
||||
49. Fixed two issues in JIT. These were found by Karl Skomski with a custom
|
||||
LLVM fuzzer.
|
||||
|
||||
50. The pcre2test program has been extended by adding the #newline_default
|
||||
command. This has made it possible to run the standard tests when PCRE2 is
|
||||
compiled with either CR or CRLF as the default newline convention. As part of
|
||||
this work, the new command was added to several test files and the testing
|
||||
scripts were modified. The pcre2grep tests can now also be run when there is no
|
||||
LF in the default newline convention.
|
||||
|
||||
51. The RunTest script has been modified so that, when JIT is used and valgrind
|
||||
is specified, a valgrind suppressions file is set up to ignore "Invalid read of
|
||||
size 16" errors because these are false positives when the hardware supports
|
||||
the SSE2 instruction set.
|
||||
|
||||
52. It is now possible to have comment lines amid the subject strings in
|
||||
pcre2test (and perltest.sh) input.
|
||||
|
||||
53. Implemented PCRE2_USE_OFFSET_LIMIT and pcre2_set_offset_limit().
|
||||
|
||||
54. Add the null_context modifier to pcre2test so that calling pcre2_compile()
|
||||
and the matching functions with NULL contexts can be tested.
|
||||
|
||||
55. Implemented PCRE2_SUBSTITUTE_EXTENDED.
|
||||
|
||||
56. In a character class such as [\W\p{Any}] where both a negative-type escape
|
||||
("not a word character") and a property escape were present, the property
|
||||
escape was being ignored.
|
||||
|
||||
57. Fixed integer overflow for patterns whose minimum matching length is very,
|
||||
very large.
|
||||
|
||||
58. Implemented --never-backslash-C.
|
||||
|
||||
59. Change 55 above introduced a bug by which certain patterns provoked the
|
||||
erroneous error "\ at end of pattern".
|
||||
|
||||
60. The special sequences [[:<:]] and [[:>:]] gave rise to incorrect compiling
|
||||
errors or other strange effects if compiled in UCP mode. Found with libFuzzer
|
||||
and AddressSanitizer.
|
||||
|
||||
61. Whitespace at the end of a pcre2test pattern line caused a spurious error
|
||||
message if there were only single-character modifiers. It should be ignored.
|
||||
|
||||
62. The use of PCRE2_NO_AUTO_CAPTURE could cause incorrect compilation results
|
||||
or segmentation errors for some patterns. Found with libFuzzer and
|
||||
AddressSanitizer.
|
||||
|
||||
63. Very long names in (*MARK) or (*THEN) etc. items could provoke a buffer
|
||||
overflow.
|
||||
|
||||
64. Improve error message for overly-complicated patterns.
|
||||
|
||||
65. Implemented an optional replication feature for patterns in pcre2test, to
|
||||
make it easier to test long repetitive patterns. The tests for 63 above are
|
||||
converted to use the new feature.
|
||||
|
||||
66. In the POSIX wrapper, if regerror() was given too small a buffer, it could
|
||||
misbehave.
|
||||
|
||||
67. In pcre2_substitute() in UTF mode, the UTF validity check on the
|
||||
replacement string was happening before the length setting when the replacement
|
||||
string was zero-terminated.
|
||||
|
||||
68. In pcre2_substitute() in UTF mode, PCRE2_NO_UTF_CHECK can be set for the
|
||||
second and subsequent calls to pcre2_match().
|
||||
|
||||
69. There was no check for integer overflow for a replacement group number in
|
||||
pcre2_substitute(). An added check for a number greater than the largest group
|
||||
number in the pattern means this is not now needed.
|
||||
|
||||
70. The PCRE2-specific VERSION condition didn't work correctly if only one
|
||||
digit was given after the decimal point, or if more than two digits were given.
|
||||
It now works with one or two digits, and gives a compile time error if more are
|
||||
given.
|
||||
|
||||
71. In pcre2_substitute() there was the possibility of reading one code unit
|
||||
beyond the end of the replacement string.
|
||||
|
||||
72. The code for checking a subject's UTF-32 validity for a pattern with a
|
||||
lookbehind involved an out-of-bounds pointer, which could potentially cause
|
||||
trouble in some environments.
|
||||
|
||||
73. The maximum lookbehind length was incorrectly calculated for patterns such
|
||||
as /(?<=(a)(?-1))x/ which have a recursion within a backreference.
|
||||
|
||||
74. Give an error if a lookbehind assertion is longer than 65535 code units.
|
||||
|
||||
75. Give an error in pcre2_substitute() if a match ends before it starts (as a
|
||||
result of the use of \K).
|
||||
|
||||
76. Check the length of subpattern names and the names in (*MARK:xx) etc.
|
||||
dynamically to avoid the possibility of integer overflow.
|
||||
|
||||
77. Implement pcre2_set_max_pattern_length() so that programs can restrict the
|
||||
size of patterns that they are prepared to handle.
|
||||
|
||||
78. (*NO_AUTO_POSSESS) was not working.
|
||||
|
||||
79. Adding group information caching improves the speed of compiling when
|
||||
checking whether a group has a fixed length and/or could match an empty string,
|
||||
especially when recursion or subroutine calls are involved. However, this
|
||||
cannot be used when (?| is present in the pattern because the same number may
|
||||
be used for groups of different sizes. To catch runaway patterns in this
|
||||
situation, counts have been introduced to the functions that scan for empty
|
||||
branches or compute fixed lengths.
|
||||
|
||||
80. Allow for the possibility of the size of the nest_save structure not being
|
||||
a factor of the size of the compiling workspace (it currently is).
|
||||
|
||||
81. Check for integer overflow in minimum length calculation and cap it at
|
||||
65535.
|
||||
|
||||
82. Small optimizations in code for finding the minimum matching length.
|
||||
|
||||
83. Lock out configuring for EBCDIC with non-8-bit libraries.
|
||||
|
||||
84. Test for error code <= 0 in regerror().
|
||||
|
||||
85. Check for too many replacements (more than INT_MAX) in pcre2_substitute().
|
||||
|
||||
86. Avoid the possibility of computing with an out-of-bounds pointer (though
|
||||
not dereferencing it) while handling lookbehind assertions.
|
||||
|
||||
87. Failure to get memory for the match data in regcomp() is now given as a
|
||||
regcomp() error instead of waiting for regexec() to pick it up.
|
||||
|
||||
88. In pcre2_substitute(), ensure that CRLF is not split when it is a valid
|
||||
newline sequence.
|
||||
|
||||
89. Paranoid check in regcomp() for bad error code from pcre2_compile().
|
||||
|
||||
90. Run test 8 (internal offsets and code sizes) for link sizes 3 and 4 as well
|
||||
as for link size 2.
|
||||
|
||||
91. Document that JIT has a limit on pattern size, and give more information
|
||||
about JIT compile failures in pcre2test.
|
||||
|
||||
92. Implement PCRE2_INFO_HASBACKSLASHC.
|
||||
|
||||
93. Re-arrange valgrind support code in pcre2test to avoid spurious reports
|
||||
with JIT (possibly caused by SSE2?).
|
||||
|
||||
94. Support offset_limit in JIT.
|
||||
|
||||
95. A sequence such as [[:punct:]b] that is, a POSIX character class followed
|
||||
by a single ASCII character in a class item, was incorrectly compiled in UCP
|
||||
mode. The POSIX class got lost, but only if the single character followed it.
|
||||
|
||||
96. [:punct:] in UCP mode was matching some characters in the range 128-255
|
||||
that should not have been matched.
|
||||
|
||||
97. If [:^ascii:] or [:^xdigit:] are present in a non-negated class, all
|
||||
characters with code points greater than 255 are in the class. When a Unicode
|
||||
property was also in the class (if PCRE2_UCP is set, escapes such as \w are
|
||||
turned into Unicode properties), wide characters were not correctly handled,
|
||||
and could fail to match.
|
||||
|
||||
98. In pcre2test, make the "startoffset" modifier a synonym of "offset",
|
||||
because it sets the "startoffset" parameter for pcre2_match().
|
||||
|
||||
99. If PCRE2_AUTO_CALLOUT was set on a pattern that had a (?# comment between
|
||||
an item and its qualifier (for example, A(?#comment)?B) pcre2_compile()
|
||||
misbehaved. This bug was found by the LLVM fuzzer.
|
||||
|
||||
100. The error for an invalid UTF pattern string always gave the code unit
|
||||
offset as zero instead of where the invalidity was found.
|
||||
|
||||
101. Further to 97 above, negated classes such as [^[:^ascii:]\d] were also not
|
||||
working correctly in UCP mode.
|
||||
|
||||
102. Similar to 99 above, if an isolated \E was present between an item and its
|
||||
qualifier when PCRE2_AUTO_CALLOUT was set, pcre2_compile() misbehaved. This bug
|
||||
was found by the LLVM fuzzer.
|
||||
|
||||
103. The POSIX wrapper function regexec() crashed if the option REG_STARTEND
|
||||
was set when the pmatch argument was NULL. It now returns REG_INVARG.
|
||||
|
||||
104. Allow for up to 32-bit numbers in the ordin() function in pcre2grep.
|
||||
|
||||
105. An empty \Q\E sequence between an item and its qualifier caused
|
||||
pcre2_compile() to misbehave when auto callouts were enabled. This bug
|
||||
was found by the LLVM fuzzer.
|
||||
|
||||
106. If both PCRE2_ALT_VERBNAMES and PCRE2_EXTENDED were set, and a (*MARK) or
|
||||
other verb "name" ended with whitespace immediately before the closing
|
||||
parenthesis, pcre2_compile() misbehaved. Example: /(*:abc )/, but only when
|
||||
both those options were set.
|
||||
|
||||
107. In a number of places pcre2_compile() was not handling NULL characters
|
||||
correctly, and pcre2test with the "bincode" modifier was not always correctly
|
||||
displaying fields containing NULLS:
|
||||
|
||||
(a) Within /x extended #-comments
|
||||
(b) Within the "name" part of (*MARK) and other *verbs
|
||||
(c) Within the text argument of a callout
|
||||
|
||||
108. If a pattern that was compiled with PCRE2_EXTENDED started with white
|
||||
space or a #-type comment that was followed by (?-x), which turns off
|
||||
PCRE2_EXTENDED, and there was no subsequent (?x) to turn it on again,
|
||||
pcre2_compile() assumed that (?-x) applied to the whole pattern and
|
||||
consequently mis-compiled it. This bug was found by the LLVM fuzzer. The fix
|
||||
for this bug means that a setting of any of the (?imsxU) options at the start
|
||||
of a pattern is no longer transferred to the options that are returned by
|
||||
PCRE2_INFO_ALLOPTIONS. In fact, this was an anachronism that should have
|
||||
changed when the effects of those options were all moved to compile time.
|
||||
|
||||
109. An escaped closing parenthesis in the "name" part of a (*verb) when
|
||||
PCRE2_ALT_VERBNAMES was set caused pcre2_compile() to malfunction. This bug
|
||||
was found by the LLVM fuzzer.
|
||||
|
||||
110. Implemented PCRE2_SUBSTITUTE_UNSET_EMPTY, and updated pcre2test to make it
|
||||
possible to test it.
|
||||
|
||||
111. "Harden" pcre2test against ridiculously large values in modifiers and
|
||||
command line arguments.
|
||||
|
||||
112. Implemented PCRE2_SUBSTITUTE_UNKNOWN_UNSET and PCRE2_SUBSTITUTE_OVERFLOW_
|
||||
LENGTH.
|
||||
|
||||
113. Fix printing of *MARK names that contain binary zeroes in pcre2test.
|
||||
|
||||
|
||||
Version 10.20 30-June-2015
|
||||
--------------------------
|
||||
|
|
@ -25,7 +25,7 @@ Email domain: cam.ac.uk
|
|||
University of Cambridge Computing Service,
|
||||
Cambridge, England.
|
||||
|
||||
Copyright (c) 1997-2015 University of Cambridge
|
||||
Copyright (c) 1997-2016 University of Cambridge
|
||||
All rights reserved.
|
||||
|
||||
|
||||
|
@ -36,7 +36,7 @@ Written by: Zoltan Herczeg
|
|||
Email local part: hzmester
|
||||
Emain domain: freemail.hu
|
||||
|
||||
Copyright(c) 2010-2015 Zoltan Herczeg
|
||||
Copyright(c) 2010-2016 Zoltan Herczeg
|
||||
All rights reserved.
|
||||
|
||||
|
||||
|
@ -47,7 +47,7 @@ Written by: Zoltan Herczeg
|
|||
Email local part: hzmester
|
||||
Emain domain: freemail.hu
|
||||
|
||||
Copyright(c) 2009-2015 Zoltan Herczeg
|
||||
Copyright(c) 2009-2016 Zoltan Herczeg
|
||||
All rights reserved.
|
||||
|
||||
|
|
@ -64,6 +64,7 @@ dist_html_DATA = \
|
|||
doc/html/pcre2_set_character_tables.html \
|
||||
doc/html/pcre2_set_compile_recursion_guard.html \
|
||||
doc/html/pcre2_set_match_limit.html \
|
||||
doc/html/pcre2_set_offset_limit.html \
|
||||
doc/html/pcre2_set_newline.html \
|
||||
doc/html/pcre2_set_parens_nest_limit.html \
|
||||
doc/html/pcre2_set_recursion_limit.html \
|
||||
|
@ -143,6 +144,7 @@ dist_man_MANS = \
|
|||
doc/pcre2_set_character_tables.3 \
|
||||
doc/pcre2_set_compile_recursion_guard.3 \
|
||||
doc/pcre2_set_match_limit.3 \
|
||||
doc/pcre2_set_offset_limit.3 \
|
||||
doc/pcre2_set_newline.3 \
|
||||
doc/pcre2_set_parens_nest_limit.3 \
|
||||
doc/pcre2_set_recursion_limit.3 \
|
||||
|
@ -319,6 +321,7 @@ COMMON_SOURCES = \
|
|||
src/pcre2_context.c \
|
||||
src/pcre2_dfa_match.c \
|
||||
src/pcre2_error.c \
|
||||
src/pcre2_find_bracket.c \
|
||||
src/pcre2_internal.h \
|
||||
src/pcre2_intmodedep.h \
|
||||
src/pcre2_jit_compile.c \
|
||||
|
@ -586,6 +589,10 @@ EXTRA_DIST += \
|
|||
testdata/testinput17 \
|
||||
testdata/testinput18 \
|
||||
testdata/testinput19 \
|
||||
testdata/testinput20 \
|
||||
testdata/testinput21 \
|
||||
testdata/testinput22 \
|
||||
testdata/testinput23 \
|
||||
testdata/testinputEBC \
|
||||
testdata/testoutput1 \
|
||||
testdata/testoutput2 \
|
||||
|
@ -596,9 +603,15 @@ EXTRA_DIST += \
|
|||
testdata/testoutput5 \
|
||||
testdata/testoutput6 \
|
||||
testdata/testoutput7 \
|
||||
testdata/testoutput8-16 \
|
||||
testdata/testoutput8-32 \
|
||||
testdata/testoutput8-8 \
|
||||
testdata/testoutput8-16-2 \
|
||||
testdata/testoutput8-16-3 \
|
||||
testdata/testoutput8-16-3 \
|
||||
testdata/testoutput8-32-2 \
|
||||
testdata/testoutput8-32-3 \
|
||||
testdata/testoutput8-32-4 \
|
||||
testdata/testoutput8-8-2 \
|
||||
testdata/testoutput8-8-3 \
|
||||
testdata/testoutput8-8-4 \
|
||||
testdata/testoutput9 \
|
||||
testdata/testoutput10 \
|
||||
testdata/testoutput11-16 \
|
||||
|
@ -606,13 +619,22 @@ EXTRA_DIST += \
|
|||
testdata/testoutput12-16 \
|
||||
testdata/testoutput12-32 \
|
||||
testdata/testoutput13 \
|
||||
testdata/testoutput14 \
|
||||
testdata/testoutput14-16 \
|
||||
testdata/testoutput14-32 \
|
||||
testdata/testoutput14-8 \
|
||||
testdata/testoutput15 \
|
||||
testdata/testoutput16 \
|
||||
testdata/testoutput17 \
|
||||
testdata/testoutput18 \
|
||||
testdata/testoutput19 \
|
||||
testdata/testoutput20 \
|
||||
testdata/testoutput21 \
|
||||
testdata/testoutput22-16 \
|
||||
testdata/testoutput22-32 \
|
||||
testdata/testoutput22-8 \
|
||||
testdata/testoutput23 \
|
||||
testdata/testoutputEBC \
|
||||
testdata/valgrind-jit.supp \
|
||||
testdata/wintestinput3 \
|
||||
testdata/wintestoutput3 \
|
||||
perltest.sh
|
|
@ -186,7 +186,8 @@ LTLIBRARIES = $(lib_LTLIBRARIES)
|
|||
libpcre2_16_la_DEPENDENCIES =
|
||||
am__libpcre2_16_la_SOURCES_DIST = src/pcre2_auto_possess.c \
|
||||
src/pcre2_compile.c src/pcre2_config.c src/pcre2_context.c \
|
||||
src/pcre2_dfa_match.c src/pcre2_error.c src/pcre2_internal.h \
|
||||
src/pcre2_dfa_match.c src/pcre2_error.c \
|
||||
src/pcre2_find_bracket.c src/pcre2_internal.h \
|
||||
src/pcre2_intmodedep.h src/pcre2_jit_compile.c \
|
||||
src/pcre2_maketables.c src/pcre2_match.c \
|
||||
src/pcre2_match_data.c src/pcre2_newline.c src/pcre2_ord2utf.c \
|
||||
|
@ -202,6 +203,7 @@ am__objects_1 = src/libpcre2_16_la-pcre2_auto_possess.lo \
|
|||
src/libpcre2_16_la-pcre2_context.lo \
|
||||
src/libpcre2_16_la-pcre2_dfa_match.lo \
|
||||
src/libpcre2_16_la-pcre2_error.lo \
|
||||
src/libpcre2_16_la-pcre2_find_bracket.lo \
|
||||
src/libpcre2_16_la-pcre2_jit_compile.lo \
|
||||
src/libpcre2_16_la-pcre2_maketables.lo \
|
||||
src/libpcre2_16_la-pcre2_match.lo \
|
||||
|
@ -235,7 +237,8 @@ libpcre2_16_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \
|
|||
libpcre2_32_la_DEPENDENCIES =
|
||||
am__libpcre2_32_la_SOURCES_DIST = src/pcre2_auto_possess.c \
|
||||
src/pcre2_compile.c src/pcre2_config.c src/pcre2_context.c \
|
||||
src/pcre2_dfa_match.c src/pcre2_error.c src/pcre2_internal.h \
|
||||
src/pcre2_dfa_match.c src/pcre2_error.c \
|
||||
src/pcre2_find_bracket.c src/pcre2_internal.h \
|
||||
src/pcre2_intmodedep.h src/pcre2_jit_compile.c \
|
||||
src/pcre2_maketables.c src/pcre2_match.c \
|
||||
src/pcre2_match_data.c src/pcre2_newline.c src/pcre2_ord2utf.c \
|
||||
|
@ -250,6 +253,7 @@ am__objects_3 = src/libpcre2_32_la-pcre2_auto_possess.lo \
|
|||
src/libpcre2_32_la-pcre2_context.lo \
|
||||
src/libpcre2_32_la-pcre2_dfa_match.lo \
|
||||
src/libpcre2_32_la-pcre2_error.lo \
|
||||
src/libpcre2_32_la-pcre2_find_bracket.lo \
|
||||
src/libpcre2_32_la-pcre2_jit_compile.lo \
|
||||
src/libpcre2_32_la-pcre2_maketables.lo \
|
||||
src/libpcre2_32_la-pcre2_match.lo \
|
||||
|
@ -279,7 +283,8 @@ libpcre2_32_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \
|
|||
libpcre2_8_la_DEPENDENCIES =
|
||||
am__libpcre2_8_la_SOURCES_DIST = src/pcre2_auto_possess.c \
|
||||
src/pcre2_compile.c src/pcre2_config.c src/pcre2_context.c \
|
||||
src/pcre2_dfa_match.c src/pcre2_error.c src/pcre2_internal.h \
|
||||
src/pcre2_dfa_match.c src/pcre2_error.c \
|
||||
src/pcre2_find_bracket.c src/pcre2_internal.h \
|
||||
src/pcre2_intmodedep.h src/pcre2_jit_compile.c \
|
||||
src/pcre2_maketables.c src/pcre2_match.c \
|
||||
src/pcre2_match_data.c src/pcre2_newline.c src/pcre2_ord2utf.c \
|
||||
|
@ -294,6 +299,7 @@ am__objects_5 = src/libpcre2_8_la-pcre2_auto_possess.lo \
|
|||
src/libpcre2_8_la-pcre2_context.lo \
|
||||
src/libpcre2_8_la-pcre2_dfa_match.lo \
|
||||
src/libpcre2_8_la-pcre2_error.lo \
|
||||
src/libpcre2_8_la-pcre2_find_bracket.lo \
|
||||
src/libpcre2_8_la-pcre2_jit_compile.lo \
|
||||
src/libpcre2_8_la-pcre2_maketables.lo \
|
||||
src/libpcre2_8_la-pcre2_match.lo \
|
||||
|
@ -865,6 +871,7 @@ dist_html_DATA = \
|
|||
doc/html/pcre2_set_character_tables.html \
|
||||
doc/html/pcre2_set_compile_recursion_guard.html \
|
||||
doc/html/pcre2_set_match_limit.html \
|
||||
doc/html/pcre2_set_offset_limit.html \
|
||||
doc/html/pcre2_set_newline.html \
|
||||
doc/html/pcre2_set_parens_nest_limit.html \
|
||||
doc/html/pcre2_set_recursion_limit.html \
|
||||
|
@ -944,6 +951,7 @@ dist_man_MANS = \
|
|||
doc/pcre2_set_character_tables.3 \
|
||||
doc/pcre2_set_compile_recursion_guard.3 \
|
||||
doc/pcre2_set_match_limit.3 \
|
||||
doc/pcre2_set_offset_limit.3 \
|
||||
doc/pcre2_set_newline.3 \
|
||||
doc/pcre2_set_parens_nest_limit.3 \
|
||||
doc/pcre2_set_recursion_limit.3 \
|
||||
|
@ -1052,21 +1060,30 @@ EXTRA_DIST = m4/ax_pthread.m4 m4/pcre2_visibility.m4 \
|
|||
testdata/testinput11 testdata/testinput12 testdata/testinput13 \
|
||||
testdata/testinput14 testdata/testinput15 testdata/testinput16 \
|
||||
testdata/testinput17 testdata/testinput18 testdata/testinput19 \
|
||||
testdata/testinputEBC testdata/testoutput1 \
|
||||
testdata/testoutput2 testdata/testoutput3 \
|
||||
testdata/testinput20 testdata/testinput21 testdata/testinput22 \
|
||||
testdata/testinput23 testdata/testinputEBC \
|
||||
testdata/testoutput1 testdata/testoutput2 testdata/testoutput3 \
|
||||
testdata/testoutput3A testdata/testoutput3B \
|
||||
testdata/testoutput4 testdata/testoutput5 testdata/testoutput6 \
|
||||
testdata/testoutput7 testdata/testoutput8-16 \
|
||||
testdata/testoutput8-32 testdata/testoutput8-8 \
|
||||
testdata/testoutput7 testdata/testoutput8-16-2 \
|
||||
testdata/testoutput8-16-3 testdata/testoutput8-16-3 \
|
||||
testdata/testoutput8-32-2 testdata/testoutput8-32-3 \
|
||||
testdata/testoutput8-32-4 testdata/testoutput8-8-2 \
|
||||
testdata/testoutput8-8-3 testdata/testoutput8-8-4 \
|
||||
testdata/testoutput9 testdata/testoutput10 \
|
||||
testdata/testoutput11-16 testdata/testoutput11-32 \
|
||||
testdata/testoutput12-16 testdata/testoutput12-32 \
|
||||
testdata/testoutput13 testdata/testoutput14 \
|
||||
testdata/testoutput13 testdata/testoutput14-16 \
|
||||
testdata/testoutput14-32 testdata/testoutput14-8 \
|
||||
testdata/testoutput15 testdata/testoutput16 \
|
||||
testdata/testoutput17 testdata/testoutput18 \
|
||||
testdata/testoutput19 testdata/testoutputEBC \
|
||||
testdata/wintestinput3 testdata/wintestoutput3 perltest.sh \
|
||||
src/pcre2demo.c cmake/COPYING-CMAKE-SCRIPTS \
|
||||
testdata/testoutput19 testdata/testoutput20 \
|
||||
testdata/testoutput21 testdata/testoutput22-16 \
|
||||
testdata/testoutput22-32 testdata/testoutput22-8 \
|
||||
testdata/testoutput23 testdata/testoutputEBC \
|
||||
testdata/valgrind-jit.supp testdata/wintestinput3 \
|
||||
testdata/wintestoutput3 perltest.sh src/pcre2demo.c \
|
||||
cmake/COPYING-CMAKE-SCRIPTS \
|
||||
cmake/FindPackageHandleStandardArgs.cmake \
|
||||
cmake/FindReadline.cmake cmake/FindEditline.cmake \
|
||||
CMakeLists.txt config-cmake.h.in
|
||||
|
@ -1088,6 +1105,7 @@ COMMON_SOURCES = \
|
|||
src/pcre2_context.c \
|
||||
src/pcre2_dfa_match.c \
|
||||
src/pcre2_error.c \
|
||||
src/pcre2_find_bracket.c \
|
||||
src/pcre2_internal.h \
|
||||
src/pcre2_intmodedep.h \
|
||||
src/pcre2_jit_compile.c \
|
||||
|
@ -1309,6 +1327,8 @@ src/libpcre2_16_la-pcre2_dfa_match.lo: src/$(am__dirstamp) \
|
|||
src/$(DEPDIR)/$(am__dirstamp)
|
||||
src/libpcre2_16_la-pcre2_error.lo: src/$(am__dirstamp) \
|
||||
src/$(DEPDIR)/$(am__dirstamp)
|
||||
src/libpcre2_16_la-pcre2_find_bracket.lo: src/$(am__dirstamp) \
|
||||
src/$(DEPDIR)/$(am__dirstamp)
|
||||
src/libpcre2_16_la-pcre2_jit_compile.lo: src/$(am__dirstamp) \
|
||||
src/$(DEPDIR)/$(am__dirstamp)
|
||||
src/libpcre2_16_la-pcre2_maketables.lo: src/$(am__dirstamp) \
|
||||
|
@ -1358,6 +1378,8 @@ src/libpcre2_32_la-pcre2_dfa_match.lo: src/$(am__dirstamp) \
|
|||
src/$(DEPDIR)/$(am__dirstamp)
|
||||
src/libpcre2_32_la-pcre2_error.lo: src/$(am__dirstamp) \
|
||||
src/$(DEPDIR)/$(am__dirstamp)
|
||||
src/libpcre2_32_la-pcre2_find_bracket.lo: src/$(am__dirstamp) \
|
||||
src/$(DEPDIR)/$(am__dirstamp)
|
||||
src/libpcre2_32_la-pcre2_jit_compile.lo: src/$(am__dirstamp) \
|
||||
src/$(DEPDIR)/$(am__dirstamp)
|
||||
src/libpcre2_32_la-pcre2_maketables.lo: src/$(am__dirstamp) \
|
||||
|
@ -1407,6 +1429,8 @@ src/libpcre2_8_la-pcre2_dfa_match.lo: src/$(am__dirstamp) \
|
|||
src/$(DEPDIR)/$(am__dirstamp)
|
||||
src/libpcre2_8_la-pcre2_error.lo: src/$(am__dirstamp) \
|
||||
src/$(DEPDIR)/$(am__dirstamp)
|
||||
src/libpcre2_8_la-pcre2_find_bracket.lo: src/$(am__dirstamp) \
|
||||
src/$(DEPDIR)/$(am__dirstamp)
|
||||
src/libpcre2_8_la-pcre2_jit_compile.lo: src/$(am__dirstamp) \
|
||||
src/$(DEPDIR)/$(am__dirstamp)
|
||||
src/libpcre2_8_la-pcre2_maketables.lo: src/$(am__dirstamp) \
|
||||
|
@ -1583,6 +1607,7 @@ distclean-compile:
|
|||
@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_context.Plo@am__quote@
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_dfa_match.Plo@am__quote@
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_error.Plo@am__quote@
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_find_bracket.Plo@am__quote@
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_jit_compile.Plo@am__quote@
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_maketables.Plo@am__quote@
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_match.Plo@am__quote@
|
||||
|
@ -1606,6 +1631,7 @@ distclean-compile:
|
|||
@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_context.Plo@am__quote@
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_dfa_match.Plo@am__quote@
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_error.Plo@am__quote@
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_find_bracket.Plo@am__quote@
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_jit_compile.Plo@am__quote@
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_maketables.Plo@am__quote@
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_match.Plo@am__quote@
|
||||
|
@ -1629,6 +1655,7 @@ distclean-compile:
|
|||
@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_context.Plo@am__quote@
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_dfa_match.Plo@am__quote@
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_error.Plo@am__quote@
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_find_bracket.Plo@am__quote@
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_jit_compile.Plo@am__quote@
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_maketables.Plo@am__quote@
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_match.Plo@am__quote@
|
||||
|
@ -1716,6 +1743,13 @@ src/libpcre2_16_la-pcre2_error.lo: src/pcre2_error.c
|
|||
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
|
||||
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_16_la-pcre2_error.lo `test -f 'src/pcre2_error.c' || echo '$(srcdir)/'`src/pcre2_error.c
|
||||
|
||||
src/libpcre2_16_la-pcre2_find_bracket.lo: src/pcre2_find_bracket.c
|
||||
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_16_la-pcre2_find_bracket.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_16_la-pcre2_find_bracket.Tpo -c -o src/libpcre2_16_la-pcre2_find_bracket.lo `test -f 'src/pcre2_find_bracket.c' || echo '$(srcdir)/'`src/pcre2_find_bracket.c
|
||||
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_16_la-pcre2_find_bracket.Tpo src/$(DEPDIR)/libpcre2_16_la-pcre2_find_bracket.Plo
|
||||
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_find_bracket.c' object='src/libpcre2_16_la-pcre2_find_bracket.lo' libtool=yes @AMDEPBACKSLASH@
|
||||
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
|
||||
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_16_la-pcre2_find_bracket.lo `test -f 'src/pcre2_find_bracket.c' || echo '$(srcdir)/'`src/pcre2_find_bracket.c
|
||||
|
||||
src/libpcre2_16_la-pcre2_jit_compile.lo: src/pcre2_jit_compile.c
|
||||
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_16_la-pcre2_jit_compile.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_16_la-pcre2_jit_compile.Tpo -c -o src/libpcre2_16_la-pcre2_jit_compile.lo `test -f 'src/pcre2_jit_compile.c' || echo '$(srcdir)/'`src/pcre2_jit_compile.c
|
||||
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_16_la-pcre2_jit_compile.Tpo src/$(DEPDIR)/libpcre2_16_la-pcre2_jit_compile.Plo
|
||||
|
@ -1877,6 +1911,13 @@ src/libpcre2_32_la-pcre2_error.lo: src/pcre2_error.c
|
|||
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
|
||||
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_32_la-pcre2_error.lo `test -f 'src/pcre2_error.c' || echo '$(srcdir)/'`src/pcre2_error.c
|
||||
|
||||
src/libpcre2_32_la-pcre2_find_bracket.lo: src/pcre2_find_bracket.c
|
||||
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_32_la-pcre2_find_bracket.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_32_la-pcre2_find_bracket.Tpo -c -o src/libpcre2_32_la-pcre2_find_bracket.lo `test -f 'src/pcre2_find_bracket.c' || echo '$(srcdir)/'`src/pcre2_find_bracket.c
|
||||
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_32_la-pcre2_find_bracket.Tpo src/$(DEPDIR)/libpcre2_32_la-pcre2_find_bracket.Plo
|
||||
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_find_bracket.c' object='src/libpcre2_32_la-pcre2_find_bracket.lo' libtool=yes @AMDEPBACKSLASH@
|
||||
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
|
||||
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_32_la-pcre2_find_bracket.lo `test -f 'src/pcre2_find_bracket.c' || echo '$(srcdir)/'`src/pcre2_find_bracket.c
|
||||
|
||||
src/libpcre2_32_la-pcre2_jit_compile.lo: src/pcre2_jit_compile.c
|
||||
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_32_la-pcre2_jit_compile.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_32_la-pcre2_jit_compile.Tpo -c -o src/libpcre2_32_la-pcre2_jit_compile.lo `test -f 'src/pcre2_jit_compile.c' || echo '$(srcdir)/'`src/pcre2_jit_compile.c
|
||||
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_32_la-pcre2_jit_compile.Tpo src/$(DEPDIR)/libpcre2_32_la-pcre2_jit_compile.Plo
|
||||
|
@ -2038,6 +2079,13 @@ src/libpcre2_8_la-pcre2_error.lo: src/pcre2_error.c
|
|||
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
|
||||
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_8_la-pcre2_error.lo `test -f 'src/pcre2_error.c' || echo '$(srcdir)/'`src/pcre2_error.c
|
||||
|
||||
src/libpcre2_8_la-pcre2_find_bracket.lo: src/pcre2_find_bracket.c
|
||||
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_8_la-pcre2_find_bracket.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_8_la-pcre2_find_bracket.Tpo -c -o src/libpcre2_8_la-pcre2_find_bracket.lo `test -f 'src/pcre2_find_bracket.c' || echo '$(srcdir)/'`src/pcre2_find_bracket.c
|
||||
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_8_la-pcre2_find_bracket.Tpo src/$(DEPDIR)/libpcre2_8_la-pcre2_find_bracket.Plo
|
||||
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_find_bracket.c' object='src/libpcre2_8_la-pcre2_find_bracket.lo' libtool=yes @AMDEPBACKSLASH@
|
||||
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
|
||||
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_8_la-pcre2_find_bracket.lo `test -f 'src/pcre2_find_bracket.c' || echo '$(srcdir)/'`src/pcre2_find_bracket.c
|
||||
|
||||
src/libpcre2_8_la-pcre2_jit_compile.lo: src/pcre2_jit_compile.c
|
||||
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_8_la-pcre2_jit_compile.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_8_la-pcre2_jit_compile.Tpo -c -o src/libpcre2_8_la-pcre2_jit_compile.lo `test -f 'src/pcre2_jit_compile.c' || echo '$(srcdir)/'`src/pcre2_jit_compile.c
|
||||
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_8_la-pcre2_jit_compile.Tpo src/$(DEPDIR)/libpcre2_8_la-pcre2_jit_compile.Plo
|
88
pcre2-10.21/NEWS
Normal file
88
pcre2-10.21/NEWS
Normal file
|
@ -0,0 +1,88 @@
|
|||
News about PCRE2 releases
|
||||
-------------------------
|
||||
|
||||
Version 10.21 12-January-2016
|
||||
-----------------------------
|
||||
|
||||
1. Many bugs have been fixed. A large number of them were provoked only by very
|
||||
strange pattern input, and were discovered by fuzzers. Some others were
|
||||
discovered by code auditing. See ChangeLog for details.
|
||||
|
||||
2. The Unicode tables have been updated to Unicode version 8.0.0.
|
||||
|
||||
3. For Perl compatibility in EBCDIC environments, ranges such as a-z in a
|
||||
class, where both values are literal letters in the same case, omit the
|
||||
non-letter EBCDIC code points within the range.
|
||||
|
||||
4. There have been a number of enhancements to the pcre2_substitute() function,
|
||||
giving more flexibility to replacement facilities. It is now also possible to
|
||||
cause the function to return the needed buffer size if the one given is too
|
||||
small.
|
||||
|
||||
5. The PCRE2_ALT_VERBNAMES option causes the "name" parts of special verbs such
|
||||
as (*THEN:name) to be processed for backslashes and to take note of
|
||||
PCRE2_EXTENDED.
|
||||
|
||||
6. PCRE2_INFO_HASBACKSLASHC makes it possible for a client to find out if a
|
||||
pattern uses \C, and --never-backslash-C makes it possible to compile a version
|
||||
PCRE2 in which the use of \C is always forbidden.
|
||||
|
||||
7. A limit to the length of pattern that can be handled can now be set by
|
||||
calling pcre2_set_max_pattern_length().
|
||||
|
||||
8. When matching an unanchored pattern, a match can be required to begin within
|
||||
a given number of code units after the start of the subject by calling
|
||||
pcre2_set_offset_limit().
|
||||
|
||||
9. The pcre2test program has been extended to test new facilities, and it can
|
||||
now run the tests when LF on its own is not a valid newline sequence.
|
||||
|
||||
10. The RunTest script has also been updated to enable more tests to be run.
|
||||
|
||||
11. There have been some minor performance enhancements.
|
||||
|
||||
|
||||
Version 10.20 30-June-2015
|
||||
--------------------------
|
||||
|
||||
1. Callouts with string arguments and the pcre2_callout_enumerate() function
|
||||
have been implemented.
|
||||
|
||||
2. The PCRE2_NEVER_BACKSLASH_C option, which locks out the use of \C, is added.
|
||||
|
||||
3. The PCRE2_ALT_CIRCUMFLEX option lets ^ match after a newline at the end of a
|
||||
subject in multiline mode.
|
||||
|
||||
4. The way named subpatterns are handled has been refactored. The previous
|
||||
approach had several bugs.
|
||||
|
||||
5. The handling of \c in EBCDIC environments has been changed to conform to the
|
||||
perlebcdic document. This is an incompatible change.
|
||||
|
||||
6. Bugs have been mended, many of them discovered by fuzzers.
|
||||
|
||||
|
||||
Version 10.10 06-March-2015
|
||||
---------------------------
|
||||
|
||||
1. Serialization and de-serialization functions have been added to the API,
|
||||
making it possible to save and restore sets of compiled patterns, though
|
||||
restoration must be done in the same environment that was used for compilation.
|
||||
|
||||
2. The (*NO_JIT) feature has been added; this makes it possible for a pattern
|
||||
creator to specify that JIT is not to be used.
|
||||
|
||||
3. A number of bugs have been fixed. In particular, bugs that caused building
|
||||
on Windows using CMake to fail have been mended.
|
||||
|
||||
|
||||
Version 10.00 05-January-2015
|
||||
-----------------------------
|
||||
|
||||
Version 10.00 is the first release of PCRE2, a revised API for the PCRE
|
||||
library. Changes prior to 10.00 are logged in the ChangeLog file for the old
|
||||
API, up to item 20 for release 8.36. New programs are recommended to use the
|
||||
new library. Programs that use the original (PCRE1) API will need changing
|
||||
before linking with the new library.
|
||||
|
||||
****
|
|
@ -97,6 +97,7 @@ can skip ahead to the CMake section.
|
|||
pcre2_context.c
|
||||
pcre2_dfa_match.c
|
||||
pcre2_error.c
|
||||
pcre2_find_bracket.c
|
||||
pcre2_jit_compile.c
|
||||
pcre2_maketables.c
|
||||
pcre2_match.c
|
||||
|
@ -388,4 +389,4 @@ and executable, is in EBCDIC and native z/OS file formats and this is the
|
|||
recommended download site.
|
||||
|
||||
=============================
|
||||
Last Updated: 15 June 2015
|
||||
Last Updated: 16 July 2015
|
|
@ -65,13 +65,9 @@ End
|
|||
|
||||
echo "Making pcre2.txt"
|
||||
for file in pcre2 pcre2api pcre2build pcre2callout pcre2compat pcre2jit \
|
||||
pcre2limits pcre2matching pcre2partial pcre2unicode ; do
|
||||
|
||||
#for file in \
|
||||
# pcre2syntax \
|
||||
# pcre2precompile pcre2perform pcre2posix pcre2sample \
|
||||
# pcre2stack ; do
|
||||
|
||||
pcre2limits pcre2matching pcre2partial pcre2pattern pcre2perform \
|
||||
pcre2posix pcre2sample pcre2serialize pcre2stack pcre2syntax \
|
||||
pcre2unicode ; do
|
||||
echo " Processing $file.3"
|
||||
nroff -c -man $file.3 >$file.rawtxt
|
||||
perl ../CleanTxt <$file.rawtxt >>pcre2.txt
|
||||
|
@ -153,7 +149,6 @@ for file in *.3 ; do
|
|||
[ "$base" = "pcre2stack" ] || \
|
||||
[ "$base" = "pcre2compat" ] || \
|
||||
[ "$base" = "pcre2limits" ] || \
|
||||
[ "$base" = "pcre2perform" ] || \
|
||||
[ "$base" = "pcre2unicode" ] ; then
|
||||
toc=""
|
||||
fi
|
||||
|
@ -204,6 +199,7 @@ files="\
|
|||
src/pcre2_context.c \
|
||||
src/pcre2_dfa_match.c \
|
||||
src/pcre2_error.c \
|
||||
src/pcre2_find_bracket.c \
|
||||
src/pcre2_internal.h \
|
||||
src/pcre2_intmodedep.h \
|
||||
src/pcre2_jit_compile.c \
|
|
@ -220,6 +220,13 @@ library. They are also documented in the pcre2build man page.
|
|||
restrict \R to match only CR, LF, or CRLF. You can make this the default by
|
||||
adding --enable-bsr-anycrlf to the "configure" command (bsr = "backslash R").
|
||||
|
||||
. In a pattern, the escape sequence \C matches a single code unit, even in a
|
||||
UTF mode. This can be dangerous because it breaks up multi-code-unit
|
||||
characters. You can build PCRE2 with the use of \C permanently locked out by
|
||||
adding --enable-never-backslash-C (note the upper case C) to the "configure"
|
||||
command. When \C is allowed by the library, individual applications can lock
|
||||
it out by calling pcre2_compile() with the PCRE2_NEVER_BACKSLASH_C option.
|
||||
|
||||
. PCRE2 has a counter that limits the depth of nesting of parentheses in a
|
||||
pattern. This limits the amount of system stack that a pattern uses when it
|
||||
is compiled. The default is 250, but you can change it by setting, for
|
||||
|
@ -724,6 +731,7 @@ The distribution should contain the files listed below.
|
|||
src/pcre2_context.c )
|
||||
src/pcre2_dfa_match.c )
|
||||
src/pcre2_error.c )
|
||||
src/pcre2_find_bracket.c )
|
||||
src/pcre2_jit_compile.c )
|
||||
src/pcre2_jit_match.c ) sources for the functions in the library,
|
||||
src/pcre2_jit_misc.c ) and some internal functions that they use
|
||||
|
@ -832,4 +840,4 @@ The distribution should contain the files listed below.
|
|||
Philip Hazel
|
||||
Email local part: ph10
|
||||
Email domain: cam.ac.uk
|
||||
Last updated: 24 April 2015
|
||||
Last updated: 16 October 2015
|
|
@ -19,12 +19,18 @@ unset cp ls mv rm
|
|||
|
||||
builddir=`pwd`
|
||||
pcre2grep=$builddir/pcre2grep
|
||||
pcre2test=$builddir/pcre2test
|
||||
|
||||
if [ ! -x $pcre2grep ] ; then
|
||||
echo "** $pcre2grep does not exist or is not execuatble."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ ! -x $pcre2test ] ; then
|
||||
echo "** $pcre2test does not exist or is not execuatble."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
valgrind=
|
||||
while [ $# -gt 0 ] ; do
|
||||
case $1 in
|
||||
|
@ -34,7 +40,6 @@ while [ $# -gt 0 ] ; do
|
|||
shift
|
||||
done
|
||||
|
||||
echo " "
|
||||
pcre2grep_version=`$pcre2grep -V`
|
||||
if [ "$valgrind" = "" ] ; then
|
||||
echo "Testing $pcre2grep_version"
|
||||
|
@ -69,14 +74,22 @@ fi
|
|||
|
||||
# Check for the availability of UTF-8 support
|
||||
|
||||
./pcre2test -C unicode >/dev/null
|
||||
$pcre2test -C unicode >/dev/null
|
||||
utf8=$?
|
||||
|
||||
# Check default newline convention. If it does not include LF, force LF.
|
||||
|
||||
nl=`$pcre2test -C newline`
|
||||
if [ "$nl" != "LF" -a "$nl" != "ANY" -a "$nl" != "ANYCRLF" ]; then
|
||||
pcre2grep="$pcre2grep -N LF"
|
||||
echo "Default newline setting forced to LF"
|
||||
fi
|
||||
|
||||
# ------ Function to run and check a special pcre2grep arguments test -------
|
||||
|
||||
checkspecial()
|
||||
{
|
||||
$valgrind ./pcre2grep $1 >>testtrygrep 2>&1
|
||||
$valgrind $pcre2grep $1 >>testtrygrep 2>&1
|
||||
if [ $? -ne $2 ] ; then
|
||||
echo "** pcre2grep $1 failed - check testtrygrep"
|
||||
exit 1
|
||||
|
@ -530,6 +543,14 @@ echo "aaaaa" >>testtemp1grep
|
|||
(cd $srcdir; $valgrind $pcre2grep --line-offsets '(?<=\Ka)' $builddir/testtemp1grep) >>testtrygrep 2>&1
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 108 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -lq PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 109 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcre2grep -cq lazy ./testdata/grepinput*) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
# Now compare the results.
|
||||
|
||||
$cf $srcdir/testdata/grepoutput testtrygrep
|
|
@ -33,6 +33,10 @@
|
|||
# For backwards compatibility, -nojit, -valgrind, -valgrind-log, and -sim may
|
||||
# be given without the leading "-" character.
|
||||
#
|
||||
# When PCRE2 is compiled by clang with -fsanitize arguments, some tests need
|
||||
# very much more stack than normal. In environments where the stack can be
|
||||
# set at runtime, -bigstack sets a gigantic stack.
|
||||
#
|
||||
# There are two special cases where only one argument is allowed:
|
||||
#
|
||||
# If the first and only argument is "ebcdic", the script runs the special
|
||||
|
@ -64,13 +68,17 @@ title10="Test 10: Specials for the 8-bit library with UTF-8 and UCP support"
|
|||
title11="Test 11: Specials for the basic 16-bit and 32-bit libraries"
|
||||
title12="Test 12: Specials for the 16-bit and 32-bit libraries UTF and UCP support"
|
||||
title13="Test 13: DFA specials for the basic 16-bit and 32-bit libraries"
|
||||
title14="Test 14: Non-JIT limits and other non-JIT tests"
|
||||
title15="Test 15: JIT-specific features when JIT is not available"
|
||||
title16="Test 16: JIT-specific features when JIT is available"
|
||||
title17="Test 17: Tests of the POSIX interface, excluding UTF/UCP"
|
||||
title18="Test 18: Tests of the POSIX interface with UTF/UCP"
|
||||
title19="Test 19: Serialization tests"
|
||||
maxtest=18
|
||||
title14="Test 14: DFA specials for UTF and UCP support"
|
||||
title15="Test 15: Non-JIT limits and other non-JIT tests"
|
||||
title16="Test 16: JIT-specific features when JIT is not available"
|
||||
title17="Test 17: JIT-specific features when JIT is available"
|
||||
title18="Test 18: Tests of the POSIX interface, excluding UTF/UCP"
|
||||
title19="Test 19: Tests of the POSIX interface with UTF/UCP"
|
||||
title20="Test 20: Serialization tests"
|
||||
title21="Test 21: \C tests without UTF (supported for DFA matching)"
|
||||
title22="Test 22: \C tests with UTF (not supported for DFA matching)"
|
||||
title23="Test 23: \C disabled test"
|
||||
maxtest=23
|
||||
|
||||
if [ $# -eq 1 -a "$1" = "list" ]; then
|
||||
echo $title0
|
||||
|
@ -93,6 +101,10 @@ if [ $# -eq 1 -a "$1" = "list" ]; then
|
|||
echo $title17
|
||||
echo $title18
|
||||
echo $title19
|
||||
echo $title20
|
||||
echo $title21
|
||||
echo $title22
|
||||
echo $title23
|
||||
exit 0
|
||||
fi
|
||||
|
||||
|
@ -151,7 +163,7 @@ checkresult()
|
|||
|
||||
checkspecial()
|
||||
{
|
||||
$valgrind ./pcre2test $1 >>testtry
|
||||
$valgrind $vjs ./pcre2test $1 >>testtry
|
||||
if [ $? -ne 0 ] ; then
|
||||
echo "** pcre2test $1 failed - check testtry"
|
||||
exit 1
|
||||
|
@ -184,9 +196,11 @@ arg8=
|
|||
arg16=
|
||||
arg32=
|
||||
nojit=
|
||||
bigstack=
|
||||
sim=
|
||||
skip=
|
||||
valgrind=
|
||||
vjs=
|
||||
|
||||
# This is in case the caller has set aliases (as I do - PH)
|
||||
unset cp ls mv rm
|
||||
|
@ -214,6 +228,10 @@ do16=no
|
|||
do17=no
|
||||
do18=no
|
||||
do19=no
|
||||
do20=no
|
||||
do21=no
|
||||
do22=no
|
||||
do23=no
|
||||
|
||||
while [ $# -gt 0 ] ; do
|
||||
case $1 in
|
||||
|
@ -237,9 +255,14 @@ while [ $# -gt 0 ] ; do
|
|||
17) do17=yes;;
|
||||
18) do18=yes;;
|
||||
19) do19=yes;;
|
||||
20) do20=yes;;
|
||||
21) do21=yes;;
|
||||
22) do22=yes;;
|
||||
23) do23=yes;;
|
||||
-8) arg8=yes;;
|
||||
-16) arg16=yes;;
|
||||
-32) arg32=yes;;
|
||||
bigstack|-bigstack) bigstack=yes;;
|
||||
nojit|-nojit) nojit=yes;;
|
||||
sim|-sim) shift; sim=$1;;
|
||||
valgrind|-valgrind) valgrind="valgrind --tool=memcheck -q --smc-check=all";;
|
||||
|
@ -287,13 +310,22 @@ fi
|
|||
|
||||
# If it is possible to set the system stack size, arrange to set a value for
|
||||
# test 2, which needs more than the even the Linux default when PCRE2 has been
|
||||
# compiled with -fsanitize=address.
|
||||
# compiled by gcc with -fsanitize=address. When the compiler is clang, sanitize
|
||||
# options require an even bigger stack for test 2, and an increased stack for
|
||||
# some of the other tests.
|
||||
|
||||
$sim ./pcre2test -S 1 /dev/null /dev/null
|
||||
if [ $? -eq 0 ] ; then
|
||||
test2stack="-S 16"
|
||||
if [ "$bigstack" = "" ] ; then
|
||||
test2stack="-S 16"
|
||||
defaultstack=""
|
||||
else
|
||||
test2stack="-S 1024"
|
||||
defaultstack="-S 64"
|
||||
fi
|
||||
else
|
||||
test2stack=""
|
||||
defaultstack=""
|
||||
fi
|
||||
|
||||
# All of 8-bit, 16-bit, and 32-bit character strings may be supported, but only
|
||||
|
@ -306,6 +338,11 @@ support16=$?
|
|||
$sim ./pcre2test -C pcre2-32 >/dev/null
|
||||
support32=$?
|
||||
|
||||
# \C may be disabled
|
||||
|
||||
$sim ./pcre2test -C backslash-C >/dev/null
|
||||
supportBSC=$?
|
||||
|
||||
# Initialize all bitsizes skipped
|
||||
|
||||
test8=skip
|
||||
|
@ -358,11 +395,18 @@ fi
|
|||
$sim ./pcre2test -C unicode >/dev/null
|
||||
utf=$?
|
||||
|
||||
# When JIT is used with valgrind, we need to set up valgrind suppressions as
|
||||
# otherwise there are a lot of false positive valgrind reports when the
|
||||
# the hardware supports SSE2.
|
||||
|
||||
jitopt=
|
||||
$sim ./pcre2test -C jit >/dev/null
|
||||
jit=$?
|
||||
if [ $jit -ne 0 -a "$nojit" != "yes" ] ; then
|
||||
jitopt=-jit
|
||||
if [ "$valgrind" != "" ] ; then
|
||||
vjs="--suppressions=$testdata/valgrind-jit.supp"
|
||||
fi
|
||||
fi
|
||||
|
||||
# If no specific tests were requested, select all. Those that are not
|
||||
|
@ -372,7 +416,8 @@ if [ $do0 = no -a $do1 = no -a $do2 = no -a $do3 = no -a \
|
|||
$do4 = no -a $do5 = no -a $do6 = no -a $do7 = no -a \
|
||||
$do8 = no -a $do9 = no -a $do10 = no -a $do11 = no -a \
|
||||
$do12 = no -a $do13 = no -a $do14 = no -a $do15 = no -a \
|
||||
$do16 = no -a $do17 = no -a $do18 = no -a $do19 = no \
|
||||
$do16 = no -a $do17 = no -a $do18 = no -a $do19 = no -a \
|
||||
$do20 = no -a $do21 = no -a $do22 = no -a $do23 = no \
|
||||
]; then
|
||||
do0=yes
|
||||
do1=yes
|
||||
|
@ -394,6 +439,10 @@ if [ $do0 = no -a $do1 = no -a $do2 = no -a $do3 = no -a \
|
|||
do17=yes
|
||||
do18=yes
|
||||
do19=yes
|
||||
do20=yes
|
||||
do21=yes
|
||||
do22=yes
|
||||
do23=yes
|
||||
fi
|
||||
|
||||
# Handle any explicit skips at this stage, so that an argument list may consist
|
||||
|
@ -438,7 +487,7 @@ for bmode in "$test8" "$test16" "$test32"; do
|
|||
if [ $do1 = yes ] ; then
|
||||
echo $title1
|
||||
for opt in "" $jitopt; do
|
||||
$sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput1 testtry
|
||||
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $defaultstack $bmode $opt $testdata/testinput1 testtry
|
||||
checkresult $? 1 "$opt"
|
||||
done
|
||||
fi
|
||||
|
@ -448,7 +497,7 @@ for bmode in "$test8" "$test16" "$test32"; do
|
|||
if [ $do2 = yes ] ; then
|
||||
echo $title2 "(excluding UTF-$bits)"
|
||||
for opt in "" $jitopt; do
|
||||
$sim $valgrind ./pcre2test -q $test2stack $bmode $opt $testdata/testinput2 testtry
|
||||
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $test2stack $bmode $opt $testdata/testinput2 testtry
|
||||
if [ $? = 0 ] ; then
|
||||
checkresult $? 2 "$opt"
|
||||
else
|
||||
|
@ -508,7 +557,7 @@ for bmode in "$test8" "$test16" "$test32"; do
|
|||
if [ "$locale" != "" ] ; then
|
||||
echo $title3 "(using '$locale' locale)"
|
||||
for opt in "" $jitopt; do
|
||||
$sim $valgrind ./pcre2test -q $bmode $opt $infile testtry
|
||||
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $defaultstack $bmode $opt $infile testtry
|
||||
if [ $? = 0 ] ; then
|
||||
case "$opt" in
|
||||
-jit) with=" with JIT";;
|
||||
|
@ -545,7 +594,7 @@ for bmode in "$test8" "$test16" "$test32"; do
|
|||
echo " Skipped because UTF-$bits support is not available"
|
||||
else
|
||||
for opt in "" $jitopt; do
|
||||
$sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput4 testtry
|
||||
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $defaultstack $bmode $opt $testdata/testinput4 testtry
|
||||
checkresult $? 4 "$opt"
|
||||
done
|
||||
fi
|
||||
|
@ -557,7 +606,7 @@ for bmode in "$test8" "$test16" "$test32"; do
|
|||
echo " Skipped because UTF-$bits support is not available"
|
||||
else
|
||||
for opt in "" $jitopt; do
|
||||
$sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput5 testtry
|
||||
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $defaultstack $bmode $opt $testdata/testinput5 testtry
|
||||
checkresult $? 5 "$opt"
|
||||
done
|
||||
fi
|
||||
|
@ -567,7 +616,7 @@ for bmode in "$test8" "$test16" "$test32"; do
|
|||
|
||||
if [ $do6 = yes ] ; then
|
||||
echo $title6
|
||||
$sim $valgrind ./pcre2test -q $bmode $testdata/testinput6 testtry
|
||||
$sim $valgrind ./pcre2test -q $defaultstack $bmode $testdata/testinput6 testtry
|
||||
checkresult $? 6 ""
|
||||
fi
|
||||
|
||||
|
@ -576,28 +625,26 @@ for bmode in "$test8" "$test16" "$test32"; do
|
|||
if [ $utf -eq 0 ] ; then
|
||||
echo " Skipped because UTF-$bits support is not available"
|
||||
else
|
||||
$sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput7 testtry
|
||||
$sim $valgrind ./pcre2test -q $defaultstack $bmode $opt $testdata/testinput7 testtry
|
||||
checkresult $? 7 ""
|
||||
fi
|
||||
fi
|
||||
|
||||
# Test of internal offsets and code sizes. This test is run only when there
|
||||
# is UTF/UCP support and the link size is 2. The actual tests are
|
||||
# mostly the same as in some of the above, but in this test we inspect some
|
||||
# offsets and sizes that require a known link size. This is a doublecheck for
|
||||
# the maintainer, just in case something changes unexpectely. The output from
|
||||
# this test is different in 8-bit, 16-bit, and 32-bit modes, so there are
|
||||
# mode-specific output files.
|
||||
# is UTF/UCP support. The actual tests are mostly the same as in some of the
|
||||
# above, but in this test we inspect some offsets and sizes. This is a
|
||||
# doublecheck for the maintainer, just in case something changes unexpectely.
|
||||
# The output from this test is different in 8-bit, 16-bit, and 32-bit modes
|
||||
# and for different link sizes, so there are different output files for each
|
||||
# mode and link size.
|
||||
|
||||
if [ $do8 = yes ] ; then
|
||||
echo $title8
|
||||
if [ $link_size -ne 2 ] ; then
|
||||
echo " Skipped because link size is not 2"
|
||||
elif [ $utf -eq 0 ] ; then
|
||||
if [ $utf -eq 0 ] ; then
|
||||
echo " Skipped because UTF-$bits support is not available"
|
||||
else
|
||||
$sim $valgrind ./pcre2test -q $bmode $testdata/testinput8 testtry
|
||||
checkresult $? 8-$bits ""
|
||||
$sim $valgrind ./pcre2test -q $defaultstack $bmode $testdata/testinput8 testtry
|
||||
checkresult $? 8-$bits-$link_size ""
|
||||
fi
|
||||
fi
|
||||
|
||||
|
@ -609,7 +656,7 @@ for bmode in "$test8" "$test16" "$test32"; do
|
|||
echo " Skipped when running 16/32-bit tests"
|
||||
else
|
||||
for opt in "" $jitopt; do
|
||||
$sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput9 testtry
|
||||
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $defaultstack $bmode $opt $testdata/testinput9 testtry
|
||||
checkresult $? 9 "$opt"
|
||||
done
|
||||
fi
|
||||
|
@ -625,7 +672,7 @@ for bmode in "$test8" "$test16" "$test32"; do
|
|||
echo " Skipped because UTF-$bits support is not available"
|
||||
else
|
||||
for opt in "" $jitopt; do
|
||||
$sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput10 testtry
|
||||
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $defaultstack $bmode $opt $testdata/testinput10 testtry
|
||||
checkresult $? 10 "$opt"
|
||||
done
|
||||
fi
|
||||
|
@ -639,7 +686,7 @@ for bmode in "$test8" "$test16" "$test32"; do
|
|||
echo " Skipped when running 8-bit tests"
|
||||
else
|
||||
for opt in "" $jitopt; do
|
||||
$sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput11 testtry
|
||||
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $defaultstack $bmode $opt $testdata/testinput11 testtry
|
||||
checkresult $? 11-$bits "$opt"
|
||||
done
|
||||
fi
|
||||
|
@ -656,7 +703,7 @@ for bmode in "$test8" "$test16" "$test32"; do
|
|||
echo " Skipped because UTF-$bits support is not available"
|
||||
else
|
||||
for opt in "" $jitopt; do
|
||||
$sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput12 testtry
|
||||
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $defaultstack $bmode $opt $testdata/testinput12 testtry
|
||||
checkresult $? 12-$bits "$opt"
|
||||
done
|
||||
fi
|
||||
|
@ -669,75 +716,129 @@ for bmode in "$test8" "$test16" "$test32"; do
|
|||
if [ "$bits" = "8" ] ; then
|
||||
echo " Skipped when running 8-bit tests"
|
||||
else
|
||||
$sim $valgrind ./pcre2test -q $bmode $testdata/testinput13 testtry
|
||||
$sim $valgrind ./pcre2test -q $defaultstack $bmode $testdata/testinput13 testtry
|
||||
checkresult $? 13 ""
|
||||
fi
|
||||
fi
|
||||
|
||||
# Tests for DFA UTF and UCP features. Output is different for the different widths.
|
||||
|
||||
if [ $do14 = yes ] ; then
|
||||
echo $title14
|
||||
if [ $utf -eq 0 ] ; then
|
||||
echo " Skipped because UTF-$bits support is not available"
|
||||
else
|
||||
$sim $valgrind ./pcre2test -q $defaultstack $bmode $opt $testdata/testinput14 testtry
|
||||
checkresult $? 14-$bits ""
|
||||
fi
|
||||
fi
|
||||
|
||||
# Test non-JIT match and recursion limits
|
||||
|
||||
if [ $do14 = yes ] ; then
|
||||
echo $title14
|
||||
$sim $valgrind ./pcre2test -q $bmode $testdata/testinput14 testtry
|
||||
checkresult $? 14 ""
|
||||
if [ $do15 = yes ] ; then
|
||||
echo $title15
|
||||
$sim $valgrind ./pcre2test -q $defaultstack $bmode $testdata/testinput15 testtry
|
||||
checkresult $? 15 ""
|
||||
fi
|
||||
|
||||
# Test JIT-specific features when JIT is not available
|
||||
|
||||
if [ $do15 = yes ] ; then
|
||||
echo $title15
|
||||
if [ $do16 = yes ] ; then
|
||||
echo $title16
|
||||
if [ $jit -ne 0 ] ; then
|
||||
echo " Skipped because JIT is available"
|
||||
else
|
||||
$sim $valgrind ./pcre2test -q $bmode $testdata/testinput15 testtry
|
||||
checkresult $? 15 ""
|
||||
$sim $valgrind ./pcre2test -q $defaultstack $bmode $testdata/testinput16 testtry
|
||||
checkresult $? 16 ""
|
||||
fi
|
||||
fi
|
||||
|
||||
# Test JIT-specific features when JIT is available
|
||||
|
||||
if [ $do16 = yes ] ; then
|
||||
echo $title16
|
||||
if [ $do17 = yes ] ; then
|
||||
echo $title17
|
||||
if [ $jit -eq 0 -o "$nojit" = "yes" ] ; then
|
||||
echo " Skipped because JIT is not available or nojit was specified"
|
||||
else
|
||||
$sim $valgrind ./pcre2test -q $bmode $testdata/testinput16 testtry
|
||||
checkresult $? 16 ""
|
||||
$sim $valgrind $vjs ./pcre2test -q $defaultstack $bmode $testdata/testinput17 testtry
|
||||
checkresult $? 17 ""
|
||||
fi
|
||||
fi
|
||||
|
||||
# Tests for the POSIX interface without UTF/UCP (8-bit only)
|
||||
|
||||
if [ $do17 = yes ] ; then
|
||||
echo $title17
|
||||
if [ $do18 = yes ] ; then
|
||||
echo $title18
|
||||
if [ "$bits" = "16" -o "$bits" = "32" ] ; then
|
||||
echo " Skipped when running 16/32-bit tests"
|
||||
else
|
||||
$sim $valgrind ./pcre2test -q $bmode $testdata/testinput17 testtry
|
||||
checkresult $? 17 ""
|
||||
$sim $valgrind ./pcre2test -q $defaultstack $bmode $testdata/testinput18 testtry
|
||||
checkresult $? 18 ""
|
||||
fi
|
||||
fi
|
||||
|
||||
# Tests for the POSIX interface with UTF/UCP (8-bit only)
|
||||
|
||||
if [ $do18 = yes ] ; then
|
||||
echo $title18
|
||||
if [ $do19 = yes ] ; then
|
||||
echo $title19
|
||||
if [ "$bits" = "16" -o "$bits" = "32" ] ; then
|
||||
echo " Skipped when running 16/32-bit tests"
|
||||
elif [ $utf -eq 0 ] ; then
|
||||
echo " Skipped because UTF-$bits support is not available"
|
||||
else
|
||||
$sim $valgrind ./pcre2test -q $bmode $testdata/testinput18 testtry
|
||||
checkresult $? 18 ""
|
||||
$sim $valgrind ./pcre2test -q $defaultstack $bmode $testdata/testinput19 testtry
|
||||
checkresult $? 19 ""
|
||||
fi
|
||||
fi
|
||||
|
||||
# Serialization tests
|
||||
|
||||
if [ $do19 = yes ] ; then
|
||||
echo $title19
|
||||
$sim $valgrind ./pcre2test -q $bmode $testdata/testinput19 testtry
|
||||
checkresult $? 19 ""
|
||||
if [ $do20 = yes ] ; then
|
||||
echo $title20
|
||||
$sim $valgrind ./pcre2test -q $defaultstack $bmode $testdata/testinput20 testtry
|
||||
checkresult $? 20 ""
|
||||
fi
|
||||
|
||||
# \C tests without UTF - DFA matching is supported
|
||||
|
||||
if [ "$do21" = yes ] ; then
|
||||
echo $title21
|
||||
if [ $supportBSC -eq 0 ] ; then
|
||||
echo " Skipped because \C is disabled"
|
||||
else
|
||||
for opt in "" $jitopt -dfa; do
|
||||
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $defaultstack $bmode $opt $testdata/testinput21 testtry
|
||||
checkresult $? 21 "$opt"
|
||||
done
|
||||
fi
|
||||
fi
|
||||
|
||||
# \C tests with UTF - DFA matching is not supported for \C in UTF mode
|
||||
|
||||
if [ "$do22" = yes ] ; then
|
||||
echo $title22
|
||||
if [ $supportBSC -eq 0 ] ; then
|
||||
echo " Skipped because \C is disabled"
|
||||
elif [ $utf -eq 0 ] ; then
|
||||
echo " Skipped because UTF-$bits support is not available"
|
||||
else
|
||||
for opt in "" $jitopt; do
|
||||
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $defaultstack $bmode $opt $testdata/testinput22 testtry
|
||||
checkresult $? 22-$bits "$opt"
|
||||
done
|
||||
fi
|
||||
fi
|
||||
|
||||
# Test when \C is disabled
|
||||
|
||||
if [ "$do23" = yes ] ; then
|
||||
echo $title23
|
||||
if [ $supportBSC -ne 0 ] ; then
|
||||
echo " Skipped because \C is not disabled"
|
||||
else
|
||||
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $defaultstack $bmode $opt $testdata/testinput23 testtry
|
||||
checkresult $? 23 ""
|
||||
fi
|
||||
fi
|
||||
|
||||
# End of loop for 8/16/32-bit tests
|
|
@ -13,17 +13,18 @@
|
|||
@rem line. Added argument validation and added error reporting.
|
||||
@rem
|
||||
@rem Sheri Pierce added logic to skip feature dependent tests
|
||||
@rem tests 4 5 9 15 and 18 require utf support
|
||||
@rem tests 6 7 10 16 and 19 require ucp support
|
||||
@rem 11 requires ucp and link size 2
|
||||
@rem 12 requires presence of jit support
|
||||
@rem 13 requires absence of jit support
|
||||
@rem tests 4 5 7 10 12 14 19 and 22 require Unicode support
|
||||
@rem 8 requires Unicode and link size 2
|
||||
@rem 16 requires absence of jit support
|
||||
@rem 17 requires presence of jit support
|
||||
@rem Sheri P also added override tests for study and jit testing
|
||||
@rem Zoltan Herczeg added libpcre16 support
|
||||
@rem Zoltan Herczeg added libpcre32 support
|
||||
@rem -------------------------------------------------------------------
|
||||
@rem
|
||||
@rem The file was converted for PCRE2 by PH, February 2015.
|
||||
@rem Updated for new test 14 (moving others up a number), August 2015.
|
||||
@rem Tidied and updated for new tests 21, 22, 23 by PH, October 2015.
|
||||
|
||||
|
||||
setlocal enabledelayedexpansion
|
||||
|
@ -64,6 +65,8 @@ set support32=%ERRORLEVEL%
|
|||
set unicode=%ERRORLEVEL%
|
||||
%pcre2test% -C jit >NUL
|
||||
set jit=%ERRORLEVEL%
|
||||
%pcre2test% -C backslash-C >NUL
|
||||
set supportBSC=%ERRORLEVEL%
|
||||
|
||||
if %support8% EQU 1 (
|
||||
if not exist testout8 md testout8
|
||||
|
@ -99,18 +102,22 @@ set do16=no
|
|||
set do17=no
|
||||
set do18=no
|
||||
set do19=no
|
||||
set do20=no
|
||||
set do21=no
|
||||
set do22=no
|
||||
set do23=no
|
||||
set all=yes
|
||||
|
||||
for %%a in (%*) do (
|
||||
set valid=no
|
||||
for %%v in (1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19) do if %%v == %%a set valid=yes
|
||||
for %%v in (1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23) do if %%v == %%a set valid=yes
|
||||
if "!valid!" == "yes" (
|
||||
set do%%a=yes
|
||||
set all=no
|
||||
) else (
|
||||
echo Invalid test number - %%a!
|
||||
echo Usage %0 [ test_number ] ...
|
||||
echo Where test_number is one or more optional test numbers 1 through 19, default is all tests.
|
||||
echo Where test_number is one or more optional test numbers 1 through 23, default is all tests.
|
||||
exit /b 1
|
||||
)
|
||||
)
|
||||
|
@ -136,6 +143,10 @@ if "%all%" == "yes" (
|
|||
set do17=yes
|
||||
set do18=yes
|
||||
set do19=yes
|
||||
set do20=yes
|
||||
set do21=yes
|
||||
set do22=yes
|
||||
set do23=yes
|
||||
)
|
||||
|
||||
@echo RunTest.bat's pcre2test output is written to newly created subfolders
|
||||
|
@ -183,6 +194,10 @@ if "%do16%" == "yes" call :do16
|
|||
if "%do17%" == "yes" call :do17
|
||||
if "%do18%" == "yes" call :do18
|
||||
if "%do19%" == "yes" call :do19
|
||||
if "%do20%" == "yes" call :do20
|
||||
if "%do21%" == "yes" call :do21
|
||||
if "%do22%" == "yes" call :do22
|
||||
if "%do23%" == "yes" call :do23
|
||||
:modeSkip
|
||||
if "%mode%" == "" (
|
||||
set mode=-16
|
||||
|
@ -253,6 +268,9 @@ if [%1]==[11] (
|
|||
if [%1]==[12] (
|
||||
set type=-%bits%
|
||||
)
|
||||
if [%1]==[14] (
|
||||
set type=-%bits%
|
||||
)
|
||||
|
||||
fc /n %srcdir%\testdata\%testoutput%%type% %2%bits%\%testoutput% >NUL
|
||||
|
||||
|
@ -316,7 +334,7 @@ if %unicode% EQU 0 (
|
|||
goto :eof
|
||||
|
||||
:do6
|
||||
call :runsub 6 testout "DFA matching main non-UTF, non-UCP functionality" -q -dfa
|
||||
call :runsub 6 testout "DFA matching main non-UTF, non-UCP functionality" -q
|
||||
goto :eof
|
||||
|
||||
:do7
|
||||
|
@ -324,7 +342,7 @@ if %unicode% EQU 0 (
|
|||
echo Test 7 Skipped due to absence of Unicode support.
|
||||
goto :eof
|
||||
)
|
||||
call :runsub 7 testout "DFA matching with UTF-%bits% and Unicode property support" -q -dfa
|
||||
call :runsub 7 testout "DFA matching with UTF-%bits% and Unicode property support" -q
|
||||
goto :eof
|
||||
|
||||
:do8
|
||||
|
@ -388,39 +406,35 @@ if %bits% EQU 8 (
|
|||
echo Test 13 Skipped when running 8-bit tests.
|
||||
goto :eof
|
||||
)
|
||||
call :runsub 13 testout "DFA specials for the basic 16/32-bit library" -q -dfa
|
||||
call :runsub 13 testout "DFA specials for the basic 16/32-bit library" -q
|
||||
goto :eof
|
||||
|
||||
:do14
|
||||
call :runsub 14 testout "Non-JIT limits and other non_JIT tests" -q
|
||||
goto :eof
|
||||
|
||||
:do15
|
||||
if %jit% EQU 1 (
|
||||
echo Test 15 Skipped due to presence of JIT support.
|
||||
if %unicode% EQU 0 (
|
||||
echo Test 14 Skipped due to absence of Unicode support.
|
||||
goto :eof
|
||||
)
|
||||
call :runsub 15 testout "JIT-specific features when JIT is not available" -q
|
||||
call :runsub 14 testout "DFA specials for UTF and UCP support" -q
|
||||
goto :eof
|
||||
|
||||
:do15
|
||||
call :runsub 15 testout "Non-JIT limits and other non_JIT tests" -q
|
||||
goto :eof
|
||||
|
||||
:do16
|
||||
if %jit% EQU 0 (
|
||||
echo Test 16 Skipped due to absence of JIT support.
|
||||
if %jit% EQU 1 (
|
||||
echo Test 16 Skipped due to presence of JIT support.
|
||||
goto :eof
|
||||
)
|
||||
call :runsub 16 testout "JIT-specific features when JIT is available" -q
|
||||
call :runsub 16 testout "JIT-specific features when JIT is not available" -q
|
||||
goto :eof
|
||||
|
||||
:do17
|
||||
if %bits% EQU 16 (
|
||||
echo Test 17 Skipped when running 16-bit tests.
|
||||
if %jit% EQU 0 (
|
||||
echo Test 17 Skipped due to absence of JIT support.
|
||||
goto :eof
|
||||
)
|
||||
if %bits% EQU 32 (
|
||||
echo Test 17 Skipped when running 32-bit tests.
|
||||
goto :eof
|
||||
)
|
||||
call :runsub 17 testout "POSIX interface, excluding UTF-8 and UCP" -q
|
||||
call :runsub 17 testout "JIT-specific features when JIT is available" -q
|
||||
goto :eof
|
||||
|
||||
:do18
|
||||
|
@ -432,11 +446,58 @@ if %bits% EQU 32 (
|
|||
echo Test 18 Skipped when running 32-bit tests.
|
||||
goto :eof
|
||||
)
|
||||
call :runsub 1 testout "POSIX interface with UTF-8 and UCP" -q
|
||||
call :runsub 18 testout "POSIX interface, excluding UTF-8 and UCP" -q
|
||||
goto :eof
|
||||
|
||||
:do19
|
||||
call :runsub 1 testout "Serialization tests" -q
|
||||
if %bits% EQU 16 (
|
||||
echo Test 19 Skipped when running 16-bit tests.
|
||||
goto :eof
|
||||
)
|
||||
if %bits% EQU 32 (
|
||||
echo Test 19 Skipped when running 32-bit tests.
|
||||
goto :eof
|
||||
)
|
||||
if %unicode% EQU 0 (
|
||||
echo Test 19 Skipped due to absence of Unicode support.
|
||||
goto :eof
|
||||
)
|
||||
call :runsub 19 testout "POSIX interface with UTF-8 and UCP" -q
|
||||
goto :eof
|
||||
|
||||
:do20
|
||||
call :runsub 20 testout "Serialization tests" -q
|
||||
goto :eof
|
||||
|
||||
:do21
|
||||
if %supportBSC% EQU 0 (
|
||||
echo Test 21 Skipped due to absence of backslash-C support.
|
||||
goto :eof
|
||||
)
|
||||
call :runsub 21 testout "Backslash-C tests without UTF" -q
|
||||
call :runsub 21 testout "Backslash-C tests without UTF (DFA)" -q -dfa
|
||||
if %jit% EQU 1 call :runsub 21 testoutjit "Test with JIT Override" -q -jit
|
||||
goto :eof
|
||||
|
||||
:do22
|
||||
if %supportBSC% EQU 0 (
|
||||
echo Test 22 Skipped due to absence of backslash-C support.
|
||||
goto :eof
|
||||
)
|
||||
if %unicode% EQU 0 (
|
||||
echo Test 22 Skipped due to absence of Unicode support.
|
||||
goto :eof
|
||||
)
|
||||
call :runsub 22 testout "Backslash-C tests with UTF" -q
|
||||
if %jit% EQU 1 call :runsub 22 testoutjit "Test with JIT Override" -q -jit
|
||||
goto :eof
|
||||
|
||||
:do23
|
||||
if %supportBSC% EQU 1 (
|
||||
echo Test 23 Skipped due to presence of backslash-C support.
|
||||
goto :eof
|
||||
)
|
||||
call :runsub 23 testout "Backslash-C disabled test" -q
|
||||
goto :eof
|
||||
|
||||
:conferror
|
209
pcre2-10.20/aclocal.m4 → pcre2-10.21/aclocal.m4
vendored
209
pcre2-10.20/aclocal.m4 → pcre2-10.21/aclocal.m4
vendored
|
@ -20,32 +20,63 @@ You have another version of autoconf. It may work, but is not guaranteed to.
|
|||
If you have problems, you may need to regenerate the build system entirely.
|
||||
To do so, use the procedure documented by the package, typically 'autoreconf'.])])
|
||||
|
||||
# pkg.m4 - Macros to locate and utilise pkg-config. -*- Autoconf -*-
|
||||
# serial 1 (pkg-config-0.24)
|
||||
#
|
||||
# Copyright © 2004 Scott James Remnant <scott@netsplit.com>.
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the Free Software
|
||||
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
#
|
||||
# As a special exception to the GNU General Public License, if you
|
||||
# distribute this file as part of a program that contains a
|
||||
# configuration script generated by Autoconf, you may include it under
|
||||
# the same distribution terms that you use for the rest of that program.
|
||||
dnl pkg.m4 - Macros to locate and utilise pkg-config. -*- Autoconf -*-
|
||||
dnl serial 11 (pkg-config-0.29)
|
||||
dnl
|
||||
dnl Copyright © 2004 Scott James Remnant <scott@netsplit.com>.
|
||||
dnl Copyright © 2012-2015 Dan Nicholson <dbn.lists@gmail.com>
|
||||
dnl
|
||||
dnl This program is free software; you can redistribute it and/or modify
|
||||
dnl it under the terms of the GNU General Public License as published by
|
||||
dnl the Free Software Foundation; either version 2 of the License, or
|
||||
dnl (at your option) any later version.
|
||||
dnl
|
||||
dnl This program is distributed in the hope that it will be useful, but
|
||||
dnl WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
dnl General Public License for more details.
|
||||
dnl
|
||||
dnl You should have received a copy of the GNU General Public License
|
||||
dnl along with this program; if not, write to the Free Software
|
||||
dnl Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
|
||||
dnl 02111-1307, USA.
|
||||
dnl
|
||||
dnl As a special exception to the GNU General Public License, if you
|
||||
dnl distribute this file as part of a program that contains a
|
||||
dnl configuration script generated by Autoconf, you may include it under
|
||||
dnl the same distribution terms that you use for the rest of that
|
||||
dnl program.
|
||||
|
||||
# PKG_PROG_PKG_CONFIG([MIN-VERSION])
|
||||
# ----------------------------------
|
||||
dnl PKG_PREREQ(MIN-VERSION)
|
||||
dnl -----------------------
|
||||
dnl Since: 0.29
|
||||
dnl
|
||||
dnl Verify that the version of the pkg-config macros are at least
|
||||
dnl MIN-VERSION. Unlike PKG_PROG_PKG_CONFIG, which checks the user's
|
||||
dnl installed version of pkg-config, this checks the developer's version
|
||||
dnl of pkg.m4 when generating configure.
|
||||
dnl
|
||||
dnl To ensure that this macro is defined, also add:
|
||||
dnl m4_ifndef([PKG_PREREQ],
|
||||
dnl [m4_fatal([must install pkg-config 0.29 or later before running autoconf/autogen])])
|
||||
dnl
|
||||
dnl See the "Since" comment for each macro you use to see what version
|
||||
dnl of the macros you require.
|
||||
m4_defun([PKG_PREREQ],
|
||||
[m4_define([PKG_MACROS_VERSION], [0.29])
|
||||
m4_if(m4_version_compare(PKG_MACROS_VERSION, [$1]), -1,
|
||||
[m4_fatal([pkg.m4 version $1 or higher is required but ]PKG_MACROS_VERSION[ found])])
|
||||
])dnl PKG_PREREQ
|
||||
|
||||
dnl PKG_PROG_PKG_CONFIG([MIN-VERSION])
|
||||
dnl ----------------------------------
|
||||
dnl Since: 0.16
|
||||
dnl
|
||||
dnl Search for the pkg-config tool and set the PKG_CONFIG variable to
|
||||
dnl first found in the path. Checks that the version of pkg-config found
|
||||
dnl is at least MIN-VERSION. If MIN-VERSION is not specified, 0.9.0 is
|
||||
dnl used since that's the first version where most current features of
|
||||
dnl pkg-config existed.
|
||||
AC_DEFUN([PKG_PROG_PKG_CONFIG],
|
||||
[m4_pattern_forbid([^_?PKG_[A-Z_]+$])
|
||||
m4_pattern_allow([^PKG_CONFIG(_(PATH|LIBDIR|SYSROOT_DIR|ALLOW_SYSTEM_(CFLAGS|LIBS)))?$])
|
||||
|
@ -67,18 +98,19 @@ if test -n "$PKG_CONFIG"; then
|
|||
PKG_CONFIG=""
|
||||
fi
|
||||
fi[]dnl
|
||||
])# PKG_PROG_PKG_CONFIG
|
||||
])dnl PKG_PROG_PKG_CONFIG
|
||||
|
||||
# PKG_CHECK_EXISTS(MODULES, [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND])
|
||||
#
|
||||
# Check to see whether a particular set of modules exists. Similar
|
||||
# to PKG_CHECK_MODULES(), but does not set variables or print errors.
|
||||
#
|
||||
# Please remember that m4 expands AC_REQUIRE([PKG_PROG_PKG_CONFIG])
|
||||
# only at the first occurence in configure.ac, so if the first place
|
||||
# it's called might be skipped (such as if it is within an "if", you
|
||||
# have to call PKG_CHECK_EXISTS manually
|
||||
# --------------------------------------------------------------
|
||||
dnl PKG_CHECK_EXISTS(MODULES, [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND])
|
||||
dnl -------------------------------------------------------------------
|
||||
dnl Since: 0.18
|
||||
dnl
|
||||
dnl Check to see whether a particular set of modules exists. Similar to
|
||||
dnl PKG_CHECK_MODULES(), but does not set variables or print errors.
|
||||
dnl
|
||||
dnl Please remember that m4 expands AC_REQUIRE([PKG_PROG_PKG_CONFIG])
|
||||
dnl only at the first occurence in configure.ac, so if the first place
|
||||
dnl it's called might be skipped (such as if it is within an "if", you
|
||||
dnl have to call PKG_CHECK_EXISTS manually
|
||||
AC_DEFUN([PKG_CHECK_EXISTS],
|
||||
[AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl
|
||||
if test -n "$PKG_CONFIG" && \
|
||||
|
@ -88,8 +120,10 @@ m4_ifvaln([$3], [else
|
|||
$3])dnl
|
||||
fi])
|
||||
|
||||
# _PKG_CONFIG([VARIABLE], [COMMAND], [MODULES])
|
||||
# ---------------------------------------------
|
||||
dnl _PKG_CONFIG([VARIABLE], [COMMAND], [MODULES])
|
||||
dnl ---------------------------------------------
|
||||
dnl Internal wrapper calling pkg-config via PKG_CONFIG and setting
|
||||
dnl pkg_failed based on the result.
|
||||
m4_define([_PKG_CONFIG],
|
||||
[if test -n "$$1"; then
|
||||
pkg_cv_[]$1="$$1"
|
||||
|
@ -101,10 +135,11 @@ m4_define([_PKG_CONFIG],
|
|||
else
|
||||
pkg_failed=untried
|
||||
fi[]dnl
|
||||
])# _PKG_CONFIG
|
||||
])dnl _PKG_CONFIG
|
||||
|
||||
# _PKG_SHORT_ERRORS_SUPPORTED
|
||||
# -----------------------------
|
||||
dnl _PKG_SHORT_ERRORS_SUPPORTED
|
||||
dnl ---------------------------
|
||||
dnl Internal check to see if pkg-config supports short errors.
|
||||
AC_DEFUN([_PKG_SHORT_ERRORS_SUPPORTED],
|
||||
[AC_REQUIRE([PKG_PROG_PKG_CONFIG])
|
||||
if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then
|
||||
|
@ -112,19 +147,17 @@ if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then
|
|||
else
|
||||
_pkg_short_errors_supported=no
|
||||
fi[]dnl
|
||||
])# _PKG_SHORT_ERRORS_SUPPORTED
|
||||
])dnl _PKG_SHORT_ERRORS_SUPPORTED
|
||||
|
||||
|
||||
# PKG_CHECK_MODULES(VARIABLE-PREFIX, MODULES, [ACTION-IF-FOUND],
|
||||
# [ACTION-IF-NOT-FOUND])
|
||||
#
|
||||
#
|
||||
# Note that if there is a possibility the first call to
|
||||
# PKG_CHECK_MODULES might not happen, you should be sure to include an
|
||||
# explicit call to PKG_PROG_PKG_CONFIG in your configure.ac
|
||||
#
|
||||
#
|
||||
# --------------------------------------------------------------
|
||||
dnl PKG_CHECK_MODULES(VARIABLE-PREFIX, MODULES, [ACTION-IF-FOUND],
|
||||
dnl [ACTION-IF-NOT-FOUND])
|
||||
dnl --------------------------------------------------------------
|
||||
dnl Since: 0.4.0
|
||||
dnl
|
||||
dnl Note that if there is a possibility the first call to
|
||||
dnl PKG_CHECK_MODULES might not happen, you should be sure to include an
|
||||
dnl explicit call to PKG_PROG_PKG_CONFIG in your configure.ac
|
||||
AC_DEFUN([PKG_CHECK_MODULES],
|
||||
[AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl
|
||||
AC_ARG_VAR([$1][_CFLAGS], [C compiler flags for $1, overriding pkg-config])dnl
|
||||
|
@ -178,16 +211,40 @@ else
|
|||
AC_MSG_RESULT([yes])
|
||||
$3
|
||||
fi[]dnl
|
||||
])# PKG_CHECK_MODULES
|
||||
])dnl PKG_CHECK_MODULES
|
||||
|
||||
|
||||
# PKG_INSTALLDIR(DIRECTORY)
|
||||
# -------------------------
|
||||
# Substitutes the variable pkgconfigdir as the location where a module
|
||||
# should install pkg-config .pc files. By default the directory is
|
||||
# $libdir/pkgconfig, but the default can be changed by passing
|
||||
# DIRECTORY. The user can override through the --with-pkgconfigdir
|
||||
# parameter.
|
||||
dnl PKG_CHECK_MODULES_STATIC(VARIABLE-PREFIX, MODULES, [ACTION-IF-FOUND],
|
||||
dnl [ACTION-IF-NOT-FOUND])
|
||||
dnl ---------------------------------------------------------------------
|
||||
dnl Since: 0.29
|
||||
dnl
|
||||
dnl Checks for existence of MODULES and gathers its build flags with
|
||||
dnl static libraries enabled. Sets VARIABLE-PREFIX_CFLAGS from --cflags
|
||||
dnl and VARIABLE-PREFIX_LIBS from --libs.
|
||||
dnl
|
||||
dnl Note that if there is a possibility the first call to
|
||||
dnl PKG_CHECK_MODULES_STATIC might not happen, you should be sure to
|
||||
dnl include an explicit call to PKG_PROG_PKG_CONFIG in your
|
||||
dnl configure.ac.
|
||||
AC_DEFUN([PKG_CHECK_MODULES_STATIC],
|
||||
[AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl
|
||||
_save_PKG_CONFIG=$PKG_CONFIG
|
||||
PKG_CONFIG="$PKG_CONFIG --static"
|
||||
PKG_CHECK_MODULES($@)
|
||||
PKG_CONFIG=$_save_PKG_CONFIG[]dnl
|
||||
])dnl PKG_CHECK_MODULES_STATIC
|
||||
|
||||
|
||||
dnl PKG_INSTALLDIR([DIRECTORY])
|
||||
dnl -------------------------
|
||||
dnl Since: 0.27
|
||||
dnl
|
||||
dnl Substitutes the variable pkgconfigdir as the location where a module
|
||||
dnl should install pkg-config .pc files. By default the directory is
|
||||
dnl $libdir/pkgconfig, but the default can be changed by passing
|
||||
dnl DIRECTORY. The user can override through the --with-pkgconfigdir
|
||||
dnl parameter.
|
||||
AC_DEFUN([PKG_INSTALLDIR],
|
||||
[m4_pushdef([pkg_default], [m4_default([$1], ['${libdir}/pkgconfig'])])
|
||||
m4_pushdef([pkg_description],
|
||||
|
@ -198,16 +255,18 @@ AC_ARG_WITH([pkgconfigdir],
|
|||
AC_SUBST([pkgconfigdir], [$with_pkgconfigdir])
|
||||
m4_popdef([pkg_default])
|
||||
m4_popdef([pkg_description])
|
||||
]) dnl PKG_INSTALLDIR
|
||||
])dnl PKG_INSTALLDIR
|
||||
|
||||
|
||||
# PKG_NOARCH_INSTALLDIR(DIRECTORY)
|
||||
# -------------------------
|
||||
# Substitutes the variable noarch_pkgconfigdir as the location where a
|
||||
# module should install arch-independent pkg-config .pc files. By
|
||||
# default the directory is $datadir/pkgconfig, but the default can be
|
||||
# changed by passing DIRECTORY. The user can override through the
|
||||
# --with-noarch-pkgconfigdir parameter.
|
||||
dnl PKG_NOARCH_INSTALLDIR([DIRECTORY])
|
||||
dnl --------------------------------
|
||||
dnl Since: 0.27
|
||||
dnl
|
||||
dnl Substitutes the variable noarch_pkgconfigdir as the location where a
|
||||
dnl module should install arch-independent pkg-config .pc files. By
|
||||
dnl default the directory is $datadir/pkgconfig, but the default can be
|
||||
dnl changed by passing DIRECTORY. The user can override through the
|
||||
dnl --with-noarch-pkgconfigdir parameter.
|
||||
AC_DEFUN([PKG_NOARCH_INSTALLDIR],
|
||||
[m4_pushdef([pkg_default], [m4_default([$1], ['${datadir}/pkgconfig'])])
|
||||
m4_pushdef([pkg_description],
|
||||
|
@ -218,13 +277,15 @@ AC_ARG_WITH([noarch-pkgconfigdir],
|
|||
AC_SUBST([noarch_pkgconfigdir], [$with_noarch_pkgconfigdir])
|
||||
m4_popdef([pkg_default])
|
||||
m4_popdef([pkg_description])
|
||||
]) dnl PKG_NOARCH_INSTALLDIR
|
||||
])dnl PKG_NOARCH_INSTALLDIR
|
||||
|
||||
|
||||
# PKG_CHECK_VAR(VARIABLE, MODULE, CONFIG-VARIABLE,
|
||||
# [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND])
|
||||
# -------------------------------------------
|
||||
# Retrieves the value of the pkg-config variable for the given module.
|
||||
dnl PKG_CHECK_VAR(VARIABLE, MODULE, CONFIG-VARIABLE,
|
||||
dnl [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND])
|
||||
dnl -------------------------------------------
|
||||
dnl Since: 0.28
|
||||
dnl
|
||||
dnl Retrieves the value of the pkg-config variable for the given module.
|
||||
AC_DEFUN([PKG_CHECK_VAR],
|
||||
[AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl
|
||||
AC_ARG_VAR([$1], [value of $3 for $2, overriding pkg-config])dnl
|
||||
|
@ -233,7 +294,7 @@ _PKG_CONFIG([$1], [variable="][$3]["], [$2])
|
|||
AS_VAR_COPY([$1], [pkg_cv_][$1])
|
||||
|
||||
AS_VAR_IF([$1], [""], [$5], [$4])dnl
|
||||
])# PKG_CHECK_VAR
|
||||
])dnl PKG_CHECK_VAR
|
||||
|
||||
# Copyright (C) 2002-2014 Free Software Foundation, Inc.
|
||||
#
|
|
@ -33,6 +33,7 @@
|
|||
#cmakedefine EBCDIC 1
|
||||
#cmakedefine EBCDIC_NL25 1
|
||||
#cmakedefine HEAP_MATCH_RECURSE 1
|
||||
#cmakedefine NEVER_BACKSLASH_C 1
|
||||
|
||||
#define LINK_SIZE @PCRE2_LINK_SIZE@
|
||||
#define MATCH_LIMIT @PCRE2_MATCH_LIMIT@
|
58
pcre2-10.20/configure → pcre2-10.21/configure
vendored
58
pcre2-10.20/configure → pcre2-10.21/configure
vendored
|
@ -1,6 +1,6 @@
|
|||
#! /bin/sh
|
||||
# Guess values for system-dependent variables and create Makefiles.
|
||||
# Generated by GNU Autoconf 2.69 for PCRE2 10.20.
|
||||
# Generated by GNU Autoconf 2.69 for PCRE2 10.21.
|
||||
#
|
||||
#
|
||||
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
|
||||
|
@ -587,8 +587,8 @@ MAKEFLAGS=
|
|||
# Identity of this package.
|
||||
PACKAGE_NAME='PCRE2'
|
||||
PACKAGE_TARNAME='pcre2'
|
||||
PACKAGE_VERSION='10.20'
|
||||
PACKAGE_STRING='PCRE2 10.20'
|
||||
PACKAGE_VERSION='10.21'
|
||||
PACKAGE_STRING='PCRE2 10.21'
|
||||
PACKAGE_BUGREPORT=''
|
||||
PACKAGE_URL=''
|
||||
|
||||
|
@ -833,6 +833,7 @@ enable_newline_is_crlf
|
|||
enable_newline_is_anycrlf
|
||||
enable_newline_is_any
|
||||
enable_bsr_anycrlf
|
||||
enable_never_backslash_C
|
||||
enable_ebcdic
|
||||
enable_ebcdic_nl25
|
||||
enable_stack_for_recursion
|
||||
|
@ -1405,7 +1406,7 @@ if test "$ac_init_help" = "long"; then
|
|||
# Omit some internal or obsolete options to make the list less imposing.
|
||||
# This message is too long to be a string in the A/UX 3.1 sh.
|
||||
cat <<_ACEOF
|
||||
\`configure' configures PCRE2 10.20 to adapt to many kinds of systems.
|
||||
\`configure' configures PCRE2 10.21 to adapt to many kinds of systems.
|
||||
|
||||
Usage: $0 [OPTION]... [VAR=VALUE]...
|
||||
|
||||
|
@ -1475,7 +1476,7 @@ fi
|
|||
|
||||
if test -n "$ac_init_help"; then
|
||||
case $ac_init_help in
|
||||
short | recursive ) echo "Configuration of PCRE2 10.20:";;
|
||||
short | recursive ) echo "Configuration of PCRE2 10.21:";;
|
||||
esac
|
||||
cat <<\_ACEOF
|
||||
|
||||
|
@ -1515,6 +1516,8 @@ Optional Features:
|
|||
use CR, LF, or CRLF as newline sequence
|
||||
--enable-newline-is-any use any valid Unicode newline sequence
|
||||
--enable-bsr-anycrlf \R matches only CR, LF, CRLF by default
|
||||
--enable-never-backslash-C
|
||||
use of \C causes an error
|
||||
--enable-ebcdic assume EBCDIC coding rather than ASCII; incompatible
|
||||
with --enable-utf; use only in (uncommon) EBCDIC
|
||||
environments; it implies --enable-rebuild-chartables
|
||||
|
@ -1642,7 +1645,7 @@ fi
|
|||
test -n "$ac_init_help" && exit $ac_status
|
||||
if $ac_init_version; then
|
||||
cat <<\_ACEOF
|
||||
PCRE2 configure 10.20
|
||||
PCRE2 configure 10.21
|
||||
generated by GNU Autoconf 2.69
|
||||
|
||||
Copyright (C) 2012 Free Software Foundation, Inc.
|
||||
|
@ -2137,7 +2140,7 @@ cat >config.log <<_ACEOF
|
|||
This file contains any messages produced by compilers while
|
||||
running configure, to aid debugging if configure makes a mistake.
|
||||
|
||||
It was created by PCRE2 $as_me 10.20, which was
|
||||
It was created by PCRE2 $as_me 10.21, which was
|
||||
generated by GNU Autoconf 2.69. Invocation command line was
|
||||
|
||||
$ $0 $@
|
||||
|
@ -3001,7 +3004,7 @@ fi
|
|||
|
||||
# Define the identity of the package.
|
||||
PACKAGE='pcre2'
|
||||
VERSION='10.20'
|
||||
VERSION='10.21'
|
||||
|
||||
|
||||
cat >>confdefs.h <<_ACEOF
|
||||
|
@ -3162,6 +3165,7 @@ fi
|
|||
MAINT=$MAINTAINER_MODE_TRUE
|
||||
|
||||
|
||||
# END FISH PATCH
|
||||
|
||||
# This is a new thing required to stop a warning from automake 1.12
|
||||
DEPDIR="${am__leading_dot}deps"
|
||||
|
@ -13624,9 +13628,9 @@ _ACEOF
|
|||
# Versioning
|
||||
|
||||
PCRE2_MAJOR="10"
|
||||
PCRE2_MINOR="20"
|
||||
PCRE2_MINOR="21"
|
||||
PCRE2_PRERELEASE=""
|
||||
PCRE2_DATE="2015-06-30"
|
||||
PCRE2_DATE="2016-01-12"
|
||||
|
||||
if test "$PCRE2_MINOR" = "08" -o "$PCRE2_MINOR" = "09"
|
||||
then
|
||||
|
@ -13790,6 +13794,15 @@ else
|
|||
fi
|
||||
|
||||
|
||||
# Handle --enable-never-backslash-C
|
||||
# Check whether --enable-never-backslash-C was given.
|
||||
if test "${enable_never_backslash_C+set}" = set; then :
|
||||
enableval=$enable_never_backslash_C;
|
||||
else
|
||||
enable_never_backslash_C=no
|
||||
fi
|
||||
|
||||
|
||||
# Handle --enable-ebcdic
|
||||
# Check whether --enable-ebcdic was given.
|
||||
if test "${enable_ebcdic+set}" = set; then :
|
||||
|
@ -13980,13 +13993,17 @@ fi
|
|||
# Make sure that if enable_ebcdic is set, rebuild_chartables is also enabled.
|
||||
# Also check that UTF support is not requested, because PCRE2 cannot handle
|
||||
# EBCDIC and UTF in the same build. To do so it would need to use different
|
||||
# character constants depending on the mode.
|
||||
# character constants depending on the mode. Also, EBCDIC cannot be used with
|
||||
# 16-bit and 32-bit libraries.
|
||||
#
|
||||
if test "x$enable_ebcdic" = "xyes"; then
|
||||
enable_rebuild_chartables=yes
|
||||
if test "x$enable_unicode" = "xyes"; then
|
||||
as_fn_error $? "support for EBCDIC and Unicode cannot be enabled at the same time" "$LINENO" 5
|
||||
fi
|
||||
if test "x$enable_pcre2_16" = "xyes" -o "x$enable_pcre2_32" = "xyes"; then
|
||||
as_fn_error $? "EBCDIC support is available only for the 8-bit library" "$LINENO" 5
|
||||
fi
|
||||
fi
|
||||
|
||||
# Check argument to --with-link-size
|
||||
|
@ -15321,6 +15338,12 @@ $as_echo "#define BSR_ANYCRLF /**/" >>confdefs.h
|
|||
|
||||
fi
|
||||
|
||||
if test "$enable_never_backslash_C" = "yes"; then
|
||||
|
||||
$as_echo "#define NEVER_BACKSLASH_C /**/" >>confdefs.h
|
||||
|
||||
fi
|
||||
|
||||
|
||||
cat >>confdefs.h <<_ACEOF
|
||||
#define LINK_SIZE $with_link_size
|
||||
|
@ -15395,16 +15418,16 @@ esac
|
|||
# are m4 variables, assigned above.
|
||||
|
||||
EXTRA_LIBPCRE2_8_LDFLAGS="$EXTRA_LIBPCRE2_8_LDFLAGS \
|
||||
$NO_UNDEFINED -version-info 2:0:2"
|
||||
$NO_UNDEFINED -version-info 3:0:3"
|
||||
|
||||
EXTRA_LIBPCRE2_16_LDFLAGS="$EXTRA_LIBPCRE2_16_LDFLAGS \
|
||||
$NO_UNDEFINED -version-info 2:0:2"
|
||||
$NO_UNDEFINED -version-info 3:0:3"
|
||||
|
||||
EXTRA_LIBPCRE2_32_LDFLAGS="$EXTRA_LIBPCRE2_32_LDFLAGS \
|
||||
$NO_UNDEFINED -version-info 2:0:2"
|
||||
$NO_UNDEFINED -version-info 3:0:3"
|
||||
|
||||
EXTRA_LIBPCRE2_POSIX_LDFLAGS="$EXTRA_LIBPCRE2_POSIX_LDFLAGS \
|
||||
$NO_UNDEFINED -version-info 0:0:0"
|
||||
$NO_UNDEFINED -version-info 0:1:0"
|
||||
|
||||
|
||||
|
||||
|
@ -16451,7 +16474,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
|
|||
# report actual input values of CONFIG_FILES etc. instead of their
|
||||
# values after options handling.
|
||||
ac_log="
|
||||
This file was extended by PCRE2 $as_me 10.20, which was
|
||||
This file was extended by PCRE2 $as_me 10.21, which was
|
||||
generated by GNU Autoconf 2.69. Invocation command line was
|
||||
|
||||
CONFIG_FILES = $CONFIG_FILES
|
||||
|
@ -16517,7 +16540,7 @@ _ACEOF
|
|||
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
|
||||
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
|
||||
ac_cs_version="\\
|
||||
PCRE2 config.status 10.20
|
||||
PCRE2 config.status 10.21
|
||||
configured by $0, generated by GNU Autoconf 2.69,
|
||||
with options \\"\$ac_cs_config\\"
|
||||
|
||||
|
@ -18249,6 +18272,7 @@ $PACKAGE-$VERSION configuration summary:
|
|||
Enable Unicode support .......... : ${enable_unicode}
|
||||
Newline char/sequence ........... : ${enable_newline}
|
||||
\R matches only ANYCRLF ......... : ${enable_bsr_anycrlf}
|
||||
\C is disabled .................. : ${enable_never_backslash_C}
|
||||
EBCDIC coding ................... : ${enable_ebcdic}
|
||||
EBCDIC code for NL .............. : ${ebcdic_nl_code}
|
||||
Rebuild char tables ............. : ${enable_rebuild_chartables}
|
|
@ -9,18 +9,18 @@ dnl The PCRE2_PRERELEASE feature is for identifying release candidates. It might
|
|||
dnl be defined as -RC2, for example. For real releases, it should be empty.
|
||||
|
||||
m4_define(pcre2_major, [10])
|
||||
m4_define(pcre2_minor, [20])
|
||||
m4_define(pcre2_minor, [21])
|
||||
m4_define(pcre2_prerelease, [])
|
||||
m4_define(pcre2_date, [2015-06-30])
|
||||
m4_define(pcre2_date, [2016-01-12])
|
||||
|
||||
# NOTE: The CMakeLists.txt file searches for the above variables in the first
|
||||
# 50 lines of this file. Please update that if the variables above are moved.
|
||||
|
||||
# Libtool shared library interface versions (current:revision:age)
|
||||
m4_define(libpcre2_8_version, [2:0:2])
|
||||
m4_define(libpcre2_16_version, [2:0:2])
|
||||
m4_define(libpcre2_32_version, [2:0:2])
|
||||
m4_define(libpcre2_posix_version, [0:0:0])
|
||||
m4_define(libpcre2_8_version, [3:0:3])
|
||||
m4_define(libpcre2_16_version, [3:0:3])
|
||||
m4_define(libpcre2_32_version, [3:0:3])
|
||||
m4_define(libpcre2_posix_version, [0:1:0])
|
||||
|
||||
AC_PREREQ(2.57)
|
||||
AC_INIT(PCRE2, pcre2_major.pcre2_minor[]pcre2_prerelease, , pcre2)
|
||||
|
@ -33,6 +33,7 @@ AC_CONFIG_HEADERS(src/config.h)
|
|||
# Enable maintainer mode to avoid spurious rebuilds due to timestamps in git
|
||||
# not being stored. Discussion in https://github.com/fish-shell/fish-shell/issues/2469
|
||||
AM_MAINTAINER_MODE
|
||||
# END FISH PATCH
|
||||
|
||||
# This is a new thing required to stop a warning from automake 1.12
|
||||
m4_ifdef([AM_PROG_AR], [AM_PROG_AR])
|
||||
|
@ -195,6 +196,12 @@ AC_ARG_ENABLE(bsr-anycrlf,
|
|||
[\R matches only CR, LF, CRLF by default]),
|
||||
, enable_bsr_anycrlf=no)
|
||||
|
||||
# Handle --enable-never-backslash-C
|
||||
AC_ARG_ENABLE(never-backslash-C,
|
||||
AS_HELP_STRING([--enable-never-backslash-C],
|
||||
[use of \C causes an error]),
|
||||
, enable_never_backslash_C=no)
|
||||
|
||||
# Handle --enable-ebcdic
|
||||
AC_ARG_ENABLE(ebcdic,
|
||||
AS_HELP_STRING([--enable-ebcdic],
|
||||
|
@ -338,13 +345,17 @@ fi
|
|||
# Make sure that if enable_ebcdic is set, rebuild_chartables is also enabled.
|
||||
# Also check that UTF support is not requested, because PCRE2 cannot handle
|
||||
# EBCDIC and UTF in the same build. To do so it would need to use different
|
||||
# character constants depending on the mode.
|
||||
# character constants depending on the mode. Also, EBCDIC cannot be used with
|
||||
# 16-bit and 32-bit libraries.
|
||||
#
|
||||
if test "x$enable_ebcdic" = "xyes"; then
|
||||
enable_rebuild_chartables=yes
|
||||
if test "x$enable_unicode" = "xyes"; then
|
||||
AC_MSG_ERROR([support for EBCDIC and Unicode cannot be enabled at the same time])
|
||||
fi
|
||||
if test "x$enable_pcre2_16" = "xyes" -o "x$enable_pcre2_32" = "xyes"; then
|
||||
AC_MSG_ERROR([EBCDIC support is available only for the 8-bit library])
|
||||
fi
|
||||
fi
|
||||
|
||||
# Check argument to --with-link-size
|
||||
|
@ -609,6 +620,11 @@ if test "$enable_bsr_anycrlf" = "yes"; then
|
|||
The build-time default can be overridden by the user of PCRE2 at runtime.])
|
||||
fi
|
||||
|
||||
if test "$enable_never_backslash_C" = "yes"; then
|
||||
AC_DEFINE([NEVER_BACKSLASH_C], [], [
|
||||
Defining NEVER_BACKSLASH_C locks out the use of \C in all patterns.])
|
||||
fi
|
||||
|
||||
AC_DEFINE_UNQUOTED([LINK_SIZE], [$with_link_size], [
|
||||
The value of LINK_SIZE determines the number of bytes used to store
|
||||
links as offsets within the compiled regex. The default is 2, which
|
||||
|
@ -886,6 +902,7 @@ $PACKAGE-$VERSION configuration summary:
|
|||
Enable Unicode support .......... : ${enable_unicode}
|
||||
Newline char/sequence ........... : ${enable_newline}
|
||||
\R matches only ANYCRLF ......... : ${enable_bsr_anycrlf}
|
||||
\C is disabled .................. : ${enable_never_backslash_C}
|
||||
EBCDIC coding ................... : ${enable_ebcdic}
|
||||
EBCDIC code for NL .............. : ${ebcdic_nl_code}
|
||||
Rebuild char tables ............. : ${enable_rebuild_chartables}
|
|
@ -204,12 +204,14 @@ for (;;)
|
|||
printf "data> " if $interact;
|
||||
last NEXT_RE if ! ($_ = <$infile>);
|
||||
chomp;
|
||||
printf $outfile "$_\n" if ! $interact;
|
||||
printf $outfile "%s", "$_\n" if ! $interact;
|
||||
|
||||
s/\s+$//; # Remove trailing space
|
||||
s/^\s+//; # Remove leading space
|
||||
|
||||
last if ($_ eq "");
|
||||
next if $_ =~ /^\\=(?:\s|$)/; # Comment line
|
||||
|
||||
$x = eval "\"$_\""; # To get escapes processed
|
||||
|
||||
# Empty array for holding results, ensure $REGERROR and $REGMARK are
|
|
@ -182,6 +182,9 @@ sure both macros are undefined; an emulation function will then be used. */
|
|||
#define MAX_NAME_SIZE 32
|
||||
#endif
|
||||
|
||||
/* Defining NEVER_BACKSLASH_C locks out the use of \C in all patterns. */
|
||||
/* #undef NEVER_BACKSLASH_C */
|
||||
|
||||
/* The value of NEWLINE_DEFAULT determines the default newline character
|
||||
sequence. PCRE2 client programs can override this by selecting other values
|
||||
at run time. The valid values are 1 (CR), 2 (LF), 3 (CRLF), 4 (ANY), and 5
|
||||
|
@ -200,7 +203,7 @@ sure both macros are undefined; an emulation function will then be used. */
|
|||
#define PACKAGE_NAME "PCRE2"
|
||||
|
||||
/* Define to the full name and version of this package. */
|
||||
#define PACKAGE_STRING "PCRE2 10.20"
|
||||
#define PACKAGE_STRING "PCRE2 10.21"
|
||||
|
||||
/* Define to the one symbol short name of this package. */
|
||||
#define PACKAGE_TARNAME "pcre2"
|
||||
|
@ -209,7 +212,7 @@ sure both macros are undefined; an emulation function will then be used. */
|
|||
#define PACKAGE_URL ""
|
||||
|
||||
/* Define to the version of this package. */
|
||||
#define PACKAGE_VERSION "10.20"
|
||||
#define PACKAGE_VERSION "10.21"
|
||||
|
||||
/* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested
|
||||
parentheses (of any kind) in a pattern. This limits the amount of system
|
||||
|
@ -290,7 +293,7 @@ sure both macros are undefined; an emulation function will then be used. */
|
|||
/* #undef SUPPORT_VALGRIND */
|
||||
|
||||
/* Version number of package */
|
||||
#define VERSION "10.20"
|
||||
#define VERSION "10.21"
|
||||
|
||||
/* Define to empty if `const' does not conform to ANSI C. */
|
||||
/* #undef const */
|
|
@ -169,6 +169,9 @@ sure both macros are undefined; an emulation function will then be used. */
|
|||
overflow caused by enormously large patterns. */
|
||||
#undef MAX_NAME_SIZE
|
||||
|
||||
/* Defining NEVER_BACKSLASH_C locks out the use of \C in all patterns. */
|
||||
#undef NEVER_BACKSLASH_C
|
||||
|
||||
/* The value of NEWLINE_DEFAULT determines the default newline character
|
||||
sequence. PCRE2 client programs can override this by selecting other values
|
||||
at run time. The valid values are 1 (CR), 2 (LF), 3 (CRLF), 4 (ANY), and 5
|
|
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
|
|||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2014 University of Cambridge
|
||||
New API code Copyright (c) 2016 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -102,7 +102,7 @@ fprintf(f,
|
|||
"/* This file was automatically written by the dftables auxiliary\n"
|
||||
"program. It contains character tables that are used when no external\n"
|
||||
"tables are passed to PCRE2 by the application that calls it. The tables\n"
|
||||
"are used only for characters whose code values are less than 256.\n\n");
|
||||
"are used only for characters whose code values are less than 256. */\n\n");
|
||||
|
||||
/* Force config.h in z/OS */
|
||||
|
||||
|
@ -115,7 +115,7 @@ fprintf(f,
|
|||
#endif
|
||||
|
||||
fprintf(f,
|
||||
"The following #includes are present because without them gcc 4.x may remove\n"
|
||||
"/* The following #includes are present because without them gcc 4.x may remove\n"
|
||||
"the array definition from the final binary if PCRE2 is built into a static\n"
|
||||
"library and dead code stripping is activated. This leads to link errors.\n"
|
||||
"Pulling in the header ensures that the array gets flagged as \"someone\n"
|
|
@ -5,7 +5,7 @@
|
|||
/* This is the public header file for the PCRE library, second API, to be
|
||||
#included by applications that call PCRE2 functions.
|
||||
|
||||
Copyright (c) 2015 University of Cambridge
|
||||
Copyright (c) 2016 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -42,9 +42,9 @@ POSSIBILITY OF SUCH DAMAGE.
|
|||
/* The current PCRE version information. */
|
||||
|
||||
#define PCRE2_MAJOR 10
|
||||
#define PCRE2_MINOR 20
|
||||
#define PCRE2_MINOR 21
|
||||
#define PCRE2_PRERELEASE
|
||||
#define PCRE2_DATE 2015-06-30
|
||||
#define PCRE2_DATE 2016-01-12
|
||||
|
||||
/* When an application links to a PCRE DLL in Windows, the symbols that are
|
||||
imported have to be identified as such. When building PCRE2, the appropriate
|
||||
|
@ -120,6 +120,8 @@ D is inspected during pcre2_dfa_match() execution
|
|||
#define PCRE2_UTF 0x00080000u /* C J M D */
|
||||
#define PCRE2_NEVER_BACKSLASH_C 0x00100000u /* C */
|
||||
#define PCRE2_ALT_CIRCUMFLEX 0x00200000u /* J M D */
|
||||
#define PCRE2_ALT_VERBNAMES 0x00400000u /* C */
|
||||
#define PCRE2_USE_OFFSET_LIMIT 0x00800000u /* J M D */
|
||||
|
||||
/* These are for pcre2_jit_compile(). */
|
||||
|
||||
|
@ -144,9 +146,13 @@ sanity checks). */
|
|||
#define PCRE2_DFA_RESTART 0x00000040u
|
||||
#define PCRE2_DFA_SHORTEST 0x00000080u
|
||||
|
||||
/* This is an additional option for pcre2_substitute(). */
|
||||
/* These are additional options for pcre2_substitute(). */
|
||||
|
||||
#define PCRE2_SUBSTITUTE_GLOBAL 0x00000100u
|
||||
#define PCRE2_SUBSTITUTE_GLOBAL 0x00000100u
|
||||
#define PCRE2_SUBSTITUTE_EXTENDED 0x00000200u
|
||||
#define PCRE2_SUBSTITUTE_UNSET_EMPTY 0x00000400u
|
||||
#define PCRE2_SUBSTITUTE_UNKNOWN_UNSET 0x00000800u
|
||||
#define PCRE2_SUBSTITUTE_OVERFLOW_LENGTH 0x00001000u
|
||||
|
||||
/* Newline and \R settings, for use in compile contexts. The newline values
|
||||
must be kept in step with values set in config.h and both sets must all be
|
||||
|
@ -233,6 +239,12 @@ numbers must not be changed. */
|
|||
#define PCRE2_ERROR_RECURSIONLIMIT (-53)
|
||||
#define PCRE2_ERROR_UNAVAILABLE (-54)
|
||||
#define PCRE2_ERROR_UNSET (-55)
|
||||
#define PCRE2_ERROR_BADOFFSETLIMIT (-56)
|
||||
#define PCRE2_ERROR_BADREPESCAPE (-57)
|
||||
#define PCRE2_ERROR_REPMISSINGBRACE (-58)
|
||||
#define PCRE2_ERROR_BADSUBSTITUTION (-59)
|
||||
#define PCRE2_ERROR_BADSUBSPATTERN (-60)
|
||||
#define PCRE2_ERROR_TOOMANYREPLACE (-61)
|
||||
|
||||
/* Request types for pcre2_pattern_info() */
|
||||
|
||||
|
@ -259,6 +271,7 @@ numbers must not be changed. */
|
|||
#define PCRE2_INFO_NEWLINE 20
|
||||
#define PCRE2_INFO_RECURSIONLIMIT 21
|
||||
#define PCRE2_INFO_SIZE 22
|
||||
#define PCRE2_INFO_HASBACKSLASHC 23
|
||||
|
||||
/* Request types for pcre2_config(). */
|
||||
|
||||
|
@ -291,6 +304,7 @@ define special values to indicate zero-terminated strings and unset offsets in
|
|||
the offset vector (ovector). */
|
||||
|
||||
#define PCRE2_SIZE size_t
|
||||
#define PCRE2_SIZE_MAX SIZE_MAX
|
||||
#define PCRE2_ZERO_TERMINATED (~(PCRE2_SIZE)0)
|
||||
#define PCRE2_UNSET (~(PCRE2_SIZE)0)
|
||||
|
||||
|
@ -388,6 +402,8 @@ PCRE2_EXP_DECL void pcre2_compile_context_free(pcre2_compile_context *); \
|
|||
PCRE2_EXP_DECL int pcre2_set_bsr(pcre2_compile_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int pcre2_set_character_tables(pcre2_compile_context *, \
|
||||
const unsigned char *); \
|
||||
PCRE2_EXP_DECL int pcre2_set_max_pattern_length(pcre2_compile_context *, \
|
||||
PCRE2_SIZE); \
|
||||
PCRE2_EXP_DECL int pcre2_set_newline(pcre2_compile_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int pcre2_set_parens_nest_limit(pcre2_compile_context *, \
|
||||
uint32_t); \
|
||||
|
@ -405,6 +421,8 @@ PCRE2_EXP_DECL int pcre2_set_callout(pcre2_match_context *, \
|
|||
int (*)(pcre2_callout_block *, void *), void *); \
|
||||
PCRE2_EXP_DECL int pcre2_set_match_limit(pcre2_match_context *, \
|
||||
uint32_t); \
|
||||
PCRE2_EXP_DECL int pcre2_set_offset_limit(pcre2_match_context *, \
|
||||
PCRE2_SIZE); \
|
||||
PCRE2_EXP_DECL int pcre2_set_recursion_limit(pcre2_match_context *, \
|
||||
uint32_t); \
|
||||
PCRE2_EXP_DECL int pcre2_set_recursion_memory_management( \
|
||||
|
@ -606,8 +624,10 @@ pcre2_compile are called by application code. */
|
|||
#define pcre2_set_character_tables PCRE2_SUFFIX(pcre2_set_character_tables_)
|
||||
#define pcre2_set_compile_recursion_guard PCRE2_SUFFIX(pcre2_set_compile_recursion_guard_)
|
||||
#define pcre2_set_match_limit PCRE2_SUFFIX(pcre2_set_match_limit_)
|
||||
#define pcre2_set_max_pattern_length PCRE2_SUFFIX(pcre2_set_max_pattern_length_)
|
||||
#define pcre2_set_newline PCRE2_SUFFIX(pcre2_set_newline_)
|
||||
#define pcre2_set_parens_nest_limit PCRE2_SUFFIX(pcre2_set_parens_nest_limit_)
|
||||
#define pcre2_set_offset_limit PCRE2_SUFFIX(pcre2_set_offset_limit_)
|
||||
#define pcre2_set_recursion_limit PCRE2_SUFFIX(pcre2_set_recursion_limit_)
|
||||
#define pcre2_set_recursion_memory_management PCRE2_SUFFIX(pcre2_set_recursion_memory_management_)
|
||||
#define pcre2_substitute PCRE2_SUFFIX(pcre2_substitute_)
|
|
@ -5,7 +5,7 @@
|
|||
/* This is the public header file for the PCRE library, second API, to be
|
||||
#included by applications that call PCRE2 functions.
|
||||
|
||||
Copyright (c) 2015 University of Cambridge
|
||||
Copyright (c) 2016 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -120,6 +120,8 @@ D is inspected during pcre2_dfa_match() execution
|
|||
#define PCRE2_UTF 0x00080000u /* C J M D */
|
||||
#define PCRE2_NEVER_BACKSLASH_C 0x00100000u /* C */
|
||||
#define PCRE2_ALT_CIRCUMFLEX 0x00200000u /* J M D */
|
||||
#define PCRE2_ALT_VERBNAMES 0x00400000u /* C */
|
||||
#define PCRE2_USE_OFFSET_LIMIT 0x00800000u /* J M D */
|
||||
|
||||
/* These are for pcre2_jit_compile(). */
|
||||
|
||||
|
@ -144,9 +146,13 @@ sanity checks). */
|
|||
#define PCRE2_DFA_RESTART 0x00000040u
|
||||
#define PCRE2_DFA_SHORTEST 0x00000080u
|
||||
|
||||
/* This is an additional option for pcre2_substitute(). */
|
||||
/* These are additional options for pcre2_substitute(). */
|
||||
|
||||
#define PCRE2_SUBSTITUTE_GLOBAL 0x00000100u
|
||||
#define PCRE2_SUBSTITUTE_GLOBAL 0x00000100u
|
||||
#define PCRE2_SUBSTITUTE_EXTENDED 0x00000200u
|
||||
#define PCRE2_SUBSTITUTE_UNSET_EMPTY 0x00000400u
|
||||
#define PCRE2_SUBSTITUTE_UNKNOWN_UNSET 0x00000800u
|
||||
#define PCRE2_SUBSTITUTE_OVERFLOW_LENGTH 0x00001000u
|
||||
|
||||
/* Newline and \R settings, for use in compile contexts. The newline values
|
||||
must be kept in step with values set in config.h and both sets must all be
|
||||
|
@ -233,6 +239,12 @@ numbers must not be changed. */
|
|||
#define PCRE2_ERROR_RECURSIONLIMIT (-53)
|
||||
#define PCRE2_ERROR_UNAVAILABLE (-54)
|
||||
#define PCRE2_ERROR_UNSET (-55)
|
||||
#define PCRE2_ERROR_BADOFFSETLIMIT (-56)
|
||||
#define PCRE2_ERROR_BADREPESCAPE (-57)
|
||||
#define PCRE2_ERROR_REPMISSINGBRACE (-58)
|
||||
#define PCRE2_ERROR_BADSUBSTITUTION (-59)
|
||||
#define PCRE2_ERROR_BADSUBSPATTERN (-60)
|
||||
#define PCRE2_ERROR_TOOMANYREPLACE (-61)
|
||||
|
||||
/* Request types for pcre2_pattern_info() */
|
||||
|
||||
|
@ -259,6 +271,7 @@ numbers must not be changed. */
|
|||
#define PCRE2_INFO_NEWLINE 20
|
||||
#define PCRE2_INFO_RECURSIONLIMIT 21
|
||||
#define PCRE2_INFO_SIZE 22
|
||||
#define PCRE2_INFO_HASBACKSLASHC 23
|
||||
|
||||
/* Request types for pcre2_config(). */
|
||||
|
||||
|
@ -291,6 +304,7 @@ define special values to indicate zero-terminated strings and unset offsets in
|
|||
the offset vector (ovector). */
|
||||
|
||||
#define PCRE2_SIZE size_t
|
||||
#define PCRE2_SIZE_MAX SIZE_MAX
|
||||
#define PCRE2_ZERO_TERMINATED (~(PCRE2_SIZE)0)
|
||||
#define PCRE2_UNSET (~(PCRE2_SIZE)0)
|
||||
|
||||
|
@ -388,6 +402,8 @@ PCRE2_EXP_DECL void pcre2_compile_context_free(pcre2_compile_context *); \
|
|||
PCRE2_EXP_DECL int pcre2_set_bsr(pcre2_compile_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int pcre2_set_character_tables(pcre2_compile_context *, \
|
||||
const unsigned char *); \
|
||||
PCRE2_EXP_DECL int pcre2_set_max_pattern_length(pcre2_compile_context *, \
|
||||
PCRE2_SIZE); \
|
||||
PCRE2_EXP_DECL int pcre2_set_newline(pcre2_compile_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int pcre2_set_parens_nest_limit(pcre2_compile_context *, \
|
||||
uint32_t); \
|
||||
|
@ -405,6 +421,8 @@ PCRE2_EXP_DECL int pcre2_set_callout(pcre2_match_context *, \
|
|||
int (*)(pcre2_callout_block *, void *), void *); \
|
||||
PCRE2_EXP_DECL int pcre2_set_match_limit(pcre2_match_context *, \
|
||||
uint32_t); \
|
||||
PCRE2_EXP_DECL int pcre2_set_offset_limit(pcre2_match_context *, \
|
||||
PCRE2_SIZE); \
|
||||
PCRE2_EXP_DECL int pcre2_set_recursion_limit(pcre2_match_context *, \
|
||||
uint32_t); \
|
||||
PCRE2_EXP_DECL int pcre2_set_recursion_memory_management( \
|
||||
|
@ -606,8 +624,10 @@ pcre2_compile are called by application code. */
|
|||
#define pcre2_set_character_tables PCRE2_SUFFIX(pcre2_set_character_tables_)
|
||||
#define pcre2_set_compile_recursion_guard PCRE2_SUFFIX(pcre2_set_compile_recursion_guard_)
|
||||
#define pcre2_set_match_limit PCRE2_SUFFIX(pcre2_set_match_limit_)
|
||||
#define pcre2_set_max_pattern_length PCRE2_SUFFIX(pcre2_set_max_pattern_length_)
|
||||
#define pcre2_set_newline PCRE2_SUFFIX(pcre2_set_newline_)
|
||||
#define pcre2_set_parens_nest_limit PCRE2_SUFFIX(pcre2_set_parens_nest_limit_)
|
||||
#define pcre2_set_offset_limit PCRE2_SUFFIX(pcre2_set_offset_limit_)
|
||||
#define pcre2_set_recursion_limit PCRE2_SUFFIX(pcre2_set_recursion_limit_)
|
||||
#define pcre2_set_recursion_memory_management PCRE2_SUFFIX(pcre2_set_recursion_memory_management_)
|
||||
#define pcre2_substitute PCRE2_SUFFIX(pcre2_substitute_)
|
|
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
|
|||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2014 University of Cambridge
|
||||
New API code Copyright (c) 2016 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -171,64 +171,6 @@ static const uint8_t posspropstab[3][4] = {
|
|||
{ ucp_L, ucp_N, ucp_P, ucp_Po } /* WORD */
|
||||
};
|
||||
|
||||
/* This table is used when converting repeating opcodes into possessified
|
||||
versions as a result of an explicit possessive quantifier such as ++. A zero
|
||||
value means there is no possessified version - in those cases the item in
|
||||
question must be wrapped in ONCE brackets. The table is truncated at OP_CALLOUT
|
||||
because all relevant opcodes are less than that. */
|
||||
|
||||
static const uint8_t opcode_possessify[] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0 - 15 */
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 16 - 31 */
|
||||
|
||||
0, /* NOTI */
|
||||
OP_POSSTAR, 0, /* STAR, MINSTAR */
|
||||
OP_POSPLUS, 0, /* PLUS, MINPLUS */
|
||||
OP_POSQUERY, 0, /* QUERY, MINQUERY */
|
||||
OP_POSUPTO, 0, /* UPTO, MINUPTO */
|
||||
0, /* EXACT */
|
||||
0, 0, 0, 0, /* POS{STAR,PLUS,QUERY,UPTO} */
|
||||
|
||||
OP_POSSTARI, 0, /* STARI, MINSTARI */
|
||||
OP_POSPLUSI, 0, /* PLUSI, MINPLUSI */
|
||||
OP_POSQUERYI, 0, /* QUERYI, MINQUERYI */
|
||||
OP_POSUPTOI, 0, /* UPTOI, MINUPTOI */
|
||||
0, /* EXACTI */
|
||||
0, 0, 0, 0, /* POS{STARI,PLUSI,QUERYI,UPTOI} */
|
||||
|
||||
OP_NOTPOSSTAR, 0, /* NOTSTAR, NOTMINSTAR */
|
||||
OP_NOTPOSPLUS, 0, /* NOTPLUS, NOTMINPLUS */
|
||||
OP_NOTPOSQUERY, 0, /* NOTQUERY, NOTMINQUERY */
|
||||
OP_NOTPOSUPTO, 0, /* NOTUPTO, NOTMINUPTO */
|
||||
0, /* NOTEXACT */
|
||||
0, 0, 0, 0, /* NOTPOS{STAR,PLUS,QUERY,UPTO} */
|
||||
|
||||
OP_NOTPOSSTARI, 0, /* NOTSTARI, NOTMINSTARI */
|
||||
OP_NOTPOSPLUSI, 0, /* NOTPLUSI, NOTMINPLUSI */
|
||||
OP_NOTPOSQUERYI, 0, /* NOTQUERYI, NOTMINQUERYI */
|
||||
OP_NOTPOSUPTOI, 0, /* NOTUPTOI, NOTMINUPTOI */
|
||||
0, /* NOTEXACTI */
|
||||
0, 0, 0, 0, /* NOTPOS{STARI,PLUSI,QUERYI,UPTOI} */
|
||||
|
||||
OP_TYPEPOSSTAR, 0, /* TYPESTAR, TYPEMINSTAR */
|
||||
OP_TYPEPOSPLUS, 0, /* TYPEPLUS, TYPEMINPLUS */
|
||||
OP_TYPEPOSQUERY, 0, /* TYPEQUERY, TYPEMINQUERY */
|
||||
OP_TYPEPOSUPTO, 0, /* TYPEUPTO, TYPEMINUPTO */
|
||||
0, /* TYPEEXACT */
|
||||
0, 0, 0, 0, /* TYPEPOS{STAR,PLUS,QUERY,UPTO} */
|
||||
|
||||
OP_CRPOSSTAR, 0, /* CRSTAR, CRMINSTAR */
|
||||
OP_CRPOSPLUS, 0, /* CRPLUS, CRMINPLUS */
|
||||
OP_CRPOSQUERY, 0, /* CRQUERY, CRMINQUERY */
|
||||
OP_CRPOSRANGE, 0, /* CRRANGE, CRMINRANGE */
|
||||
0, 0, 0, 0, /* CRPOS{STAR,PLUS,QUERY,RANGE} */
|
||||
|
||||
0, 0, 0, /* CLASS, NCLASS, XCLASS */
|
||||
0, 0, /* REF, REFI */
|
||||
0, 0, /* DNREF, DNREFI */
|
||||
0, 0 /* RECURSE, CALLOUT */
|
||||
};
|
||||
|
||||
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
File diff suppressed because it is too large
Load diff
|
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
|
|||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2014 University of Cambridge
|
||||
New API code Copyright (c) 2016 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
|
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
|
|||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2014 University of Cambridge
|
||||
New API code Copyright (c) 2016 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -131,13 +131,14 @@ return gcontext;
|
|||
when no context is supplied to the compile function. */
|
||||
|
||||
const pcre2_compile_context PRIV(default_compile_context) = {
|
||||
{ default_malloc, default_free, NULL },
|
||||
NULL,
|
||||
NULL,
|
||||
PRIV(default_tables),
|
||||
BSR_DEFAULT,
|
||||
NEWLINE_DEFAULT,
|
||||
PARENS_NEST_LIMIT };
|
||||
{ default_malloc, default_free, NULL }, /* Default memory handling */
|
||||
NULL, /* Stack guard */
|
||||
NULL, /* Stack guard data */
|
||||
PRIV(default_tables), /* Character tables */
|
||||
PCRE2_UNSET, /* Max pattern length */
|
||||
BSR_DEFAULT, /* Backslash R default */
|
||||
NEWLINE_DEFAULT, /* Newline convention */
|
||||
PARENS_NEST_LIMIT }; /* As it says */
|
||||
|
||||
/* The create function copies the default into the new memory, but must
|
||||
override the default memory handling functions if a gcontext was provided. */
|
||||
|
@ -169,6 +170,7 @@ const pcre2_match_context PRIV(default_match_context) = {
|
|||
#endif
|
||||
NULL,
|
||||
NULL,
|
||||
PCRE2_UNSET, /* Offset limit */
|
||||
MATCH_LIMIT,
|
||||
MATCH_LIMIT_RECURSION };
|
||||
|
||||
|
@ -294,6 +296,13 @@ switch(value)
|
|||
}
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_max_pattern_length(pcre2_compile_context *ccontext, PCRE2_SIZE length)
|
||||
{
|
||||
ccontext->max_pattern_length = length;
|
||||
return 0;
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_newline(pcre2_compile_context *ccontext, uint32_t newline)
|
||||
{
|
||||
|
@ -347,6 +356,13 @@ mcontext->match_limit = limit;
|
|||
return 0;
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_offset_limit(pcre2_match_context *mcontext, PCRE2_SIZE limit)
|
||||
{
|
||||
mcontext->offset_limit = limit;
|
||||
return 0;
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_recursion_limit(pcre2_match_context *mcontext, uint32_t limit)
|
||||
{
|
|
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
|
|||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2014 University of Cambridge
|
||||
New API code Copyright (c) 2016 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -433,13 +433,13 @@ move back, and set up each alternative appropriately. */
|
|||
|
||||
if (*first_op == OP_REVERSE)
|
||||
{
|
||||
int max_back = 0;
|
||||
int gone_back;
|
||||
size_t max_back = 0;
|
||||
size_t gone_back;
|
||||
|
||||
end_code = this_start_code;
|
||||
do
|
||||
{
|
||||
int back = GET(end_code, 2+LINK_SIZE);
|
||||
size_t back = GET(end_code, 2+LINK_SIZE);
|
||||
if (back > max_back) max_back = back;
|
||||
end_code += GET(end_code, 1);
|
||||
}
|
||||
|
@ -466,8 +466,8 @@ if (*first_op == OP_REVERSE)
|
|||
/* In byte-mode we can do this quickly. */
|
||||
|
||||
{
|
||||
gone_back = (current_subject - max_back < start_subject)?
|
||||
(int)(current_subject - start_subject) : max_back;
|
||||
size_t current_offset = (size_t)(current_subject - start_subject);
|
||||
gone_back = (current_offset < max_back)? current_offset : max_back;
|
||||
current_subject -= gone_back;
|
||||
}
|
||||
|
||||
|
@ -481,7 +481,7 @@ if (*first_op == OP_REVERSE)
|
|||
end_code = this_start_code;
|
||||
do
|
||||
{
|
||||
int back = GET(end_code, 2+LINK_SIZE);
|
||||
size_t back = GET(end_code, 2+LINK_SIZE);
|
||||
if (back <= gone_back)
|
||||
{
|
||||
int bstate = (int)(end_code - start_code + 2 + 2*LINK_SIZE);
|
||||
|
@ -2774,7 +2774,7 @@ for (;;)
|
|||
{
|
||||
PCRE2_SPTR p = start_subject + local_offsets[rc];
|
||||
PCRE2_SPTR pp = start_subject + local_offsets[rc+1];
|
||||
while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
|
||||
while (p < pp) if (NOT_FIRSTCU(*p++)) charcount--;
|
||||
}
|
||||
#endif
|
||||
if (charcount > 0)
|
||||
|
@ -2874,7 +2874,7 @@ for (;;)
|
|||
PCRE2_SPTR pp = local_ptr;
|
||||
charcount = (int)(pp - p);
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
if (utf) while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
|
||||
if (utf) while (p < pp) if (NOT_FIRSTCU(*p++)) charcount--;
|
||||
#endif
|
||||
ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
|
||||
}
|
||||
|
@ -2960,7 +2960,7 @@ for (;;)
|
|||
{
|
||||
PCRE2_SPTR p = start_subject + local_offsets[0];
|
||||
PCRE2_SPTR pp = start_subject + local_offsets[1];
|
||||
while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
|
||||
while (p < pp) if (NOT_FIRSTCU(*p++)) charcount--;
|
||||
}
|
||||
#endif
|
||||
ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
|
||||
|
@ -3116,6 +3116,7 @@ const pcre2_real_code *re = (const pcre2_real_code *)code;
|
|||
|
||||
PCRE2_SPTR start_match;
|
||||
PCRE2_SPTR end_subject;
|
||||
PCRE2_SPTR bumpalong_limit;
|
||||
PCRE2_SPTR req_cu_ptr;
|
||||
|
||||
BOOL utf, anchored, startline, firstline;
|
||||
|
@ -3172,15 +3173,10 @@ occur. */
|
|||
|
||||
#define FF (PCRE2_NOTEMPTY_SET|PCRE2_NE_ATST_SET)
|
||||
#define OO (PCRE2_NOTEMPTY|PCRE2_NOTEMPTY_ATSTART)
|
||||
options |= (re->flags & FF) / ((FF & -FF) / (OO & -OO));
|
||||
options |= (re->flags & FF) / ((FF & (~FF+1)) / (OO & (~OO+1)));
|
||||
#undef FF
|
||||
#undef OO
|
||||
|
||||
/* A NULL match context means "use a default context" */
|
||||
|
||||
if (mcontext == NULL)
|
||||
mcontext = (pcre2_match_context *)(&PRIV(default_match_context));
|
||||
|
||||
/* If restarting after a partial match, do some sanity checks on the contents
|
||||
of the workspace. */
|
||||
|
||||
|
@ -3205,8 +3201,11 @@ where to start. */
|
|||
|
||||
startline = (re->flags & PCRE2_STARTLINE) != 0;
|
||||
firstline = (re->overall_options & PCRE2_FIRSTLINE) != 0;
|
||||
bumpalong_limit = end_subject;
|
||||
|
||||
/* Fill in the fields in the match block. */
|
||||
/* Get data from the match context, if present, and fill in the fields in the
|
||||
match block. It is an error to set an offset limit without setting the flag at
|
||||
compile time. */
|
||||
|
||||
if (mcontext == NULL)
|
||||
{
|
||||
|
@ -3215,6 +3214,12 @@ if (mcontext == NULL)
|
|||
}
|
||||
else
|
||||
{
|
||||
if (mcontext->offset_limit != PCRE2_UNSET)
|
||||
{
|
||||
if ((re->overall_options & PCRE2_USE_OFFSET_LIMIT) == 0)
|
||||
return PCRE2_ERROR_BADOFFSETLIMIT;
|
||||
bumpalong_limit = subject + mcontext->offset_limit;
|
||||
}
|
||||
mb->callout = mcontext->callout;
|
||||
mb->callout_data = mcontext->callout_data;
|
||||
mb->memctl = mcontext->memctl;
|
||||
|
@ -3264,18 +3269,50 @@ switch(re->newline_convention)
|
|||
|
||||
/* Check a UTF string for validity if required. For 8-bit and 16-bit strings,
|
||||
we must also check that a starting offset does not point into the middle of a
|
||||
multiunit character. */
|
||||
multiunit character. We check only the portion of the subject that is going to
|
||||
be inspected during matching - from the offset minus the maximum back reference
|
||||
to the given length. This saves time when a small part of a large subject is
|
||||
being matched by the use of a starting offset. Note that the maximum lookbehind
|
||||
is a number of characters, not code units. */
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf && (options & PCRE2_NO_UTF_CHECK) == 0)
|
||||
{
|
||||
match_data->rc = PRIV(valid_utf)(subject, length, &(match_data->startchar));
|
||||
if (match_data->rc != 0) return match_data->rc;
|
||||
PCRE2_SPTR check_subject = start_match; /* start_match includes offset */
|
||||
|
||||
if (start_offset > 0)
|
||||
{
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 32
|
||||
if (start_offset > 0 && start_offset < length &&
|
||||
NOT_FIRSTCHAR(subject[start_offset]))
|
||||
return PCRE2_ERROR_BADUTFOFFSET;
|
||||
unsigned int i;
|
||||
if (start_match < end_subject && NOT_FIRSTCU(*start_match))
|
||||
return PCRE2_ERROR_BADUTFOFFSET;
|
||||
for (i = re->max_lookbehind; i > 0 && check_subject > subject; i--)
|
||||
{
|
||||
check_subject--;
|
||||
while (check_subject > subject &&
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
(*check_subject & 0xc0) == 0x80)
|
||||
#else /* 16-bit */
|
||||
(*check_subject & 0xfc00) == 0xdc00)
|
||||
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
|
||||
check_subject--;
|
||||
}
|
||||
#else /* In the 32-bit library, one code unit equals one character. */
|
||||
check_subject -= re->max_lookbehind;
|
||||
if (check_subject < subject) check_subject = subject;
|
||||
#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */
|
||||
}
|
||||
|
||||
/* Validate the relevant portion of the subject. After an error, adjust the
|
||||
offset to be an absolute offset in the whole string. */
|
||||
|
||||
match_data->rc = PRIV(valid_utf)(check_subject,
|
||||
length - (check_subject - subject), &(match_data->startchar));
|
||||
if (match_data->rc != 0)
|
||||
{
|
||||
match_data->startchar += check_subject - subject;
|
||||
return match_data->rc;
|
||||
}
|
||||
}
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
|
@ -3507,6 +3544,10 @@ for (;;)
|
|||
|
||||
/* ------------ End of start of match optimizations ------------ */
|
||||
|
||||
/* Give no match if we have passed the bumpalong limit. */
|
||||
|
||||
if (start_match > bumpalong_limit) break;
|
||||
|
||||
/* OK, now we can do the business */
|
||||
|
||||
mb->start_used_ptr = start_match;
|
|
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
|
|||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2015 University of Cambridge
|
||||
New API code Copyright (c) 2016 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -51,11 +51,10 @@ POSSIBILITY OF SUCH DAMAGE.
|
|||
/* The texts of compile-time error messages. Compile-time error numbers start
|
||||
at COMPILE_ERROR_BASE (100).
|
||||
|
||||
Do not ever re-use any error number, because they are documented. Always add a
|
||||
new error instead. This used to be a table of strings, but in order to reduce
|
||||
the number of relocations needed when a shared library is loaded dynamically,
|
||||
it is now one long string. We cannot use a table of offsets, because the
|
||||
lengths of inserts such as XSTRING(MAX_NAME_SIZE) are not known. Instead,
|
||||
This used to be a table of strings, but in order to reduce the number of
|
||||
relocations needed when a shared library is loaded dynamically, it is now one
|
||||
long string. We cannot use a table of offsets, because the lengths of inserts
|
||||
such as XSTRING(MAX_NAME_SIZE) are not known. Instead,
|
||||
pcre2_get_error_message() counts through to the one it wants - this isn't a
|
||||
performance issue because these strings are used only when there is an error.
|
||||
|
||||
|
@ -92,7 +91,7 @@ static const char compile_error_texts[] =
|
|||
"failed to allocate heap memory\0"
|
||||
"unmatched closing parenthesis\0"
|
||||
"internal error: code overflow\0"
|
||||
"unrecognized character after (?<\0"
|
||||
"letter or underscore expected after (?< or (?'\0"
|
||||
/* 25 */
|
||||
"lookbehind assertion is not fixed length\0"
|
||||
"malformed number or name after (?(\0"
|
||||
|
@ -112,7 +111,7 @@ static const char compile_error_texts[] =
|
|||
"number after (?C is greater than 255\0"
|
||||
"closing parenthesis for (?C expected\0"
|
||||
/* 40 */
|
||||
"recursion could loop indefinitely\0"
|
||||
"invalid escape sequence in (*VERB) name\0"
|
||||
"unrecognized character after (?P\0"
|
||||
"syntax error in subpattern name (missing terminator)\0"
|
||||
"two named subpatterns have the same name (PCRE2_DUPNAMES not set)\0"
|
||||
|
@ -154,7 +153,7 @@ static const char compile_error_texts[] =
|
|||
/* 70 */
|
||||
"internal error: unknown opcode in find_fixedlength()\0"
|
||||
"\\N is not supported in a class\0"
|
||||
"too many forward references\0"
|
||||
"SPARE ERROR\0"
|
||||
"disallowed Unicode code point (>= 0xd800 && <= 0xdfff)\0"
|
||||
"using UTF is disabled by the application\0"
|
||||
/* 75 */
|
||||
|
@ -169,6 +168,11 @@ static const char compile_error_texts[] =
|
|||
"unrecognized string delimiter follows (?C\0"
|
||||
"using \\C is disabled by the application\0"
|
||||
"(?| and/or (?J: or (?x: parentheses are too deeply nested\0"
|
||||
/* 85 */
|
||||
"using \\C is disabled in this PCRE2 library\0"
|
||||
"regular expression is too complicated\0"
|
||||
"lookbehind assertion is too long\0"
|
||||
"pattern string is longer than the limit set by the application\0"
|
||||
;
|
||||
|
||||
/* Match-time and UTF error texts are in the same format. */
|
||||
|
@ -200,7 +204,7 @@ static const char match_error_texts[] =
|
|||
/* 20 */
|
||||
"UTF-8 error: overlong 5-byte sequence\0"
|
||||
"UTF-8 error: overlong 6-byte sequence\0"
|
||||
"UTF-8 error: isolated 0x80 byte\0"
|
||||
"UTF-8 error: isolated byte with 0x80 bit set\0"
|
||||
"UTF-8 error: illegal byte (0xfe or 0xff)\0"
|
||||
"UTF-16 error: missing low surrogate at end\0"
|
||||
/* 25 */
|
||||
|
@ -239,7 +243,15 @@ static const char match_error_texts[] =
|
|||
"nested recursion at the same subject position\0"
|
||||
"recursion limit exceeded\0"
|
||||
"requested value is not available\0"
|
||||
/* 55 */
|
||||
"requested value is not set\0"
|
||||
"offset limit set without PCRE2_USE_OFFSET_LIMIT\0"
|
||||
"bad escape sequence in replacement string\0"
|
||||
"expected closing curly bracket in replacement string\0"
|
||||
"bad substitution in replacement string\0"
|
||||
/* 60 */
|
||||
"match with end before start is not supported\0"
|
||||
"too many replacements (more than INT_MAX)\0"
|
||||
;
|
||||
|
||||
|
218
pcre2-10.21/src/pcre2_find_bracket.c
Normal file
218
pcre2-10.21/src/pcre2_find_bracket.c
Normal file
|
@ -0,0 +1,218 @@
|
|||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains a single function that scans through a compiled pattern
|
||||
until it finds a capturing bracket with the given number, or, if the number is
|
||||
negative, an instance of OP_REVERSE for a lookbehind. The function is called
|
||||
from pcre2_compile.c and also from pcre2_study.c when finding the minimum
|
||||
matching length. */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Scan compiled regex for specific bracket *
|
||||
*************************************************/
|
||||
|
||||
/*
|
||||
Arguments:
|
||||
code points to start of expression
|
||||
utf TRUE in UTF mode
|
||||
number the required bracket number or negative to find a lookbehind
|
||||
|
||||
Returns: pointer to the opcode for the bracket, or NULL if not found
|
||||
*/
|
||||
|
||||
PCRE2_SPTR
|
||||
PRIV(find_bracket)(PCRE2_SPTR code, BOOL utf, int number)
|
||||
{
|
||||
for (;;)
|
||||
{
|
||||
register PCRE2_UCHAR c = *code;
|
||||
|
||||
if (c == OP_END) return NULL;
|
||||
|
||||
/* XCLASS is used for classes that cannot be represented just by a bit map.
|
||||
This includes negated single high-valued characters. CALLOUT_STR is used for
|
||||
callouts with string arguments. In both cases the length in the table is
|
||||
zero; the actual length is stored in the compiled code. */
|
||||
|
||||
if (c == OP_XCLASS) code += GET(code, 1);
|
||||
else if (c == OP_CALLOUT_STR) code += GET(code, 1 + 2*LINK_SIZE);
|
||||
|
||||
/* Handle lookbehind */
|
||||
|
||||
else if (c == OP_REVERSE)
|
||||
{
|
||||
if (number < 0) return (PCRE2_UCHAR *)code;
|
||||
code += PRIV(OP_lengths)[c];
|
||||
}
|
||||
|
||||
/* Handle capturing bracket */
|
||||
|
||||
else if (c == OP_CBRA || c == OP_SCBRA ||
|
||||
c == OP_CBRAPOS || c == OP_SCBRAPOS)
|
||||
{
|
||||
int n = (int)GET2(code, 1+LINK_SIZE);
|
||||
if (n == number) return (PCRE2_UCHAR *)code;
|
||||
code += PRIV(OP_lengths)[c];
|
||||
}
|
||||
|
||||
/* Otherwise, we can get the item's length from the table, except that for
|
||||
repeated character types, we have to test for \p and \P, which have an extra
|
||||
two bytes of parameters, and for MARK/PRUNE/SKIP/THEN with an argument, we
|
||||
must add in its length. */
|
||||
|
||||
else
|
||||
{
|
||||
switch(c)
|
||||
{
|
||||
case OP_TYPESTAR:
|
||||
case OP_TYPEMINSTAR:
|
||||
case OP_TYPEPLUS:
|
||||
case OP_TYPEMINPLUS:
|
||||
case OP_TYPEQUERY:
|
||||
case OP_TYPEMINQUERY:
|
||||
case OP_TYPEPOSSTAR:
|
||||
case OP_TYPEPOSPLUS:
|
||||
case OP_TYPEPOSQUERY:
|
||||
if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;
|
||||
break;
|
||||
|
||||
case OP_TYPEUPTO:
|
||||
case OP_TYPEMINUPTO:
|
||||
case OP_TYPEEXACT:
|
||||
case OP_TYPEPOSUPTO:
|
||||
if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
|
||||
code += 2;
|
||||
break;
|
||||
|
||||
case OP_MARK:
|
||||
case OP_PRUNE_ARG:
|
||||
case OP_SKIP_ARG:
|
||||
case OP_THEN_ARG:
|
||||
code += code[1];
|
||||
break;
|
||||
}
|
||||
|
||||
/* Add in the fixed length from the table */
|
||||
|
||||
code += PRIV(OP_lengths)[c];
|
||||
|
||||
/* In UTF-8 and UTF-16 modes, opcodes that are followed by a character may be
|
||||
followed by a multi-byte character. The length in the table is a minimum, so
|
||||
we have to arrange to skip the extra bytes. */
|
||||
|
||||
#ifdef MAYBE_UTF_MULTI
|
||||
if (utf) switch(c)
|
||||
{
|
||||
case OP_CHAR:
|
||||
case OP_CHARI:
|
||||
case OP_NOT:
|
||||
case OP_NOTI:
|
||||
case OP_EXACT:
|
||||
case OP_EXACTI:
|
||||
case OP_NOTEXACT:
|
||||
case OP_NOTEXACTI:
|
||||
case OP_UPTO:
|
||||
case OP_UPTOI:
|
||||
case OP_NOTUPTO:
|
||||
case OP_NOTUPTOI:
|
||||
case OP_MINUPTO:
|
||||
case OP_MINUPTOI:
|
||||
case OP_NOTMINUPTO:
|
||||
case OP_NOTMINUPTOI:
|
||||
case OP_POSUPTO:
|
||||
case OP_POSUPTOI:
|
||||
case OP_NOTPOSUPTO:
|
||||
case OP_NOTPOSUPTOI:
|
||||
case OP_STAR:
|
||||
case OP_STARI:
|
||||
case OP_NOTSTAR:
|
||||
case OP_NOTSTARI:
|
||||
case OP_MINSTAR:
|
||||
case OP_MINSTARI:
|
||||
case OP_NOTMINSTAR:
|
||||
case OP_NOTMINSTARI:
|
||||
case OP_POSSTAR:
|
||||
case OP_POSSTARI:
|
||||
case OP_NOTPOSSTAR:
|
||||
case OP_NOTPOSSTARI:
|
||||
case OP_PLUS:
|
||||
case OP_PLUSI:
|
||||
case OP_NOTPLUS:
|
||||
case OP_NOTPLUSI:
|
||||
case OP_MINPLUS:
|
||||
case OP_MINPLUSI:
|
||||
case OP_NOTMINPLUS:
|
||||
case OP_NOTMINPLUSI:
|
||||
case OP_POSPLUS:
|
||||
case OP_POSPLUSI:
|
||||
case OP_NOTPOSPLUS:
|
||||
case OP_NOTPOSPLUSI:
|
||||
case OP_QUERY:
|
||||
case OP_QUERYI:
|
||||
case OP_NOTQUERY:
|
||||
case OP_NOTQUERYI:
|
||||
case OP_MINQUERY:
|
||||
case OP_MINQUERYI:
|
||||
case OP_NOTMINQUERY:
|
||||
case OP_NOTMINQUERYI:
|
||||
case OP_POSQUERY:
|
||||
case OP_POSQUERYI:
|
||||
case OP_NOTPOSQUERY:
|
||||
case OP_NOTPOSQUERYI:
|
||||
if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]);
|
||||
break;
|
||||
}
|
||||
#else
|
||||
(void)(utf); /* Keep compiler happy by referencing function argument */
|
||||
#endif /* MAYBE_UTF_MULTI */
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* End of pcre2_find_bracket.c */
|
|
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
|
|||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2015 University of Cambridge
|
||||
New API code Copyright (c) 2016 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -39,7 +39,10 @@ POSSIBILITY OF SUCH DAMAGE.
|
|||
*/
|
||||
|
||||
/* We do not support both EBCDIC and Unicode at the same time. The "configure"
|
||||
script prevents both being selected, but not everybody uses "configure". */
|
||||
script prevents both being selected, but not everybody uses "configure". EBCDIC
|
||||
is only supported for the 8-bit library, but the check for this has to be later
|
||||
in this file, because the first part is not width-dependent, and is included by
|
||||
pcre2test.c with CODE_UNIT_WIDTH == 0. */
|
||||
|
||||
#if defined EBCDIC && defined SUPPORT_UNICODE
|
||||
#error The use of both EBCDIC and SUPPORT_UNICODE is not supported.
|
||||
|
@ -524,9 +527,11 @@ bytes in a code unit in that mode. */
|
|||
#define PCRE2_NL_SET 0x00008000 /* newline was set in the pattern */
|
||||
#define PCRE2_NOTEMPTY_SET 0x00010000 /* (*NOTEMPTY) used ) keep */
|
||||
#define PCRE2_NE_ATST_SET 0x00020000 /* (*NOTEMPTY_ATSTART) used) together */
|
||||
#define PCRE2_DEREF_TABLES 0x00040000 /* Release character tables. */
|
||||
#define PCRE2_DEREF_TABLES 0x00040000 /* release character tables */
|
||||
#define PCRE2_NOJIT 0x00080000 /* (*NOJIT) used */
|
||||
#define PCRE2_HASBKPORX 0x00100000 /* contains \P, \p, or \X */
|
||||
#define PCRE2_DUPCAPUSED 0x00200000 /* contains (?| */
|
||||
#define PCRE2_HASBKC 0x00400000 /* contains \C */
|
||||
|
||||
#define PCRE2_MODE_MASK (PCRE2_MODE8 | PCRE2_MODE16 | PCRE2_MODE32)
|
||||
|
||||
|
@ -917,6 +922,7 @@ a positive value. */
|
|||
#define STRING_NOTEMPTY_ATSTART_RIGHTPAR "NOTEMPTY_ATSTART)"
|
||||
#define STRING_LIMIT_MATCH_EQ "LIMIT_MATCH="
|
||||
#define STRING_LIMIT_RECURSION_EQ "LIMIT_RECURSION="
|
||||
#define STRING_MARK "MARK"
|
||||
|
||||
#else /* SUPPORT_UNICODE */
|
||||
|
||||
|
@ -1189,6 +1195,7 @@ only. */
|
|||
#define STRING_NOTEMPTY_ATSTART_RIGHTPAR STR_N STR_O STR_T STR_E STR_M STR_P STR_T STR_Y STR_UNDERSCORE STR_A STR_T STR_S STR_T STR_A STR_R STR_T STR_RIGHT_PARENTHESIS
|
||||
#define STRING_LIMIT_MATCH_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_M STR_A STR_T STR_C STR_H STR_EQUALS_SIGN
|
||||
#define STRING_LIMIT_RECURSION_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_R STR_E STR_C STR_U STR_R STR_S STR_I STR_O STR_N STR_EQUALS_SIGN
|
||||
#define STRING_MARK STR_M STR_A STR_R STR_K
|
||||
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
|
@ -1800,11 +1807,16 @@ typedef struct pcre2_serialized_data {
|
|||
|
||||
#if defined PCRE2_CODE_UNIT_WIDTH && PCRE2_CODE_UNIT_WIDTH != 0
|
||||
|
||||
/* EBCDIC is supported only for the 8-bit library. */
|
||||
|
||||
#if defined EBCDIC && PCRE2_CODE_UNIT_WIDTH != 8
|
||||
#error EBCDIC is not supported for the 16-bit or 32-bit libraries
|
||||
#endif
|
||||
|
||||
/* This is the largest non-UTF code point. */
|
||||
|
||||
#define MAX_NON_UTF_CHAR (0xffffffffU >> (32 - PCRE2_CODE_UNIT_WIDTH))
|
||||
|
||||
|
||||
/* Internal shared data tables and variables. These are used by more than one
|
||||
of the exported public functions. They have to be "external" in the C sense,
|
||||
but are not part of the PCRE2 public API. Although the data for some of them is
|
||||
|
@ -1883,6 +1895,7 @@ not referenced from pcre2test, and must not be defined when no code unit width
|
|||
is available. */
|
||||
|
||||
#define _pcre2_auto_possessify PCRE2_SUFFIX(_pcre2_auto_possessify_)
|
||||
#define _pcre2_check_escape PCRE2_SUFFIX(_pcre2_check_escape_)
|
||||
#define _pcre2_find_bracket PCRE2_SUFFIX(_pcre2_find_bracket_)
|
||||
#define _pcre2_is_newline PCRE2_SUFFIX(_pcre2_is_newline_)
|
||||
#define _pcre2_jit_free_rodata PCRE2_SUFFIX(_pcre2_jit_free_rodata_)
|
||||
|
@ -1904,6 +1917,8 @@ is available. */
|
|||
|
||||
extern int _pcre2_auto_possessify(PCRE2_UCHAR *, BOOL,
|
||||
const compile_block *);
|
||||
extern int _pcre2_check_escape(PCRE2_SPTR *, PCRE2_SPTR, uint32_t *,
|
||||
int *, uint32_t, BOOL, compile_block *);
|
||||
extern PCRE2_SPTR _pcre2_find_bracket(PCRE2_SPTR, BOOL, int);
|
||||
extern BOOL _pcre2_is_newline(PCRE2_SPTR, uint32_t, PCRE2_SPTR,
|
||||
uint32_t *, BOOL);
|
|
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
|
|||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2014 University of Cambridge
|
||||
New API code Copyright (c) 2016 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -72,7 +72,7 @@ just to undefine them all. */
|
|||
#undef MAX_MARK
|
||||
#undef MAX_PATTERN_SIZE
|
||||
#undef MAX_UTF_SINGLE_CU
|
||||
#undef NOT_FIRSTCHAR
|
||||
#undef NOT_FIRSTCU
|
||||
#undef PUT
|
||||
#undef PUT2
|
||||
#undef PUT2INC
|
||||
|
@ -252,7 +252,7 @@ UTF support is omitted, we don't even define them. */
|
|||
/* #define MAX_UTF_SINGLE_CU */
|
||||
/* #define HAS_EXTRALEN(c) */
|
||||
/* #define GET_EXTRALEN(c) */
|
||||
/* #define NOT_FIRSTCHAR(c) */
|
||||
/* #define NOT_FIRSTCU(c) */
|
||||
#define GETCHAR(c, eptr) c = *eptr;
|
||||
#define GETCHARTEST(c, eptr) c = *eptr;
|
||||
#define GETCHARINC(c, eptr) c = *eptr++;
|
||||
|
@ -285,10 +285,10 @@ Otherwise it has an undefined behaviour. */
|
|||
|
||||
#define GET_EXTRALEN(c) (PRIV(utf8_table4)[(c) & 0x3f])
|
||||
|
||||
/* Returns TRUE, if the given character is not the first character
|
||||
of a UTF sequence. */
|
||||
/* Returns TRUE, if the given value is not the first code unit of a UTF
|
||||
sequence. */
|
||||
|
||||
#define NOT_FIRSTCHAR(c) (((c) & 0xc0) == 0x80)
|
||||
#define NOT_FIRSTCU(c) (((c) & 0xc0) == 0x80)
|
||||
|
||||
/* Get the next UTF-8 character, not advancing the pointer. This is called when
|
||||
we know we are in UTF-8 mode. */
|
||||
|
@ -371,10 +371,10 @@ Otherwise it has an undefined behaviour. */
|
|||
|
||||
#define GET_EXTRALEN(c) 1
|
||||
|
||||
/* Returns TRUE, if the given character is not the first character
|
||||
of a UTF sequence. */
|
||||
/* Returns TRUE, if the given value is not the first code unit of a UTF
|
||||
sequence. */
|
||||
|
||||
#define NOT_FIRSTCHAR(c) (((c) & 0xfc00) == 0xdc00)
|
||||
#define NOT_FIRSTCU(c) (((c) & 0xfc00) == 0xdc00)
|
||||
|
||||
/* Base macro to pick up the low surrogate of a UTF-16 character, not
|
||||
advancing the pointer. */
|
||||
|
@ -469,7 +469,7 @@ into one PCRE2_UCHAR unit. */
|
|||
#define MAX_UTF_SINGLE_CU (0x10ffffu)
|
||||
#define HAS_EXTRALEN(c) (0)
|
||||
#define GET_EXTRALEN(c) (0)
|
||||
#define NOT_FIRSTCHAR(c) (0)
|
||||
#define NOT_FIRSTCU(c) (0)
|
||||
|
||||
/* Get the next UTF-32 character, not advancing the pointer. This is called when
|
||||
we know we are in UTF-32 mode. */
|
||||
|
@ -562,6 +562,7 @@ typedef struct pcre2_real_compile_context {
|
|||
int (*stack_guard)(uint32_t, void *);
|
||||
void *stack_guard_data;
|
||||
const uint8_t *tables;
|
||||
PCRE2_SIZE max_pattern_length;
|
||||
uint16_t bsr_convention;
|
||||
uint16_t newline_convention;
|
||||
uint32_t parens_nest_limit;
|
||||
|
@ -580,6 +581,7 @@ typedef struct pcre2_real_match_context {
|
|||
#endif
|
||||
int (*callout)(pcre2_callout_block *, void *);
|
||||
void *callout_data;
|
||||
PCRE2_SIZE offset_limit;
|
||||
uint32_t match_limit;
|
||||
uint32_t recursion_limit;
|
||||
} pcre2_real_match_context;
|
||||
|
@ -588,11 +590,17 @@ typedef struct pcre2_real_match_context {
|
|||
defined specially because it is required in pcre2_serialize_decode() when
|
||||
copying the size from possibly unaligned memory into a variable of the same
|
||||
type. Use a macro rather than a typedef to avoid compiler warnings when this
|
||||
file is included multiple times by pcre2test. */
|
||||
file is included multiple times by pcre2test. LOOKBEHIND_MAX specifies the
|
||||
largest lookbehind that is supported. (OP_REVERSE in a pattern has a 16-bit
|
||||
argument in 8-bit and 16-bit modes, so we need no more than a 16-bit field
|
||||
here.) */
|
||||
|
||||
#undef CODE_BLOCKSIZE_TYPE
|
||||
#define CODE_BLOCKSIZE_TYPE size_t
|
||||
|
||||
#undef LOOKBEHIND_MAX
|
||||
#define LOOKBEHIND_MAX UINT16_MAX
|
||||
|
||||
typedef struct pcre2_real_code {
|
||||
pcre2_memctl memctl; /* Memory control fields */
|
||||
const uint8_t *tables; /* The character tables */
|
||||
|
@ -647,6 +655,13 @@ typedef struct recurse_check {
|
|||
PCRE2_SPTR group;
|
||||
} recurse_check;
|
||||
|
||||
/* Structure for building a cache when filling in recursion offsets. */
|
||||
|
||||
typedef struct recurse_cache {
|
||||
PCRE2_SPTR group;
|
||||
int recno;
|
||||
} recurse_cache;
|
||||
|
||||
/* Structure for maintaining a chain of pointers to the currently incomplete
|
||||
branches, for testing for left recursion while compiling. */
|
||||
|
||||
|
@ -678,7 +693,7 @@ typedef struct compile_block {
|
|||
PCRE2_SPTR start_code; /* The start of the compiled code */
|
||||
PCRE2_SPTR start_pattern; /* The start of the pattern */
|
||||
PCRE2_SPTR end_pattern; /* The end of the pattern */
|
||||
PCRE2_UCHAR *hwm; /* High watermark of workspace */
|
||||
PCRE2_SPTR nestptr[2]; /* Pointer(s) saved for string substitution */
|
||||
PCRE2_UCHAR *name_table; /* The name/number table */
|
||||
size_t workspace_size; /* Size of workspace */
|
||||
uint16_t names_found; /* Number of entries so far */
|
||||
|
@ -690,6 +705,7 @@ typedef struct compile_block {
|
|||
uint32_t external_flags; /* External flag bits to be set */
|
||||
uint32_t bracount; /* Count of capturing parens as we compile */
|
||||
uint32_t final_bracount; /* Saved value after first pass */
|
||||
uint32_t *groupinfo; /* Group info vector */
|
||||
uint32_t top_backref; /* Maximum back reference */
|
||||
uint32_t backref_map; /* Bitmap of low back refs */
|
||||
uint32_t nltype; /* Newline type */
|
||||
|
@ -701,6 +717,7 @@ typedef struct compile_block {
|
|||
int req_varyopt; /* "After variable item" flag for reqbyte */
|
||||
BOOL had_accept; /* (*ACCEPT) encountered */
|
||||
BOOL had_pruneorskip; /* (*PRUNE) or (*SKIP) encountered */
|
||||
BOOL had_recurse; /* Had a recursion or subroutine call */
|
||||
BOOL check_lookbehind; /* Lookbehinds need later checking */
|
||||
BOOL dupnames; /* Duplicate names exist */
|
||||
BOOL iscondassert; /* Next assert is a condition */
|
File diff suppressed because it is too large
Load diff
|
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
|
|||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2014 University of Cambridge
|
||||
New API code Copyright (c) 2016 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -129,10 +129,12 @@ arguments.match_data = match_data;
|
|||
arguments.startchar_ptr = subject;
|
||||
arguments.mark_ptr = NULL;
|
||||
arguments.options = options;
|
||||
|
||||
if (mcontext != NULL)
|
||||
{
|
||||
arguments.callout = mcontext->callout;
|
||||
arguments.callout_data = mcontext->callout_data;
|
||||
arguments.offset_limit = mcontext->offset_limit;
|
||||
arguments.limit_match = (mcontext->match_limit < re->limit_match)?
|
||||
mcontext->match_limit : re->limit_match;
|
||||
if (mcontext->jit_callback != NULL)
|
||||
|
@ -144,6 +146,7 @@ else
|
|||
{
|
||||
arguments.callout = NULL;
|
||||
arguments.callout_data = NULL;
|
||||
arguments.offset_limit = PCRE2_UNSET;
|
||||
arguments.limit_match = (MATCH_LIMIT < re->limit_match)?
|
||||
MATCH_LIMIT : re->limit_match;
|
||||
jit_stack = NULL;
|
|
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
|
|||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2014 University of Cambridge
|
||||
New API code Copyright (c) 2016 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
|
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
|
|||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2014 University of Cambridge
|
||||
New API code Copyright (c) 2016 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -188,6 +188,7 @@ static struct regression_test_case regression_test_cases[] = {
|
|||
{ CMUP, A, 0, 0, "\xf0\x90\x90\x80{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
|
||||
{ CMUP, A, 0, 0, "\xf0\x90\x90\xa8{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
|
||||
{ CMUP, A, 0, 0, "\xe1\xbd\xb8\xe1\xbf\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
|
||||
{ M, A, 0, 0, "[3-57-9]", "5" },
|
||||
|
||||
/* Assertions. */
|
||||
{ MU, A, 0, 0, "\\b[^A]", "A_B#" },
|
||||
|
@ -247,13 +248,17 @@ static struct regression_test_case regression_test_cases[] = {
|
|||
{ M, A, 0, 0, "a\\z", "aaa" },
|
||||
{ M, A, 0, 0 | F_NOMATCH, "a\\z", "aab" },
|
||||
|
||||
/* Brackets. */
|
||||
/* Brackets and alternatives. */
|
||||
{ MU, A, 0, 0, "(ab|bb|cd)", "bacde" },
|
||||
{ MU, A, 0, 0, "(?:ab|a)(bc|c)", "ababc" },
|
||||
{ MU, A, 0, 0, "((ab|(cc))|(bb)|(?:cd|efg))", "abac" },
|
||||
{ CMU, A, 0, 0, "((aB|(Cc))|(bB)|(?:cd|EFg))", "AcCe" },
|
||||
{ MU, A, 0, 0, "((ab|(cc))|(bb)|(?:cd|ebg))", "acebebg" },
|
||||
{ MU, A, 0, 0, "(?:(a)|(?:b))(cc|(?:d|e))(a|b)k", "accabdbbccbk" },
|
||||
{ MU, A, 0, 0, "\xc7\x82|\xc6\x82", "\xf1\x83\x82\x82\xc7\x82\xc7\x83" },
|
||||
{ MU, A, 0, 0, "=\xc7\x82|#\xc6\x82", "\xf1\x83\x82\x82=\xc7\x82\xc7\x83" },
|
||||
{ MU, A, 0, 0, "\xc7\x82\xc7\x83|\xc6\x82\xc6\x82", "\xf1\x83\x82\x82\xc7\x82\xc7\x83" },
|
||||
{ MU, A, 0, 0, "\xc6\x82\xc6\x82|\xc7\x83\xc7\x83|\xc8\x84\xc8\x84", "\xf1\x83\x82\x82\xc8\x84\xc8\x84" },
|
||||
|
||||
/* Greedy and non-greedy ? operators. */
|
||||
{ MU, A, 0, 0, "(?:a)?a", "laab" },
|
||||
|
@ -323,6 +328,14 @@ static struct regression_test_case regression_test_cases[] = {
|
|||
{ CMU, A, 0, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
|
||||
{ MU, A, 0, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
|
||||
{ MU, A, 0, 0, "[^\xe1\xbd\xb8]{3,}?", "##\xe1\xbd\xb8#\xe1\xbd\xb8#\xc3\x89#\xe1\xbd\xb8" },
|
||||
{ MU, A, 0, 0, "\\d+123", "987654321,01234" },
|
||||
{ MU, A, 0, 0, "abcd*|\\w+xy", "aaaaa,abxyz" },
|
||||
{ MU, A, 0, 0, "(?:abc|((?:amc|\\b\\w*xy)))", "aaaaa,abxyz" },
|
||||
{ MU, A, 0, 0, "a(?R)|([a-z]++)#", ".abcd.abcd#."},
|
||||
{ MU, A, 0, 0, "a(?R)|([a-z]++)#", ".abcd.mbcd#."},
|
||||
{ MU, A, 0, 0, ".[ab]*.", "xx" },
|
||||
{ MU, A, 0, 0, ".[ab]*a", "xxa" },
|
||||
{ MU, A, 0, 0, ".[ab]?.", "xx" },
|
||||
|
||||
/* Bracket repeats with limit. */
|
||||
{ MU, A, 0, 0, "(?:(ab){2}){5}M", "abababababababababababM" },
|
||||
|
@ -679,6 +692,7 @@ static struct regression_test_case regression_test_cases[] = {
|
|||
{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 1, ".", "\r\n" },
|
||||
{ PCRE2_FIRSTLINE | PCRE2_DOTALL, PCRE2_NEWLINE_LF, 0, 0 | F_NOMATCH, "ab.", "ab" },
|
||||
{ MU | PCRE2_FIRSTLINE, A, 0, 1 | F_NOMATCH, "^[a-d0-9]", "\nxx\nd" },
|
||||
{ PCRE2_FIRSTLINE | PCRE2_DOTALL, PCRE2_NEWLINE_ANY, 0, 0, "....a", "012\n0a" },
|
||||
|
||||
/* Recurse. */
|
||||
{ MU, A, 0, 0, "(a)(?1)", "aa" },
|
||||
|
@ -813,6 +827,9 @@ static struct regression_test_case regression_test_cases[] = {
|
|||
|
||||
/* (*SKIP) verb. */
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "(?=a(*SKIP)b)ab|ad", "ad" },
|
||||
{ MU, A, 0, 0, "(\\w+(*SKIP)#)", "abcd,xyz#," },
|
||||
{ MU, A, 0, 0, "\\w+(*SKIP)#|mm", "abcd,xyz#," },
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "b+(?<=(*SKIP)#c)|b+", "#bbb" },
|
||||
|
||||
/* (*THEN) verb. */
|
||||
{ MU, A, 0, 0, "((?:a(*THEN)|aab)(*THEN)c|a+)+m", "aabcaabcaabcaabcnacm" },
|
|
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
|
|||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2014 University of Cambridge
|
||||
New API code Copyright (c) 2016 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
|
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
|
|||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2014 University of Cambridge
|
||||
New API code Copyright (c) 2016 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -194,7 +194,7 @@ if (caseless)
|
|||
GETCHARINC(c, eptr);
|
||||
GETCHARINC(d, p);
|
||||
ur = GET_UCD(d);
|
||||
if (c != d && c != d + ur->other_case)
|
||||
if (c != d && c != (uint32_t)((int)d + ur->other_case))
|
||||
{
|
||||
const uint32_t *pp = PRIV(ucd_caseless_sets) + ur->caseset;
|
||||
for (;;)
|
||||
|
@ -211,7 +211,7 @@ if (caseless)
|
|||
/* Not in UTF mode */
|
||||
|
||||
{
|
||||
while (length-- > 0)
|
||||
for (; length > 0; length--)
|
||||
{
|
||||
uint32_t cc, cp;
|
||||
if (eptr >= mb->end_subject) return 1; /* Partial match */
|
||||
|
@ -226,11 +226,11 @@ if (caseless)
|
|||
}
|
||||
|
||||
/* In the caseful case, we can just compare the code units, whether or not we
|
||||
are in UT mode. */
|
||||
are in UTF mode. */
|
||||
|
||||
else
|
||||
{
|
||||
while (length-- > 0)
|
||||
for (; length > 0; length--)
|
||||
{
|
||||
if (eptr >= mb->end_subject) return 1; /* Partial match */
|
||||
if (UCHAR21INCTEST(p) != UCHAR21INCTEST(eptr)) return -1; /*No match */
|
||||
|
@ -1704,14 +1704,14 @@ for (;;)
|
|||
back a number of characters, not bytes. */
|
||||
|
||||
case OP_REVERSE:
|
||||
i = GET(ecode, 1);
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
i = GET(ecode, 1);
|
||||
while (i-- > 0)
|
||||
{
|
||||
if (eptr <= mb->start_subject) RRETURN(MATCH_NOMATCH);
|
||||
eptr--;
|
||||
if (eptr < mb->start_subject) RRETURN(MATCH_NOMATCH);
|
||||
BACKCHAR(eptr);
|
||||
}
|
||||
}
|
||||
|
@ -1721,8 +1721,8 @@ for (;;)
|
|||
/* No UTF-8 support, or not in UTF-8 mode: count is byte count */
|
||||
|
||||
{
|
||||
eptr -= GET(ecode, 1);
|
||||
if (eptr < mb->start_subject) RRETURN(MATCH_NOMATCH);
|
||||
if (i > eptr - mb->start_subject) RRETURN(MATCH_NOMATCH);
|
||||
eptr -= i;
|
||||
}
|
||||
|
||||
/* Save the earliest consulted character, then skip to next op code */
|
||||
|
@ -2408,8 +2408,9 @@ for (;;)
|
|||
ecode++;
|
||||
break;
|
||||
|
||||
/* Match a single byte, even in UTF-8 mode. This opcode really does match
|
||||
any byte, even newline, independent of the setting of PCRE2_DOTALL. */
|
||||
/* Match a single code unit, even in UTF-8 mode. This opcode really does
|
||||
match any code unit, even newline. (It really should be called ANYCODEUNIT,
|
||||
of course - the byte name is from pre-16 bit days.) */
|
||||
|
||||
case OP_ANYBYTE:
|
||||
if (eptr >= mb->end_subject) /* DO NOT merge the eptr++ here; it must */
|
||||
|
@ -3342,7 +3343,10 @@ for (;;)
|
|||
CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
}
|
||||
while (length-- > 0) if (*ecode++ != UCHAR21INC(eptr)) RRETURN(MATCH_NOMATCH);
|
||||
for (; length > 0; length--)
|
||||
{
|
||||
if (*ecode++ != UCHAR21INC(eptr)) RRETURN(MATCH_NOMATCH);
|
||||
}
|
||||
}
|
||||
else
|
||||
#endif
|
||||
|
@ -6459,6 +6463,7 @@ PCRE2_UCHAR first_cu2 = 0;
|
|||
PCRE2_UCHAR req_cu = 0;
|
||||
PCRE2_UCHAR req_cu2 = 0;
|
||||
|
||||
PCRE2_SPTR bumpalong_limit;
|
||||
PCRE2_SPTR end_subject;
|
||||
PCRE2_SPTR start_match = subject + start_offset;
|
||||
PCRE2_SPTR req_cu_ptr = start_match - 1;
|
||||
|
@ -6482,6 +6487,7 @@ mb->match_frames_base = &frame_zero;
|
|||
subject string. */
|
||||
|
||||
if (length == PCRE2_ZERO_TERMINATED) length = PRIV(strlen)(subject);
|
||||
end_subject = subject + length;
|
||||
|
||||
/* Plausibility checks */
|
||||
|
||||
|
@ -6513,7 +6519,7 @@ occur. */
|
|||
|
||||
#define FF (PCRE2_NOTEMPTY_SET|PCRE2_NE_ATST_SET)
|
||||
#define OO (PCRE2_NOTEMPTY|PCRE2_NOTEMPTY_ATSTART)
|
||||
options |= (re->flags & FF) / ((FF & -FF) / (OO & -OO));
|
||||
options |= (re->flags & FF) / ((FF & (~FF+1)) / (OO & (~OO+1)));
|
||||
#undef FF
|
||||
#undef OO
|
||||
|
||||
|
@ -6533,21 +6539,66 @@ mb->partial = ((options & PCRE2_PARTIAL_HARD) != 0)? 2 :
|
|||
|
||||
/* Check a UTF string for validity if required. For 8-bit and 16-bit strings,
|
||||
we must also check that a starting offset does not point into the middle of a
|
||||
multiunit character. */
|
||||
multiunit character. We check only the portion of the subject that is going to
|
||||
be inspected during matching - from the offset minus the maximum back reference
|
||||
to the given length. This saves time when a small part of a large subject is
|
||||
being matched by the use of a starting offset. Note that the maximum lookbehind
|
||||
is a number of characters, not code units. */
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf && (options & PCRE2_NO_UTF_CHECK) == 0)
|
||||
{
|
||||
match_data->rc = PRIV(valid_utf)(subject, length, &(match_data->startchar));
|
||||
if (match_data->rc != 0) return match_data->rc;
|
||||
PCRE2_SPTR check_subject = start_match; /* start_match includes offset */
|
||||
|
||||
if (start_offset > 0)
|
||||
{
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 32
|
||||
if (start_offset > 0 && start_offset < length &&
|
||||
NOT_FIRSTCHAR(subject[start_offset]))
|
||||
return PCRE2_ERROR_BADUTFOFFSET;
|
||||
unsigned int i;
|
||||
if (start_match < end_subject && NOT_FIRSTCU(*start_match))
|
||||
return PCRE2_ERROR_BADUTFOFFSET;
|
||||
for (i = re->max_lookbehind; i > 0 && check_subject > subject; i--)
|
||||
{
|
||||
check_subject--;
|
||||
while (check_subject > subject &&
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
(*check_subject & 0xc0) == 0x80)
|
||||
#else /* 16-bit */
|
||||
(*check_subject & 0xfc00) == 0xdc00)
|
||||
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
|
||||
check_subject--;
|
||||
}
|
||||
#else
|
||||
/* In the 32-bit library, one code unit equals one character. However,
|
||||
we cannot just subtract the lookbehind and then compare pointers, because
|
||||
a very large lookbehind could create an invalid pointer. */
|
||||
|
||||
if (start_offset >= re->max_lookbehind)
|
||||
check_subject -= re->max_lookbehind;
|
||||
else
|
||||
check_subject = subject;
|
||||
#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */
|
||||
}
|
||||
|
||||
/* Validate the relevant portion of the subject. After an error, adjust the
|
||||
offset to be an absolute offset in the whole string. */
|
||||
|
||||
match_data->rc = PRIV(valid_utf)(check_subject,
|
||||
length - (check_subject - subject), &(match_data->startchar));
|
||||
if (match_data->rc != 0)
|
||||
{
|
||||
match_data->startchar += check_subject - subject;
|
||||
return match_data->rc;
|
||||
}
|
||||
}
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
/* It is an error to set an offset limit without setting the flag at compile
|
||||
time. */
|
||||
|
||||
if (mcontext->offset_limit != PCRE2_UNSET &&
|
||||
(re->overall_options & PCRE2_USE_OFFSET_LIMIT) == 0)
|
||||
return PCRE2_ERROR_BADOFFSETLIMIT;
|
||||
|
||||
/* If the pattern was successfully studied with JIT support, run the JIT
|
||||
executable instead of the rest of this function. Most options must be set at
|
||||
compile time for the JIT code to be usable. Fallback to the normal code path if
|
||||
|
@ -6568,30 +6619,21 @@ if (re->executable_jit != NULL && (options & ~PUBLIC_JIT_MATCH_OPTIONS) == 0)
|
|||
anchored = ((re->overall_options | options) & PCRE2_ANCHORED) != 0;
|
||||
firstline = (re->overall_options & PCRE2_FIRSTLINE) != 0;
|
||||
startline = (re->flags & PCRE2_STARTLINE) != 0;
|
||||
bumpalong_limit = (mcontext->offset_limit == PCRE2_UNSET)?
|
||||
end_subject : subject + mcontext->offset_limit;
|
||||
|
||||
/* Fill in the fields in the match block. */
|
||||
|
||||
if (mcontext == NULL)
|
||||
{
|
||||
mb->callout = NULL;
|
||||
mb->memctl = re->memctl;
|
||||
mb->callout = mcontext->callout;
|
||||
mb->callout_data = mcontext->callout_data;
|
||||
mb->memctl = mcontext->memctl;
|
||||
#ifdef HEAP_MATCH_RECURSE
|
||||
mb->stack_memctl = re->memctl;
|
||||
mb->stack_memctl = mcontext->stack_memctl;
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
mb->callout = mcontext->callout;
|
||||
mb->callout_data = mcontext->callout_data;
|
||||
mb->memctl = mcontext->memctl;
|
||||
#ifdef HEAP_MATCH_RECURSE
|
||||
mb->stack_memctl = mcontext->stack_memctl;
|
||||
#endif
|
||||
}
|
||||
|
||||
mb->start_subject = subject;
|
||||
mb->start_offset = start_offset;
|
||||
mb->end_subject = end_subject = mb->start_subject + length;
|
||||
mb->end_subject = end_subject;
|
||||
mb->hasthen = (re->flags & PCRE2_HASTHEN) != 0;
|
||||
|
||||
mb->moptions = options; /* Match options */
|
||||
|
@ -6783,7 +6825,8 @@ for(;;)
|
|||
end_subject = t;
|
||||
}
|
||||
|
||||
/* Advance to a unique first code unit if there is one. */
|
||||
/* Advance to a unique first code unit if there is one. In 8-bit mode, the
|
||||
use of memchr() gives a big speed up. */
|
||||
|
||||
if (has_first_cu)
|
||||
{
|
||||
|
@ -6793,8 +6836,15 @@ for(;;)
|
|||
(smc = UCHAR21TEST(start_match)) != first_cu && smc != first_cu2)
|
||||
start_match++;
|
||||
else
|
||||
{
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||
while (start_match < end_subject && UCHAR21TEST(start_match) != first_cu)
|
||||
start_match++;
|
||||
#else
|
||||
start_match = memchr(start_match, first_cu, end_subject - start_match);
|
||||
if (start_match == NULL) start_match = end_subject;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
/* Or to just after a linebreak for a multiline match */
|
||||
|
@ -6926,6 +6976,14 @@ for(;;)
|
|||
|
||||
/* ------------ End of start of match optimizations ------------ */
|
||||
|
||||
/* Give no match if we have passed the bumpalong limit. */
|
||||
|
||||
if (start_match > bumpalong_limit)
|
||||
{
|
||||
rc = MATCH_NOMATCH;
|
||||
break;
|
||||
}
|
||||
|
||||
/* OK, we can now run the match. If "hitend" is set afterwards, remember the
|
||||
first starting point for which a partial match was found. */
|
||||
|
||||
|
@ -7044,7 +7102,7 @@ for(;;)
|
|||
|
||||
(2) The pattern is anchored or the match was failed by (*COMMIT);
|
||||
|
||||
(3) We are past the end of the subject;
|
||||
(3) We are past the end of the subject or the bumpalong limit;
|
||||
|
||||
(4) PCRE2_FIRSTLINE is set and we have failed to match at a newline, because
|
||||
this option requests that a match occur at or before the first newline in
|
|
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
|
|||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2014 University of Cambridge
|
||||
New API code Copyright (c) 2016 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
|
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
|
|||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2014 University of Cambridge
|
||||
New API code Copyright (c) 2016 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
|
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
|
|||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2014 University of Cambridge
|
||||
New API code Copyright (c) 2016 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
|
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
|
|||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2014 University of Cambridge
|
||||
New API code Copyright (c) 2016 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -77,6 +77,7 @@ if (where == NULL) /* Requests field length */
|
|||
case PCRE2_INFO_CAPTURECOUNT:
|
||||
case PCRE2_INFO_FIRSTCODETYPE:
|
||||
case PCRE2_INFO_FIRSTCODEUNIT:
|
||||
case PCRE2_INFO_HASBACKSLASHC:
|
||||
case PCRE2_INFO_HASCRORLF:
|
||||
case PCRE2_INFO_JCHANGED:
|
||||
case PCRE2_INFO_LASTCODETYPE:
|
||||
|
@ -151,6 +152,10 @@ switch(what)
|
|||
&(re->start_bitmap[0]) : NULL;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_HASBACKSLASHC:
|
||||
*((uint32_t *)where) = (re->flags & PCRE2_HASBKC) != 0;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_HASCRORLF:
|
||||
*((uint32_t *)where) = (re->flags & PCRE2_HASCRORLF) != 0;
|
||||
break;
|
|
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
|
|||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2014 University of Cambridge
|
||||
New API code Copyright (c) 2016 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -58,12 +58,13 @@ static const char *OP_names[] = { OP_NAME_LIST };
|
|||
|
||||
/* The functions and tables herein must all have mode-dependent names. */
|
||||
|
||||
#define OP_lengths PCRE2_SUFFIX(OP_lengths_)
|
||||
#define get_ucpname PCRE2_SUFFIX(get_ucpname_)
|
||||
#define pcre2_printint PCRE2_SUFFIX(pcre2_printint_)
|
||||
#define print_char PCRE2_SUFFIX(print_char_)
|
||||
#define print_custring PCRE2_SUFFIX(print_custring_)
|
||||
#define print_prop PCRE2_SUFFIX(print_prop_)
|
||||
#define OP_lengths PCRE2_SUFFIX(OP_lengths_)
|
||||
#define get_ucpname PCRE2_SUFFIX(get_ucpname_)
|
||||
#define pcre2_printint PCRE2_SUFFIX(pcre2_printint_)
|
||||
#define print_char PCRE2_SUFFIX(print_char_)
|
||||
#define print_custring PCRE2_SUFFIX(print_custring_)
|
||||
#define print_custring_bylen PCRE2_SUFFIX(print_custring_bylen_)
|
||||
#define print_prop PCRE2_SUFFIX(print_prop_)
|
||||
|
||||
/* Table of sizes for the fixed-length opcodes. It's defined in a macro so that
|
||||
the definition is next to the definition of the opcodes in pcre2_internal.h.
|
||||
|
@ -188,12 +189,14 @@ return 0;
|
|||
* Print string as a list of code units *
|
||||
*************************************************/
|
||||
|
||||
/* This takes no account of UTF as it always prints each individual code unit.
|
||||
The string is zero-terminated.
|
||||
/* These take no account of UTF as they always print each individual code unit.
|
||||
The string is zero-terminated for print_custring(); the length is given for
|
||||
print_custring_bylen().
|
||||
|
||||
Arguments:
|
||||
f file to write to
|
||||
ptr point to the string
|
||||
len length for print_custring_bylen()
|
||||
|
||||
Returns: nothing
|
||||
*/
|
||||
|
@ -208,6 +211,16 @@ while (*ptr != '\0')
|
|||
}
|
||||
}
|
||||
|
||||
static void
|
||||
print_custring_bylen(FILE *f, PCRE2_SPTR ptr, PCRE2_UCHAR len)
|
||||
{
|
||||
while (len-- > 0)
|
||||
{
|
||||
register uint32_t c = *ptr++;
|
||||
if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x{%x}", c);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
|
@ -603,7 +616,7 @@ for(;;)
|
|||
c = code[1 + 4*LINK_SIZE];
|
||||
fprintf(f, " %s %c", OP_names[*code], c);
|
||||
extra = GET(code, 1 + 2*LINK_SIZE);
|
||||
print_custring(f, code + 2 + 4*LINK_SIZE);
|
||||
print_custring_bylen(f, code + 2 + 4*LINK_SIZE, extra - 3 - 4*LINK_SIZE);
|
||||
for (i = 0; PRIV(callout_start_delims)[i] != 0; i++)
|
||||
if (c == PRIV(callout_start_delims)[i])
|
||||
{
|
||||
|
@ -791,7 +804,7 @@ for(;;)
|
|||
case OP_SKIP_ARG:
|
||||
case OP_THEN_ARG:
|
||||
fprintf(f, " %s ", OP_names[*code]);
|
||||
print_custring(f, code + 2);
|
||||
print_custring_bylen(f, code + 2, code[1]);
|
||||
extra += code[1];
|
||||
break;
|
||||
|
|
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
|
|||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2015 University of Cambridge
|
||||
New API code Copyright (c) 2016 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -104,7 +104,7 @@ for (i = 0; i < number_of_codes; i++)
|
|||
return PCRE2_ERROR_MIXEDTABLES;
|
||||
total_size += re->blocksize;
|
||||
}
|
||||
|
||||
|
||||
/* Initialize the byte stream. */
|
||||
bytes = memctl->malloc(total_size + sizeof(pcre2_memctl), memctl->memory_data);
|
||||
if (bytes == NULL) return PCRE2_ERROR_NOMEMORY;
|
||||
|
@ -167,7 +167,7 @@ if (number_of_codes > data->number_of_codes)
|
|||
|
||||
src_bytes = bytes + sizeof(pcre2_serialized_data);
|
||||
|
||||
/* Decode tables. The reference count for the tables is stored immediately
|
||||
/* Decode tables. The reference count for the tables is stored immediately
|
||||
following them. */
|
||||
|
||||
tables = memctl->malloc(tables_length + sizeof(PCRE2_SIZE), memctl->memory_data);
|
||||
|
@ -179,8 +179,8 @@ src_bytes += tables_length;
|
|||
|
||||
/* Decode the byte stream. We must not try to read the size from the compiled
|
||||
code block in the stream, because it might be unaligned, which causes errors on
|
||||
hardware such as Sparc-64 that doesn't like unaligned memory accesses. The type
|
||||
of the blocksize field is given its own name to ensure that it is the same here
|
||||
hardware such as Sparc-64 that doesn't like unaligned memory accesses. The type
|
||||
of the blocksize field is given its own name to ensure that it is the same here
|
||||
as in the block. */
|
||||
|
||||
for (i = 0; i < number_of_codes; i++)
|
||||
|
@ -190,8 +190,8 @@ for (i = 0; i < number_of_codes; i++)
|
|||
sizeof(CODE_BLOCKSIZE_TYPE));
|
||||
|
||||
/* The allocator provided by gcontext replaces the original one. */
|
||||
|
||||
dst_re = (pcre2_real_code *)PRIV(memctl_malloc)(blocksize,
|
||||
|
||||
dst_re = (pcre2_real_code *)PRIV(memctl_malloc)(blocksize,
|
||||
(pcre2_memctl *)gcontext);
|
||||
if (dst_re == NULL)
|
||||
{
|
||||
|
@ -205,12 +205,12 @@ for (i = 0; i < number_of_codes; i++)
|
|||
}
|
||||
|
||||
/* The new allocator must be preserved. */
|
||||
|
||||
|
||||
memcpy(((uint8_t *)dst_re) + sizeof(pcre2_memctl),
|
||||
src_bytes + sizeof(pcre2_memctl), blocksize - sizeof(pcre2_memctl));
|
||||
|
||||
/* At the moment only one table is supported. */
|
||||
|
||||
|
||||
dst_re->tables = tables;
|
||||
dst_re->executable_jit = NULL;
|
||||
dst_re->flags |= PCRE2_DEREF_TABLES;
|
||||
|
@ -252,7 +252,7 @@ if (bytes != NULL)
|
|||
{
|
||||
pcre2_memctl *memctl = (pcre2_memctl *)(bytes - sizeof(pcre2_memctl));
|
||||
memctl->free(memctl, memctl->memory_data);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* End of pcre2_serialize.c */
|
|
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
|
|||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2014 University of Cambridge
|
||||
New API code Copyright (c) 2016 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -121,7 +121,7 @@ int
|
|||
PRIV(strncmp)(PCRE2_SPTR str1, PCRE2_SPTR str2, size_t len)
|
||||
{
|
||||
PCRE2_UCHAR c1, c2;
|
||||
while (len-- > 0)
|
||||
for (; len > 0; len--)
|
||||
{
|
||||
c1 = *str1++;
|
||||
c2 = *str2++;
|
||||
|
@ -150,7 +150,7 @@ int
|
|||
PRIV(strncmp_c8)(PCRE2_SPTR str1, const char *str2, size_t len)
|
||||
{
|
||||
PCRE2_UCHAR c1, c2;
|
||||
while (len-- > 0)
|
||||
for (; len > 0; len--)
|
||||
{
|
||||
c1 = *str1++;
|
||||
c2 = *str2++;
|
|
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
|
|||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2014 University of Cambridge
|
||||
New API code Copyright (c) 2016 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -59,15 +59,17 @@ collecting data (e.g. minimum matching length). */
|
|||
enum { SSB_FAIL, SSB_DONE, SSB_CONTINUE, SSB_UNKNOWN };
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Find the minimum subject length for a group *
|
||||
*************************************************/
|
||||
|
||||
/* Scan a parenthesized group and compute the minimum length of subject that
|
||||
is needed to match it. This is a lower bound; it does not mean there is a
|
||||
string of that length that matches. In UTF8 mode, the result is in characters
|
||||
rather than bytes.
|
||||
string of that length that matches. In UTF mode, the result is in characters
|
||||
rather than code units. The field in a compiled pattern for storing the minimum
|
||||
length is 16-bits long (on the grounds that anything longer than that is
|
||||
pathological), so we give up when we reach that amount. This also means that
|
||||
integer overflow for really crazy patterns cannot happen.
|
||||
|
||||
Arguments:
|
||||
re compiled pattern block
|
||||
|
@ -75,36 +77,57 @@ Arguments:
|
|||
startcode pointer to start of the whole pattern's code
|
||||
utf UTF flag
|
||||
recurses chain of recurse_check to catch mutual recursion
|
||||
countptr pointer to call count (to catch over complexity)
|
||||
|
||||
Returns: the minimum length
|
||||
-1 \C in UTF-8 mode
|
||||
or (*ACCEPT)
|
||||
or pattern too complicated
|
||||
or back reference to duplicate name/number
|
||||
-2 internal error (missing capturing bracket)
|
||||
-3 internal error (opcode not listed)
|
||||
*/
|
||||
|
||||
static int
|
||||
find_minlength(const pcre2_real_code *re, PCRE2_SPTR code,
|
||||
PCRE2_SPTR startcode, BOOL utf, recurse_check *recurses)
|
||||
PCRE2_SPTR startcode, BOOL utf, recurse_check *recurses, int *countptr)
|
||||
{
|
||||
int length = -1;
|
||||
int prev_cap_recno = -1;
|
||||
int prev_cap_d = 0;
|
||||
int prev_recurse_recno = -1;
|
||||
int prev_recurse_d = 0;
|
||||
uint32_t once_fudge = 0;
|
||||
BOOL had_recurse = FALSE;
|
||||
BOOL dupcapused = (re->flags & PCRE2_DUPCAPUSED) != 0;
|
||||
recurse_check this_recurse;
|
||||
register int branchlength = 0;
|
||||
register PCRE2_UCHAR *cc = (PCRE2_UCHAR *)code + 1 + LINK_SIZE;
|
||||
|
||||
if (*code == OP_CBRA || *code == OP_SCBRA ||
|
||||
*code == OP_CBRAPOS || *code == OP_SCBRAPOS) cc += IMM2_SIZE;
|
||||
/* If this is a "could be empty" group, its minimum length is 0. */
|
||||
|
||||
/* Scan along the opcodes for this branch. If we get to the end of the
|
||||
branch, check the length against that of the other branches. */
|
||||
if (*code >= OP_SBRA && *code <= OP_SCOND) return 0;
|
||||
|
||||
/* Skip over capturing bracket number */
|
||||
|
||||
if (*code == OP_CBRA || *code == OP_CBRAPOS) cc += IMM2_SIZE;
|
||||
|
||||
/* A large and/or complex regex can take too long to process. */
|
||||
|
||||
if ((*countptr)++ > 1000) return -1;
|
||||
|
||||
/* Scan along the opcodes for this branch. If we get to the end of the branch,
|
||||
check the length against that of the other branches. If the accumulated length
|
||||
passes 16-bits, stop. */
|
||||
|
||||
for (;;)
|
||||
{
|
||||
int d, min;
|
||||
int d, min, recno;
|
||||
PCRE2_UCHAR *cs, *ce;
|
||||
register PCRE2_UCHAR op = *cc;
|
||||
|
||||
if (branchlength >= UINT16_MAX) return UINT16_MAX;
|
||||
|
||||
switch (op)
|
||||
{
|
||||
case OP_COND:
|
||||
|
@ -112,7 +135,8 @@ for (;;)
|
|||
|
||||
/* If there is only one branch in a condition, the implied branch has zero
|
||||
length, so we don't add anything. This covers the DEFINE "condition"
|
||||
automatically. */
|
||||
automatically. If there are two branches we can treat it the same as any
|
||||
other non-capturing subpattern. */
|
||||
|
||||
cs = cc + GET(cc, 1);
|
||||
if (*cs != OP_ALT)
|
||||
|
@ -120,23 +144,52 @@ for (;;)
|
|||
cc = cs + 1 + LINK_SIZE;
|
||||
break;
|
||||
}
|
||||
goto PROCESS_NON_CAPTURE;
|
||||
|
||||
/* Otherwise we can fall through and treat it the same as any other
|
||||
subpattern. */
|
||||
/* There's a special case of OP_ONCE, when it is wrapped round an
|
||||
OP_RECURSE. We'd like to process the latter at this level so that
|
||||
remembering the value works for repeated cases. So we do nothing, but
|
||||
set a fudge value to skip over the OP_KET after the recurse. */
|
||||
|
||||
case OP_ONCE:
|
||||
if (cc[1+LINK_SIZE] == OP_RECURSE && cc[2*(1+LINK_SIZE)] == OP_KET)
|
||||
{
|
||||
once_fudge = 1 + LINK_SIZE;
|
||||
cc += 1 + LINK_SIZE;
|
||||
break;
|
||||
}
|
||||
/* Fall through */
|
||||
|
||||
case OP_ONCE_NC:
|
||||
case OP_BRA:
|
||||
case OP_SBRA:
|
||||
case OP_BRAPOS:
|
||||
case OP_SBRAPOS:
|
||||
PROCESS_NON_CAPTURE:
|
||||
d = find_minlength(re, cc, startcode, utf, recurses, countptr);
|
||||
if (d < 0) return d;
|
||||
branchlength += d;
|
||||
do cc += GET(cc, 1); while (*cc == OP_ALT);
|
||||
cc += 1 + LINK_SIZE;
|
||||
break;
|
||||
|
||||
/* To save time for repeated capturing subpatterns, we remember the
|
||||
length of the previous one. Unfortunately we can't do the same for
|
||||
the unnumbered ones above. Nor can we do this if (?| is present in the
|
||||
pattern because captures with the same number are not then identical. */
|
||||
|
||||
case OP_CBRA:
|
||||
case OP_SCBRA:
|
||||
case OP_BRA:
|
||||
case OP_SBRA:
|
||||
case OP_CBRAPOS:
|
||||
case OP_SCBRAPOS:
|
||||
case OP_BRAPOS:
|
||||
case OP_SBRAPOS:
|
||||
case OP_ONCE:
|
||||
case OP_ONCE_NC:
|
||||
d = find_minlength(re, cc, startcode, utf, recurses);
|
||||
if (d < 0) return d;
|
||||
branchlength += d;
|
||||
recno = dupcapused? prev_cap_recno - 1 : (int)GET2(cc, 1+LINK_SIZE);
|
||||
if (recno != prev_cap_recno)
|
||||
{
|
||||
prev_cap_recno = recno;
|
||||
prev_cap_d = find_minlength(re, cc, startcode, utf, recurses, countptr);
|
||||
if (prev_cap_d < 0) return prev_cap_d;
|
||||
}
|
||||
branchlength += prev_cap_d;
|
||||
do cc += GET(cc, 1); while (*cc == OP_ALT);
|
||||
cc += 1 + LINK_SIZE;
|
||||
break;
|
||||
|
@ -388,8 +441,12 @@ for (;;)
|
|||
matches an empty string (by default it causes a matching failure), so in
|
||||
that case we must set the minimum length to zero. */
|
||||
|
||||
case OP_DNREF: /* Duplicate named pattern back reference */
|
||||
/* Duplicate named pattern back reference. We cannot reliably find a length
|
||||
for this if duplicate numbers are present in the pattern. */
|
||||
|
||||
case OP_DNREF:
|
||||
case OP_DNREFI:
|
||||
if (dupcapused) return -1;
|
||||
if ((re->overall_options & PCRE2_MATCH_UNSET_BACKREF) == 0)
|
||||
{
|
||||
int count = GET2(cc, 1+IMM2_SIZE);
|
||||
|
@ -427,7 +484,7 @@ for (;;)
|
|||
int dd;
|
||||
this_recurse.prev = recurses;
|
||||
this_recurse.group = cs;
|
||||
dd = find_minlength(re, cs, startcode, utf, &this_recurse);
|
||||
dd = find_minlength(re, cs, startcode, utf, &this_recurse, countptr);
|
||||
if (dd < d) d = dd;
|
||||
}
|
||||
}
|
||||
|
@ -438,8 +495,12 @@ for (;;)
|
|||
cc += 1 + 2*IMM2_SIZE;
|
||||
goto REPEAT_BACK_REFERENCE;
|
||||
|
||||
case OP_REF: /* Single back reference */
|
||||
/* Single back reference. We cannot find a length for this if duplicate
|
||||
numbers are present in the pattern. */
|
||||
|
||||
case OP_REF:
|
||||
case OP_REFI:
|
||||
if (dupcapused) return -1;
|
||||
if ((re->overall_options & PCRE2_MATCH_UNSET_BACKREF) == 0)
|
||||
{
|
||||
ce = cs = (PCRE2_UCHAR *)PRIV(find_bracket)(startcode, utf, GET2(cc, 1));
|
||||
|
@ -463,7 +524,7 @@ for (;;)
|
|||
{
|
||||
this_recurse.prev = recurses;
|
||||
this_recurse.group = cs;
|
||||
d = find_minlength(re, cs, startcode, utf, &this_recurse);
|
||||
d = find_minlength(re, cs, startcode, utf, &this_recurse, countptr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -504,28 +565,51 @@ for (;;)
|
|||
break;
|
||||
}
|
||||
|
||||
branchlength += min * d;
|
||||
/* Take care not to overflow: (1) min and d are ints, so check that their
|
||||
product is not greater than INT_MAX. (2) branchlength is limited to
|
||||
UINT16_MAX (checked at the top of the loop). */
|
||||
|
||||
if ((d > 0 && (INT_MAX/d) < min) || UINT16_MAX - branchlength < min*d)
|
||||
branchlength = UINT16_MAX;
|
||||
else branchlength += min * d;
|
||||
break;
|
||||
|
||||
/* Recursion always refers to the first occurrence of a subpattern with a
|
||||
given number. Therefore, we can always make use of caching, even when the
|
||||
pattern contains multiple subpatterns with the same number. */
|
||||
|
||||
case OP_RECURSE:
|
||||
cs = ce = (PCRE2_UCHAR *)startcode + GET(cc, 1);
|
||||
do ce += GET(ce, 1); while (*ce == OP_ALT);
|
||||
if (cc > cs && cc < ce) /* Simple recursion */
|
||||
had_recurse = TRUE;
|
||||
recno = GET2(cs, 1+LINK_SIZE);
|
||||
if (recno == prev_recurse_recno)
|
||||
{
|
||||
branchlength += prev_recurse_d;
|
||||
}
|
||||
else
|
||||
{
|
||||
recurse_check *r = recurses;
|
||||
for (r = recurses; r != NULL; r = r->prev) if (r->group == cs) break;
|
||||
if (r != NULL) /* Mutual recursion */
|
||||
do ce += GET(ce, 1); while (*ce == OP_ALT);
|
||||
if (cc > cs && cc < ce) /* Simple recursion */
|
||||
had_recurse = TRUE;
|
||||
else
|
||||
{
|
||||
this_recurse.prev = recurses;
|
||||
this_recurse.group = cs;
|
||||
branchlength += find_minlength(re, cs, startcode, utf, &this_recurse);
|
||||
recurse_check *r = recurses;
|
||||
for (r = recurses; r != NULL; r = r->prev) if (r->group == cs) break;
|
||||
if (r != NULL) /* Mutual recursion */
|
||||
had_recurse = TRUE;
|
||||
else
|
||||
{
|
||||
this_recurse.prev = recurses;
|
||||
this_recurse.group = cs;
|
||||
prev_recurse_d = find_minlength(re, cs, startcode, utf, &this_recurse,
|
||||
countptr);
|
||||
if (prev_recurse_d < 0) return prev_recurse_d;
|
||||
prev_recurse_recno = recno;
|
||||
branchlength += prev_recurse_d;
|
||||
}
|
||||
}
|
||||
}
|
||||
cc += 1 + LINK_SIZE;
|
||||
cc += 1 + LINK_SIZE + once_fudge;
|
||||
once_fudge = 0;
|
||||
break;
|
||||
|
||||
/* Anything else does not or need not match a character. We can get the
|
||||
|
@ -1441,6 +1525,7 @@ int
|
|||
PRIV(study)(pcre2_real_code *re)
|
||||
{
|
||||
int min;
|
||||
int count = 0;
|
||||
PCRE2_UCHAR *code;
|
||||
BOOL utf = (re->overall_options & PCRE2_UTF) != 0;
|
||||
|
||||
|
@ -1461,22 +1546,27 @@ if ((re->overall_options & PCRE2_ANCHORED) == 0 &&
|
|||
if (rc == SSB_DONE) re->flags |= PCRE2_FIRSTMAPSET;
|
||||
}
|
||||
|
||||
/* Find the minimum length of subject string. */
|
||||
/* Find the minimum length of subject string. If it can match an empty string,
|
||||
the minimum length is already known. */
|
||||
|
||||
switch(min = find_minlength(re, code, code, utf, NULL))
|
||||
if ((re->flags & PCRE2_MATCH_EMPTY) == 0)
|
||||
{
|
||||
case -1: /* \C in UTF mode or (*ACCEPT) */
|
||||
break; /* Leave minlength unchanged (will be zero) */
|
||||
switch(min = find_minlength(re, code, code, utf, NULL, &count))
|
||||
{
|
||||
case -1: /* \C in UTF mode or (*ACCEPT) or over-complex regex */
|
||||
break; /* Leave minlength unchanged (will be zero) */
|
||||
|
||||
case -2:
|
||||
return 2; /* missing capturing bracket */
|
||||
case -2:
|
||||
return 2; /* missing capturing bracket */
|
||||
|
||||
case -3:
|
||||
return 3; /* unrecognized opcode */
|
||||
case -3:
|
||||
return 3; /* unrecognized opcode */
|
||||
|
||||
default:
|
||||
re->minlength = min;
|
||||
break;
|
||||
default:
|
||||
if (min > UINT16_MAX) min = UINT16_MAX;
|
||||
re->minlength = min;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
850
pcre2-10.21/src/pcre2_substitute.c
Normal file
850
pcre2-10.21/src/pcre2_substitute.c
Normal file
|
@ -0,0 +1,850 @@
|
|||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
#define PTR_STACK_SIZE 20
|
||||
|
||||
#define SUBSTITUTE_OPTIONS \
|
||||
(PCRE2_SUBSTITUTE_EXTENDED|PCRE2_SUBSTITUTE_GLOBAL| \
|
||||
PCRE2_SUBSTITUTE_OVERFLOW_LENGTH|PCRE2_SUBSTITUTE_UNKNOWN_UNSET| \
|
||||
PCRE2_SUBSTITUTE_UNSET_EMPTY)
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Find end of substitute text *
|
||||
*************************************************/
|
||||
|
||||
/* In extended mode, we recognize ${name:+set text:unset text} and similar
|
||||
constructions. This requires the identification of unescaped : and }
|
||||
characters. This function scans for such. It must deal with nested ${
|
||||
constructions. The pointer to the text is updated, either to the required end
|
||||
character, or to where an error was detected.
|
||||
|
||||
Arguments:
|
||||
code points to the compiled expression (for options)
|
||||
ptrptr points to the pointer to the start of the text (updated)
|
||||
ptrend end of the whole string
|
||||
last TRUE if the last expected string (only } recognized)
|
||||
|
||||
Returns: 0 on success
|
||||
negative error code on failure
|
||||
*/
|
||||
|
||||
static int
|
||||
find_text_end(const pcre2_code *code, PCRE2_SPTR *ptrptr, PCRE2_SPTR ptrend,
|
||||
BOOL last)
|
||||
{
|
||||
int rc = 0;
|
||||
uint32_t nestlevel = 0;
|
||||
BOOL literal = FALSE;
|
||||
PCRE2_SPTR ptr = *ptrptr;
|
||||
|
||||
for (; ptr < ptrend; ptr++)
|
||||
{
|
||||
if (literal)
|
||||
{
|
||||
if (ptr[0] == CHAR_BACKSLASH && ptr < ptrend - 1 && ptr[1] == CHAR_E)
|
||||
{
|
||||
literal = FALSE;
|
||||
ptr += 1;
|
||||
}
|
||||
}
|
||||
|
||||
else if (*ptr == CHAR_RIGHT_CURLY_BRACKET)
|
||||
{
|
||||
if (nestlevel == 0) goto EXIT;
|
||||
nestlevel--;
|
||||
}
|
||||
|
||||
else if (*ptr == CHAR_COLON && !last && nestlevel == 0) goto EXIT;
|
||||
|
||||
else if (*ptr == CHAR_DOLLAR_SIGN)
|
||||
{
|
||||
if (ptr < ptrend - 1 && ptr[1] == CHAR_LEFT_CURLY_BRACKET)
|
||||
{
|
||||
nestlevel++;
|
||||
ptr += 1;
|
||||
}
|
||||
}
|
||||
|
||||
else if (*ptr == CHAR_BACKSLASH)
|
||||
{
|
||||
int erc;
|
||||
int errorcode = 0;
|
||||
uint32_t ch;
|
||||
|
||||
if (ptr < ptrend - 1) switch (ptr[1])
|
||||
{
|
||||
case CHAR_L:
|
||||
case CHAR_l:
|
||||
case CHAR_U:
|
||||
case CHAR_u:
|
||||
ptr += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
erc = PRIV(check_escape)(&ptr, ptrend, &ch, &errorcode,
|
||||
code->overall_options, FALSE, NULL);
|
||||
if (errorcode != 0)
|
||||
{
|
||||
rc = errorcode;
|
||||
goto EXIT;
|
||||
}
|
||||
|
||||
switch(erc)
|
||||
{
|
||||
case 0: /* Data character */
|
||||
case ESC_E: /* Isolated \E is ignored */
|
||||
break;
|
||||
|
||||
case ESC_Q:
|
||||
literal = TRUE;
|
||||
break;
|
||||
|
||||
default:
|
||||
rc = PCRE2_ERROR_BADREPESCAPE;
|
||||
goto EXIT;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
rc = PCRE2_ERROR_REPMISSINGBRACE; /* Terminator not found */
|
||||
|
||||
EXIT:
|
||||
*ptrptr = ptr;
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Match and substitute *
|
||||
*************************************************/
|
||||
|
||||
/* This function applies a compiled re to a subject string and creates a new
|
||||
string with substitutions. The first 7 arguments are the same as for
|
||||
pcre2_match(). Either string length may be PCRE2_ZERO_TERMINATED.
|
||||
|
||||
Arguments:
|
||||
code points to the compiled expression
|
||||
subject points to the subject string
|
||||
length length of subject string (may contain binary zeros)
|
||||
start_offset where to start in the subject string
|
||||
options option bits
|
||||
match_data points to a match_data block, or is NULL
|
||||
context points a PCRE2 context
|
||||
replacement points to the replacement string
|
||||
rlength length of replacement string
|
||||
buffer where to put the substituted string
|
||||
blength points to length of buffer; updated to length of string
|
||||
|
||||
Returns: >= 0 number of substitutions made
|
||||
< 0 an error code
|
||||
PCRE2_ERROR_BADREPLACEMENT means invalid use of $
|
||||
*/
|
||||
|
||||
/* This macro checks for space in the buffer before copying into it. On
|
||||
overflow, either give an error immediately, or keep on, accumulating the
|
||||
length. */
|
||||
|
||||
#define CHECKMEMCPY(from,length) \
|
||||
if (!overflowed && lengthleft < length) \
|
||||
{ \
|
||||
if ((suboptions & PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) == 0) goto NOROOM; \
|
||||
overflowed = TRUE; \
|
||||
extra_needed = length - lengthleft; \
|
||||
} \
|
||||
else if (overflowed) \
|
||||
{ \
|
||||
extra_needed += length; \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
memcpy(buffer + buff_offset, from, CU2BYTES(length)); \
|
||||
buff_offset += length; \
|
||||
lengthleft -= length; \
|
||||
}
|
||||
|
||||
/* Here's the function */
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_substitute(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length,
|
||||
PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data,
|
||||
pcre2_match_context *mcontext, PCRE2_SPTR replacement, PCRE2_SIZE rlength,
|
||||
PCRE2_UCHAR *buffer, PCRE2_SIZE *blength)
|
||||
{
|
||||
int rc;
|
||||
int subs;
|
||||
int forcecase = 0;
|
||||
int forcecasereset = 0;
|
||||
uint32_t ovector_count;
|
||||
uint32_t goptions = 0;
|
||||
uint32_t suboptions;
|
||||
BOOL match_data_created = FALSE;
|
||||
BOOL literal = FALSE;
|
||||
BOOL overflowed = FALSE;
|
||||
#ifdef SUPPORT_UNICODE
|
||||
BOOL utf = (code->overall_options & PCRE2_UTF) != 0;
|
||||
#endif
|
||||
PCRE2_UCHAR temp[6];
|
||||
PCRE2_SPTR ptr;
|
||||
PCRE2_SPTR repend;
|
||||
PCRE2_SIZE extra_needed = 0;
|
||||
PCRE2_SIZE buff_offset, buff_length, lengthleft, fraglength;
|
||||
PCRE2_SIZE *ovector;
|
||||
|
||||
buff_offset = 0;
|
||||
lengthleft = buff_length = *blength;
|
||||
*blength = PCRE2_UNSET;
|
||||
|
||||
/* Partial matching is not valid. */
|
||||
|
||||
if ((options & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) != 0)
|
||||
return PCRE2_ERROR_BADOPTION;
|
||||
|
||||
/* If no match data block is provided, create one. */
|
||||
|
||||
if (match_data == NULL)
|
||||
{
|
||||
pcre2_general_context *gcontext = (mcontext == NULL)?
|
||||
(pcre2_general_context *)code :
|
||||
(pcre2_general_context *)mcontext;
|
||||
match_data = pcre2_match_data_create_from_pattern(code, gcontext);
|
||||
if (match_data == NULL) return PCRE2_ERROR_NOMEMORY;
|
||||
match_data_created = TRUE;
|
||||
}
|
||||
ovector = pcre2_get_ovector_pointer(match_data);
|
||||
ovector_count = pcre2_get_ovector_count(match_data);
|
||||
|
||||
/* Find lengths of zero-terminated strings and the end of the replacement. */
|
||||
|
||||
if (length == PCRE2_ZERO_TERMINATED) length = PRIV(strlen)(subject);
|
||||
if (rlength == PCRE2_ZERO_TERMINATED) rlength = PRIV(strlen)(replacement);
|
||||
repend = replacement + rlength;
|
||||
|
||||
/* Check UTF replacement string if necessary. */
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf && (options & PCRE2_NO_UTF_CHECK) == 0)
|
||||
{
|
||||
rc = PRIV(valid_utf)(replacement, rlength, &(match_data->rightchar));
|
||||
if (rc != 0)
|
||||
{
|
||||
match_data->leftchar = 0;
|
||||
goto EXIT;
|
||||
}
|
||||
}
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
/* Save the substitute options and remove them from the match options. */
|
||||
|
||||
suboptions = options & SUBSTITUTE_OPTIONS;
|
||||
options &= ~SUBSTITUTE_OPTIONS;
|
||||
|
||||
/* Copy up to the start offset */
|
||||
|
||||
CHECKMEMCPY(subject, start_offset);
|
||||
|
||||
/* Loop for global substituting. */
|
||||
|
||||
subs = 0;
|
||||
do
|
||||
{
|
||||
PCRE2_SPTR ptrstack[PTR_STACK_SIZE];
|
||||
uint32_t ptrstackptr = 0;
|
||||
|
||||
rc = pcre2_match(code, subject, length, start_offset, options|goptions,
|
||||
match_data, mcontext);
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf) options |= PCRE2_NO_UTF_CHECK; /* Only need to check once */
|
||||
#endif
|
||||
|
||||
/* Any error other than no match returns the error code. No match when not
|
||||
doing the special after-empty-match global rematch, or when at the end of the
|
||||
subject, breaks the global loop. Otherwise, advance the starting point by one
|
||||
character, copying it to the output, and try again. */
|
||||
|
||||
if (rc < 0)
|
||||
{
|
||||
PCRE2_SIZE save_start;
|
||||
|
||||
if (rc != PCRE2_ERROR_NOMATCH) goto EXIT;
|
||||
if (goptions == 0 || start_offset >= length) break;
|
||||
|
||||
/* Advance by one code point. Then, if CRLF is a valid newline sequence and
|
||||
we have advanced into the middle of it, advance one more code point. In
|
||||
other words, do not start in the middle of CRLF, even if CR and LF on their
|
||||
own are valid newlines. */
|
||||
|
||||
save_start = start_offset++;
|
||||
if (subject[start_offset-1] == CHAR_CR &&
|
||||
code->newline_convention != PCRE2_NEWLINE_CR &&
|
||||
code->newline_convention != PCRE2_NEWLINE_LF &&
|
||||
start_offset < length &&
|
||||
subject[start_offset] == CHAR_LF)
|
||||
start_offset++;
|
||||
|
||||
/* Otherwise, in UTF mode, advance past any secondary code points. */
|
||||
|
||||
else if ((code->overall_options & PCRE2_UTF) != 0)
|
||||
{
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
while (start_offset < length && (subject[start_offset] & 0xc0) == 0x80)
|
||||
start_offset++;
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||
while (start_offset < length &&
|
||||
(subject[start_offset] & 0xfc00) == 0xdc00)
|
||||
start_offset++;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Copy what we have advanced past, reset the special global options, and
|
||||
continue to the next match. */
|
||||
|
||||
fraglength = start_offset - save_start;
|
||||
CHECKMEMCPY(subject + save_start, fraglength);
|
||||
goptions = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Handle a successful match. Matches that use \K to end before they start
|
||||
are not supported. */
|
||||
|
||||
if (ovector[1] < ovector[0])
|
||||
{
|
||||
rc = PCRE2_ERROR_BADSUBSPATTERN;
|
||||
goto EXIT;
|
||||
}
|
||||
|
||||
/* Count substitutions with a paranoid check for integer overflow; surely no
|
||||
real call to this function would ever hit this! */
|
||||
|
||||
if (subs == INT_MAX)
|
||||
{
|
||||
rc = PCRE2_ERROR_TOOMANYREPLACE;
|
||||
goto EXIT;
|
||||
}
|
||||
subs++;
|
||||
|
||||
/* Copy the text leading up to the match. */
|
||||
|
||||
if (rc == 0) rc = ovector_count;
|
||||
fraglength = ovector[0] - start_offset;
|
||||
CHECKMEMCPY(subject + start_offset, fraglength);
|
||||
|
||||
/* Process the replacement string. Literal mode is set by \Q, but only in
|
||||
extended mode when backslashes are being interpreted. In extended mode we
|
||||
must handle nested substrings that are to be reprocessed. */
|
||||
|
||||
ptr = replacement;
|
||||
for (;;)
|
||||
{
|
||||
uint32_t ch;
|
||||
unsigned int chlen;
|
||||
|
||||
/* If at the end of a nested substring, pop the stack. */
|
||||
|
||||
if (ptr >= repend)
|
||||
{
|
||||
if (ptrstackptr <= 0) break; /* End of replacement string */
|
||||
repend = ptrstack[--ptrstackptr];
|
||||
ptr = ptrstack[--ptrstackptr];
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Handle the next character */
|
||||
|
||||
if (literal)
|
||||
{
|
||||
if (ptr[0] == CHAR_BACKSLASH && ptr < repend - 1 && ptr[1] == CHAR_E)
|
||||
{
|
||||
literal = FALSE;
|
||||
ptr += 2;
|
||||
continue;
|
||||
}
|
||||
goto LOADLITERAL;
|
||||
}
|
||||
|
||||
/* Not in literal mode. */
|
||||
|
||||
if (*ptr == CHAR_DOLLAR_SIGN)
|
||||
{
|
||||
int group, n;
|
||||
uint32_t special = 0;
|
||||
BOOL inparens;
|
||||
BOOL star;
|
||||
PCRE2_SIZE sublength;
|
||||
PCRE2_SPTR text1_start = NULL;
|
||||
PCRE2_SPTR text1_end = NULL;
|
||||
PCRE2_SPTR text2_start = NULL;
|
||||
PCRE2_SPTR text2_end = NULL;
|
||||
PCRE2_UCHAR next;
|
||||
PCRE2_UCHAR name[33];
|
||||
|
||||
if (++ptr >= repend) goto BAD;
|
||||
if ((next = *ptr) == CHAR_DOLLAR_SIGN) goto LOADLITERAL;
|
||||
|
||||
group = -1;
|
||||
n = 0;
|
||||
inparens = FALSE;
|
||||
star = FALSE;
|
||||
|
||||
if (next == CHAR_LEFT_CURLY_BRACKET)
|
||||
{
|
||||
if (++ptr >= repend) goto BAD;
|
||||
next = *ptr;
|
||||
inparens = TRUE;
|
||||
}
|
||||
|
||||
if (next == CHAR_ASTERISK)
|
||||
{
|
||||
if (++ptr >= repend) goto BAD;
|
||||
next = *ptr;
|
||||
star = TRUE;
|
||||
}
|
||||
|
||||
if (!star && next >= CHAR_0 && next <= CHAR_9)
|
||||
{
|
||||
group = next - CHAR_0;
|
||||
while (++ptr < repend)
|
||||
{
|
||||
next = *ptr;
|
||||
if (next < CHAR_0 || next > CHAR_9) break;
|
||||
group = group * 10 + next - CHAR_0;
|
||||
|
||||
/* A check for a number greater than the hightest captured group
|
||||
is sufficient here; no need for a separate overflow check. If unknown
|
||||
groups are to be treated as unset, just skip over any remaining
|
||||
digits and carry on. */
|
||||
|
||||
if (group > code->top_bracket)
|
||||
{
|
||||
if ((suboptions & PCRE2_SUBSTITUTE_UNKNOWN_UNSET) != 0)
|
||||
{
|
||||
while (++ptr < repend && *ptr >= CHAR_0 && *ptr <= CHAR_9);
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
rc = PCRE2_ERROR_NOSUBSTRING;
|
||||
goto PTREXIT;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
const uint8_t *ctypes = code->tables + ctypes_offset;
|
||||
while (MAX_255(next) && (ctypes[next] & ctype_word) != 0)
|
||||
{
|
||||
name[n++] = next;
|
||||
if (n > 32) goto BAD;
|
||||
if (++ptr >= repend) break;
|
||||
next = *ptr;
|
||||
}
|
||||
if (n == 0) goto BAD;
|
||||
name[n] = 0;
|
||||
}
|
||||
|
||||
/* In extended mode we recognize ${name:+set text:unset text} and
|
||||
${name:-default text}. */
|
||||
|
||||
if (inparens)
|
||||
{
|
||||
if ((suboptions & PCRE2_SUBSTITUTE_EXTENDED) != 0 &&
|
||||
!star && ptr < repend - 2 && next == CHAR_COLON)
|
||||
{
|
||||
special = *(++ptr);
|
||||
if (special != CHAR_PLUS && special != CHAR_MINUS)
|
||||
{
|
||||
rc = PCRE2_ERROR_BADSUBSTITUTION;
|
||||
goto PTREXIT;
|
||||
}
|
||||
|
||||
text1_start = ++ptr;
|
||||
rc = find_text_end(code, &ptr, repend, special == CHAR_MINUS);
|
||||
if (rc != 0) goto PTREXIT;
|
||||
text1_end = ptr;
|
||||
|
||||
if (special == CHAR_PLUS && *ptr == CHAR_COLON)
|
||||
{
|
||||
text2_start = ++ptr;
|
||||
rc = find_text_end(code, &ptr, repend, TRUE);
|
||||
if (rc != 0) goto PTREXIT;
|
||||
text2_end = ptr;
|
||||
}
|
||||
}
|
||||
|
||||
else
|
||||
{
|
||||
if (ptr >= repend || *ptr != CHAR_RIGHT_CURLY_BRACKET)
|
||||
{
|
||||
rc = PCRE2_ERROR_REPMISSINGBRACE;
|
||||
goto PTREXIT;
|
||||
}
|
||||
}
|
||||
|
||||
ptr++;
|
||||
}
|
||||
|
||||
/* Have found a syntactically correct group number or name, or *name.
|
||||
Only *MARK is currently recognized. */
|
||||
|
||||
if (star)
|
||||
{
|
||||
if (PRIV(strcmp_c8)(name, STRING_MARK) == 0)
|
||||
{
|
||||
PCRE2_SPTR mark = pcre2_get_mark(match_data);
|
||||
if (mark != NULL)
|
||||
{
|
||||
PCRE2_SPTR mark_start = mark;
|
||||
while (*mark != 0) mark++;
|
||||
fraglength = mark - mark_start;
|
||||
CHECKMEMCPY(mark_start, fraglength);
|
||||
}
|
||||
}
|
||||
else goto BAD;
|
||||
}
|
||||
|
||||
/* Substitute the contents of a group. We don't use substring_copy
|
||||
functions any more, in order to support case forcing. */
|
||||
|
||||
else
|
||||
{
|
||||
PCRE2_SPTR subptr, subptrend;
|
||||
|
||||
/* Find a number for a named group. In case there are duplicate names,
|
||||
search for the first one that is set. If the name is not found when
|
||||
PCRE2_SUBSTITUTE_UNKNOWN_EMPTY is set, set the group number to a
|
||||
non-existent group. */
|
||||
|
||||
if (group < 0)
|
||||
{
|
||||
PCRE2_SPTR first, last, entry;
|
||||
rc = pcre2_substring_nametable_scan(code, name, &first, &last);
|
||||
if (rc == PCRE2_ERROR_NOSUBSTRING &&
|
||||
(suboptions & PCRE2_SUBSTITUTE_UNKNOWN_UNSET) != 0)
|
||||
{
|
||||
group = code->top_bracket + 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (rc < 0) goto PTREXIT;
|
||||
for (entry = first; entry <= last; entry += rc)
|
||||
{
|
||||
uint32_t ng = GET2(entry, 0);
|
||||
if (ng < ovector_count)
|
||||
{
|
||||
if (group < 0) group = ng; /* First in ovector */
|
||||
if (ovector[ng*2] != PCRE2_UNSET)
|
||||
{
|
||||
group = ng; /* First that is set */
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* If group is still negative, it means we did not find a group
|
||||
that is in the ovector. Just set the first group. */
|
||||
|
||||
if (group < 0) group = GET2(first, 0);
|
||||
}
|
||||
}
|
||||
|
||||
/* We now have a group that is identified by number. Find the length of
|
||||
the captured string. If a group in a non-special substitution is unset
|
||||
when PCRE2_SUBSTITUTE_UNSET_EMPTY is set, substitute nothing. */
|
||||
|
||||
rc = pcre2_substring_length_bynumber(match_data, group, &sublength);
|
||||
if (rc < 0)
|
||||
{
|
||||
if (rc == PCRE2_ERROR_NOSUBSTRING &&
|
||||
(suboptions & PCRE2_SUBSTITUTE_UNKNOWN_UNSET) != 0)
|
||||
{
|
||||
rc = PCRE2_ERROR_UNSET;
|
||||
}
|
||||
if (rc != PCRE2_ERROR_UNSET) goto PTREXIT; /* Non-unset errors */
|
||||
if (special == 0) /* Plain substitution */
|
||||
{
|
||||
if ((suboptions & PCRE2_SUBSTITUTE_UNSET_EMPTY) != 0) continue;
|
||||
goto PTREXIT; /* Else error */
|
||||
}
|
||||
}
|
||||
|
||||
/* If special is '+' we have a 'set' and possibly an 'unset' text,
|
||||
both of which are reprocessed when used. If special is '-' we have a
|
||||
default text for when the group is unset; it must be reprocessed. */
|
||||
|
||||
if (special != 0)
|
||||
{
|
||||
if (special == CHAR_MINUS)
|
||||
{
|
||||
if (rc == 0) goto LITERAL_SUBSTITUTE;
|
||||
text2_start = text1_start;
|
||||
text2_end = text1_end;
|
||||
}
|
||||
|
||||
if (ptrstackptr >= PTR_STACK_SIZE) goto BAD;
|
||||
ptrstack[ptrstackptr++] = ptr;
|
||||
ptrstack[ptrstackptr++] = repend;
|
||||
|
||||
if (rc == 0)
|
||||
{
|
||||
ptr = text1_start;
|
||||
repend = text1_end;
|
||||
}
|
||||
else
|
||||
{
|
||||
ptr = text2_start;
|
||||
repend = text2_end;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Otherwise we have a literal substitution of a group's contents. */
|
||||
|
||||
LITERAL_SUBSTITUTE:
|
||||
subptr = subject + ovector[group*2];
|
||||
subptrend = subject + ovector[group*2 + 1];
|
||||
|
||||
/* Substitute a literal string, possibly forcing alphabetic case. */
|
||||
|
||||
while (subptr < subptrend)
|
||||
{
|
||||
GETCHARINCTEST(ch, subptr);
|
||||
if (forcecase != 0)
|
||||
{
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
uint32_t type = UCD_CHARTYPE(ch);
|
||||
if (PRIV(ucp_gentype)[type] == ucp_L &&
|
||||
type != ((forcecase > 0)? ucp_Lu : ucp_Ll))
|
||||
ch = UCD_OTHERCASE(ch);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
if (((code->tables + cbits_offset +
|
||||
((forcecase > 0)? cbit_upper:cbit_lower)
|
||||
)[ch/8] & (1 << (ch%8))) == 0)
|
||||
ch = (code->tables + fcc_offset)[ch];
|
||||
}
|
||||
forcecase = forcecasereset;
|
||||
}
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf) chlen = PRIV(ord2utf)(ch, temp); else
|
||||
#endif
|
||||
{
|
||||
temp[0] = ch;
|
||||
chlen = 1;
|
||||
}
|
||||
CHECKMEMCPY(temp, chlen);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Handle an escape sequence in extended mode. We can use check_escape()
|
||||
to process \Q, \E, \c, \o, \x and \ followed by non-alphanumerics, but
|
||||
the case-forcing escapes are not supported in pcre2_compile() so must be
|
||||
recognized here. */
|
||||
|
||||
else if ((suboptions & PCRE2_SUBSTITUTE_EXTENDED) != 0 &&
|
||||
*ptr == CHAR_BACKSLASH)
|
||||
{
|
||||
int errorcode = 0;
|
||||
|
||||
if (ptr < repend - 1) switch (ptr[1])
|
||||
{
|
||||
case CHAR_L:
|
||||
forcecase = forcecasereset = -1;
|
||||
ptr += 2;
|
||||
continue;
|
||||
|
||||
case CHAR_l:
|
||||
forcecase = -1;
|
||||
forcecasereset = 0;
|
||||
ptr += 2;
|
||||
continue;
|
||||
|
||||
case CHAR_U:
|
||||
forcecase = forcecasereset = 1;
|
||||
ptr += 2;
|
||||
continue;
|
||||
|
||||
case CHAR_u:
|
||||
forcecase = 1;
|
||||
forcecasereset = 0;
|
||||
ptr += 2;
|
||||
continue;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
rc = PRIV(check_escape)(&ptr, repend, &ch, &errorcode,
|
||||
code->overall_options, FALSE, NULL);
|
||||
if (errorcode != 0) goto BADESCAPE;
|
||||
ptr++;
|
||||
|
||||
switch(rc)
|
||||
{
|
||||
case ESC_E:
|
||||
forcecase = forcecasereset = 0;
|
||||
continue;
|
||||
|
||||
case ESC_Q:
|
||||
literal = TRUE;
|
||||
continue;
|
||||
|
||||
case 0: /* Data character */
|
||||
goto LITERAL;
|
||||
|
||||
default:
|
||||
goto BADESCAPE;
|
||||
}
|
||||
}
|
||||
|
||||
/* Handle a literal code unit */
|
||||
|
||||
else
|
||||
{
|
||||
LOADLITERAL:
|
||||
GETCHARINCTEST(ch, ptr); /* Get character value, increment pointer */
|
||||
|
||||
LITERAL:
|
||||
if (forcecase != 0)
|
||||
{
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
uint32_t type = UCD_CHARTYPE(ch);
|
||||
if (PRIV(ucp_gentype)[type] == ucp_L &&
|
||||
type != ((forcecase > 0)? ucp_Lu : ucp_Ll))
|
||||
ch = UCD_OTHERCASE(ch);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
if (((code->tables + cbits_offset +
|
||||
((forcecase > 0)? cbit_upper:cbit_lower)
|
||||
)[ch/8] & (1 << (ch%8))) == 0)
|
||||
ch = (code->tables + fcc_offset)[ch];
|
||||
}
|
||||
forcecase = forcecasereset;
|
||||
}
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf) chlen = PRIV(ord2utf)(ch, temp); else
|
||||
#endif
|
||||
{
|
||||
temp[0] = ch;
|
||||
chlen = 1;
|
||||
}
|
||||
CHECKMEMCPY(temp, chlen);
|
||||
} /* End handling a literal code unit */
|
||||
} /* End of loop for scanning the replacement. */
|
||||
|
||||
/* The replacement has been copied to the output. Update the start offset to
|
||||
point to the rest of the subject string. If we matched an empty string,
|
||||
do the magic for global matches. */
|
||||
|
||||
start_offset = ovector[1];
|
||||
goptions = (ovector[0] != ovector[1])? 0 :
|
||||
PCRE2_ANCHORED|PCRE2_NOTEMPTY_ATSTART;
|
||||
} while ((suboptions & PCRE2_SUBSTITUTE_GLOBAL) != 0); /* Repeat "do" loop */
|
||||
|
||||
/* Copy the rest of the subject. */
|
||||
|
||||
fraglength = length - start_offset;
|
||||
CHECKMEMCPY(subject + start_offset, fraglength);
|
||||
temp[0] = 0;
|
||||
CHECKMEMCPY(temp , 1);
|
||||
|
||||
/* If overflowed is set it means the PCRE2_SUBSTITUTE_OVERFLOW_LENGTH is set,
|
||||
and matching has carried on after a full buffer, in order to compute the length
|
||||
needed. Otherwise, an overflow generates an immediate error return. */
|
||||
|
||||
if (overflowed)
|
||||
{
|
||||
rc = PCRE2_ERROR_NOMEMORY;
|
||||
*blength = buff_length + extra_needed;
|
||||
}
|
||||
|
||||
/* After a successful execution, return the number of substitutions and set the
|
||||
length of buffer used, excluding the trailing zero. */
|
||||
|
||||
else
|
||||
{
|
||||
rc = subs;
|
||||
*blength = buff_offset - 1;
|
||||
}
|
||||
|
||||
EXIT:
|
||||
if (match_data_created) pcre2_match_data_free(match_data);
|
||||
else match_data->rc = rc;
|
||||
return rc;
|
||||
|
||||
NOROOM:
|
||||
rc = PCRE2_ERROR_NOMEMORY;
|
||||
goto EXIT;
|
||||
|
||||
BAD:
|
||||
rc = PCRE2_ERROR_BADREPLACEMENT;
|
||||
goto PTREXIT;
|
||||
|
||||
BADESCAPE:
|
||||
rc = PCRE2_ERROR_BADREPESCAPE;
|
||||
|
||||
PTREXIT:
|
||||
*blength = (PCRE2_SIZE)(ptr - replacement);
|
||||
goto EXIT;
|
||||
}
|
||||
|
||||
/* End of pcre2_substitute.c */
|
|
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
|
|||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2014 University of Cambridge
|
||||
New API code Copyright (c) 2016 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
|
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
|
|||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2014 University of Cambridge
|
||||
New API code Copyright (c) 2016 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -227,6 +227,8 @@ version. Like all other character and string literals that are compared against
|
|||
the regular expression pattern, we must use STR_ macros instead of literal
|
||||
strings to make sure that UTF-8 support works on EBCDIC platforms. */
|
||||
|
||||
#define STRING_Ahom0 STR_A STR_h STR_o STR_m "\0"
|
||||
#define STRING_Anatolian_Hieroglyphs0 STR_A STR_n STR_a STR_t STR_o STR_l STR_i STR_a STR_n STR_UNDERSCORE STR_H STR_i STR_e STR_r STR_o STR_g STR_l STR_y STR_p STR_h STR_s "\0"
|
||||
#define STRING_Any0 STR_A STR_n STR_y "\0"
|
||||
#define STRING_Arabic0 STR_A STR_r STR_a STR_b STR_i STR_c "\0"
|
||||
#define STRING_Armenian0 STR_A STR_r STR_m STR_e STR_n STR_i STR_a STR_n "\0"
|
||||
|
@ -274,6 +276,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
|
|||
#define STRING_Han0 STR_H STR_a STR_n "\0"
|
||||
#define STRING_Hangul0 STR_H STR_a STR_n STR_g STR_u STR_l "\0"
|
||||
#define STRING_Hanunoo0 STR_H STR_a STR_n STR_u STR_n STR_o STR_o "\0"
|
||||
#define STRING_Hatran0 STR_H STR_a STR_t STR_r STR_a STR_n "\0"
|
||||
#define STRING_Hebrew0 STR_H STR_e STR_b STR_r STR_e STR_w "\0"
|
||||
#define STRING_Hiragana0 STR_H STR_i STR_r STR_a STR_g STR_a STR_n STR_a "\0"
|
||||
#define STRING_Imperial_Aramaic0 STR_I STR_m STR_p STR_e STR_r STR_i STR_a STR_l STR_UNDERSCORE STR_A STR_r STR_a STR_m STR_a STR_i STR_c "\0"
|
||||
|
@ -321,6 +324,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
|
|||
#define STRING_Modi0 STR_M STR_o STR_d STR_i "\0"
|
||||
#define STRING_Mongolian0 STR_M STR_o STR_n STR_g STR_o STR_l STR_i STR_a STR_n "\0"
|
||||
#define STRING_Mro0 STR_M STR_r STR_o "\0"
|
||||
#define STRING_Multani0 STR_M STR_u STR_l STR_t STR_a STR_n STR_i "\0"
|
||||
#define STRING_Myanmar0 STR_M STR_y STR_a STR_n STR_m STR_a STR_r "\0"
|
||||
#define STRING_N0 STR_N "\0"
|
||||
#define STRING_Nabataean0 STR_N STR_a STR_b STR_a STR_t STR_a STR_e STR_a STR_n "\0"
|
||||
|
@ -331,6 +335,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
|
|||
#define STRING_No0 STR_N STR_o "\0"
|
||||
#define STRING_Ogham0 STR_O STR_g STR_h STR_a STR_m "\0"
|
||||
#define STRING_Ol_Chiki0 STR_O STR_l STR_UNDERSCORE STR_C STR_h STR_i STR_k STR_i "\0"
|
||||
#define STRING_Old_Hungarian0 STR_O STR_l STR_d STR_UNDERSCORE STR_H STR_u STR_n STR_g STR_a STR_r STR_i STR_a STR_n "\0"
|
||||
#define STRING_Old_Italic0 STR_O STR_l STR_d STR_UNDERSCORE STR_I STR_t STR_a STR_l STR_i STR_c "\0"
|
||||
#define STRING_Old_North_Arabian0 STR_O STR_l STR_d STR_UNDERSCORE STR_N STR_o STR_r STR_t STR_h STR_UNDERSCORE STR_A STR_r STR_a STR_b STR_i STR_a STR_n "\0"
|
||||
#define STRING_Old_Permic0 STR_O STR_l STR_d STR_UNDERSCORE STR_P STR_e STR_r STR_m STR_i STR_c "\0"
|
||||
|
@ -362,6 +367,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
|
|||
#define STRING_Sharada0 STR_S STR_h STR_a STR_r STR_a STR_d STR_a "\0"
|
||||
#define STRING_Shavian0 STR_S STR_h STR_a STR_v STR_i STR_a STR_n "\0"
|
||||
#define STRING_Siddham0 STR_S STR_i STR_d STR_d STR_h STR_a STR_m "\0"
|
||||
#define STRING_SignWriting0 STR_S STR_i STR_g STR_n STR_W STR_r STR_i STR_t STR_i STR_n STR_g "\0"
|
||||
#define STRING_Sinhala0 STR_S STR_i STR_n STR_h STR_a STR_l STR_a "\0"
|
||||
#define STRING_Sk0 STR_S STR_k "\0"
|
||||
#define STRING_Sm0 STR_S STR_m "\0"
|
||||
|
@ -398,6 +404,8 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
|
|||
#define STRING_Zs0 STR_Z STR_s "\0"
|
||||
|
||||
const char PRIV(utt_names)[] =
|
||||
STRING_Ahom0
|
||||
STRING_Anatolian_Hieroglyphs0
|
||||
STRING_Any0
|
||||
STRING_Arabic0
|
||||
STRING_Armenian0
|
||||
|
@ -445,6 +453,7 @@ const char PRIV(utt_names)[] =
|
|||
STRING_Han0
|
||||
STRING_Hangul0
|
||||
STRING_Hanunoo0
|
||||
STRING_Hatran0
|
||||
STRING_Hebrew0
|
||||
STRING_Hiragana0
|
||||
STRING_Imperial_Aramaic0
|
||||
|
@ -492,6 +501,7 @@ const char PRIV(utt_names)[] =
|
|||
STRING_Modi0
|
||||
STRING_Mongolian0
|
||||
STRING_Mro0
|
||||
STRING_Multani0
|
||||
STRING_Myanmar0
|
||||
STRING_N0
|
||||
STRING_Nabataean0
|
||||
|
@ -502,6 +512,7 @@ const char PRIV(utt_names)[] =
|
|||
STRING_No0
|
||||
STRING_Ogham0
|
||||
STRING_Ol_Chiki0
|
||||
STRING_Old_Hungarian0
|
||||
STRING_Old_Italic0
|
||||
STRING_Old_North_Arabian0
|
||||
STRING_Old_Permic0
|
||||
|
@ -533,6 +544,7 @@ const char PRIV(utt_names)[] =
|
|||
STRING_Sharada0
|
||||
STRING_Shavian0
|
||||
STRING_Siddham0
|
||||
STRING_SignWriting0
|
||||
STRING_Sinhala0
|
||||
STRING_Sk0
|
||||
STRING_Sm0
|
||||
|
@ -569,175 +581,181 @@ const char PRIV(utt_names)[] =
|
|||
STRING_Zs0;
|
||||
|
||||
const ucp_type_table PRIV(utt)[] = {
|
||||
{ 0, PT_ANY, 0 },
|
||||
{ 4, PT_SC, ucp_Arabic },
|
||||
{ 11, PT_SC, ucp_Armenian },
|
||||
{ 20, PT_SC, ucp_Avestan },
|
||||
{ 28, PT_SC, ucp_Balinese },
|
||||
{ 37, PT_SC, ucp_Bamum },
|
||||
{ 43, PT_SC, ucp_Bassa_Vah },
|
||||
{ 53, PT_SC, ucp_Batak },
|
||||
{ 59, PT_SC, ucp_Bengali },
|
||||
{ 67, PT_SC, ucp_Bopomofo },
|
||||
{ 76, PT_SC, ucp_Brahmi },
|
||||
{ 83, PT_SC, ucp_Braille },
|
||||
{ 91, PT_SC, ucp_Buginese },
|
||||
{ 100, PT_SC, ucp_Buhid },
|
||||
{ 106, PT_GC, ucp_C },
|
||||
{ 108, PT_SC, ucp_Canadian_Aboriginal },
|
||||
{ 128, PT_SC, ucp_Carian },
|
||||
{ 135, PT_SC, ucp_Caucasian_Albanian },
|
||||
{ 154, PT_PC, ucp_Cc },
|
||||
{ 157, PT_PC, ucp_Cf },
|
||||
{ 160, PT_SC, ucp_Chakma },
|
||||
{ 167, PT_SC, ucp_Cham },
|
||||
{ 172, PT_SC, ucp_Cherokee },
|
||||
{ 181, PT_PC, ucp_Cn },
|
||||
{ 184, PT_PC, ucp_Co },
|
||||
{ 187, PT_SC, ucp_Common },
|
||||
{ 194, PT_SC, ucp_Coptic },
|
||||
{ 201, PT_PC, ucp_Cs },
|
||||
{ 204, PT_SC, ucp_Cuneiform },
|
||||
{ 214, PT_SC, ucp_Cypriot },
|
||||
{ 222, PT_SC, ucp_Cyrillic },
|
||||
{ 231, PT_SC, ucp_Deseret },
|
||||
{ 239, PT_SC, ucp_Devanagari },
|
||||
{ 250, PT_SC, ucp_Duployan },
|
||||
{ 259, PT_SC, ucp_Egyptian_Hieroglyphs },
|
||||
{ 280, PT_SC, ucp_Elbasan },
|
||||
{ 288, PT_SC, ucp_Ethiopic },
|
||||
{ 297, PT_SC, ucp_Georgian },
|
||||
{ 306, PT_SC, ucp_Glagolitic },
|
||||
{ 317, PT_SC, ucp_Gothic },
|
||||
{ 324, PT_SC, ucp_Grantha },
|
||||
{ 332, PT_SC, ucp_Greek },
|
||||
{ 338, PT_SC, ucp_Gujarati },
|
||||
{ 347, PT_SC, ucp_Gurmukhi },
|
||||
{ 356, PT_SC, ucp_Han },
|
||||
{ 360, PT_SC, ucp_Hangul },
|
||||
{ 367, PT_SC, ucp_Hanunoo },
|
||||
{ 375, PT_SC, ucp_Hebrew },
|
||||
{ 382, PT_SC, ucp_Hiragana },
|
||||
{ 391, PT_SC, ucp_Imperial_Aramaic },
|
||||
{ 408, PT_SC, ucp_Inherited },
|
||||
{ 418, PT_SC, ucp_Inscriptional_Pahlavi },
|
||||
{ 440, PT_SC, ucp_Inscriptional_Parthian },
|
||||
{ 463, PT_SC, ucp_Javanese },
|
||||
{ 472, PT_SC, ucp_Kaithi },
|
||||
{ 479, PT_SC, ucp_Kannada },
|
||||
{ 487, PT_SC, ucp_Katakana },
|
||||
{ 496, PT_SC, ucp_Kayah_Li },
|
||||
{ 505, PT_SC, ucp_Kharoshthi },
|
||||
{ 516, PT_SC, ucp_Khmer },
|
||||
{ 522, PT_SC, ucp_Khojki },
|
||||
{ 529, PT_SC, ucp_Khudawadi },
|
||||
{ 539, PT_GC, ucp_L },
|
||||
{ 541, PT_LAMP, 0 },
|
||||
{ 544, PT_SC, ucp_Lao },
|
||||
{ 548, PT_SC, ucp_Latin },
|
||||
{ 554, PT_SC, ucp_Lepcha },
|
||||
{ 561, PT_SC, ucp_Limbu },
|
||||
{ 567, PT_SC, ucp_Linear_A },
|
||||
{ 576, PT_SC, ucp_Linear_B },
|
||||
{ 585, PT_SC, ucp_Lisu },
|
||||
{ 590, PT_PC, ucp_Ll },
|
||||
{ 593, PT_PC, ucp_Lm },
|
||||
{ 596, PT_PC, ucp_Lo },
|
||||
{ 599, PT_PC, ucp_Lt },
|
||||
{ 602, PT_PC, ucp_Lu },
|
||||
{ 605, PT_SC, ucp_Lycian },
|
||||
{ 612, PT_SC, ucp_Lydian },
|
||||
{ 619, PT_GC, ucp_M },
|
||||
{ 621, PT_SC, ucp_Mahajani },
|
||||
{ 630, PT_SC, ucp_Malayalam },
|
||||
{ 640, PT_SC, ucp_Mandaic },
|
||||
{ 648, PT_SC, ucp_Manichaean },
|
||||
{ 659, PT_PC, ucp_Mc },
|
||||
{ 662, PT_PC, ucp_Me },
|
||||
{ 665, PT_SC, ucp_Meetei_Mayek },
|
||||
{ 678, PT_SC, ucp_Mende_Kikakui },
|
||||
{ 692, PT_SC, ucp_Meroitic_Cursive },
|
||||
{ 709, PT_SC, ucp_Meroitic_Hieroglyphs },
|
||||
{ 730, PT_SC, ucp_Miao },
|
||||
{ 735, PT_PC, ucp_Mn },
|
||||
{ 738, PT_SC, ucp_Modi },
|
||||
{ 743, PT_SC, ucp_Mongolian },
|
||||
{ 753, PT_SC, ucp_Mro },
|
||||
{ 757, PT_SC, ucp_Myanmar },
|
||||
{ 765, PT_GC, ucp_N },
|
||||
{ 767, PT_SC, ucp_Nabataean },
|
||||
{ 777, PT_PC, ucp_Nd },
|
||||
{ 780, PT_SC, ucp_New_Tai_Lue },
|
||||
{ 792, PT_SC, ucp_Nko },
|
||||
{ 796, PT_PC, ucp_Nl },
|
||||
{ 799, PT_PC, ucp_No },
|
||||
{ 802, PT_SC, ucp_Ogham },
|
||||
{ 808, PT_SC, ucp_Ol_Chiki },
|
||||
{ 817, PT_SC, ucp_Old_Italic },
|
||||
{ 828, PT_SC, ucp_Old_North_Arabian },
|
||||
{ 846, PT_SC, ucp_Old_Permic },
|
||||
{ 857, PT_SC, ucp_Old_Persian },
|
||||
{ 869, PT_SC, ucp_Old_South_Arabian },
|
||||
{ 887, PT_SC, ucp_Old_Turkic },
|
||||
{ 898, PT_SC, ucp_Oriya },
|
||||
{ 904, PT_SC, ucp_Osmanya },
|
||||
{ 912, PT_GC, ucp_P },
|
||||
{ 914, PT_SC, ucp_Pahawh_Hmong },
|
||||
{ 927, PT_SC, ucp_Palmyrene },
|
||||
{ 937, PT_SC, ucp_Pau_Cin_Hau },
|
||||
{ 949, PT_PC, ucp_Pc },
|
||||
{ 952, PT_PC, ucp_Pd },
|
||||
{ 955, PT_PC, ucp_Pe },
|
||||
{ 958, PT_PC, ucp_Pf },
|
||||
{ 961, PT_SC, ucp_Phags_Pa },
|
||||
{ 970, PT_SC, ucp_Phoenician },
|
||||
{ 981, PT_PC, ucp_Pi },
|
||||
{ 984, PT_PC, ucp_Po },
|
||||
{ 987, PT_PC, ucp_Ps },
|
||||
{ 990, PT_SC, ucp_Psalter_Pahlavi },
|
||||
{ 1006, PT_SC, ucp_Rejang },
|
||||
{ 1013, PT_SC, ucp_Runic },
|
||||
{ 1019, PT_GC, ucp_S },
|
||||
{ 1021, PT_SC, ucp_Samaritan },
|
||||
{ 1031, PT_SC, ucp_Saurashtra },
|
||||
{ 1042, PT_PC, ucp_Sc },
|
||||
{ 1045, PT_SC, ucp_Sharada },
|
||||
{ 1053, PT_SC, ucp_Shavian },
|
||||
{ 1061, PT_SC, ucp_Siddham },
|
||||
{ 1069, PT_SC, ucp_Sinhala },
|
||||
{ 1077, PT_PC, ucp_Sk },
|
||||
{ 1080, PT_PC, ucp_Sm },
|
||||
{ 1083, PT_PC, ucp_So },
|
||||
{ 1086, PT_SC, ucp_Sora_Sompeng },
|
||||
{ 1099, PT_SC, ucp_Sundanese },
|
||||
{ 1109, PT_SC, ucp_Syloti_Nagri },
|
||||
{ 1122, PT_SC, ucp_Syriac },
|
||||
{ 1129, PT_SC, ucp_Tagalog },
|
||||
{ 1137, PT_SC, ucp_Tagbanwa },
|
||||
{ 1146, PT_SC, ucp_Tai_Le },
|
||||
{ 1153, PT_SC, ucp_Tai_Tham },
|
||||
{ 1162, PT_SC, ucp_Tai_Viet },
|
||||
{ 1171, PT_SC, ucp_Takri },
|
||||
{ 1177, PT_SC, ucp_Tamil },
|
||||
{ 1183, PT_SC, ucp_Telugu },
|
||||
{ 1190, PT_SC, ucp_Thaana },
|
||||
{ 1197, PT_SC, ucp_Thai },
|
||||
{ 1202, PT_SC, ucp_Tibetan },
|
||||
{ 1210, PT_SC, ucp_Tifinagh },
|
||||
{ 1219, PT_SC, ucp_Tirhuta },
|
||||
{ 1227, PT_SC, ucp_Ugaritic },
|
||||
{ 1236, PT_SC, ucp_Vai },
|
||||
{ 1240, PT_SC, ucp_Warang_Citi },
|
||||
{ 1252, PT_ALNUM, 0 },
|
||||
{ 1256, PT_PXSPACE, 0 },
|
||||
{ 1260, PT_SPACE, 0 },
|
||||
{ 1264, PT_UCNC, 0 },
|
||||
{ 1268, PT_WORD, 0 },
|
||||
{ 1272, PT_SC, ucp_Yi },
|
||||
{ 1275, PT_GC, ucp_Z },
|
||||
{ 1277, PT_PC, ucp_Zl },
|
||||
{ 1280, PT_PC, ucp_Zp },
|
||||
{ 1283, PT_PC, ucp_Zs }
|
||||
{ 0, PT_SC, ucp_Ahom },
|
||||
{ 5, PT_SC, ucp_Anatolian_Hieroglyphs },
|
||||
{ 27, PT_ANY, 0 },
|
||||
{ 31, PT_SC, ucp_Arabic },
|
||||
{ 38, PT_SC, ucp_Armenian },
|
||||
{ 47, PT_SC, ucp_Avestan },
|
||||
{ 55, PT_SC, ucp_Balinese },
|
||||
{ 64, PT_SC, ucp_Bamum },
|
||||
{ 70, PT_SC, ucp_Bassa_Vah },
|
||||
{ 80, PT_SC, ucp_Batak },
|
||||
{ 86, PT_SC, ucp_Bengali },
|
||||
{ 94, PT_SC, ucp_Bopomofo },
|
||||
{ 103, PT_SC, ucp_Brahmi },
|
||||
{ 110, PT_SC, ucp_Braille },
|
||||
{ 118, PT_SC, ucp_Buginese },
|
||||
{ 127, PT_SC, ucp_Buhid },
|
||||
{ 133, PT_GC, ucp_C },
|
||||
{ 135, PT_SC, ucp_Canadian_Aboriginal },
|
||||
{ 155, PT_SC, ucp_Carian },
|
||||
{ 162, PT_SC, ucp_Caucasian_Albanian },
|
||||
{ 181, PT_PC, ucp_Cc },
|
||||
{ 184, PT_PC, ucp_Cf },
|
||||
{ 187, PT_SC, ucp_Chakma },
|
||||
{ 194, PT_SC, ucp_Cham },
|
||||
{ 199, PT_SC, ucp_Cherokee },
|
||||
{ 208, PT_PC, ucp_Cn },
|
||||
{ 211, PT_PC, ucp_Co },
|
||||
{ 214, PT_SC, ucp_Common },
|
||||
{ 221, PT_SC, ucp_Coptic },
|
||||
{ 228, PT_PC, ucp_Cs },
|
||||
{ 231, PT_SC, ucp_Cuneiform },
|
||||
{ 241, PT_SC, ucp_Cypriot },
|
||||
{ 249, PT_SC, ucp_Cyrillic },
|
||||
{ 258, PT_SC, ucp_Deseret },
|
||||
{ 266, PT_SC, ucp_Devanagari },
|
||||
{ 277, PT_SC, ucp_Duployan },
|
||||
{ 286, PT_SC, ucp_Egyptian_Hieroglyphs },
|
||||
{ 307, PT_SC, ucp_Elbasan },
|
||||
{ 315, PT_SC, ucp_Ethiopic },
|
||||
{ 324, PT_SC, ucp_Georgian },
|
||||
{ 333, PT_SC, ucp_Glagolitic },
|
||||
{ 344, PT_SC, ucp_Gothic },
|
||||
{ 351, PT_SC, ucp_Grantha },
|
||||
{ 359, PT_SC, ucp_Greek },
|
||||
{ 365, PT_SC, ucp_Gujarati },
|
||||
{ 374, PT_SC, ucp_Gurmukhi },
|
||||
{ 383, PT_SC, ucp_Han },
|
||||
{ 387, PT_SC, ucp_Hangul },
|
||||
{ 394, PT_SC, ucp_Hanunoo },
|
||||
{ 402, PT_SC, ucp_Hatran },
|
||||
{ 409, PT_SC, ucp_Hebrew },
|
||||
{ 416, PT_SC, ucp_Hiragana },
|
||||
{ 425, PT_SC, ucp_Imperial_Aramaic },
|
||||
{ 442, PT_SC, ucp_Inherited },
|
||||
{ 452, PT_SC, ucp_Inscriptional_Pahlavi },
|
||||
{ 474, PT_SC, ucp_Inscriptional_Parthian },
|
||||
{ 497, PT_SC, ucp_Javanese },
|
||||
{ 506, PT_SC, ucp_Kaithi },
|
||||
{ 513, PT_SC, ucp_Kannada },
|
||||
{ 521, PT_SC, ucp_Katakana },
|
||||
{ 530, PT_SC, ucp_Kayah_Li },
|
||||
{ 539, PT_SC, ucp_Kharoshthi },
|
||||
{ 550, PT_SC, ucp_Khmer },
|
||||
{ 556, PT_SC, ucp_Khojki },
|
||||
{ 563, PT_SC, ucp_Khudawadi },
|
||||
{ 573, PT_GC, ucp_L },
|
||||
{ 575, PT_LAMP, 0 },
|
||||
{ 578, PT_SC, ucp_Lao },
|
||||
{ 582, PT_SC, ucp_Latin },
|
||||
{ 588, PT_SC, ucp_Lepcha },
|
||||
{ 595, PT_SC, ucp_Limbu },
|
||||
{ 601, PT_SC, ucp_Linear_A },
|
||||
{ 610, PT_SC, ucp_Linear_B },
|
||||
{ 619, PT_SC, ucp_Lisu },
|
||||
{ 624, PT_PC, ucp_Ll },
|
||||
{ 627, PT_PC, ucp_Lm },
|
||||
{ 630, PT_PC, ucp_Lo },
|
||||
{ 633, PT_PC, ucp_Lt },
|
||||
{ 636, PT_PC, ucp_Lu },
|
||||
{ 639, PT_SC, ucp_Lycian },
|
||||
{ 646, PT_SC, ucp_Lydian },
|
||||
{ 653, PT_GC, ucp_M },
|
||||
{ 655, PT_SC, ucp_Mahajani },
|
||||
{ 664, PT_SC, ucp_Malayalam },
|
||||
{ 674, PT_SC, ucp_Mandaic },
|
||||
{ 682, PT_SC, ucp_Manichaean },
|
||||
{ 693, PT_PC, ucp_Mc },
|
||||
{ 696, PT_PC, ucp_Me },
|
||||
{ 699, PT_SC, ucp_Meetei_Mayek },
|
||||
{ 712, PT_SC, ucp_Mende_Kikakui },
|
||||
{ 726, PT_SC, ucp_Meroitic_Cursive },
|
||||
{ 743, PT_SC, ucp_Meroitic_Hieroglyphs },
|
||||
{ 764, PT_SC, ucp_Miao },
|
||||
{ 769, PT_PC, ucp_Mn },
|
||||
{ 772, PT_SC, ucp_Modi },
|
||||
{ 777, PT_SC, ucp_Mongolian },
|
||||
{ 787, PT_SC, ucp_Mro },
|
||||
{ 791, PT_SC, ucp_Multani },
|
||||
{ 799, PT_SC, ucp_Myanmar },
|
||||
{ 807, PT_GC, ucp_N },
|
||||
{ 809, PT_SC, ucp_Nabataean },
|
||||
{ 819, PT_PC, ucp_Nd },
|
||||
{ 822, PT_SC, ucp_New_Tai_Lue },
|
||||
{ 834, PT_SC, ucp_Nko },
|
||||
{ 838, PT_PC, ucp_Nl },
|
||||
{ 841, PT_PC, ucp_No },
|
||||
{ 844, PT_SC, ucp_Ogham },
|
||||
{ 850, PT_SC, ucp_Ol_Chiki },
|
||||
{ 859, PT_SC, ucp_Old_Hungarian },
|
||||
{ 873, PT_SC, ucp_Old_Italic },
|
||||
{ 884, PT_SC, ucp_Old_North_Arabian },
|
||||
{ 902, PT_SC, ucp_Old_Permic },
|
||||
{ 913, PT_SC, ucp_Old_Persian },
|
||||
{ 925, PT_SC, ucp_Old_South_Arabian },
|
||||
{ 943, PT_SC, ucp_Old_Turkic },
|
||||
{ 954, PT_SC, ucp_Oriya },
|
||||
{ 960, PT_SC, ucp_Osmanya },
|
||||
{ 968, PT_GC, ucp_P },
|
||||
{ 970, PT_SC, ucp_Pahawh_Hmong },
|
||||
{ 983, PT_SC, ucp_Palmyrene },
|
||||
{ 993, PT_SC, ucp_Pau_Cin_Hau },
|
||||
{ 1005, PT_PC, ucp_Pc },
|
||||
{ 1008, PT_PC, ucp_Pd },
|
||||
{ 1011, PT_PC, ucp_Pe },
|
||||
{ 1014, PT_PC, ucp_Pf },
|
||||
{ 1017, PT_SC, ucp_Phags_Pa },
|
||||
{ 1026, PT_SC, ucp_Phoenician },
|
||||
{ 1037, PT_PC, ucp_Pi },
|
||||
{ 1040, PT_PC, ucp_Po },
|
||||
{ 1043, PT_PC, ucp_Ps },
|
||||
{ 1046, PT_SC, ucp_Psalter_Pahlavi },
|
||||
{ 1062, PT_SC, ucp_Rejang },
|
||||
{ 1069, PT_SC, ucp_Runic },
|
||||
{ 1075, PT_GC, ucp_S },
|
||||
{ 1077, PT_SC, ucp_Samaritan },
|
||||
{ 1087, PT_SC, ucp_Saurashtra },
|
||||
{ 1098, PT_PC, ucp_Sc },
|
||||
{ 1101, PT_SC, ucp_Sharada },
|
||||
{ 1109, PT_SC, ucp_Shavian },
|
||||
{ 1117, PT_SC, ucp_Siddham },
|
||||
{ 1125, PT_SC, ucp_SignWriting },
|
||||
{ 1137, PT_SC, ucp_Sinhala },
|
||||
{ 1145, PT_PC, ucp_Sk },
|
||||
{ 1148, PT_PC, ucp_Sm },
|
||||
{ 1151, PT_PC, ucp_So },
|
||||
{ 1154, PT_SC, ucp_Sora_Sompeng },
|
||||
{ 1167, PT_SC, ucp_Sundanese },
|
||||
{ 1177, PT_SC, ucp_Syloti_Nagri },
|
||||
{ 1190, PT_SC, ucp_Syriac },
|
||||
{ 1197, PT_SC, ucp_Tagalog },
|
||||
{ 1205, PT_SC, ucp_Tagbanwa },
|
||||
{ 1214, PT_SC, ucp_Tai_Le },
|
||||
{ 1221, PT_SC, ucp_Tai_Tham },
|
||||
{ 1230, PT_SC, ucp_Tai_Viet },
|
||||
{ 1239, PT_SC, ucp_Takri },
|
||||
{ 1245, PT_SC, ucp_Tamil },
|
||||
{ 1251, PT_SC, ucp_Telugu },
|
||||
{ 1258, PT_SC, ucp_Thaana },
|
||||
{ 1265, PT_SC, ucp_Thai },
|
||||
{ 1270, PT_SC, ucp_Tibetan },
|
||||
{ 1278, PT_SC, ucp_Tifinagh },
|
||||
{ 1287, PT_SC, ucp_Tirhuta },
|
||||
{ 1295, PT_SC, ucp_Ugaritic },
|
||||
{ 1304, PT_SC, ucp_Vai },
|
||||
{ 1308, PT_SC, ucp_Warang_Citi },
|
||||
{ 1320, PT_ALNUM, 0 },
|
||||
{ 1324, PT_PXSPACE, 0 },
|
||||
{ 1328, PT_SPACE, 0 },
|
||||
{ 1332, PT_UCNC, 0 },
|
||||
{ 1336, PT_WORD, 0 },
|
||||
{ 1340, PT_SC, ucp_Yi },
|
||||
{ 1343, PT_GC, ucp_Z },
|
||||
{ 1345, PT_PC, ucp_Zl },
|
||||
{ 1348, PT_PC, ucp_Zp },
|
||||
{ 1351, PT_PC, ucp_Zs }
|
||||
};
|
||||
|
||||
const size_t PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table);
|
File diff suppressed because it is too large
Load diff
|
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
|
|||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2014 University of Cambridge
|
||||
New API code Copyright (c) 2016 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -253,7 +253,14 @@ enum {
|
|||
ucp_Pau_Cin_Hau,
|
||||
ucp_Siddham,
|
||||
ucp_Tirhuta,
|
||||
ucp_Warang_Citi
|
||||
ucp_Warang_Citi,
|
||||
/* New for Unicode 8.0.0: */
|
||||
ucp_Ahom,
|
||||
ucp_Anatolian_Hieroglyphs,
|
||||
ucp_Hatran,
|
||||
ucp_Multani,
|
||||
ucp_Old_Hungarian,
|
||||
ucp_SignWriting
|
||||
};
|
||||
|
||||
#endif
|
|
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
|
|||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2014 University of Cambridge
|
||||
New API code Copyright (c) 2016 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -131,11 +131,13 @@ PCRE2_ERROR_UTF8_ERR20 Isolated 0x80 byte (not within UTF-8 character)
|
|||
PCRE2_ERROR_UTF8_ERR21 Byte with the illegal value 0xfe or 0xff
|
||||
*/
|
||||
|
||||
for (p = string; length-- > 0; p++)
|
||||
for (p = string; length > 0; p++)
|
||||
{
|
||||
register uint32_t ab, d;
|
||||
|
||||
c = *p;
|
||||
length--;
|
||||
|
||||
if (c < 128) continue; /* ASCII character */
|
||||
|
||||
if (c < 0xc0) /* Isolated 10xx xxxx byte */
|
||||
|
@ -324,9 +326,10 @@ PCRE2_ERROR_UTF16_ERR2 Invalid low surrogate
|
|||
PCRE2_ERROR_UTF16_ERR3 Isolated low surrogate
|
||||
*/
|
||||
|
||||
for (p = string; length-- > 0; p++)
|
||||
for (p = string; length > 0; p++)
|
||||
{
|
||||
c = *p;
|
||||
length--;
|
||||
|
||||
if ((c & 0xf800) != 0xd800)
|
||||
{
|
||||
|
@ -368,7 +371,7 @@ PCRE2_ERROR_UTF32_ERR1 Surrogate character
|
|||
PCRE2_ERROR_UTF32_ERR2 Character > 0x10ffff
|
||||
*/
|
||||
|
||||
for (p = string; length-- > 0; p++)
|
||||
for (p = string; length > 0; length--, p++)
|
||||
{
|
||||
c = *p;
|
||||
if ((c & 0xfffff800u) != 0xd800u)
|
|
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
|
|||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2014 University of Cambridge
|
||||
New API code Copyright (c) 2016 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -247,7 +247,7 @@ while ((t = *data++) != XCL_END)
|
|||
|
||||
case PT_PXPUNCT:
|
||||
if ((PRIV(ucp_gentype)[prop->chartype] == ucp_P ||
|
||||
(c < 256 && PRIV(ucp_gentype)[prop->chartype] == ucp_S)) == isprop)
|
||||
(c < 128 && PRIV(ucp_gentype)[prop->chartype] == ucp_S)) == isprop)
|
||||
return !negated;
|
||||
break;
|
||||
|
|
@ -13,7 +13,7 @@ distribution because other apparatus is needed to compile pcre2grep for z/OS.
|
|||
The header can be found in the special z/OS distribution, which is available
|
||||
from www.zaconsultants.net or from www.cbttape.org.
|
||||
|
||||
Copyright (c) 1997-2014 University of Cambridge
|
||||
Copyright (c) 1997-2016 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -1691,9 +1691,13 @@ while (ptr < endptr)
|
|||
|
||||
if (filenames == FN_NOMATCH_ONLY) return 1;
|
||||
|
||||
/* If all we want is a yes/no answer, we can return immediately. */
|
||||
|
||||
if (quiet) return 0;
|
||||
|
||||
/* Just count if just counting is wanted. */
|
||||
|
||||
if (count_only) count++;
|
||||
else if (count_only) count++;
|
||||
|
||||
/* When handling a binary file and binary-files==binary, the "binary"
|
||||
variable will be set true (it's false in all other cases). In this
|
||||
|
@ -1705,8 +1709,8 @@ while (ptr < endptr)
|
|||
return 0;
|
||||
}
|
||||
|
||||
/* If all we want is a file name, there is no need to scan any more lines
|
||||
in the file. */
|
||||
/* Likewise, if all we want is a file name, there is no need to scan any
|
||||
more lines in the file. */
|
||||
|
||||
else if (filenames == FN_MATCH_ONLY)
|
||||
{
|
||||
|
@ -1714,10 +1718,6 @@ while (ptr < endptr)
|
|||
return 0;
|
||||
}
|
||||
|
||||
/* Likewise, if all we want is a yes/no answer. */
|
||||
|
||||
else if (quiet) return 0;
|
||||
|
||||
/* The --only-matching option prints just the substring that matched,
|
||||
and/or one or more captured portions of it, as long as these strings are
|
||||
not empty. The --file-offsets and --line-offsets options output offsets for
|
||||
|
@ -2069,7 +2069,7 @@ if (filenames == FN_NOMATCH_ONLY)
|
|||
|
||||
/* Print the match count if wanted */
|
||||
|
||||
if (count_only)
|
||||
if (count_only && !quiet)
|
||||
{
|
||||
if (count > 0 || !omit_zero_count)
|
||||
{
|
||||
|
@ -2421,7 +2421,7 @@ return options;
|
|||
static char *
|
||||
ordin(int n)
|
||||
{
|
||||
static char buffer[8];
|
||||
static char buffer[14];
|
||||
char *p = buffer;
|
||||
sprintf(p, "%d", n);
|
||||
while (*p != 0) p++;
|
|
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
|
|||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2014 University of Cambridge
|
||||
New API code Copyright (c) 2016 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -106,7 +106,7 @@ static const int eint1[] = {
|
|||
|
||||
static const int eint2[] = {
|
||||
30, REG_ECTYPE, /* unknown POSIX class name */
|
||||
32, REG_INVARG, /* this version of PCRE does not have UTF or UCP support */
|
||||
32, REG_INVARG, /* this version of PCRE2 does not have Unicode support */
|
||||
37, REG_EESCAPE, /* PCRE2 does not support \L, \l, \N{name}, \U, or \u */
|
||||
56, REG_INVARG, /* internal error: unknown newline setting */
|
||||
};
|
||||
|
@ -144,29 +144,23 @@ static const char *const pstring[] = {
|
|||
PCRE2POSIX_EXP_DEFN size_t PCRE2_CALL_CONVENTION
|
||||
regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
|
||||
{
|
||||
const char *message, *addmessage;
|
||||
size_t length, addlength;
|
||||
int used;
|
||||
const char *message;
|
||||
|
||||
message = (errcode >= (int)(sizeof(pstring)/sizeof(char *)))?
|
||||
message = (errcode <= 0 || errcode >= (int)(sizeof(pstring)/sizeof(char *)))?
|
||||
"unknown error code" : pstring[errcode];
|
||||
length = strlen(message) + 1;
|
||||
|
||||
addmessage = " at offset ";
|
||||
addlength = (preg != NULL && (int)preg->re_erroffset != -1)?
|
||||
strlen(addmessage) + 6 : 0;
|
||||
|
||||
if (errbuf_size > 0)
|
||||
if (preg != NULL && (int)preg->re_erroffset != -1)
|
||||
{
|
||||
if (addlength > 0 && errbuf_size >= length + addlength)
|
||||
sprintf(errbuf, "%s%s%-6d", message, addmessage, (int)preg->re_erroffset);
|
||||
else
|
||||
{
|
||||
strncpy(errbuf, message, errbuf_size - 1);
|
||||
errbuf[errbuf_size-1] = 0;
|
||||
}
|
||||
used = snprintf(errbuf, errbuf_size, "%s at offset %-6d", message,
|
||||
(int)preg->re_erroffset);
|
||||
}
|
||||
else
|
||||
{
|
||||
used = snprintf(errbuf, errbuf_size, "%s", message);
|
||||
}
|
||||
|
||||
return length + addlength;
|
||||
return used + 1;
|
||||
}
|
||||
|
||||
|
||||
|
@ -223,8 +217,13 @@ preg->re_erroffset = erroffset;
|
|||
if (preg->re_pcre2_code == NULL)
|
||||
{
|
||||
unsigned int i;
|
||||
if (errorcode < 0) return REG_BADPAT; /* UTF error */
|
||||
|
||||
/* A negative value is a UTF error; otherwise all error codes are greater
|
||||
than COMPILE_ERROR_BASE, but check, just in case. */
|
||||
|
||||
if (errorcode < COMPILE_ERROR_BASE) return REG_BADPAT;
|
||||
errorcode -= COMPILE_ERROR_BASE;
|
||||
|
||||
if (errorcode < (int)(sizeof(eint1)/sizeof(const int)))
|
||||
return eint1[errorcode];
|
||||
for (i = 0; i < sizeof(eint2)/(2*sizeof(const int)); i += 2)
|
||||
|
@ -237,6 +236,13 @@ if (preg->re_pcre2_code == NULL)
|
|||
preg->re_nsub = (size_t)re_nsub;
|
||||
if ((options & PCRE2_NO_AUTO_CAPTURE) != 0) re_nsub = -1;
|
||||
preg->re_match_data = pcre2_match_data_create(re_nsub + 1, NULL);
|
||||
|
||||
if (preg->re_match_data == NULL)
|
||||
{
|
||||
pcre2_code_free(preg->re_pcre2_code);
|
||||
return REG_ESPACE;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -279,6 +285,7 @@ start location rather than being passed as a PCRE2 "starting offset". */
|
|||
|
||||
if ((eflags & REG_STARTEND) != 0)
|
||||
{
|
||||
if (pmatch == NULL) return REG_INVARG;
|
||||
so = pmatch[0].rm_so;
|
||||
eo = pmatch[0].rm_eo;
|
||||
}
|
|
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
|
|||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2014 University of Cambridge
|
||||
New API code Copyright (c) 2016 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
File diff suppressed because it is too large
Load diff
Some files were not shown because too many files have changed in this diff Show more
Loading…
Reference in a new issue